epeg 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (504) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/MANIFEST +5 -0
  4. data/TODO +1 -0
  5. data/epeg/.dockerignore +4 -0
  6. data/epeg/.gitignore +5 -0
  7. data/epeg/CMakeLists.txt +30 -0
  8. data/epeg/Dockerfile +23 -0
  9. data/epeg/Epeg.h +90 -0
  10. data/epeg/README.md +42 -0
  11. data/epeg/epeg_main.c +1642 -0
  12. data/epeg/epeg_private.h +85 -0
  13. data/epeg/example/.gitignore +1 -0
  14. data/epeg/example/CMakeLists.txt +20 -0
  15. data/epeg/example/example.jpg +0 -0
  16. data/epeg/example/rotatetest.c +29 -0
  17. data/epeg/example/scaletest.c +48 -0
  18. data/epeg/vendor/libjpeg-turbo-2.0.4/BUILDING.md +828 -0
  19. data/epeg/vendor/libjpeg-turbo-2.0.4/CMakeLists.txt +1420 -0
  20. data/epeg/vendor/libjpeg-turbo-2.0.4/ChangeLog.md +1494 -0
  21. data/epeg/vendor/libjpeg-turbo-2.0.4/LICENSE.md +132 -0
  22. data/epeg/vendor/libjpeg-turbo-2.0.4/README.ijg +277 -0
  23. data/epeg/vendor/libjpeg-turbo-2.0.4/README.md +356 -0
  24. data/epeg/vendor/libjpeg-turbo-2.0.4/cderror.h +137 -0
  25. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.c +145 -0
  26. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.h +157 -0
  27. data/epeg/vendor/libjpeg-turbo-2.0.4/change.log +315 -0
  28. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.1 +354 -0
  29. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.c +695 -0
  30. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/BuildPackages.cmake +182 -0
  31. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/GNUInstallDirs.cmake +416 -0
  32. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/cmake_uninstall.cmake.in +24 -0
  33. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/testclean.cmake +41 -0
  34. data/epeg/vendor/libjpeg-turbo-2.0.4/cmyk.h +61 -0
  35. data/epeg/vendor/libjpeg-turbo-2.0.4/coderules.txt +78 -0
  36. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.1 +296 -0
  37. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.c +822 -0
  38. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/annotated.html +104 -0
  39. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bc_s.png +0 -0
  40. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bdwn.png +0 -0
  41. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/classes.html +106 -0
  42. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/closed.png +0 -0
  43. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen-extra.css +3 -0
  44. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.css +1184 -0
  45. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.png +0 -0
  46. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/dynsections.js +97 -0
  47. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2blank.png +0 -0
  48. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2cl.png +0 -0
  49. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2doc.png +0 -0
  50. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderclosed.png +0 -0
  51. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderopen.png +0 -0
  52. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2lastnode.png +0 -0
  53. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2link.png +0 -0
  54. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mlastnode.png +0 -0
  55. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mnode.png +0 -0
  56. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mo.png +0 -0
  57. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2node.png +0 -0
  58. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2ns.png +0 -0
  59. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2plastnode.png +0 -0
  60. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2pnode.png +0 -0
  61. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2splitbar.png +0 -0
  62. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2vertline.png +0 -0
  63. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions.html +134 -0
  64. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions_vars.html +134 -0
  65. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/group___turbo_j_p_e_g.html +2775 -0
  66. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/index.html +90 -0
  67. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/jquery.js +8 -0
  68. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/modules.html +95 -0
  69. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_f.png +0 -0
  70. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_g.png +0 -0
  71. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_h.png +0 -0
  72. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/open.png +0 -0
  73. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.html +26 -0
  74. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.js +4 -0
  75. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.html +26 -0
  76. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.js +5 -0
  77. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.html +26 -0
  78. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.js +4 -0
  79. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.html +26 -0
  80. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.js +4 -0
  81. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.html +26 -0
  82. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.js +5 -0
  83. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.html +26 -0
  84. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.js +4 -0
  85. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.html +26 -0
  86. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.js +102 -0
  87. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.html +26 -0
  88. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.js +4 -0
  89. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.html +26 -0
  90. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.js +4 -0
  91. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.html +26 -0
  92. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.js +4 -0
  93. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.html +26 -0
  94. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.js +6 -0
  95. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/close.png +0 -0
  96. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.html +26 -0
  97. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.js +8 -0
  98. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.html +26 -0
  99. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.js +37 -0
  100. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.html +26 -0
  101. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.js +31 -0
  102. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.html +26 -0
  103. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.js +4 -0
  104. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/mag_sel.png +0 -0
  105. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/nomatches.html +12 -0
  106. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.css +271 -0
  107. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.js +809 -0
  108. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_l.png +0 -0
  109. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_m.png +0 -0
  110. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_r.png +0 -0
  111. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.html +26 -0
  112. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.js +5 -0
  113. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.html +26 -0
  114. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.js +4 -0
  115. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.html +26 -0
  116. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.js +5 -0
  117. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.html +26 -0
  118. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.js +4 -0
  119. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.html +26 -0
  120. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.js +4 -0
  121. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.html +26 -0
  122. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.js +5 -0
  123. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.html +26 -0
  124. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.js +4 -0
  125. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.html +26 -0
  126. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.js +10 -0
  127. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.html +26 -0
  128. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.js +4 -0
  129. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.html +26 -0
  130. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.js +4 -0
  131. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.html +26 -0
  132. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.js +4 -0
  133. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjregion.html +186 -0
  134. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjscalingfactor.html +148 -0
  135. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjtransform.html +212 -0
  136. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_off.png +0 -0
  137. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_on.png +0 -0
  138. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_a.png +0 -0
  139. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_b.png +0 -0
  140. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_h.png +0 -0
  141. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_s.png +0 -0
  142. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tabs.css +60 -0
  143. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen-extra.css +3 -0
  144. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen.config +16 -0
  145. data/epeg/vendor/libjpeg-turbo-2.0.4/example.txt +464 -0
  146. data/epeg/vendor/libjpeg-turbo-2.0.4/jaricom.c +157 -0
  147. data/epeg/vendor/libjpeg-turbo-2.0.4/java/CMakeLists.txt +88 -0
  148. data/epeg/vendor/libjpeg-turbo-2.0.4/java/MANIFEST.MF +2 -0
  149. data/epeg/vendor/libjpeg-turbo-2.0.4/java/README +52 -0
  150. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJBench.java +1021 -0
  151. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJExample.java +405 -0
  152. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJUnitTest.java +960 -0
  153. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-frame.html +24 -0
  154. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-noframe.html +24 -0
  155. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/constant-values.html +532 -0
  156. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/deprecated-list.html +252 -0
  157. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/help-doc.html +210 -0
  158. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index-all.html +1029 -0
  159. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index.html +71 -0
  160. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJ.html +1356 -0
  161. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +926 -0
  162. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +241 -0
  163. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +1255 -0
  164. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJException.html +340 -0
  165. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html +343 -0
  166. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +751 -0
  167. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +421 -0
  168. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +765 -0
  169. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-frame.html +31 -0
  170. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +202 -0
  171. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-tree.html +160 -0
  172. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/overview-tree.html +164 -0
  173. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/package-list +1 -0
  174. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/background.gif +0 -0
  175. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/tab.gif +0 -0
  176. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar.gif +0 -0
  177. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar_end.gif +0 -0
  178. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/script.js +30 -0
  179. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/serialized-form.html +176 -0
  180. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/stylesheet.css +474 -0
  181. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJ.java +584 -0
  182. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCompressor.java +677 -0
  183. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java +76 -0
  184. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +931 -0
  185. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJException.java +78 -0
  186. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in +59 -0
  187. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-win.java.in +35 -0
  188. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java +115 -0
  189. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransform.java +227 -0
  190. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransformer.java +163 -0
  191. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/YUVImage.java +445 -0
  192. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJ.h +129 -0
  193. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJCompressor.h +101 -0
  194. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJDecompressor.h +101 -0
  195. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJTransformer.h +29 -0
  196. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapimin.c +295 -0
  197. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapistd.c +162 -0
  198. data/epeg/vendor/libjpeg-turbo-2.0.4/jcarith.c +932 -0
  199. data/epeg/vendor/libjpeg-turbo-2.0.4/jccoefct.c +449 -0
  200. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolext.c +144 -0
  201. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolor.c +710 -0
  202. data/epeg/vendor/libjpeg-turbo-2.0.4/jcdctmgr.c +721 -0
  203. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.c +1096 -0
  204. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.h +42 -0
  205. data/epeg/vendor/libjpeg-turbo-2.0.4/jcicc.c +105 -0
  206. data/epeg/vendor/libjpeg-turbo-2.0.4/jcinit.c +77 -0
  207. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmainct.c +162 -0
  208. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmarker.c +664 -0
  209. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmaster.c +640 -0
  210. data/epeg/vendor/libjpeg-turbo-2.0.4/jcomapi.c +109 -0
  211. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.h.in +73 -0
  212. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.txt +143 -0
  213. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfigint.h.in +31 -0
  214. data/epeg/vendor/libjpeg-turbo-2.0.4/jcparam.c +541 -0
  215. data/epeg/vendor/libjpeg-turbo-2.0.4/jcphuff.c +1105 -0
  216. data/epeg/vendor/libjpeg-turbo-2.0.4/jcprepct.c +351 -0
  217. data/epeg/vendor/libjpeg-turbo-2.0.4/jcsample.c +539 -0
  218. data/epeg/vendor/libjpeg-turbo-2.0.4/jcstest.c +126 -0
  219. data/epeg/vendor/libjpeg-turbo-2.0.4/jctrans.c +400 -0
  220. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapimin.c +407 -0
  221. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapistd.c +639 -0
  222. data/epeg/vendor/libjpeg-turbo-2.0.4/jdarith.c +773 -0
  223. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst-tj.c +203 -0
  224. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst.c +293 -0
  225. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc-tj.c +194 -0
  226. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc.c +295 -0
  227. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.c +692 -0
  228. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.h +82 -0
  229. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcol565.c +384 -0
  230. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolext.c +143 -0
  231. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolor.c +883 -0
  232. data/epeg/vendor/libjpeg-turbo-2.0.4/jdct.h +208 -0
  233. data/epeg/vendor/libjpeg-turbo-2.0.4/jddctmgr.c +352 -0
  234. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.c +831 -0
  235. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.h +238 -0
  236. data/epeg/vendor/libjpeg-turbo-2.0.4/jdicc.c +171 -0
  237. data/epeg/vendor/libjpeg-turbo-2.0.4/jdinput.c +408 -0
  238. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.c +460 -0
  239. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.h +71 -0
  240. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmarker.c +1377 -0
  241. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.c +737 -0
  242. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.h +28 -0
  243. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmerge.c +617 -0
  244. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrg565.c +354 -0
  245. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrgext.c +184 -0
  246. data/epeg/vendor/libjpeg-turbo-2.0.4/jdphuff.c +687 -0
  247. data/epeg/vendor/libjpeg-turbo-2.0.4/jdpostct.c +294 -0
  248. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.c +518 -0
  249. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.h +50 -0
  250. data/epeg/vendor/libjpeg-turbo-2.0.4/jdtrans.c +155 -0
  251. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.c +251 -0
  252. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.h +316 -0
  253. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctflt.c +169 -0
  254. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctfst.c +227 -0
  255. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctint.c +288 -0
  256. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctflt.c +240 -0
  257. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctfst.c +371 -0
  258. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctint.c +2627 -0
  259. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctred.c +409 -0
  260. data/epeg/vendor/libjpeg-turbo-2.0.4/jinclude.h +88 -0
  261. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemmgr.c +1179 -0
  262. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemnobs.c +115 -0
  263. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemsys.h +178 -0
  264. data/epeg/vendor/libjpeg-turbo-2.0.4/jmorecfg.h +421 -0
  265. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeg_nbits_table.h +4098 -0
  266. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegcomp.h +31 -0
  267. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegint.h +368 -0
  268. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeglib.h +1132 -0
  269. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.1 +295 -0
  270. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.c +601 -0
  271. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant1.c +859 -0
  272. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant2.c +1285 -0
  273. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd.h +117 -0
  274. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd_none.c +418 -0
  275. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimddct.h +70 -0
  276. data/epeg/vendor/libjpeg-turbo-2.0.4/jstdhuff.c +143 -0
  277. data/epeg/vendor/libjpeg-turbo-2.0.4/jutils.c +133 -0
  278. data/epeg/vendor/libjpeg-turbo-2.0.4/jversion.h +52 -0
  279. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.map.in +11 -0
  280. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.txt +3144 -0
  281. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/CMakeLists.txt +1 -0
  282. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.c +275 -0
  283. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.h +57 -0
  284. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5cmp.c +59 -0
  285. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5hl.c +125 -0
  286. data/epeg/vendor/libjpeg-turbo-2.0.4/rdbmp.c +689 -0
  287. data/epeg/vendor/libjpeg-turbo-2.0.4/rdcolmap.c +254 -0
  288. data/epeg/vendor/libjpeg-turbo-2.0.4/rdgif.c +39 -0
  289. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.1 +63 -0
  290. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.c +510 -0
  291. data/epeg/vendor/libjpeg-turbo-2.0.4/rdppm.c +766 -0
  292. data/epeg/vendor/libjpeg-turbo-2.0.4/rdrle.c +389 -0
  293. data/epeg/vendor/libjpeg-turbo-2.0.4/rdswitch.c +424 -0
  294. data/epeg/vendor/libjpeg-turbo-2.0.4/rdtarga.c +509 -0
  295. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Distribution.xml.in +24 -0
  296. data/epeg/vendor/libjpeg-turbo-2.0.4/release/License.rtf +20 -0
  297. data/epeg/vendor/libjpeg-turbo-2.0.4/release/ReadMe.txt +5 -0
  298. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Welcome.rtf +17 -0
  299. data/epeg/vendor/libjpeg-turbo-2.0.4/release/deb-control.in +31 -0
  300. data/epeg/vendor/libjpeg-turbo-2.0.4/release/installer.nsi.in +191 -0
  301. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libjpeg.pc.in +10 -0
  302. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libturbojpeg.pc.in +10 -0
  303. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makecygwinpkg.in +66 -0
  304. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makedpkg.in +115 -0
  305. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makemacpkg.in +284 -0
  306. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makerpm.in +30 -0
  307. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makesrpm.in +48 -0
  308. data/epeg/vendor/libjpeg-turbo-2.0.4/release/maketarball.in +51 -0
  309. data/epeg/vendor/libjpeg-turbo-2.0.4/release/rpm.spec.in +221 -0
  310. data/epeg/vendor/libjpeg-turbo-2.0.4/release/uninstall.in +113 -0
  311. data/epeg/vendor/libjpeg-turbo-2.0.4/sharedlib/CMakeLists.txt +99 -0
  312. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/CMakeLists.txt +385 -0
  313. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd.c +721 -0
  314. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd_neon.S +2878 -0
  315. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd.c +798 -0
  316. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd_neon.S +3433 -0
  317. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/gas-preprocessor.in +1 -0
  318. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-avx2.asm +578 -0
  319. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-mmx.asm +476 -0
  320. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-sse2.asm +503 -0
  321. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-avx2.asm +121 -0
  322. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-mmx.asm +121 -0
  323. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-sse2.asm +120 -0
  324. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-avx2.asm +113 -0
  325. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-mmx.asm +113 -0
  326. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-sse2.asm +112 -0
  327. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-avx2.asm +457 -0
  328. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-mmx.asm +355 -0
  329. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-sse2.asm +382 -0
  330. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jchuff-sse2.asm +424 -0
  331. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcphuff-sse2.asm +660 -0
  332. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-avx2.asm +388 -0
  333. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-mmx.asm +324 -0
  334. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-sse2.asm +351 -0
  335. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-avx2.asm +515 -0
  336. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-mmx.asm +404 -0
  337. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-sse2.asm +458 -0
  338. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-avx2.asm +118 -0
  339. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-mmx.asm +117 -0
  340. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-sse2.asm +117 -0
  341. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-avx2.asm +136 -0
  342. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-mmx.asm +123 -0
  343. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-sse2.asm +135 -0
  344. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-avx2.asm +575 -0
  345. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-mmx.asm +460 -0
  346. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-sse2.asm +517 -0
  347. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-avx2.asm +760 -0
  348. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-mmx.asm +731 -0
  349. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-sse2.asm +724 -0
  350. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-3dn.asm +318 -0
  351. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-sse.asm +369 -0
  352. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-mmx.asm +395 -0
  353. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-sse2.asm +403 -0
  354. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-avx2.asm +331 -0
  355. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-mmx.asm +620 -0
  356. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-sse2.asm +633 -0
  357. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-3dn.asm +451 -0
  358. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse.asm +571 -0
  359. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse2.asm +497 -0
  360. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-mmx.asm +499 -0
  361. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-sse2.asm +501 -0
  362. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-avx2.asm +453 -0
  363. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-mmx.asm +851 -0
  364. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-sse2.asm +858 -0
  365. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-mmx.asm +704 -0
  366. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-sse2.asm +592 -0
  367. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-3dn.asm +230 -0
  368. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-mmx.asm +276 -0
  369. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-sse.asm +208 -0
  370. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquantf-sse2.asm +168 -0
  371. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-avx2.asm +188 -0
  372. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-sse2.asm +201 -0
  373. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimd.c +1253 -0
  374. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimdcpu.asm +135 -0
  375. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/jsimd.h +1083 -0
  376. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolext-mmi.c +483 -0
  377. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolor-mmi.c +148 -0
  378. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample-mmi.c +100 -0
  379. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample.h +28 -0
  380. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolext-mmi.c +424 -0
  381. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolor-mmi.c +139 -0
  382. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdsample-mmi.c +245 -0
  383. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jfdctint-mmi.c +398 -0
  384. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jidctint-mmi.c +571 -0
  385. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jquanti-mmi.c +130 -0
  386. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd.c +610 -0
  387. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd_mmi.h +57 -0
  388. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/loongson-mmintrin.h +1324 -0
  389. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd.c +1123 -0
  390. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2.S +4479 -0
  391. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2_asm.h +292 -0
  392. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jcolsamp.inc +135 -0
  393. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jdct.inc +31 -0
  394. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jpeg_nbits_table.inc +4097 -0
  395. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc +93 -0
  396. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc.h +131 -0
  397. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdext.inc +479 -0
  398. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolext-altivec.c +269 -0
  399. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolor-altivec.c +116 -0
  400. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgray-altivec.c +111 -0
  401. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgryext-altivec.c +228 -0
  402. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample-altivec.c +159 -0
  403. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample.h +28 -0
  404. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolext-altivec.c +276 -0
  405. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolor-altivec.c +106 -0
  406. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmerge-altivec.c +130 -0
  407. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmrgext-altivec.c +329 -0
  408. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdsample-altivec.c +400 -0
  409. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctfst-altivec.c +154 -0
  410. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctint-altivec.c +258 -0
  411. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctfst-altivec.c +255 -0
  412. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctint-altivec.c +357 -0
  413. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jquanti-altivec.c +250 -0
  414. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd.c +872 -0
  415. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd_altivec.h +98 -0
  416. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-avx2.asm +558 -0
  417. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-sse2.asm +483 -0
  418. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-avx2.asm +121 -0
  419. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-sse2.asm +120 -0
  420. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-avx2.asm +113 -0
  421. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-sse2.asm +112 -0
  422. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-avx2.asm +437 -0
  423. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-sse2.asm +362 -0
  424. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jchuff-sse2.asm +346 -0
  425. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcphuff-sse2.asm +637 -0
  426. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-avx2.asm +366 -0
  427. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-sse2.asm +329 -0
  428. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-avx2.asm +495 -0
  429. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-sse2.asm +438 -0
  430. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-avx2.asm +118 -0
  431. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-sse2.asm +117 -0
  432. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-avx2.asm +136 -0
  433. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-sse2.asm +135 -0
  434. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-avx2.asm +593 -0
  435. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-sse2.asm +535 -0
  436. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-avx2.asm +695 -0
  437. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-sse2.asm +664 -0
  438. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctflt-sse.asm +355 -0
  439. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctfst-sse2.asm +389 -0
  440. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-avx2.asm +320 -0
  441. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-sse2.asm +619 -0
  442. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctflt-sse2.asm +481 -0
  443. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctfst-sse2.asm +490 -0
  444. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-avx2.asm +417 -0
  445. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-sse2.asm +846 -0
  446. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctred-sse2.asm +573 -0
  447. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquantf-sse2.asm +154 -0
  448. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-avx2.asm +162 -0
  449. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-sse2.asm +187 -0
  450. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimd.c +1076 -0
  451. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimdcpu.asm +86 -0
  452. data/epeg/vendor/libjpeg-turbo-2.0.4/structure.txt +904 -0
  453. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.bmp +0 -0
  454. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.txt +25 -0
  455. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test.scan +5 -0
  456. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc +0 -0
  457. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc.txt +20 -0
  458. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc +0 -0
  459. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc.txt +20 -0
  460. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgari.jpg +0 -0
  461. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgint.jpg +0 -0
  462. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.jpg +0 -0
  463. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.ppm +4 -0
  464. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig12.jpg +0 -0
  465. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_5674_0098.bmp +0 -0
  466. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6434_0018a.bmp +0 -0
  467. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6548_0026a.bmp +0 -0
  468. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbench.c +1031 -0
  469. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.in +256 -0
  470. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.java.in +215 -0
  471. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexample.c +396 -0
  472. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.in +149 -0
  473. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.java.in +151 -0
  474. data/epeg/vendor/libjpeg-turbo-2.0.4/tjunittest.c +931 -0
  475. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.c +70 -0
  476. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.h +47 -0
  477. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.c +1628 -0
  478. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.h +210 -0
  479. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-jni.c +1246 -0
  480. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile +65 -0
  481. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile.jni +101 -0
  482. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.c +2152 -0
  483. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.h +1744 -0
  484. data/epeg/vendor/libjpeg-turbo-2.0.4/usage.txt +635 -0
  485. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jconfig.h.in +34 -0
  486. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62-memsrcdst.def +108 -0
  487. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62.def +106 -0
  488. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7-memsrcdst.def +110 -0
  489. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7.def +108 -0
  490. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg8.def +111 -0
  491. data/epeg/vendor/libjpeg-turbo-2.0.4/wizard.txt +212 -0
  492. data/epeg/vendor/libjpeg-turbo-2.0.4/wrbmp.c +558 -0
  493. data/epeg/vendor/libjpeg-turbo-2.0.4/wrgif.c +413 -0
  494. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.1 +103 -0
  495. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.c +591 -0
  496. data/epeg/vendor/libjpeg-turbo-2.0.4/wrppm.c +365 -0
  497. data/epeg/vendor/libjpeg-turbo-2.0.4/wrrle.c +309 -0
  498. data/epeg/vendor/libjpeg-turbo-2.0.4/wrtarga.c +261 -0
  499. data/epeg.c +131 -0
  500. data/epeg.gemspec +18 -0
  501. data/extconf.rb +80 -0
  502. data/test.jpg +0 -0
  503. data/test.rb +42 -0
  504. metadata +546 -0
@@ -0,0 +1,760 @@
1
+ ;
2
+ ; jdsample.asm - upsampling (AVX2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2015, Intel Corporation.
6
+ ; Copyright (C) 2016, D. R. Commander.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jsimdext.inc"
19
+
20
+ ; --------------------------------------------------------------------------
21
+ SECTION SEG_CONST
22
+
23
+ alignz 32
24
+ GLOBAL_DATA(jconst_fancy_upsample_avx2)
25
+
26
+ EXTN(jconst_fancy_upsample_avx2):
27
+
28
+ PW_ONE times 16 dw 1
29
+ PW_TWO times 16 dw 2
30
+ PW_THREE times 16 dw 3
31
+ PW_SEVEN times 16 dw 7
32
+ PW_EIGHT times 16 dw 8
33
+
34
+ alignz 32
35
+
36
+ ; --------------------------------------------------------------------------
37
+ SECTION SEG_TEXT
38
+ BITS 32
39
+ ;
40
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
41
+ ;
42
+ ; The upsampling algorithm is linear interpolation between pixel centers,
43
+ ; also known as a "triangle filter". This is a good compromise between
44
+ ; speed and visual quality. The centers of the output pixels are 1/4 and 3/4
45
+ ; of the way between input pixel centers.
46
+ ;
47
+ ; GLOBAL(void)
48
+ ; jsimd_h2v1_fancy_upsample_avx2(int max_v_samp_factor,
49
+ ; JDIMENSION downsampled_width,
50
+ ; JSAMPARRAY input_data,
51
+ ; JSAMPARRAY *output_data_ptr);
52
+ ;
53
+
54
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
55
+ %define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width
56
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
57
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
58
+
59
+ align 32
60
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
61
+
62
+ EXTN(jsimd_h2v1_fancy_upsample_avx2):
63
+ push ebp
64
+ mov ebp, esp
65
+ pushpic ebx
66
+ ; push ecx ; need not be preserved
67
+ ; push edx ; need not be preserved
68
+ push esi
69
+ push edi
70
+
71
+ get_GOT ebx ; get GOT address
72
+
73
+ mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
74
+ test eax, eax
75
+ jz near .return
76
+
77
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
78
+ test ecx, ecx
79
+ jz near .return
80
+
81
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
82
+ mov edi, POINTER [output_data_ptr(ebp)]
83
+ mov edi, JSAMPARRAY [edi] ; output_data
84
+ alignx 16, 7
85
+ .rowloop:
86
+ push eax ; colctr
87
+ push edi
88
+ push esi
89
+
90
+ mov esi, JSAMPROW [esi] ; inptr
91
+ mov edi, JSAMPROW [edi] ; outptr
92
+
93
+ test eax, SIZEOF_YMMWORD-1
94
+ jz short .skip
95
+ mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
96
+ mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample
97
+ .skip:
98
+ vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
99
+ vpcmpeqb xmm7, xmm7, xmm7
100
+ vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-1) ; (ff -- -- -- ... -- --) LSB is ff
101
+ vpand ymm7, ymm7, YMMWORD [esi+0*SIZEOF_YMMWORD]
102
+
103
+ add eax, byte SIZEOF_YMMWORD-1
104
+ and eax, byte -SIZEOF_YMMWORD
105
+ cmp eax, byte SIZEOF_YMMWORD
106
+ ja short .columnloop
107
+ alignx 16, 7
108
+
109
+ .columnloop_last:
110
+ vpcmpeqb xmm6, xmm6, xmm6
111
+ vpslldq xmm6, xmm6, (SIZEOF_XMMWORD-1)
112
+ vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff
113
+ vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
114
+ jmp short .upsample
115
+ alignx 16, 7
116
+
117
+ .columnloop:
118
+ vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
119
+ vperm2i128 ymm6, ymm0, ymm6, 0x20
120
+ vpslldq ymm6, ymm6, 15
121
+
122
+ .upsample:
123
+ vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm1=( 0 1 2 ... 29 30 31)
124
+
125
+ vperm2i128 ymm2, ymm0, ymm1, 0x20
126
+ vpalignr ymm2, ymm1, ymm2, 15 ; ymm2=(-- 0 1 ... 28 29 30)
127
+ vperm2i128 ymm4, ymm0, ymm1, 0x03
128
+ vpalignr ymm3, ymm4, ymm1, 1 ; ymm3=( 1 2 3 ... 30 31 --)
129
+
130
+ vpor ymm2, ymm2, ymm7 ; ymm2=(-1 0 1 ... 28 29 30)
131
+ vpor ymm3, ymm3, ymm6 ; ymm3=( 1 2 3 ... 30 31 32)
132
+
133
+ vpsrldq ymm7, ymm4, (SIZEOF_XMMWORD-1) ; ymm7=(31 -- -- ... -- -- --)
134
+
135
+ vpunpckhbw ymm4, ymm1, ymm0 ; ymm4=( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
136
+ vpunpcklbw ymm5, ymm1, ymm0 ; ymm5=( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
137
+ vperm2i128 ymm1, ymm5, ymm4, 0x20 ; ymm1=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
138
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
139
+
140
+ vpunpckhbw ymm5, ymm2, ymm0 ; ymm5=( 7 8 9 10 11 12 13 14 23 24 25 26 27 28 29 30)
141
+ vpunpcklbw ymm6, ymm2, ymm0 ; ymm6=(-1 0 1 2 3 4 5 6 15 16 17 18 19 20 21 22)
142
+ vperm2i128 ymm2, ymm6, ymm5, 0x20 ; ymm2=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
143
+ vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
144
+
145
+ vpunpckhbw ymm6, ymm3, ymm0 ; ymm6=( 1 2 3 4 5 6 7 8 17 18 19 20 21 22 23 24)
146
+ vpunpcklbw ymm0, ymm3, ymm0 ; ymm0=( 9 10 11 12 13 14 15 16 25 26 27 28 29 30 31 32)
147
+ vperm2i128 ymm3, ymm0, ymm6, 0x20 ; ymm3=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
148
+ vperm2i128 ymm6, ymm0, ymm6, 0x31 ; ymm6=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
149
+
150
+ vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
151
+
152
+ vpmullw ymm1, ymm1, [GOTOFF(ebx,PW_THREE)]
153
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
154
+ vpaddw ymm2, ymm2, [GOTOFF(ebx,PW_ONE)]
155
+ vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_ONE)]
156
+ vpaddw ymm3, ymm3, [GOTOFF(ebx,PW_TWO)]
157
+ vpaddw ymm6, ymm6, [GOTOFF(ebx,PW_TWO)]
158
+
159
+ vpaddw ymm2, ymm2, ymm1
160
+ vpaddw ymm5, ymm5, ymm4
161
+ vpsrlw ymm2, ymm2, 2 ; ymm2=OutLE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30)
162
+ vpsrlw ymm5, ymm5, 2 ; ymm5=OutHE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62)
163
+ vpaddw ymm3, ymm3, ymm1
164
+ vpaddw ymm6, ymm6, ymm4
165
+ vpsrlw ymm3, ymm3, 2 ; ymm3=OutLO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31)
166
+ vpsrlw ymm6, ymm6, 2 ; ymm6=OutHO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63)
167
+
168
+ vpsllw ymm3, ymm3, BYTE_BIT
169
+ vpsllw ymm6, ymm6, BYTE_BIT
170
+ vpor ymm2, ymm2, ymm3 ; ymm2=OutL=( 0 1 2 ... 29 30 31)
171
+ vpor ymm5, ymm5, ymm6 ; ymm5=OutH=(32 33 34 ... 61 62 63)
172
+
173
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm2
174
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm5
175
+
176
+ sub eax, byte SIZEOF_YMMWORD
177
+ add esi, byte 1*SIZEOF_YMMWORD ; inptr
178
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
179
+ cmp eax, byte SIZEOF_YMMWORD
180
+ ja near .columnloop
181
+ test eax, eax
182
+ jnz near .columnloop_last
183
+
184
+ pop esi
185
+ pop edi
186
+ pop eax
187
+
188
+ add esi, byte SIZEOF_JSAMPROW ; input_data
189
+ add edi, byte SIZEOF_JSAMPROW ; output_data
190
+ dec ecx ; rowctr
191
+ jg near .rowloop
192
+
193
+ .return:
194
+ vzeroupper
195
+ pop edi
196
+ pop esi
197
+ ; pop edx ; need not be preserved
198
+ ; pop ecx ; need not be preserved
199
+ poppic ebx
200
+ pop ebp
201
+ ret
202
+
203
+ ; --------------------------------------------------------------------------
204
+ ;
205
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
206
+ ; Again a triangle filter; see comments for h2v1 case, above.
207
+ ;
208
+ ; GLOBAL(void)
209
+ ; jsimd_h2v2_fancy_upsample_avx2(int max_v_samp_factor,
210
+ ; JDIMENSION downsampled_width,
211
+ ; JSAMPARRAY input_data,
212
+ ; JSAMPARRAY *output_data_ptr);
213
+ ;
214
+
215
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
216
+ %define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width
217
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
218
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
219
+
220
+ %define original_ebp ebp + 0
221
+ %define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD
222
+ ; ymmword wk[WK_NUM]
223
+ %define WK_NUM 4
224
+ %define gotptr wk(0) - SIZEOF_POINTER ; void *gotptr
225
+
226
+ align 32
227
+ GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
228
+
229
+ EXTN(jsimd_h2v2_fancy_upsample_avx2):
230
+ push ebp
231
+ mov eax, esp ; eax = original ebp
232
+ sub esp, byte 4
233
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
234
+ mov [esp], eax
235
+ mov ebp, esp ; ebp = aligned ebp
236
+ lea esp, [wk(0)]
237
+ pushpic eax ; make a room for GOT address
238
+ push ebx
239
+ ; push ecx ; need not be preserved
240
+ ; push edx ; need not be preserved
241
+ push esi
242
+ push edi
243
+
244
+ get_GOT ebx ; get GOT address
245
+ movpic POINTER [gotptr], ebx ; save GOT address
246
+
247
+ mov edx, eax ; edx = original ebp
248
+ mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
249
+ test eax, eax
250
+ jz near .return
251
+
252
+ mov ecx, INT [max_v_samp(edx)] ; rowctr
253
+ test ecx, ecx
254
+ jz near .return
255
+
256
+ mov esi, JSAMPARRAY [input_data(edx)] ; input_data
257
+ mov edi, POINTER [output_data_ptr(edx)]
258
+ mov edi, JSAMPARRAY [edi] ; output_data
259
+ alignx 16, 7
260
+ .rowloop:
261
+ push eax ; colctr
262
+ push ecx
263
+ push edi
264
+ push esi
265
+
266
+ mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above)
267
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
268
+ mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below)
269
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
270
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
271
+
272
+ test eax, SIZEOF_YMMWORD-1
273
+ jz short .skip
274
+ push edx
275
+ mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
276
+ mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
277
+ mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
278
+ mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
279
+ mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
280
+ mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample
281
+ pop edx
282
+ .skip:
283
+ ; -- process the first column block
284
+
285
+ vmovdqu ymm0, YMMWORD [ebx+0*SIZEOF_YMMWORD] ; ymm0=row[ 0][0]
286
+ vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0]
287
+ vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0]
288
+
289
+ pushpic ebx
290
+ movpic ebx, POINTER [gotptr] ; load GOT address
291
+
292
+ vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
293
+
294
+ vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
295
+ vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
296
+ vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
297
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
298
+
299
+ vpunpckhbw ymm5, ymm1, ymm3 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
300
+ vpunpcklbw ymm6, ymm1, ymm3 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
301
+ vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
302
+ vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
303
+
304
+ vpunpckhbw ymm6, ymm2, ymm3 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
305
+ vpunpcklbw ymm3, ymm2, ymm3 ; ymm3=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
306
+ vperm2i128 ymm2, ymm3, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
307
+ vperm2i128 ymm6, ymm3, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
308
+
309
+ vpmullw ymm0, ymm0, [GOTOFF(ebx,PW_THREE)]
310
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
311
+
312
+ vpcmpeqb xmm7, xmm7, xmm7
313
+ vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-2) ; (ffff ---- ---- ... ---- ----) LSB is ffff
314
+
315
+ vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
316
+ vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
317
+ vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
318
+ vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
319
+
320
+ vmovdqu YMMWORD [edx+0*SIZEOF_YMMWORD], ymm1 ; temporarily save
321
+ vmovdqu YMMWORD [edx+1*SIZEOF_YMMWORD], ymm5 ; the intermediate data
322
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm2
323
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm6
324
+
325
+ vpand ymm1, ymm1, ymm7 ; ymm1=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
326
+ vpand ymm2, ymm2, ymm7 ; ymm2=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
327
+
328
+ vmovdqa YMMWORD [wk(0)], ymm1
329
+ vmovdqa YMMWORD [wk(1)], ymm2
330
+
331
+ poppic ebx
332
+
333
+ add eax, byte SIZEOF_YMMWORD-1
334
+ and eax, byte -SIZEOF_YMMWORD
335
+ cmp eax, byte SIZEOF_YMMWORD
336
+ ja short .columnloop
337
+ alignx 16, 7
338
+
339
+ .columnloop_last:
340
+ ; -- process the last column block
341
+
342
+ pushpic ebx
343
+ movpic ebx, POINTER [gotptr] ; load GOT address
344
+
345
+ vpcmpeqb xmm1, xmm1, xmm1
346
+ vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2)
347
+ vperm2i128 ymm1, ymm1, ymm1, 1 ; (---- ---- ... ---- ---- ffff) MSB is ffff
348
+
349
+ vpand ymm2, ymm1, YMMWORD [edi+1*SIZEOF_YMMWORD]
350
+ vpand ymm1, ymm1, YMMWORD [edx+1*SIZEOF_YMMWORD]
351
+
352
+ vmovdqa YMMWORD [wk(2)], ymm1 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
353
+ vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
354
+
355
+ jmp near .upsample
356
+ alignx 16, 7
357
+
358
+ .columnloop:
359
+ ; -- process the next column block
360
+
361
+ vmovdqu ymm0, YMMWORD [ebx+1*SIZEOF_YMMWORD] ; ymm0=row[ 0][1]
362
+ vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1]
363
+ vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1]
364
+
365
+ pushpic ebx
366
+ movpic ebx, POINTER [gotptr] ; load GOT address
367
+
368
+ vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
369
+
370
+ vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
371
+ vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
372
+ vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
373
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
374
+
375
+ vpunpckhbw ymm5, ymm1, ymm3 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
376
+ vpunpcklbw ymm6, ymm1, ymm3 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
377
+ vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
378
+ vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
379
+
380
+ vpunpckhbw ymm6, ymm2, ymm3 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
381
+ vpunpcklbw ymm7, ymm2, ymm3 ; ymm7=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
382
+ vperm2i128 ymm2, ymm7, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
383
+ vperm2i128 ymm6, ymm7, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
384
+
385
+ vpmullw ymm0, ymm0, [GOTOFF(ebx,PW_THREE)]
386
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
387
+
388
+ vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
389
+ vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
390
+ vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
391
+ vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
392
+
393
+ vmovdqu YMMWORD [edx+2*SIZEOF_YMMWORD], ymm1 ; temporarily save
394
+ vmovdqu YMMWORD [edx+3*SIZEOF_YMMWORD], ymm5 ; the intermediate data
395
+ vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymm2
396
+ vmovdqu YMMWORD [edi+3*SIZEOF_YMMWORD], ymm6
397
+
398
+ vperm2i128 ymm1, ymm3, ymm1, 0x20
399
+ vpslldq ymm1, ymm1, 14 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0)
400
+ vperm2i128 ymm2, ymm3, ymm2, 0x20
401
+ vpslldq ymm2, ymm2, 14 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0)
402
+
403
+ vmovdqa YMMWORD [wk(2)], ymm1
404
+ vmovdqa YMMWORD [wk(3)], ymm2
405
+
406
+ .upsample:
407
+ ; -- process the upper row
408
+
409
+ vmovdqu ymm7, YMMWORD [edx+0*SIZEOF_YMMWORD] ; ymm7=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
410
+ vmovdqu ymm3, YMMWORD [edx+1*SIZEOF_YMMWORD] ; ymm3=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
411
+
412
+ vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
413
+
414
+ vperm2i128 ymm0, ymm1, ymm7, 0x03
415
+ vpalignr ymm0, ymm0, ymm7, 2 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --)
416
+ vperm2i128 ymm4, ymm1, ymm3, 0x20
417
+ vpslldq ymm4, ymm4, 14 ; ymm4=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16)
418
+
419
+ vperm2i128 ymm5, ymm1, ymm7, 0x03
420
+ vpsrldq ymm5, ymm5, 14 ; ymm5=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
421
+ vperm2i128 ymm6, ymm1, ymm3, 0x20
422
+ vpalignr ymm6, ymm3, ymm6, 14 ; ymm6=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
423
+
424
+ vpor ymm0, ymm0, ymm4 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
425
+ vpor ymm5, ymm5, ymm6 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
426
+
427
+ vperm2i128 ymm2, ymm1, ymm3, 0x03
428
+ vpalignr ymm2, ymm2, ymm3, 2 ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --)
429
+ vperm2i128 ymm4, ymm1, ymm3, 0x03
430
+ vpsrldq ymm4, ymm4, 14 ; ymm4=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
431
+ vperm2i128 ymm1, ymm1, ymm7, 0x20
432
+ vpalignr ymm1, ymm7, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
433
+
434
+ vpor ymm1, ymm1, YMMWORD [wk(0)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
435
+ vpor ymm2, ymm2, YMMWORD [wk(2)] ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
436
+
437
+ vmovdqa YMMWORD [wk(0)], ymm4
438
+
439
+ vpmullw ymm7, ymm7, [GOTOFF(ebx,PW_THREE)]
440
+ vpmullw ymm3, ymm3, [GOTOFF(ebx,PW_THREE)]
441
+ vpaddw ymm1, ymm1, [GOTOFF(ebx,PW_EIGHT)]
442
+ vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_EIGHT)]
443
+ vpaddw ymm0, ymm0, [GOTOFF(ebx,PW_SEVEN)]
444
+ vpaddw ymm2, [GOTOFF(ebx,PW_SEVEN)]
445
+
446
+ vpaddw ymm1, ymm1, ymm7
447
+ vpaddw ymm5, ymm5, ymm3
448
+ vpsrlw ymm1, ymm1, 4 ; ymm1=Out0LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30)
449
+ vpsrlw ymm5, ymm5, 4 ; ymm5=Out0HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62)
450
+ vpaddw ymm0, ymm0, ymm7
451
+ vpaddw ymm2, ymm2, ymm3
452
+ vpsrlw ymm0, ymm0, 4 ; ymm0=Out0LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31)
453
+ vpsrlw ymm2, ymm2, 4 ; ymm2=Out0HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63)
454
+
455
+ vpsllw ymm0, ymm0, BYTE_BIT
456
+ vpsllw ymm2, ymm2, BYTE_BIT
457
+ vpor ymm1, ymm1, ymm0 ; ymm1=Out0L=( 0 1 2 ... 29 30 31)
458
+ vpor ymm5, ymm5, ymm2 ; ymm5=Out0H=(32 33 34 ... 61 62 63)
459
+
460
+ vmovdqu YMMWORD [edx+0*SIZEOF_YMMWORD], ymm1
461
+ vmovdqu YMMWORD [edx+1*SIZEOF_YMMWORD], ymm5
462
+
463
+ ; -- process the lower row
464
+
465
+ vmovdqu ymm6, YMMWORD [edi+0*SIZEOF_YMMWORD] ; ymm6=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
466
+ vmovdqu ymm4, YMMWORD [edi+1*SIZEOF_YMMWORD] ; ymm4=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
467
+
468
+ vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
469
+
470
+ vperm2i128 ymm7, ymm1, ymm6, 0x03
471
+ vpalignr ymm7, ymm7, ymm6, 2 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --)
472
+ vperm2i128 ymm3, ymm1, ymm4, 0x20
473
+ vpslldq ymm3, ymm3, 14 ; ymm3=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16)
474
+
475
+ vperm2i128 ymm0, ymm1, ymm6, 0x03
476
+ vpsrldq ymm0, ymm0, 14 ; ymm0=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
477
+ vperm2i128 ymm2, ymm1, ymm4, 0x20
478
+ vpalignr ymm2, ymm4, ymm2, 14 ; ymm2=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
479
+
480
+ vpor ymm7, ymm7, ymm3 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
481
+ vpor ymm0, ymm0, ymm2 ; ymm0=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
482
+
483
+ vperm2i128 ymm5, ymm1, ymm4, 0x03
484
+ vpalignr ymm5, ymm5, ymm4, 2 ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --)
485
+ vperm2i128 ymm3, ymm1, ymm4, 0x03
486
+ vpsrldq ymm3, ymm3, 14 ; ymm3=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
487
+ vperm2i128 ymm1, ymm1, ymm6, 0x20
488
+ vpalignr ymm1, ymm6, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
489
+
490
+ vpor ymm1, ymm1, YMMWORD [wk(1)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
491
+ vpor ymm5, ymm5, YMMWORD [wk(3)] ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
492
+
493
+ vmovdqa YMMWORD [wk(1)], ymm3
494
+
495
+ vpmullw ymm6, ymm6, [GOTOFF(ebx,PW_THREE)]
496
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
497
+ vpaddw ymm1, ymm1, [GOTOFF(ebx,PW_EIGHT)]
498
+ vpaddw ymm0, ymm0, [GOTOFF(ebx,PW_EIGHT)]
499
+ vpaddw ymm7, ymm7, [GOTOFF(ebx,PW_SEVEN)]
500
+ vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_SEVEN)]
501
+
502
+ vpaddw ymm1, ymm1, ymm6
503
+ vpaddw ymm0, ymm0, ymm4
504
+ vpsrlw ymm1, ymm1, 4 ; ymm1=Out1LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30)
505
+ vpsrlw ymm0, ymm0, 4 ; ymm0=Out1HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62)
506
+ vpaddw ymm7, ymm7, ymm6
507
+ vpaddw ymm5, ymm5, ymm4
508
+ vpsrlw ymm7, ymm7, 4 ; ymm7=Out1LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31)
509
+ vpsrlw ymm5, ymm5, 4 ; ymm5=Out1HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63)
510
+
511
+ vpsllw ymm7, ymm7, BYTE_BIT
512
+ vpsllw ymm5, ymm5, BYTE_BIT
513
+ vpor ymm1, ymm1, ymm7 ; ymm1=Out1L=( 0 1 2 ... 29 30 31)
514
+ vpor ymm0, ymm0, ymm5 ; ymm0=Out1H=(32 33 34 ... 61 62 63)
515
+
516
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
517
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
518
+
519
+ poppic ebx
520
+
521
+ sub eax, byte SIZEOF_YMMWORD
522
+ add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above)
523
+ add ebx, byte 1*SIZEOF_YMMWORD ; inptr0
524
+ add esi, byte 1*SIZEOF_YMMWORD ; inptr1(below)
525
+ add edx, byte 2*SIZEOF_YMMWORD ; outptr0
526
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr1
527
+ cmp eax, byte SIZEOF_YMMWORD
528
+ ja near .columnloop
529
+ test eax, eax
530
+ jnz near .columnloop_last
531
+
532
+ pop esi
533
+ pop edi
534
+ pop ecx
535
+ pop eax
536
+
537
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
538
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
539
+ sub ecx, byte 2 ; rowctr
540
+ jg near .rowloop
541
+
542
+ .return:
543
+ vzeroupper
544
+ pop edi
545
+ pop esi
546
+ ; pop edx ; need not be preserved
547
+ ; pop ecx ; need not be preserved
548
+ pop ebx
549
+ mov esp, ebp ; esp <- aligned ebp
550
+ pop esp ; esp <- original ebp
551
+ pop ebp
552
+ ret
553
+
554
+ ; --------------------------------------------------------------------------
555
+ ;
556
+ ; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
557
+ ; It's still a box filter.
558
+ ;
559
+ ; GLOBAL(void)
560
+ ; jsimd_h2v1_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width,
561
+ ; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr);
562
+ ;
563
+
564
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
565
+ %define output_width(b) (b) + 12 ; JDIMENSION output_width
566
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
567
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
568
+
569
+ align 32
570
+ GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
571
+
572
+ EXTN(jsimd_h2v1_upsample_avx2):
573
+ push ebp
574
+ mov ebp, esp
575
+ ; push ebx ; unused
576
+ ; push ecx ; need not be preserved
577
+ ; push edx ; need not be preserved
578
+ push esi
579
+ push edi
580
+
581
+ mov edx, JDIMENSION [output_width(ebp)]
582
+ add edx, byte (SIZEOF_YMMWORD-1)
583
+ and edx, -SIZEOF_YMMWORD
584
+ jz short .return
585
+
586
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
587
+ test ecx, ecx
588
+ jz short .return
589
+
590
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
591
+ mov edi, POINTER [output_data_ptr(ebp)]
592
+ mov edi, JSAMPARRAY [edi] ; output_data
593
+ alignx 16, 7
594
+ .rowloop:
595
+ push edi
596
+ push esi
597
+
598
+ mov esi, JSAMPROW [esi] ; inptr
599
+ mov edi, JSAMPROW [edi] ; outptr
600
+ mov eax, edx ; colctr
601
+ alignx 16, 7
602
+ .columnloop:
603
+
604
+ cmp eax, byte SIZEOF_YMMWORD
605
+ ja near .above_16
606
+
607
+ vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_YMMWORD]
608
+ vpunpckhbw xmm1, xmm0, xmm0
609
+ vpunpcklbw xmm0, xmm0, xmm0
610
+
611
+ vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
612
+ vmovdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
613
+
614
+ jmp short .nextrow
615
+
616
+ .above_16:
617
+ vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
618
+
619
+ vpermq ymm0, ymm0, 0xd8
620
+ vpunpckhbw ymm1, ymm0, ymm0
621
+ vpunpcklbw ymm0, ymm0, ymm0
622
+
623
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0
624
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1
625
+
626
+ sub eax, byte 2*SIZEOF_YMMWORD
627
+ jz short .nextrow
628
+
629
+ add esi, byte SIZEOF_YMMWORD ; inptr
630
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
631
+ jmp short .columnloop
632
+ alignx 16, 7
633
+
634
+ .nextrow:
635
+ pop esi
636
+ pop edi
637
+
638
+ add esi, byte SIZEOF_JSAMPROW ; input_data
639
+ add edi, byte SIZEOF_JSAMPROW ; output_data
640
+ dec ecx ; rowctr
641
+ jg short .rowloop
642
+
643
+ .return:
644
+ vzeroupper
645
+ pop edi
646
+ pop esi
647
+ ; pop edx ; need not be preserved
648
+ ; pop ecx ; need not be preserved
649
+ ; pop ebx ; unused
650
+ pop ebp
651
+ ret
652
+
653
+ ; --------------------------------------------------------------------------
654
+ ;
655
+ ; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
656
+ ; It's still a box filter.
657
+ ;
658
+ ; GLOBAL(void)
659
+ ; jsimd_h2v2_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width,
660
+ ; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr);
661
+ ;
662
+
663
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
664
+ %define output_width(b) (b) + 12 ; JDIMENSION output_width
665
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
666
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
667
+
668
+ align 32
669
+ GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
670
+
671
+ EXTN(jsimd_h2v2_upsample_avx2):
672
+ push ebp
673
+ mov ebp, esp
674
+ push ebx
675
+ ; push ecx ; need not be preserved
676
+ ; push edx ; need not be preserved
677
+ push esi
678
+ push edi
679
+
680
+ mov edx, JDIMENSION [output_width(ebp)]
681
+ add edx, byte (SIZEOF_YMMWORD-1)
682
+ and edx, -SIZEOF_YMMWORD
683
+ jz near .return
684
+
685
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
686
+ test ecx, ecx
687
+ jz near .return
688
+
689
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
690
+ mov edi, POINTER [output_data_ptr(ebp)]
691
+ mov edi, JSAMPARRAY [edi] ; output_data
692
+ alignx 16, 7
693
+ .rowloop:
694
+ push edi
695
+ push esi
696
+
697
+ mov esi, JSAMPROW [esi] ; inptr
698
+ mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
699
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
700
+ mov eax, edx ; colctr
701
+ alignx 16, 7
702
+ .columnloop:
703
+
704
+ cmp eax, byte SIZEOF_YMMWORD
705
+ ja short .above_16
706
+
707
+ vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
708
+ vpunpckhbw xmm1, xmm0, xmm0
709
+ vpunpcklbw xmm0, xmm0, xmm0
710
+
711
+ vmovdqu XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0
712
+ vmovdqu XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1
713
+ vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
714
+ vmovdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
715
+
716
+ jmp near .nextrow
717
+
718
+ .above_16:
719
+ vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
720
+
721
+ vpermq ymm0, ymm0, 0xd8
722
+ vpunpckhbw ymm1, ymm0, ymm0
723
+ vpunpcklbw ymm0, ymm0, ymm0
724
+
725
+ vmovdqu YMMWORD [ebx+0*SIZEOF_YMMWORD], ymm0
726
+ vmovdqu YMMWORD [ebx+1*SIZEOF_YMMWORD], ymm1
727
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0
728
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1
729
+
730
+ sub eax, byte 2*SIZEOF_YMMWORD
731
+ jz short .nextrow
732
+
733
+ add esi, byte SIZEOF_YMMWORD ; inptr
734
+ add ebx, 2*SIZEOF_YMMWORD ; outptr0
735
+ add edi, 2*SIZEOF_YMMWORD ; outptr1
736
+ jmp short .columnloop
737
+ alignx 16, 7
738
+
739
+ .nextrow:
740
+ pop esi
741
+ pop edi
742
+
743
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
744
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
745
+ sub ecx, byte 2 ; rowctr
746
+ jg near .rowloop
747
+
748
+ .return:
749
+ vzeroupper
750
+ pop edi
751
+ pop esi
752
+ ; pop edx ; need not be preserved
753
+ ; pop ecx ; need not be preserved
754
+ pop ebx
755
+ pop ebp
756
+ ret
757
+
758
+ ; For some reason, the OS X linker does not honor the request to align the
759
+ ; segment unless we do this.
760
+ align 32