epeg 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/MANIFEST +5 -0
  4. data/TODO +1 -0
  5. data/epeg/.dockerignore +4 -0
  6. data/epeg/.gitignore +5 -0
  7. data/epeg/CMakeLists.txt +30 -0
  8. data/epeg/Dockerfile +23 -0
  9. data/epeg/Epeg.h +90 -0
  10. data/epeg/README.md +42 -0
  11. data/epeg/epeg_main.c +1642 -0
  12. data/epeg/epeg_private.h +85 -0
  13. data/epeg/example/.gitignore +1 -0
  14. data/epeg/example/CMakeLists.txt +20 -0
  15. data/epeg/example/example.jpg +0 -0
  16. data/epeg/example/rotatetest.c +29 -0
  17. data/epeg/example/scaletest.c +48 -0
  18. data/epeg/vendor/libjpeg-turbo-2.0.4/BUILDING.md +828 -0
  19. data/epeg/vendor/libjpeg-turbo-2.0.4/CMakeLists.txt +1420 -0
  20. data/epeg/vendor/libjpeg-turbo-2.0.4/ChangeLog.md +1494 -0
  21. data/epeg/vendor/libjpeg-turbo-2.0.4/LICENSE.md +132 -0
  22. data/epeg/vendor/libjpeg-turbo-2.0.4/README.ijg +277 -0
  23. data/epeg/vendor/libjpeg-turbo-2.0.4/README.md +356 -0
  24. data/epeg/vendor/libjpeg-turbo-2.0.4/cderror.h +137 -0
  25. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.c +145 -0
  26. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.h +157 -0
  27. data/epeg/vendor/libjpeg-turbo-2.0.4/change.log +315 -0
  28. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.1 +354 -0
  29. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.c +695 -0
  30. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/BuildPackages.cmake +182 -0
  31. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/GNUInstallDirs.cmake +416 -0
  32. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/cmake_uninstall.cmake.in +24 -0
  33. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/testclean.cmake +41 -0
  34. data/epeg/vendor/libjpeg-turbo-2.0.4/cmyk.h +61 -0
  35. data/epeg/vendor/libjpeg-turbo-2.0.4/coderules.txt +78 -0
  36. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.1 +296 -0
  37. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.c +822 -0
  38. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/annotated.html +104 -0
  39. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bc_s.png +0 -0
  40. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bdwn.png +0 -0
  41. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/classes.html +106 -0
  42. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/closed.png +0 -0
  43. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen-extra.css +3 -0
  44. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.css +1184 -0
  45. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.png +0 -0
  46. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/dynsections.js +97 -0
  47. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2blank.png +0 -0
  48. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2cl.png +0 -0
  49. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2doc.png +0 -0
  50. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderclosed.png +0 -0
  51. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderopen.png +0 -0
  52. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2lastnode.png +0 -0
  53. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2link.png +0 -0
  54. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mlastnode.png +0 -0
  55. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mnode.png +0 -0
  56. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mo.png +0 -0
  57. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2node.png +0 -0
  58. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2ns.png +0 -0
  59. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2plastnode.png +0 -0
  60. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2pnode.png +0 -0
  61. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2splitbar.png +0 -0
  62. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2vertline.png +0 -0
  63. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions.html +134 -0
  64. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions_vars.html +134 -0
  65. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/group___turbo_j_p_e_g.html +2775 -0
  66. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/index.html +90 -0
  67. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/jquery.js +8 -0
  68. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/modules.html +95 -0
  69. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_f.png +0 -0
  70. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_g.png +0 -0
  71. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_h.png +0 -0
  72. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/open.png +0 -0
  73. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.html +26 -0
  74. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.js +4 -0
  75. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.html +26 -0
  76. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.js +5 -0
  77. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.html +26 -0
  78. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.js +4 -0
  79. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.html +26 -0
  80. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.js +4 -0
  81. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.html +26 -0
  82. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.js +5 -0
  83. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.html +26 -0
  84. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.js +4 -0
  85. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.html +26 -0
  86. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.js +102 -0
  87. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.html +26 -0
  88. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.js +4 -0
  89. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.html +26 -0
  90. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.js +4 -0
  91. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.html +26 -0
  92. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.js +4 -0
  93. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.html +26 -0
  94. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.js +6 -0
  95. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/close.png +0 -0
  96. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.html +26 -0
  97. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.js +8 -0
  98. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.html +26 -0
  99. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.js +37 -0
  100. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.html +26 -0
  101. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.js +31 -0
  102. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.html +26 -0
  103. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.js +4 -0
  104. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/mag_sel.png +0 -0
  105. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/nomatches.html +12 -0
  106. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.css +271 -0
  107. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.js +809 -0
  108. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_l.png +0 -0
  109. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_m.png +0 -0
  110. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_r.png +0 -0
  111. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.html +26 -0
  112. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.js +5 -0
  113. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.html +26 -0
  114. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.js +4 -0
  115. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.html +26 -0
  116. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.js +5 -0
  117. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.html +26 -0
  118. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.js +4 -0
  119. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.html +26 -0
  120. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.js +4 -0
  121. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.html +26 -0
  122. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.js +5 -0
  123. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.html +26 -0
  124. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.js +4 -0
  125. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.html +26 -0
  126. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.js +10 -0
  127. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.html +26 -0
  128. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.js +4 -0
  129. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.html +26 -0
  130. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.js +4 -0
  131. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.html +26 -0
  132. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.js +4 -0
  133. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjregion.html +186 -0
  134. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjscalingfactor.html +148 -0
  135. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjtransform.html +212 -0
  136. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_off.png +0 -0
  137. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_on.png +0 -0
  138. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_a.png +0 -0
  139. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_b.png +0 -0
  140. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_h.png +0 -0
  141. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_s.png +0 -0
  142. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tabs.css +60 -0
  143. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen-extra.css +3 -0
  144. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen.config +16 -0
  145. data/epeg/vendor/libjpeg-turbo-2.0.4/example.txt +464 -0
  146. data/epeg/vendor/libjpeg-turbo-2.0.4/jaricom.c +157 -0
  147. data/epeg/vendor/libjpeg-turbo-2.0.4/java/CMakeLists.txt +88 -0
  148. data/epeg/vendor/libjpeg-turbo-2.0.4/java/MANIFEST.MF +2 -0
  149. data/epeg/vendor/libjpeg-turbo-2.0.4/java/README +52 -0
  150. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJBench.java +1021 -0
  151. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJExample.java +405 -0
  152. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJUnitTest.java +960 -0
  153. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-frame.html +24 -0
  154. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-noframe.html +24 -0
  155. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/constant-values.html +532 -0
  156. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/deprecated-list.html +252 -0
  157. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/help-doc.html +210 -0
  158. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index-all.html +1029 -0
  159. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index.html +71 -0
  160. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJ.html +1356 -0
  161. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +926 -0
  162. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +241 -0
  163. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +1255 -0
  164. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJException.html +340 -0
  165. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html +343 -0
  166. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +751 -0
  167. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +421 -0
  168. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +765 -0
  169. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-frame.html +31 -0
  170. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +202 -0
  171. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-tree.html +160 -0
  172. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/overview-tree.html +164 -0
  173. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/package-list +1 -0
  174. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/background.gif +0 -0
  175. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/tab.gif +0 -0
  176. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar.gif +0 -0
  177. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar_end.gif +0 -0
  178. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/script.js +30 -0
  179. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/serialized-form.html +176 -0
  180. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/stylesheet.css +474 -0
  181. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJ.java +584 -0
  182. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCompressor.java +677 -0
  183. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java +76 -0
  184. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +931 -0
  185. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJException.java +78 -0
  186. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in +59 -0
  187. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-win.java.in +35 -0
  188. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java +115 -0
  189. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransform.java +227 -0
  190. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransformer.java +163 -0
  191. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/YUVImage.java +445 -0
  192. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJ.h +129 -0
  193. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJCompressor.h +101 -0
  194. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJDecompressor.h +101 -0
  195. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJTransformer.h +29 -0
  196. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapimin.c +295 -0
  197. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapistd.c +162 -0
  198. data/epeg/vendor/libjpeg-turbo-2.0.4/jcarith.c +932 -0
  199. data/epeg/vendor/libjpeg-turbo-2.0.4/jccoefct.c +449 -0
  200. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolext.c +144 -0
  201. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolor.c +710 -0
  202. data/epeg/vendor/libjpeg-turbo-2.0.4/jcdctmgr.c +721 -0
  203. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.c +1096 -0
  204. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.h +42 -0
  205. data/epeg/vendor/libjpeg-turbo-2.0.4/jcicc.c +105 -0
  206. data/epeg/vendor/libjpeg-turbo-2.0.4/jcinit.c +77 -0
  207. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmainct.c +162 -0
  208. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmarker.c +664 -0
  209. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmaster.c +640 -0
  210. data/epeg/vendor/libjpeg-turbo-2.0.4/jcomapi.c +109 -0
  211. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.h.in +73 -0
  212. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.txt +143 -0
  213. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfigint.h.in +31 -0
  214. data/epeg/vendor/libjpeg-turbo-2.0.4/jcparam.c +541 -0
  215. data/epeg/vendor/libjpeg-turbo-2.0.4/jcphuff.c +1105 -0
  216. data/epeg/vendor/libjpeg-turbo-2.0.4/jcprepct.c +351 -0
  217. data/epeg/vendor/libjpeg-turbo-2.0.4/jcsample.c +539 -0
  218. data/epeg/vendor/libjpeg-turbo-2.0.4/jcstest.c +126 -0
  219. data/epeg/vendor/libjpeg-turbo-2.0.4/jctrans.c +400 -0
  220. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapimin.c +407 -0
  221. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapistd.c +639 -0
  222. data/epeg/vendor/libjpeg-turbo-2.0.4/jdarith.c +773 -0
  223. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst-tj.c +203 -0
  224. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst.c +293 -0
  225. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc-tj.c +194 -0
  226. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc.c +295 -0
  227. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.c +692 -0
  228. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.h +82 -0
  229. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcol565.c +384 -0
  230. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolext.c +143 -0
  231. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolor.c +883 -0
  232. data/epeg/vendor/libjpeg-turbo-2.0.4/jdct.h +208 -0
  233. data/epeg/vendor/libjpeg-turbo-2.0.4/jddctmgr.c +352 -0
  234. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.c +831 -0
  235. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.h +238 -0
  236. data/epeg/vendor/libjpeg-turbo-2.0.4/jdicc.c +171 -0
  237. data/epeg/vendor/libjpeg-turbo-2.0.4/jdinput.c +408 -0
  238. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.c +460 -0
  239. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.h +71 -0
  240. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmarker.c +1377 -0
  241. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.c +737 -0
  242. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.h +28 -0
  243. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmerge.c +617 -0
  244. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrg565.c +354 -0
  245. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrgext.c +184 -0
  246. data/epeg/vendor/libjpeg-turbo-2.0.4/jdphuff.c +687 -0
  247. data/epeg/vendor/libjpeg-turbo-2.0.4/jdpostct.c +294 -0
  248. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.c +518 -0
  249. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.h +50 -0
  250. data/epeg/vendor/libjpeg-turbo-2.0.4/jdtrans.c +155 -0
  251. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.c +251 -0
  252. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.h +316 -0
  253. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctflt.c +169 -0
  254. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctfst.c +227 -0
  255. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctint.c +288 -0
  256. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctflt.c +240 -0
  257. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctfst.c +371 -0
  258. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctint.c +2627 -0
  259. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctred.c +409 -0
  260. data/epeg/vendor/libjpeg-turbo-2.0.4/jinclude.h +88 -0
  261. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemmgr.c +1179 -0
  262. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemnobs.c +115 -0
  263. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemsys.h +178 -0
  264. data/epeg/vendor/libjpeg-turbo-2.0.4/jmorecfg.h +421 -0
  265. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeg_nbits_table.h +4098 -0
  266. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegcomp.h +31 -0
  267. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegint.h +368 -0
  268. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeglib.h +1132 -0
  269. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.1 +295 -0
  270. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.c +601 -0
  271. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant1.c +859 -0
  272. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant2.c +1285 -0
  273. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd.h +117 -0
  274. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd_none.c +418 -0
  275. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimddct.h +70 -0
  276. data/epeg/vendor/libjpeg-turbo-2.0.4/jstdhuff.c +143 -0
  277. data/epeg/vendor/libjpeg-turbo-2.0.4/jutils.c +133 -0
  278. data/epeg/vendor/libjpeg-turbo-2.0.4/jversion.h +52 -0
  279. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.map.in +11 -0
  280. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.txt +3144 -0
  281. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/CMakeLists.txt +1 -0
  282. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.c +275 -0
  283. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.h +57 -0
  284. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5cmp.c +59 -0
  285. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5hl.c +125 -0
  286. data/epeg/vendor/libjpeg-turbo-2.0.4/rdbmp.c +689 -0
  287. data/epeg/vendor/libjpeg-turbo-2.0.4/rdcolmap.c +254 -0
  288. data/epeg/vendor/libjpeg-turbo-2.0.4/rdgif.c +39 -0
  289. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.1 +63 -0
  290. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.c +510 -0
  291. data/epeg/vendor/libjpeg-turbo-2.0.4/rdppm.c +766 -0
  292. data/epeg/vendor/libjpeg-turbo-2.0.4/rdrle.c +389 -0
  293. data/epeg/vendor/libjpeg-turbo-2.0.4/rdswitch.c +424 -0
  294. data/epeg/vendor/libjpeg-turbo-2.0.4/rdtarga.c +509 -0
  295. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Distribution.xml.in +24 -0
  296. data/epeg/vendor/libjpeg-turbo-2.0.4/release/License.rtf +20 -0
  297. data/epeg/vendor/libjpeg-turbo-2.0.4/release/ReadMe.txt +5 -0
  298. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Welcome.rtf +17 -0
  299. data/epeg/vendor/libjpeg-turbo-2.0.4/release/deb-control.in +31 -0
  300. data/epeg/vendor/libjpeg-turbo-2.0.4/release/installer.nsi.in +191 -0
  301. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libjpeg.pc.in +10 -0
  302. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libturbojpeg.pc.in +10 -0
  303. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makecygwinpkg.in +66 -0
  304. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makedpkg.in +115 -0
  305. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makemacpkg.in +284 -0
  306. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makerpm.in +30 -0
  307. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makesrpm.in +48 -0
  308. data/epeg/vendor/libjpeg-turbo-2.0.4/release/maketarball.in +51 -0
  309. data/epeg/vendor/libjpeg-turbo-2.0.4/release/rpm.spec.in +221 -0
  310. data/epeg/vendor/libjpeg-turbo-2.0.4/release/uninstall.in +113 -0
  311. data/epeg/vendor/libjpeg-turbo-2.0.4/sharedlib/CMakeLists.txt +99 -0
  312. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/CMakeLists.txt +385 -0
  313. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd.c +721 -0
  314. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd_neon.S +2878 -0
  315. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd.c +798 -0
  316. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd_neon.S +3433 -0
  317. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/gas-preprocessor.in +1 -0
  318. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-avx2.asm +578 -0
  319. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-mmx.asm +476 -0
  320. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-sse2.asm +503 -0
  321. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-avx2.asm +121 -0
  322. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-mmx.asm +121 -0
  323. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-sse2.asm +120 -0
  324. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-avx2.asm +113 -0
  325. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-mmx.asm +113 -0
  326. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-sse2.asm +112 -0
  327. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-avx2.asm +457 -0
  328. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-mmx.asm +355 -0
  329. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-sse2.asm +382 -0
  330. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jchuff-sse2.asm +424 -0
  331. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcphuff-sse2.asm +660 -0
  332. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-avx2.asm +388 -0
  333. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-mmx.asm +324 -0
  334. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-sse2.asm +351 -0
  335. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-avx2.asm +515 -0
  336. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-mmx.asm +404 -0
  337. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-sse2.asm +458 -0
  338. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-avx2.asm +118 -0
  339. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-mmx.asm +117 -0
  340. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-sse2.asm +117 -0
  341. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-avx2.asm +136 -0
  342. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-mmx.asm +123 -0
  343. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-sse2.asm +135 -0
  344. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-avx2.asm +575 -0
  345. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-mmx.asm +460 -0
  346. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-sse2.asm +517 -0
  347. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-avx2.asm +760 -0
  348. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-mmx.asm +731 -0
  349. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-sse2.asm +724 -0
  350. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-3dn.asm +318 -0
  351. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-sse.asm +369 -0
  352. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-mmx.asm +395 -0
  353. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-sse2.asm +403 -0
  354. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-avx2.asm +331 -0
  355. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-mmx.asm +620 -0
  356. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-sse2.asm +633 -0
  357. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-3dn.asm +451 -0
  358. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse.asm +571 -0
  359. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse2.asm +497 -0
  360. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-mmx.asm +499 -0
  361. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-sse2.asm +501 -0
  362. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-avx2.asm +453 -0
  363. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-mmx.asm +851 -0
  364. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-sse2.asm +858 -0
  365. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-mmx.asm +704 -0
  366. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-sse2.asm +592 -0
  367. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-3dn.asm +230 -0
  368. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-mmx.asm +276 -0
  369. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-sse.asm +208 -0
  370. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquantf-sse2.asm +168 -0
  371. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-avx2.asm +188 -0
  372. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-sse2.asm +201 -0
  373. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimd.c +1253 -0
  374. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimdcpu.asm +135 -0
  375. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/jsimd.h +1083 -0
  376. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolext-mmi.c +483 -0
  377. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolor-mmi.c +148 -0
  378. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample-mmi.c +100 -0
  379. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample.h +28 -0
  380. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolext-mmi.c +424 -0
  381. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolor-mmi.c +139 -0
  382. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdsample-mmi.c +245 -0
  383. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jfdctint-mmi.c +398 -0
  384. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jidctint-mmi.c +571 -0
  385. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jquanti-mmi.c +130 -0
  386. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd.c +610 -0
  387. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd_mmi.h +57 -0
  388. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/loongson-mmintrin.h +1324 -0
  389. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd.c +1123 -0
  390. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2.S +4479 -0
  391. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2_asm.h +292 -0
  392. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jcolsamp.inc +135 -0
  393. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jdct.inc +31 -0
  394. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jpeg_nbits_table.inc +4097 -0
  395. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc +93 -0
  396. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc.h +131 -0
  397. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdext.inc +479 -0
  398. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolext-altivec.c +269 -0
  399. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolor-altivec.c +116 -0
  400. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgray-altivec.c +111 -0
  401. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgryext-altivec.c +228 -0
  402. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample-altivec.c +159 -0
  403. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample.h +28 -0
  404. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolext-altivec.c +276 -0
  405. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolor-altivec.c +106 -0
  406. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmerge-altivec.c +130 -0
  407. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmrgext-altivec.c +329 -0
  408. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdsample-altivec.c +400 -0
  409. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctfst-altivec.c +154 -0
  410. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctint-altivec.c +258 -0
  411. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctfst-altivec.c +255 -0
  412. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctint-altivec.c +357 -0
  413. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jquanti-altivec.c +250 -0
  414. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd.c +872 -0
  415. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd_altivec.h +98 -0
  416. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-avx2.asm +558 -0
  417. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-sse2.asm +483 -0
  418. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-avx2.asm +121 -0
  419. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-sse2.asm +120 -0
  420. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-avx2.asm +113 -0
  421. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-sse2.asm +112 -0
  422. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-avx2.asm +437 -0
  423. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-sse2.asm +362 -0
  424. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jchuff-sse2.asm +346 -0
  425. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcphuff-sse2.asm +637 -0
  426. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-avx2.asm +366 -0
  427. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-sse2.asm +329 -0
  428. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-avx2.asm +495 -0
  429. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-sse2.asm +438 -0
  430. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-avx2.asm +118 -0
  431. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-sse2.asm +117 -0
  432. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-avx2.asm +136 -0
  433. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-sse2.asm +135 -0
  434. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-avx2.asm +593 -0
  435. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-sse2.asm +535 -0
  436. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-avx2.asm +695 -0
  437. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-sse2.asm +664 -0
  438. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctflt-sse.asm +355 -0
  439. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctfst-sse2.asm +389 -0
  440. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-avx2.asm +320 -0
  441. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-sse2.asm +619 -0
  442. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctflt-sse2.asm +481 -0
  443. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctfst-sse2.asm +490 -0
  444. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-avx2.asm +417 -0
  445. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-sse2.asm +846 -0
  446. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctred-sse2.asm +573 -0
  447. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquantf-sse2.asm +154 -0
  448. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-avx2.asm +162 -0
  449. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-sse2.asm +187 -0
  450. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimd.c +1076 -0
  451. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimdcpu.asm +86 -0
  452. data/epeg/vendor/libjpeg-turbo-2.0.4/structure.txt +904 -0
  453. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.bmp +0 -0
  454. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.txt +25 -0
  455. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test.scan +5 -0
  456. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc +0 -0
  457. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc.txt +20 -0
  458. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc +0 -0
  459. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc.txt +20 -0
  460. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgari.jpg +0 -0
  461. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgint.jpg +0 -0
  462. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.jpg +0 -0
  463. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.ppm +4 -0
  464. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig12.jpg +0 -0
  465. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_5674_0098.bmp +0 -0
  466. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6434_0018a.bmp +0 -0
  467. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6548_0026a.bmp +0 -0
  468. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbench.c +1031 -0
  469. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.in +256 -0
  470. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.java.in +215 -0
  471. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexample.c +396 -0
  472. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.in +149 -0
  473. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.java.in +151 -0
  474. data/epeg/vendor/libjpeg-turbo-2.0.4/tjunittest.c +931 -0
  475. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.c +70 -0
  476. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.h +47 -0
  477. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.c +1628 -0
  478. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.h +210 -0
  479. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-jni.c +1246 -0
  480. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile +65 -0
  481. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile.jni +101 -0
  482. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.c +2152 -0
  483. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.h +1744 -0
  484. data/epeg/vendor/libjpeg-turbo-2.0.4/usage.txt +635 -0
  485. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jconfig.h.in +34 -0
  486. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62-memsrcdst.def +108 -0
  487. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62.def +106 -0
  488. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7-memsrcdst.def +110 -0
  489. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7.def +108 -0
  490. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg8.def +111 -0
  491. data/epeg/vendor/libjpeg-turbo-2.0.4/wizard.txt +212 -0
  492. data/epeg/vendor/libjpeg-turbo-2.0.4/wrbmp.c +558 -0
  493. data/epeg/vendor/libjpeg-turbo-2.0.4/wrgif.c +413 -0
  494. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.1 +103 -0
  495. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.c +591 -0
  496. data/epeg/vendor/libjpeg-turbo-2.0.4/wrppm.c +365 -0
  497. data/epeg/vendor/libjpeg-turbo-2.0.4/wrrle.c +309 -0
  498. data/epeg/vendor/libjpeg-turbo-2.0.4/wrtarga.c +261 -0
  499. data/epeg.c +131 -0
  500. data/epeg.gemspec +18 -0
  501. data/extconf.rb +80 -0
  502. data/test.jpg +0 -0
  503. data/test.rb +42 -0
  504. metadata +546 -0
@@ -0,0 +1,760 @@
1
+ ;
2
+ ; jdsample.asm - upsampling (AVX2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2015, Intel Corporation.
6
+ ; Copyright (C) 2016, D. R. Commander.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jsimdext.inc"
19
+
20
+ ; --------------------------------------------------------------------------
21
+ SECTION SEG_CONST
22
+
23
+ alignz 32
24
+ GLOBAL_DATA(jconst_fancy_upsample_avx2)
25
+
26
+ EXTN(jconst_fancy_upsample_avx2):
27
+
28
+ PW_ONE times 16 dw 1
29
+ PW_TWO times 16 dw 2
30
+ PW_THREE times 16 dw 3
31
+ PW_SEVEN times 16 dw 7
32
+ PW_EIGHT times 16 dw 8
33
+
34
+ alignz 32
35
+
36
+ ; --------------------------------------------------------------------------
37
+ SECTION SEG_TEXT
38
+ BITS 32
39
+ ;
40
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
41
+ ;
42
+ ; The upsampling algorithm is linear interpolation between pixel centers,
43
+ ; also known as a "triangle filter". This is a good compromise between
44
+ ; speed and visual quality. The centers of the output pixels are 1/4 and 3/4
45
+ ; of the way between input pixel centers.
46
+ ;
47
+ ; GLOBAL(void)
48
+ ; jsimd_h2v1_fancy_upsample_avx2(int max_v_samp_factor,
49
+ ; JDIMENSION downsampled_width,
50
+ ; JSAMPARRAY input_data,
51
+ ; JSAMPARRAY *output_data_ptr);
52
+ ;
53
+
54
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
55
+ %define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width
56
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
57
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
58
+
59
+ align 32
60
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
61
+
62
+ EXTN(jsimd_h2v1_fancy_upsample_avx2):
63
+ push ebp
64
+ mov ebp, esp
65
+ pushpic ebx
66
+ ; push ecx ; need not be preserved
67
+ ; push edx ; need not be preserved
68
+ push esi
69
+ push edi
70
+
71
+ get_GOT ebx ; get GOT address
72
+
73
+ mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
74
+ test eax, eax
75
+ jz near .return
76
+
77
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
78
+ test ecx, ecx
79
+ jz near .return
80
+
81
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
82
+ mov edi, POINTER [output_data_ptr(ebp)]
83
+ mov edi, JSAMPARRAY [edi] ; output_data
84
+ alignx 16, 7
85
+ .rowloop:
86
+ push eax ; colctr
87
+ push edi
88
+ push esi
89
+
90
+ mov esi, JSAMPROW [esi] ; inptr
91
+ mov edi, JSAMPROW [edi] ; outptr
92
+
93
+ test eax, SIZEOF_YMMWORD-1
94
+ jz short .skip
95
+ mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
96
+ mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample
97
+ .skip:
98
+ vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
99
+ vpcmpeqb xmm7, xmm7, xmm7
100
+ vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-1) ; (ff -- -- -- ... -- --) LSB is ff
101
+ vpand ymm7, ymm7, YMMWORD [esi+0*SIZEOF_YMMWORD]
102
+
103
+ add eax, byte SIZEOF_YMMWORD-1
104
+ and eax, byte -SIZEOF_YMMWORD
105
+ cmp eax, byte SIZEOF_YMMWORD
106
+ ja short .columnloop
107
+ alignx 16, 7
108
+
109
+ .columnloop_last:
110
+ vpcmpeqb xmm6, xmm6, xmm6
111
+ vpslldq xmm6, xmm6, (SIZEOF_XMMWORD-1)
112
+ vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff
113
+ vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
114
+ jmp short .upsample
115
+ alignx 16, 7
116
+
117
+ .columnloop:
118
+ vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
119
+ vperm2i128 ymm6, ymm0, ymm6, 0x20
120
+ vpslldq ymm6, ymm6, 15
121
+
122
+ .upsample:
123
+ vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm1=( 0 1 2 ... 29 30 31)
124
+
125
+ vperm2i128 ymm2, ymm0, ymm1, 0x20
126
+ vpalignr ymm2, ymm1, ymm2, 15 ; ymm2=(-- 0 1 ... 28 29 30)
127
+ vperm2i128 ymm4, ymm0, ymm1, 0x03
128
+ vpalignr ymm3, ymm4, ymm1, 1 ; ymm3=( 1 2 3 ... 30 31 --)
129
+
130
+ vpor ymm2, ymm2, ymm7 ; ymm2=(-1 0 1 ... 28 29 30)
131
+ vpor ymm3, ymm3, ymm6 ; ymm3=( 1 2 3 ... 30 31 32)
132
+
133
+ vpsrldq ymm7, ymm4, (SIZEOF_XMMWORD-1) ; ymm7=(31 -- -- ... -- -- --)
134
+
135
+ vpunpckhbw ymm4, ymm1, ymm0 ; ymm4=( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
136
+ vpunpcklbw ymm5, ymm1, ymm0 ; ymm5=( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
137
+ vperm2i128 ymm1, ymm5, ymm4, 0x20 ; ymm1=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
138
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
139
+
140
+ vpunpckhbw ymm5, ymm2, ymm0 ; ymm5=( 7 8 9 10 11 12 13 14 23 24 25 26 27 28 29 30)
141
+ vpunpcklbw ymm6, ymm2, ymm0 ; ymm6=(-1 0 1 2 3 4 5 6 15 16 17 18 19 20 21 22)
142
+ vperm2i128 ymm2, ymm6, ymm5, 0x20 ; ymm2=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
143
+ vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
144
+
145
+ vpunpckhbw ymm6, ymm3, ymm0 ; ymm6=( 1 2 3 4 5 6 7 8 17 18 19 20 21 22 23 24)
146
+ vpunpcklbw ymm0, ymm3, ymm0 ; ymm0=( 9 10 11 12 13 14 15 16 25 26 27 28 29 30 31 32)
147
+ vperm2i128 ymm3, ymm0, ymm6, 0x20 ; ymm3=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
148
+ vperm2i128 ymm6, ymm0, ymm6, 0x31 ; ymm6=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
149
+
150
+ vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
151
+
152
+ vpmullw ymm1, ymm1, [GOTOFF(ebx,PW_THREE)]
153
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
154
+ vpaddw ymm2, ymm2, [GOTOFF(ebx,PW_ONE)]
155
+ vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_ONE)]
156
+ vpaddw ymm3, ymm3, [GOTOFF(ebx,PW_TWO)]
157
+ vpaddw ymm6, ymm6, [GOTOFF(ebx,PW_TWO)]
158
+
159
+ vpaddw ymm2, ymm2, ymm1
160
+ vpaddw ymm5, ymm5, ymm4
161
+ vpsrlw ymm2, ymm2, 2 ; ymm2=OutLE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30)
162
+ vpsrlw ymm5, ymm5, 2 ; ymm5=OutHE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62)
163
+ vpaddw ymm3, ymm3, ymm1
164
+ vpaddw ymm6, ymm6, ymm4
165
+ vpsrlw ymm3, ymm3, 2 ; ymm3=OutLO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31)
166
+ vpsrlw ymm6, ymm6, 2 ; ymm6=OutHO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63)
167
+
168
+ vpsllw ymm3, ymm3, BYTE_BIT
169
+ vpsllw ymm6, ymm6, BYTE_BIT
170
+ vpor ymm2, ymm2, ymm3 ; ymm2=OutL=( 0 1 2 ... 29 30 31)
171
+ vpor ymm5, ymm5, ymm6 ; ymm5=OutH=(32 33 34 ... 61 62 63)
172
+
173
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm2
174
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm5
175
+
176
+ sub eax, byte SIZEOF_YMMWORD
177
+ add esi, byte 1*SIZEOF_YMMWORD ; inptr
178
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
179
+ cmp eax, byte SIZEOF_YMMWORD
180
+ ja near .columnloop
181
+ test eax, eax
182
+ jnz near .columnloop_last
183
+
184
+ pop esi
185
+ pop edi
186
+ pop eax
187
+
188
+ add esi, byte SIZEOF_JSAMPROW ; input_data
189
+ add edi, byte SIZEOF_JSAMPROW ; output_data
190
+ dec ecx ; rowctr
191
+ jg near .rowloop
192
+
193
+ .return:
194
+ vzeroupper
195
+ pop edi
196
+ pop esi
197
+ ; pop edx ; need not be preserved
198
+ ; pop ecx ; need not be preserved
199
+ poppic ebx
200
+ pop ebp
201
+ ret
202
+
203
+ ; --------------------------------------------------------------------------
204
+ ;
205
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
206
+ ; Again a triangle filter; see comments for h2v1 case, above.
207
+ ;
208
+ ; GLOBAL(void)
209
+ ; jsimd_h2v2_fancy_upsample_avx2(int max_v_samp_factor,
210
+ ; JDIMENSION downsampled_width,
211
+ ; JSAMPARRAY input_data,
212
+ ; JSAMPARRAY *output_data_ptr);
213
+ ;
214
+
215
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
216
+ %define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width
217
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
218
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
219
+
220
+ %define original_ebp ebp + 0
221
+ %define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD
222
+ ; ymmword wk[WK_NUM]
223
+ %define WK_NUM 4
224
+ %define gotptr wk(0) - SIZEOF_POINTER ; void *gotptr
225
+
226
+ align 32
227
+ GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
228
+
229
+ EXTN(jsimd_h2v2_fancy_upsample_avx2):
230
+ push ebp
231
+ mov eax, esp ; eax = original ebp
232
+ sub esp, byte 4
233
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
234
+ mov [esp], eax
235
+ mov ebp, esp ; ebp = aligned ebp
236
+ lea esp, [wk(0)]
237
+ pushpic eax ; make a room for GOT address
238
+ push ebx
239
+ ; push ecx ; need not be preserved
240
+ ; push edx ; need not be preserved
241
+ push esi
242
+ push edi
243
+
244
+ get_GOT ebx ; get GOT address
245
+ movpic POINTER [gotptr], ebx ; save GOT address
246
+
247
+ mov edx, eax ; edx = original ebp
248
+ mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
249
+ test eax, eax
250
+ jz near .return
251
+
252
+ mov ecx, INT [max_v_samp(edx)] ; rowctr
253
+ test ecx, ecx
254
+ jz near .return
255
+
256
+ mov esi, JSAMPARRAY [input_data(edx)] ; input_data
257
+ mov edi, POINTER [output_data_ptr(edx)]
258
+ mov edi, JSAMPARRAY [edi] ; output_data
259
+ alignx 16, 7
260
+ .rowloop:
261
+ push eax ; colctr
262
+ push ecx
263
+ push edi
264
+ push esi
265
+
266
+ mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above)
267
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
268
+ mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below)
269
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
270
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
271
+
272
+ test eax, SIZEOF_YMMWORD-1
273
+ jz short .skip
274
+ push edx
275
+ mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
276
+ mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
277
+ mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
278
+ mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
279
+ mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
280
+ mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample
281
+ pop edx
282
+ .skip:
283
+ ; -- process the first column block
284
+
285
+ vmovdqu ymm0, YMMWORD [ebx+0*SIZEOF_YMMWORD] ; ymm0=row[ 0][0]
286
+ vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0]
287
+ vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0]
288
+
289
+ pushpic ebx
290
+ movpic ebx, POINTER [gotptr] ; load GOT address
291
+
292
+ vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
293
+
294
+ vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
295
+ vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
296
+ vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
297
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
298
+
299
+ vpunpckhbw ymm5, ymm1, ymm3 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
300
+ vpunpcklbw ymm6, ymm1, ymm3 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
301
+ vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
302
+ vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
303
+
304
+ vpunpckhbw ymm6, ymm2, ymm3 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
305
+ vpunpcklbw ymm3, ymm2, ymm3 ; ymm3=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
306
+ vperm2i128 ymm2, ymm3, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
307
+ vperm2i128 ymm6, ymm3, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
308
+
309
+ vpmullw ymm0, ymm0, [GOTOFF(ebx,PW_THREE)]
310
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
311
+
312
+ vpcmpeqb xmm7, xmm7, xmm7
313
+ vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-2) ; (ffff ---- ---- ... ---- ----) LSB is ffff
314
+
315
+ vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
316
+ vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
317
+ vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
318
+ vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
319
+
320
+ vmovdqu YMMWORD [edx+0*SIZEOF_YMMWORD], ymm1 ; temporarily save
321
+ vmovdqu YMMWORD [edx+1*SIZEOF_YMMWORD], ymm5 ; the intermediate data
322
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm2
323
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm6
324
+
325
+ vpand ymm1, ymm1, ymm7 ; ymm1=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
326
+ vpand ymm2, ymm2, ymm7 ; ymm2=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
327
+
328
+ vmovdqa YMMWORD [wk(0)], ymm1
329
+ vmovdqa YMMWORD [wk(1)], ymm2
330
+
331
+ poppic ebx
332
+
333
+ add eax, byte SIZEOF_YMMWORD-1
334
+ and eax, byte -SIZEOF_YMMWORD
335
+ cmp eax, byte SIZEOF_YMMWORD
336
+ ja short .columnloop
337
+ alignx 16, 7
338
+
339
+ .columnloop_last:
340
+ ; -- process the last column block
341
+
342
+ pushpic ebx
343
+ movpic ebx, POINTER [gotptr] ; load GOT address
344
+
345
+ vpcmpeqb xmm1, xmm1, xmm1
346
+ vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2)
347
+ vperm2i128 ymm1, ymm1, ymm1, 1 ; (---- ---- ... ---- ---- ffff) MSB is ffff
348
+
349
+ vpand ymm2, ymm1, YMMWORD [edi+1*SIZEOF_YMMWORD]
350
+ vpand ymm1, ymm1, YMMWORD [edx+1*SIZEOF_YMMWORD]
351
+
352
+ vmovdqa YMMWORD [wk(2)], ymm1 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
353
+ vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
354
+
355
+ jmp near .upsample
356
+ alignx 16, 7
357
+
358
+ .columnloop:
359
+ ; -- process the next column block
360
+
361
+ vmovdqu ymm0, YMMWORD [ebx+1*SIZEOF_YMMWORD] ; ymm0=row[ 0][1]
362
+ vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1]
363
+ vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1]
364
+
365
+ pushpic ebx
366
+ movpic ebx, POINTER [gotptr] ; load GOT address
367
+
368
+ vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
369
+
370
+ vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
371
+ vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
372
+ vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
373
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
374
+
375
+ vpunpckhbw ymm5, ymm1, ymm3 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
376
+ vpunpcklbw ymm6, ymm1, ymm3 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
377
+ vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
378
+ vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
379
+
380
+ vpunpckhbw ymm6, ymm2, ymm3 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
381
+ vpunpcklbw ymm7, ymm2, ymm3 ; ymm7=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
382
+ vperm2i128 ymm2, ymm7, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
383
+ vperm2i128 ymm6, ymm7, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
384
+
385
+ vpmullw ymm0, ymm0, [GOTOFF(ebx,PW_THREE)]
386
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
387
+
388
+ vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
389
+ vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
390
+ vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
391
+ vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
392
+
393
+ vmovdqu YMMWORD [edx+2*SIZEOF_YMMWORD], ymm1 ; temporarily save
394
+ vmovdqu YMMWORD [edx+3*SIZEOF_YMMWORD], ymm5 ; the intermediate data
395
+ vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymm2
396
+ vmovdqu YMMWORD [edi+3*SIZEOF_YMMWORD], ymm6
397
+
398
+ vperm2i128 ymm1, ymm3, ymm1, 0x20
399
+ vpslldq ymm1, ymm1, 14 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0)
400
+ vperm2i128 ymm2, ymm3, ymm2, 0x20
401
+ vpslldq ymm2, ymm2, 14 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0)
402
+
403
+ vmovdqa YMMWORD [wk(2)], ymm1
404
+ vmovdqa YMMWORD [wk(3)], ymm2
405
+
406
+ .upsample:
407
+ ; -- process the upper row
408
+
409
+ vmovdqu ymm7, YMMWORD [edx+0*SIZEOF_YMMWORD] ; ymm7=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
410
+ vmovdqu ymm3, YMMWORD [edx+1*SIZEOF_YMMWORD] ; ymm3=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
411
+
412
+ vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
413
+
414
+ vperm2i128 ymm0, ymm1, ymm7, 0x03
415
+ vpalignr ymm0, ymm0, ymm7, 2 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --)
416
+ vperm2i128 ymm4, ymm1, ymm3, 0x20
417
+ vpslldq ymm4, ymm4, 14 ; ymm4=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16)
418
+
419
+ vperm2i128 ymm5, ymm1, ymm7, 0x03
420
+ vpsrldq ymm5, ymm5, 14 ; ymm5=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
421
+ vperm2i128 ymm6, ymm1, ymm3, 0x20
422
+ vpalignr ymm6, ymm3, ymm6, 14 ; ymm6=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
423
+
424
+ vpor ymm0, ymm0, ymm4 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
425
+ vpor ymm5, ymm5, ymm6 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
426
+
427
+ vperm2i128 ymm2, ymm1, ymm3, 0x03
428
+ vpalignr ymm2, ymm2, ymm3, 2 ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --)
429
+ vperm2i128 ymm4, ymm1, ymm3, 0x03
430
+ vpsrldq ymm4, ymm4, 14 ; ymm4=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
431
+ vperm2i128 ymm1, ymm1, ymm7, 0x20
432
+ vpalignr ymm1, ymm7, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
433
+
434
+ vpor ymm1, ymm1, YMMWORD [wk(0)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
435
+ vpor ymm2, ymm2, YMMWORD [wk(2)] ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
436
+
437
+ vmovdqa YMMWORD [wk(0)], ymm4
438
+
439
+ vpmullw ymm7, ymm7, [GOTOFF(ebx,PW_THREE)]
440
+ vpmullw ymm3, ymm3, [GOTOFF(ebx,PW_THREE)]
441
+ vpaddw ymm1, ymm1, [GOTOFF(ebx,PW_EIGHT)]
442
+ vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_EIGHT)]
443
+ vpaddw ymm0, ymm0, [GOTOFF(ebx,PW_SEVEN)]
444
+ vpaddw ymm2, [GOTOFF(ebx,PW_SEVEN)]
445
+
446
+ vpaddw ymm1, ymm1, ymm7
447
+ vpaddw ymm5, ymm5, ymm3
448
+ vpsrlw ymm1, ymm1, 4 ; ymm1=Out0LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30)
449
+ vpsrlw ymm5, ymm5, 4 ; ymm5=Out0HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62)
450
+ vpaddw ymm0, ymm0, ymm7
451
+ vpaddw ymm2, ymm2, ymm3
452
+ vpsrlw ymm0, ymm0, 4 ; ymm0=Out0LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31)
453
+ vpsrlw ymm2, ymm2, 4 ; ymm2=Out0HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63)
454
+
455
+ vpsllw ymm0, ymm0, BYTE_BIT
456
+ vpsllw ymm2, ymm2, BYTE_BIT
457
+ vpor ymm1, ymm1, ymm0 ; ymm1=Out0L=( 0 1 2 ... 29 30 31)
458
+ vpor ymm5, ymm5, ymm2 ; ymm5=Out0H=(32 33 34 ... 61 62 63)
459
+
460
+ vmovdqu YMMWORD [edx+0*SIZEOF_YMMWORD], ymm1
461
+ vmovdqu YMMWORD [edx+1*SIZEOF_YMMWORD], ymm5
462
+
463
+ ; -- process the lower row
464
+
465
+ vmovdqu ymm6, YMMWORD [edi+0*SIZEOF_YMMWORD] ; ymm6=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
466
+ vmovdqu ymm4, YMMWORD [edi+1*SIZEOF_YMMWORD] ; ymm4=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
467
+
468
+ vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
469
+
470
+ vperm2i128 ymm7, ymm1, ymm6, 0x03
471
+ vpalignr ymm7, ymm7, ymm6, 2 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --)
472
+ vperm2i128 ymm3, ymm1, ymm4, 0x20
473
+ vpslldq ymm3, ymm3, 14 ; ymm3=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16)
474
+
475
+ vperm2i128 ymm0, ymm1, ymm6, 0x03
476
+ vpsrldq ymm0, ymm0, 14 ; ymm0=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
477
+ vperm2i128 ymm2, ymm1, ymm4, 0x20
478
+ vpalignr ymm2, ymm4, ymm2, 14 ; ymm2=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
479
+
480
+ vpor ymm7, ymm7, ymm3 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
481
+ vpor ymm0, ymm0, ymm2 ; ymm0=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
482
+
483
+ vperm2i128 ymm5, ymm1, ymm4, 0x03
484
+ vpalignr ymm5, ymm5, ymm4, 2 ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --)
485
+ vperm2i128 ymm3, ymm1, ymm4, 0x03
486
+ vpsrldq ymm3, ymm3, 14 ; ymm3=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
487
+ vperm2i128 ymm1, ymm1, ymm6, 0x20
488
+ vpalignr ymm1, ymm6, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
489
+
490
+ vpor ymm1, ymm1, YMMWORD [wk(1)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
491
+ vpor ymm5, ymm5, YMMWORD [wk(3)] ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
492
+
493
+ vmovdqa YMMWORD [wk(1)], ymm3
494
+
495
+ vpmullw ymm6, ymm6, [GOTOFF(ebx,PW_THREE)]
496
+ vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)]
497
+ vpaddw ymm1, ymm1, [GOTOFF(ebx,PW_EIGHT)]
498
+ vpaddw ymm0, ymm0, [GOTOFF(ebx,PW_EIGHT)]
499
+ vpaddw ymm7, ymm7, [GOTOFF(ebx,PW_SEVEN)]
500
+ vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_SEVEN)]
501
+
502
+ vpaddw ymm1, ymm1, ymm6
503
+ vpaddw ymm0, ymm0, ymm4
504
+ vpsrlw ymm1, ymm1, 4 ; ymm1=Out1LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30)
505
+ vpsrlw ymm0, ymm0, 4 ; ymm0=Out1HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62)
506
+ vpaddw ymm7, ymm7, ymm6
507
+ vpaddw ymm5, ymm5, ymm4
508
+ vpsrlw ymm7, ymm7, 4 ; ymm7=Out1LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31)
509
+ vpsrlw ymm5, ymm5, 4 ; ymm5=Out1HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63)
510
+
511
+ vpsllw ymm7, ymm7, BYTE_BIT
512
+ vpsllw ymm5, ymm5, BYTE_BIT
513
+ vpor ymm1, ymm1, ymm7 ; ymm1=Out1L=( 0 1 2 ... 29 30 31)
514
+ vpor ymm0, ymm0, ymm5 ; ymm0=Out1H=(32 33 34 ... 61 62 63)
515
+
516
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
517
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
518
+
519
+ poppic ebx
520
+
521
+ sub eax, byte SIZEOF_YMMWORD
522
+ add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above)
523
+ add ebx, byte 1*SIZEOF_YMMWORD ; inptr0
524
+ add esi, byte 1*SIZEOF_YMMWORD ; inptr1(below)
525
+ add edx, byte 2*SIZEOF_YMMWORD ; outptr0
526
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr1
527
+ cmp eax, byte SIZEOF_YMMWORD
528
+ ja near .columnloop
529
+ test eax, eax
530
+ jnz near .columnloop_last
531
+
532
+ pop esi
533
+ pop edi
534
+ pop ecx
535
+ pop eax
536
+
537
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
538
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
539
+ sub ecx, byte 2 ; rowctr
540
+ jg near .rowloop
541
+
542
+ .return:
543
+ vzeroupper
544
+ pop edi
545
+ pop esi
546
+ ; pop edx ; need not be preserved
547
+ ; pop ecx ; need not be preserved
548
+ pop ebx
549
+ mov esp, ebp ; esp <- aligned ebp
550
+ pop esp ; esp <- original ebp
551
+ pop ebp
552
+ ret
553
+
554
+ ; --------------------------------------------------------------------------
555
+ ;
556
+ ; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
557
+ ; It's still a box filter.
558
+ ;
559
+ ; GLOBAL(void)
560
+ ; jsimd_h2v1_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width,
561
+ ; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr);
562
+ ;
563
+
564
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
565
+ %define output_width(b) (b) + 12 ; JDIMENSION output_width
566
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
567
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
568
+
569
+ align 32
570
+ GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
571
+
572
+ EXTN(jsimd_h2v1_upsample_avx2):
573
+ push ebp
574
+ mov ebp, esp
575
+ ; push ebx ; unused
576
+ ; push ecx ; need not be preserved
577
+ ; push edx ; need not be preserved
578
+ push esi
579
+ push edi
580
+
581
+ mov edx, JDIMENSION [output_width(ebp)]
582
+ add edx, byte (SIZEOF_YMMWORD-1)
583
+ and edx, -SIZEOF_YMMWORD
584
+ jz short .return
585
+
586
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
587
+ test ecx, ecx
588
+ jz short .return
589
+
590
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
591
+ mov edi, POINTER [output_data_ptr(ebp)]
592
+ mov edi, JSAMPARRAY [edi] ; output_data
593
+ alignx 16, 7
594
+ .rowloop:
595
+ push edi
596
+ push esi
597
+
598
+ mov esi, JSAMPROW [esi] ; inptr
599
+ mov edi, JSAMPROW [edi] ; outptr
600
+ mov eax, edx ; colctr
601
+ alignx 16, 7
602
+ .columnloop:
603
+
604
+ cmp eax, byte SIZEOF_YMMWORD
605
+ ja near .above_16
606
+
607
+ vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_YMMWORD]
608
+ vpunpckhbw xmm1, xmm0, xmm0
609
+ vpunpcklbw xmm0, xmm0, xmm0
610
+
611
+ vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
612
+ vmovdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
613
+
614
+ jmp short .nextrow
615
+
616
+ .above_16:
617
+ vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
618
+
619
+ vpermq ymm0, ymm0, 0xd8
620
+ vpunpckhbw ymm1, ymm0, ymm0
621
+ vpunpcklbw ymm0, ymm0, ymm0
622
+
623
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0
624
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1
625
+
626
+ sub eax, byte 2*SIZEOF_YMMWORD
627
+ jz short .nextrow
628
+
629
+ add esi, byte SIZEOF_YMMWORD ; inptr
630
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
631
+ jmp short .columnloop
632
+ alignx 16, 7
633
+
634
+ .nextrow:
635
+ pop esi
636
+ pop edi
637
+
638
+ add esi, byte SIZEOF_JSAMPROW ; input_data
639
+ add edi, byte SIZEOF_JSAMPROW ; output_data
640
+ dec ecx ; rowctr
641
+ jg short .rowloop
642
+
643
+ .return:
644
+ vzeroupper
645
+ pop edi
646
+ pop esi
647
+ ; pop edx ; need not be preserved
648
+ ; pop ecx ; need not be preserved
649
+ ; pop ebx ; unused
650
+ pop ebp
651
+ ret
652
+
653
+ ; --------------------------------------------------------------------------
654
+ ;
655
+ ; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
656
+ ; It's still a box filter.
657
+ ;
658
+ ; GLOBAL(void)
659
+ ; jsimd_h2v2_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width,
660
+ ; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr);
661
+ ;
662
+
663
+ %define max_v_samp(b) (b) + 8 ; int max_v_samp_factor
664
+ %define output_width(b) (b) + 12 ; JDIMENSION output_width
665
+ %define input_data(b) (b) + 16 ; JSAMPARRAY input_data
666
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
667
+
668
+ align 32
669
+ GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
670
+
671
+ EXTN(jsimd_h2v2_upsample_avx2):
672
+ push ebp
673
+ mov ebp, esp
674
+ push ebx
675
+ ; push ecx ; need not be preserved
676
+ ; push edx ; need not be preserved
677
+ push esi
678
+ push edi
679
+
680
+ mov edx, JDIMENSION [output_width(ebp)]
681
+ add edx, byte (SIZEOF_YMMWORD-1)
682
+ and edx, -SIZEOF_YMMWORD
683
+ jz near .return
684
+
685
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
686
+ test ecx, ecx
687
+ jz near .return
688
+
689
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
690
+ mov edi, POINTER [output_data_ptr(ebp)]
691
+ mov edi, JSAMPARRAY [edi] ; output_data
692
+ alignx 16, 7
693
+ .rowloop:
694
+ push edi
695
+ push esi
696
+
697
+ mov esi, JSAMPROW [esi] ; inptr
698
+ mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
699
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
700
+ mov eax, edx ; colctr
701
+ alignx 16, 7
702
+ .columnloop:
703
+
704
+ cmp eax, byte SIZEOF_YMMWORD
705
+ ja short .above_16
706
+
707
+ vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
708
+ vpunpckhbw xmm1, xmm0, xmm0
709
+ vpunpcklbw xmm0, xmm0, xmm0
710
+
711
+ vmovdqu XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0
712
+ vmovdqu XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1
713
+ vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
714
+ vmovdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
715
+
716
+ jmp near .nextrow
717
+
718
+ .above_16:
719
+ vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
720
+
721
+ vpermq ymm0, ymm0, 0xd8
722
+ vpunpckhbw ymm1, ymm0, ymm0
723
+ vpunpcklbw ymm0, ymm0, ymm0
724
+
725
+ vmovdqu YMMWORD [ebx+0*SIZEOF_YMMWORD], ymm0
726
+ vmovdqu YMMWORD [ebx+1*SIZEOF_YMMWORD], ymm1
727
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0
728
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1
729
+
730
+ sub eax, byte 2*SIZEOF_YMMWORD
731
+ jz short .nextrow
732
+
733
+ add esi, byte SIZEOF_YMMWORD ; inptr
734
+ add ebx, 2*SIZEOF_YMMWORD ; outptr0
735
+ add edi, 2*SIZEOF_YMMWORD ; outptr1
736
+ jmp short .columnloop
737
+ alignx 16, 7
738
+
739
+ .nextrow:
740
+ pop esi
741
+ pop edi
742
+
743
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
744
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
745
+ sub ecx, byte 2 ; rowctr
746
+ jg near .rowloop
747
+
748
+ .return:
749
+ vzeroupper
750
+ pop edi
751
+ pop esi
752
+ ; pop edx ; need not be preserved
753
+ ; pop ecx ; need not be preserved
754
+ pop ebx
755
+ pop ebp
756
+ ret
757
+
758
+ ; For some reason, the OS X linker does not honor the request to align the
759
+ ; segment unless we do this.
760
+ align 32