epeg 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (504) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/MANIFEST +5 -0
  4. data/TODO +1 -0
  5. data/epeg/.dockerignore +4 -0
  6. data/epeg/.gitignore +5 -0
  7. data/epeg/CMakeLists.txt +30 -0
  8. data/epeg/Dockerfile +23 -0
  9. data/epeg/Epeg.h +90 -0
  10. data/epeg/README.md +42 -0
  11. data/epeg/epeg_main.c +1642 -0
  12. data/epeg/epeg_private.h +85 -0
  13. data/epeg/example/.gitignore +1 -0
  14. data/epeg/example/CMakeLists.txt +20 -0
  15. data/epeg/example/example.jpg +0 -0
  16. data/epeg/example/rotatetest.c +29 -0
  17. data/epeg/example/scaletest.c +48 -0
  18. data/epeg/vendor/libjpeg-turbo-2.0.4/BUILDING.md +828 -0
  19. data/epeg/vendor/libjpeg-turbo-2.0.4/CMakeLists.txt +1420 -0
  20. data/epeg/vendor/libjpeg-turbo-2.0.4/ChangeLog.md +1494 -0
  21. data/epeg/vendor/libjpeg-turbo-2.0.4/LICENSE.md +132 -0
  22. data/epeg/vendor/libjpeg-turbo-2.0.4/README.ijg +277 -0
  23. data/epeg/vendor/libjpeg-turbo-2.0.4/README.md +356 -0
  24. data/epeg/vendor/libjpeg-turbo-2.0.4/cderror.h +137 -0
  25. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.c +145 -0
  26. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.h +157 -0
  27. data/epeg/vendor/libjpeg-turbo-2.0.4/change.log +315 -0
  28. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.1 +354 -0
  29. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.c +695 -0
  30. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/BuildPackages.cmake +182 -0
  31. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/GNUInstallDirs.cmake +416 -0
  32. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/cmake_uninstall.cmake.in +24 -0
  33. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/testclean.cmake +41 -0
  34. data/epeg/vendor/libjpeg-turbo-2.0.4/cmyk.h +61 -0
  35. data/epeg/vendor/libjpeg-turbo-2.0.4/coderules.txt +78 -0
  36. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.1 +296 -0
  37. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.c +822 -0
  38. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/annotated.html +104 -0
  39. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bc_s.png +0 -0
  40. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bdwn.png +0 -0
  41. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/classes.html +106 -0
  42. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/closed.png +0 -0
  43. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen-extra.css +3 -0
  44. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.css +1184 -0
  45. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.png +0 -0
  46. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/dynsections.js +97 -0
  47. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2blank.png +0 -0
  48. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2cl.png +0 -0
  49. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2doc.png +0 -0
  50. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderclosed.png +0 -0
  51. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderopen.png +0 -0
  52. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2lastnode.png +0 -0
  53. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2link.png +0 -0
  54. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mlastnode.png +0 -0
  55. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mnode.png +0 -0
  56. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mo.png +0 -0
  57. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2node.png +0 -0
  58. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2ns.png +0 -0
  59. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2plastnode.png +0 -0
  60. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2pnode.png +0 -0
  61. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2splitbar.png +0 -0
  62. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2vertline.png +0 -0
  63. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions.html +134 -0
  64. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions_vars.html +134 -0
  65. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/group___turbo_j_p_e_g.html +2775 -0
  66. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/index.html +90 -0
  67. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/jquery.js +8 -0
  68. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/modules.html +95 -0
  69. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_f.png +0 -0
  70. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_g.png +0 -0
  71. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_h.png +0 -0
  72. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/open.png +0 -0
  73. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.html +26 -0
  74. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.js +4 -0
  75. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.html +26 -0
  76. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.js +5 -0
  77. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.html +26 -0
  78. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.js +4 -0
  79. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.html +26 -0
  80. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.js +4 -0
  81. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.html +26 -0
  82. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.js +5 -0
  83. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.html +26 -0
  84. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.js +4 -0
  85. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.html +26 -0
  86. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.js +102 -0
  87. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.html +26 -0
  88. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.js +4 -0
  89. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.html +26 -0
  90. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.js +4 -0
  91. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.html +26 -0
  92. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.js +4 -0
  93. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.html +26 -0
  94. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.js +6 -0
  95. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/close.png +0 -0
  96. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.html +26 -0
  97. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.js +8 -0
  98. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.html +26 -0
  99. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.js +37 -0
  100. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.html +26 -0
  101. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.js +31 -0
  102. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.html +26 -0
  103. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.js +4 -0
  104. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/mag_sel.png +0 -0
  105. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/nomatches.html +12 -0
  106. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.css +271 -0
  107. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.js +809 -0
  108. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_l.png +0 -0
  109. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_m.png +0 -0
  110. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_r.png +0 -0
  111. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.html +26 -0
  112. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.js +5 -0
  113. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.html +26 -0
  114. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.js +4 -0
  115. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.html +26 -0
  116. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.js +5 -0
  117. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.html +26 -0
  118. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.js +4 -0
  119. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.html +26 -0
  120. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.js +4 -0
  121. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.html +26 -0
  122. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.js +5 -0
  123. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.html +26 -0
  124. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.js +4 -0
  125. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.html +26 -0
  126. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.js +10 -0
  127. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.html +26 -0
  128. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.js +4 -0
  129. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.html +26 -0
  130. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.js +4 -0
  131. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.html +26 -0
  132. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.js +4 -0
  133. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjregion.html +186 -0
  134. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjscalingfactor.html +148 -0
  135. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjtransform.html +212 -0
  136. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_off.png +0 -0
  137. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_on.png +0 -0
  138. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_a.png +0 -0
  139. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_b.png +0 -0
  140. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_h.png +0 -0
  141. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_s.png +0 -0
  142. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tabs.css +60 -0
  143. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen-extra.css +3 -0
  144. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen.config +16 -0
  145. data/epeg/vendor/libjpeg-turbo-2.0.4/example.txt +464 -0
  146. data/epeg/vendor/libjpeg-turbo-2.0.4/jaricom.c +157 -0
  147. data/epeg/vendor/libjpeg-turbo-2.0.4/java/CMakeLists.txt +88 -0
  148. data/epeg/vendor/libjpeg-turbo-2.0.4/java/MANIFEST.MF +2 -0
  149. data/epeg/vendor/libjpeg-turbo-2.0.4/java/README +52 -0
  150. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJBench.java +1021 -0
  151. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJExample.java +405 -0
  152. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJUnitTest.java +960 -0
  153. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-frame.html +24 -0
  154. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-noframe.html +24 -0
  155. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/constant-values.html +532 -0
  156. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/deprecated-list.html +252 -0
  157. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/help-doc.html +210 -0
  158. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index-all.html +1029 -0
  159. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index.html +71 -0
  160. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJ.html +1356 -0
  161. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +926 -0
  162. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +241 -0
  163. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +1255 -0
  164. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJException.html +340 -0
  165. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html +343 -0
  166. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +751 -0
  167. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +421 -0
  168. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +765 -0
  169. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-frame.html +31 -0
  170. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +202 -0
  171. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-tree.html +160 -0
  172. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/overview-tree.html +164 -0
  173. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/package-list +1 -0
  174. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/background.gif +0 -0
  175. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/tab.gif +0 -0
  176. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar.gif +0 -0
  177. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar_end.gif +0 -0
  178. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/script.js +30 -0
  179. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/serialized-form.html +176 -0
  180. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/stylesheet.css +474 -0
  181. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJ.java +584 -0
  182. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCompressor.java +677 -0
  183. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java +76 -0
  184. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +931 -0
  185. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJException.java +78 -0
  186. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in +59 -0
  187. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-win.java.in +35 -0
  188. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java +115 -0
  189. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransform.java +227 -0
  190. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransformer.java +163 -0
  191. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/YUVImage.java +445 -0
  192. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJ.h +129 -0
  193. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJCompressor.h +101 -0
  194. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJDecompressor.h +101 -0
  195. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJTransformer.h +29 -0
  196. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapimin.c +295 -0
  197. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapistd.c +162 -0
  198. data/epeg/vendor/libjpeg-turbo-2.0.4/jcarith.c +932 -0
  199. data/epeg/vendor/libjpeg-turbo-2.0.4/jccoefct.c +449 -0
  200. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolext.c +144 -0
  201. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolor.c +710 -0
  202. data/epeg/vendor/libjpeg-turbo-2.0.4/jcdctmgr.c +721 -0
  203. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.c +1096 -0
  204. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.h +42 -0
  205. data/epeg/vendor/libjpeg-turbo-2.0.4/jcicc.c +105 -0
  206. data/epeg/vendor/libjpeg-turbo-2.0.4/jcinit.c +77 -0
  207. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmainct.c +162 -0
  208. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmarker.c +664 -0
  209. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmaster.c +640 -0
  210. data/epeg/vendor/libjpeg-turbo-2.0.4/jcomapi.c +109 -0
  211. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.h.in +73 -0
  212. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.txt +143 -0
  213. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfigint.h.in +31 -0
  214. data/epeg/vendor/libjpeg-turbo-2.0.4/jcparam.c +541 -0
  215. data/epeg/vendor/libjpeg-turbo-2.0.4/jcphuff.c +1105 -0
  216. data/epeg/vendor/libjpeg-turbo-2.0.4/jcprepct.c +351 -0
  217. data/epeg/vendor/libjpeg-turbo-2.0.4/jcsample.c +539 -0
  218. data/epeg/vendor/libjpeg-turbo-2.0.4/jcstest.c +126 -0
  219. data/epeg/vendor/libjpeg-turbo-2.0.4/jctrans.c +400 -0
  220. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapimin.c +407 -0
  221. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapistd.c +639 -0
  222. data/epeg/vendor/libjpeg-turbo-2.0.4/jdarith.c +773 -0
  223. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst-tj.c +203 -0
  224. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst.c +293 -0
  225. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc-tj.c +194 -0
  226. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc.c +295 -0
  227. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.c +692 -0
  228. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.h +82 -0
  229. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcol565.c +384 -0
  230. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolext.c +143 -0
  231. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolor.c +883 -0
  232. data/epeg/vendor/libjpeg-turbo-2.0.4/jdct.h +208 -0
  233. data/epeg/vendor/libjpeg-turbo-2.0.4/jddctmgr.c +352 -0
  234. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.c +831 -0
  235. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.h +238 -0
  236. data/epeg/vendor/libjpeg-turbo-2.0.4/jdicc.c +171 -0
  237. data/epeg/vendor/libjpeg-turbo-2.0.4/jdinput.c +408 -0
  238. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.c +460 -0
  239. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.h +71 -0
  240. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmarker.c +1377 -0
  241. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.c +737 -0
  242. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.h +28 -0
  243. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmerge.c +617 -0
  244. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrg565.c +354 -0
  245. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrgext.c +184 -0
  246. data/epeg/vendor/libjpeg-turbo-2.0.4/jdphuff.c +687 -0
  247. data/epeg/vendor/libjpeg-turbo-2.0.4/jdpostct.c +294 -0
  248. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.c +518 -0
  249. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.h +50 -0
  250. data/epeg/vendor/libjpeg-turbo-2.0.4/jdtrans.c +155 -0
  251. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.c +251 -0
  252. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.h +316 -0
  253. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctflt.c +169 -0
  254. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctfst.c +227 -0
  255. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctint.c +288 -0
  256. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctflt.c +240 -0
  257. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctfst.c +371 -0
  258. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctint.c +2627 -0
  259. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctred.c +409 -0
  260. data/epeg/vendor/libjpeg-turbo-2.0.4/jinclude.h +88 -0
  261. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemmgr.c +1179 -0
  262. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemnobs.c +115 -0
  263. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemsys.h +178 -0
  264. data/epeg/vendor/libjpeg-turbo-2.0.4/jmorecfg.h +421 -0
  265. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeg_nbits_table.h +4098 -0
  266. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegcomp.h +31 -0
  267. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegint.h +368 -0
  268. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeglib.h +1132 -0
  269. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.1 +295 -0
  270. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.c +601 -0
  271. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant1.c +859 -0
  272. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant2.c +1285 -0
  273. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd.h +117 -0
  274. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd_none.c +418 -0
  275. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimddct.h +70 -0
  276. data/epeg/vendor/libjpeg-turbo-2.0.4/jstdhuff.c +143 -0
  277. data/epeg/vendor/libjpeg-turbo-2.0.4/jutils.c +133 -0
  278. data/epeg/vendor/libjpeg-turbo-2.0.4/jversion.h +52 -0
  279. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.map.in +11 -0
  280. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.txt +3144 -0
  281. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/CMakeLists.txt +1 -0
  282. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.c +275 -0
  283. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.h +57 -0
  284. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5cmp.c +59 -0
  285. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5hl.c +125 -0
  286. data/epeg/vendor/libjpeg-turbo-2.0.4/rdbmp.c +689 -0
  287. data/epeg/vendor/libjpeg-turbo-2.0.4/rdcolmap.c +254 -0
  288. data/epeg/vendor/libjpeg-turbo-2.0.4/rdgif.c +39 -0
  289. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.1 +63 -0
  290. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.c +510 -0
  291. data/epeg/vendor/libjpeg-turbo-2.0.4/rdppm.c +766 -0
  292. data/epeg/vendor/libjpeg-turbo-2.0.4/rdrle.c +389 -0
  293. data/epeg/vendor/libjpeg-turbo-2.0.4/rdswitch.c +424 -0
  294. data/epeg/vendor/libjpeg-turbo-2.0.4/rdtarga.c +509 -0
  295. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Distribution.xml.in +24 -0
  296. data/epeg/vendor/libjpeg-turbo-2.0.4/release/License.rtf +20 -0
  297. data/epeg/vendor/libjpeg-turbo-2.0.4/release/ReadMe.txt +5 -0
  298. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Welcome.rtf +17 -0
  299. data/epeg/vendor/libjpeg-turbo-2.0.4/release/deb-control.in +31 -0
  300. data/epeg/vendor/libjpeg-turbo-2.0.4/release/installer.nsi.in +191 -0
  301. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libjpeg.pc.in +10 -0
  302. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libturbojpeg.pc.in +10 -0
  303. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makecygwinpkg.in +66 -0
  304. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makedpkg.in +115 -0
  305. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makemacpkg.in +284 -0
  306. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makerpm.in +30 -0
  307. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makesrpm.in +48 -0
  308. data/epeg/vendor/libjpeg-turbo-2.0.4/release/maketarball.in +51 -0
  309. data/epeg/vendor/libjpeg-turbo-2.0.4/release/rpm.spec.in +221 -0
  310. data/epeg/vendor/libjpeg-turbo-2.0.4/release/uninstall.in +113 -0
  311. data/epeg/vendor/libjpeg-turbo-2.0.4/sharedlib/CMakeLists.txt +99 -0
  312. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/CMakeLists.txt +385 -0
  313. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd.c +721 -0
  314. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd_neon.S +2878 -0
  315. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd.c +798 -0
  316. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd_neon.S +3433 -0
  317. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/gas-preprocessor.in +1 -0
  318. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-avx2.asm +578 -0
  319. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-mmx.asm +476 -0
  320. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-sse2.asm +503 -0
  321. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-avx2.asm +121 -0
  322. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-mmx.asm +121 -0
  323. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-sse2.asm +120 -0
  324. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-avx2.asm +113 -0
  325. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-mmx.asm +113 -0
  326. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-sse2.asm +112 -0
  327. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-avx2.asm +457 -0
  328. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-mmx.asm +355 -0
  329. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-sse2.asm +382 -0
  330. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jchuff-sse2.asm +424 -0
  331. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcphuff-sse2.asm +660 -0
  332. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-avx2.asm +388 -0
  333. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-mmx.asm +324 -0
  334. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-sse2.asm +351 -0
  335. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-avx2.asm +515 -0
  336. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-mmx.asm +404 -0
  337. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-sse2.asm +458 -0
  338. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-avx2.asm +118 -0
  339. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-mmx.asm +117 -0
  340. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-sse2.asm +117 -0
  341. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-avx2.asm +136 -0
  342. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-mmx.asm +123 -0
  343. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-sse2.asm +135 -0
  344. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-avx2.asm +575 -0
  345. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-mmx.asm +460 -0
  346. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-sse2.asm +517 -0
  347. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-avx2.asm +760 -0
  348. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-mmx.asm +731 -0
  349. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-sse2.asm +724 -0
  350. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-3dn.asm +318 -0
  351. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-sse.asm +369 -0
  352. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-mmx.asm +395 -0
  353. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-sse2.asm +403 -0
  354. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-avx2.asm +331 -0
  355. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-mmx.asm +620 -0
  356. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-sse2.asm +633 -0
  357. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-3dn.asm +451 -0
  358. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse.asm +571 -0
  359. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse2.asm +497 -0
  360. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-mmx.asm +499 -0
  361. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-sse2.asm +501 -0
  362. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-avx2.asm +453 -0
  363. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-mmx.asm +851 -0
  364. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-sse2.asm +858 -0
  365. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-mmx.asm +704 -0
  366. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-sse2.asm +592 -0
  367. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-3dn.asm +230 -0
  368. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-mmx.asm +276 -0
  369. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-sse.asm +208 -0
  370. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquantf-sse2.asm +168 -0
  371. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-avx2.asm +188 -0
  372. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-sse2.asm +201 -0
  373. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimd.c +1253 -0
  374. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimdcpu.asm +135 -0
  375. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/jsimd.h +1083 -0
  376. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolext-mmi.c +483 -0
  377. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolor-mmi.c +148 -0
  378. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample-mmi.c +100 -0
  379. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample.h +28 -0
  380. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolext-mmi.c +424 -0
  381. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolor-mmi.c +139 -0
  382. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdsample-mmi.c +245 -0
  383. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jfdctint-mmi.c +398 -0
  384. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jidctint-mmi.c +571 -0
  385. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jquanti-mmi.c +130 -0
  386. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd.c +610 -0
  387. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd_mmi.h +57 -0
  388. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/loongson-mmintrin.h +1324 -0
  389. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd.c +1123 -0
  390. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2.S +4479 -0
  391. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2_asm.h +292 -0
  392. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jcolsamp.inc +135 -0
  393. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jdct.inc +31 -0
  394. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jpeg_nbits_table.inc +4097 -0
  395. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc +93 -0
  396. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc.h +131 -0
  397. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdext.inc +479 -0
  398. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolext-altivec.c +269 -0
  399. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolor-altivec.c +116 -0
  400. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgray-altivec.c +111 -0
  401. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgryext-altivec.c +228 -0
  402. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample-altivec.c +159 -0
  403. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample.h +28 -0
  404. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolext-altivec.c +276 -0
  405. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolor-altivec.c +106 -0
  406. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmerge-altivec.c +130 -0
  407. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmrgext-altivec.c +329 -0
  408. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdsample-altivec.c +400 -0
  409. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctfst-altivec.c +154 -0
  410. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctint-altivec.c +258 -0
  411. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctfst-altivec.c +255 -0
  412. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctint-altivec.c +357 -0
  413. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jquanti-altivec.c +250 -0
  414. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd.c +872 -0
  415. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd_altivec.h +98 -0
  416. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-avx2.asm +558 -0
  417. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-sse2.asm +483 -0
  418. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-avx2.asm +121 -0
  419. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-sse2.asm +120 -0
  420. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-avx2.asm +113 -0
  421. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-sse2.asm +112 -0
  422. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-avx2.asm +437 -0
  423. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-sse2.asm +362 -0
  424. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jchuff-sse2.asm +346 -0
  425. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcphuff-sse2.asm +637 -0
  426. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-avx2.asm +366 -0
  427. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-sse2.asm +329 -0
  428. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-avx2.asm +495 -0
  429. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-sse2.asm +438 -0
  430. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-avx2.asm +118 -0
  431. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-sse2.asm +117 -0
  432. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-avx2.asm +136 -0
  433. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-sse2.asm +135 -0
  434. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-avx2.asm +593 -0
  435. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-sse2.asm +535 -0
  436. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-avx2.asm +695 -0
  437. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-sse2.asm +664 -0
  438. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctflt-sse.asm +355 -0
  439. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctfst-sse2.asm +389 -0
  440. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-avx2.asm +320 -0
  441. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-sse2.asm +619 -0
  442. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctflt-sse2.asm +481 -0
  443. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctfst-sse2.asm +490 -0
  444. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-avx2.asm +417 -0
  445. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-sse2.asm +846 -0
  446. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctred-sse2.asm +573 -0
  447. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquantf-sse2.asm +154 -0
  448. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-avx2.asm +162 -0
  449. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-sse2.asm +187 -0
  450. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimd.c +1076 -0
  451. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimdcpu.asm +86 -0
  452. data/epeg/vendor/libjpeg-turbo-2.0.4/structure.txt +904 -0
  453. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.bmp +0 -0
  454. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.txt +25 -0
  455. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test.scan +5 -0
  456. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc +0 -0
  457. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc.txt +20 -0
  458. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc +0 -0
  459. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc.txt +20 -0
  460. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgari.jpg +0 -0
  461. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgint.jpg +0 -0
  462. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.jpg +0 -0
  463. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.ppm +4 -0
  464. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig12.jpg +0 -0
  465. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_5674_0098.bmp +0 -0
  466. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6434_0018a.bmp +0 -0
  467. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6548_0026a.bmp +0 -0
  468. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbench.c +1031 -0
  469. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.in +256 -0
  470. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.java.in +215 -0
  471. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexample.c +396 -0
  472. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.in +149 -0
  473. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.java.in +151 -0
  474. data/epeg/vendor/libjpeg-turbo-2.0.4/tjunittest.c +931 -0
  475. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.c +70 -0
  476. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.h +47 -0
  477. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.c +1628 -0
  478. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.h +210 -0
  479. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-jni.c +1246 -0
  480. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile +65 -0
  481. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile.jni +101 -0
  482. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.c +2152 -0
  483. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.h +1744 -0
  484. data/epeg/vendor/libjpeg-turbo-2.0.4/usage.txt +635 -0
  485. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jconfig.h.in +34 -0
  486. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62-memsrcdst.def +108 -0
  487. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62.def +106 -0
  488. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7-memsrcdst.def +110 -0
  489. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7.def +108 -0
  490. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg8.def +111 -0
  491. data/epeg/vendor/libjpeg-turbo-2.0.4/wizard.txt +212 -0
  492. data/epeg/vendor/libjpeg-turbo-2.0.4/wrbmp.c +558 -0
  493. data/epeg/vendor/libjpeg-turbo-2.0.4/wrgif.c +413 -0
  494. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.1 +103 -0
  495. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.c +591 -0
  496. data/epeg/vendor/libjpeg-turbo-2.0.4/wrppm.c +365 -0
  497. data/epeg/vendor/libjpeg-turbo-2.0.4/wrrle.c +309 -0
  498. data/epeg/vendor/libjpeg-turbo-2.0.4/wrtarga.c +261 -0
  499. data/epeg.c +131 -0
  500. data/epeg.gemspec +18 -0
  501. data/extconf.rb +80 -0
  502. data/test.jpg +0 -0
  503. data/test.rb +42 -0
  504. metadata +546 -0
@@ -0,0 +1,846 @@
1
+ ;
2
+ ; jidctint.asm - accurate integer IDCT (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ;
7
+ ; Based on the x86 SIMD extension for IJG JPEG library
8
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
9
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
10
+ ;
11
+ ; This file should be assembled with NASM (Netwide Assembler),
12
+ ; can *not* be assembled with Microsoft's MASM or any compatible
13
+ ; assembler (including Borland's Turbo Assembler).
14
+ ; NASM is available from http://nasm.sourceforge.net/ or
15
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
16
+ ;
17
+ ; This file contains a slow-but-accurate integer implementation of the
18
+ ; inverse DCT (Discrete Cosine Transform). The following code is based
19
+ ; directly on the IJG's original jidctint.c; see the jidctint.c for
20
+ ; more details.
21
+
22
+ %include "jsimdext.inc"
23
+ %include "jdct.inc"
24
+
25
+ ; --------------------------------------------------------------------------
26
+
27
+ %define CONST_BITS 13
28
+ %define PASS1_BITS 2
29
+
30
+ %define DESCALE_P1 (CONST_BITS - PASS1_BITS)
31
+ %define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3)
32
+
33
+ %if CONST_BITS == 13
34
+ F_0_298 equ 2446 ; FIX(0.298631336)
35
+ F_0_390 equ 3196 ; FIX(0.390180644)
36
+ F_0_541 equ 4433 ; FIX(0.541196100)
37
+ F_0_765 equ 6270 ; FIX(0.765366865)
38
+ F_0_899 equ 7373 ; FIX(0.899976223)
39
+ F_1_175 equ 9633 ; FIX(1.175875602)
40
+ F_1_501 equ 12299 ; FIX(1.501321110)
41
+ F_1_847 equ 15137 ; FIX(1.847759065)
42
+ F_1_961 equ 16069 ; FIX(1.961570560)
43
+ F_2_053 equ 16819 ; FIX(2.053119869)
44
+ F_2_562 equ 20995 ; FIX(2.562915447)
45
+ F_3_072 equ 25172 ; FIX(3.072711026)
46
+ %else
47
+ ; NASM cannot do compile-time arithmetic on floating-point constants.
48
+ %define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n))
49
+ F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336)
50
+ F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644)
51
+ F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100)
52
+ F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865)
53
+ F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223)
54
+ F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602)
55
+ F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110)
56
+ F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065)
57
+ F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560)
58
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
59
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
60
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
61
+ %endif
62
+
63
+ ; --------------------------------------------------------------------------
64
+ SECTION SEG_CONST
65
+
66
+ alignz 32
67
+ GLOBAL_DATA(jconst_idct_islow_sse2)
68
+
69
+ EXTN(jconst_idct_islow_sse2):
70
+
71
+ PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
72
+ PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847)
73
+ PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
74
+ PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390)
75
+ PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899
76
+ PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899)
77
+ PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562
78
+ PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562)
79
+ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
80
+ PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
81
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
82
+
83
+ alignz 32
84
+
85
+ ; --------------------------------------------------------------------------
86
+ SECTION SEG_TEXT
87
+ BITS 64
88
+ ;
89
+ ; Perform dequantization and inverse DCT on one block of coefficients.
90
+ ;
91
+ ; GLOBAL(void)
92
+ ; jsimd_idct_islow_sse2(void *dct_table, JCOEFPTR coef_block,
93
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
94
+ ;
95
+
96
+ ; r10 = jpeg_component_info *compptr
97
+ ; r11 = JCOEFPTR coef_block
98
+ ; r12 = JSAMPARRAY output_buf
99
+ ; r13d = JDIMENSION output_col
100
+
101
+ %define original_rbp rbp + 0
102
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
103
+ ; xmmword wk[WK_NUM]
104
+ %define WK_NUM 12
105
+
106
+ align 32
107
+ GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
108
+
109
+ EXTN(jsimd_idct_islow_sse2):
110
+ push rbp
111
+ mov rax, rsp ; rax = original rbp
112
+ sub rsp, byte 4
113
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
114
+ mov [rsp], rax
115
+ mov rbp, rsp ; rbp = aligned rbp
116
+ lea rsp, [wk(0)]
117
+ collect_args 4
118
+
119
+ ; ---- Pass 1: process columns from input.
120
+
121
+ mov rdx, r10 ; quantptr
122
+ mov rsi, r11 ; inptr
123
+
124
+ %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
125
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
126
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
127
+ jnz near .columnDCT
128
+
129
+ movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
130
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
131
+ por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
132
+ por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
133
+ por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
134
+ por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
135
+ por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
136
+ por xmm1, xmm0
137
+ packsswb xmm1, xmm1
138
+ packsswb xmm1, xmm1
139
+ movd eax, xmm1
140
+ test rax, rax
141
+ jnz short .columnDCT
142
+
143
+ ; -- AC terms all zero
144
+
145
+ movdqa xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
146
+ pmullw xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
147
+
148
+ psllw xmm5, PASS1_BITS
149
+
150
+ movdqa xmm4, xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07)
151
+ punpcklwd xmm5, xmm5 ; xmm5=(00 00 01 01 02 02 03 03)
152
+ punpckhwd xmm4, xmm4 ; xmm4=(04 04 05 05 06 06 07 07)
153
+
154
+ pshufd xmm7, xmm5, 0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00)
155
+ pshufd xmm6, xmm5, 0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01)
156
+ pshufd xmm1, xmm5, 0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02)
157
+ pshufd xmm5, xmm5, 0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03)
158
+ pshufd xmm0, xmm4, 0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04)
159
+ pshufd xmm3, xmm4, 0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05)
160
+ pshufd xmm2, xmm4, 0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06)
161
+ pshufd xmm4, xmm4, 0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07)
162
+
163
+ movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1
164
+ movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3
165
+ movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5
166
+ movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7
167
+ jmp near .column_end
168
+ %endif
169
+ .columnDCT:
170
+
171
+ ; -- Even part
172
+
173
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
174
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
175
+ pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
176
+ pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
177
+ movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
178
+ movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
179
+ pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
180
+ pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
181
+
182
+ ; (Original)
183
+ ; z1 = (z2 + z3) * 0.541196100;
184
+ ; tmp2 = z1 + z3 * -1.847759065;
185
+ ; tmp3 = z1 + z2 * 0.765366865;
186
+ ;
187
+ ; (This implementation)
188
+ ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
189
+ ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
190
+
191
+ movdqa xmm4, xmm1 ; xmm1=in2=z2
192
+ movdqa xmm5, xmm1
193
+ punpcklwd xmm4, xmm3 ; xmm3=in6=z3
194
+ punpckhwd xmm5, xmm3
195
+ movdqa xmm1, xmm4
196
+ movdqa xmm3, xmm5
197
+ pmaddwd xmm4, [rel PW_F130_F054] ; xmm4=tmp3L
198
+ pmaddwd xmm5, [rel PW_F130_F054] ; xmm5=tmp3H
199
+ pmaddwd xmm1, [rel PW_F054_MF130] ; xmm1=tmp2L
200
+ pmaddwd xmm3, [rel PW_F054_MF130] ; xmm3=tmp2H
201
+
202
+ movdqa xmm6, xmm0
203
+ paddw xmm0, xmm2 ; xmm0=in0+in4
204
+ psubw xmm6, xmm2 ; xmm6=in0-in4
205
+
206
+ pxor xmm7, xmm7
207
+ pxor xmm2, xmm2
208
+ punpcklwd xmm7, xmm0 ; xmm7=tmp0L
209
+ punpckhwd xmm2, xmm0 ; xmm2=tmp0H
210
+ psrad xmm7, (16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS
211
+ psrad xmm2, (16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS
212
+
213
+ movdqa xmm0, xmm7
214
+ paddd xmm7, xmm4 ; xmm7=tmp10L
215
+ psubd xmm0, xmm4 ; xmm0=tmp13L
216
+ movdqa xmm4, xmm2
217
+ paddd xmm2, xmm5 ; xmm2=tmp10H
218
+ psubd xmm4, xmm5 ; xmm4=tmp13H
219
+
220
+ movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L
221
+ movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H
222
+ movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L
223
+ movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H
224
+
225
+ pxor xmm5, xmm5
226
+ pxor xmm7, xmm7
227
+ punpcklwd xmm5, xmm6 ; xmm5=tmp1L
228
+ punpckhwd xmm7, xmm6 ; xmm7=tmp1H
229
+ psrad xmm5, (16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS
230
+ psrad xmm7, (16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS
231
+
232
+ movdqa xmm2, xmm5
233
+ paddd xmm5, xmm1 ; xmm5=tmp11L
234
+ psubd xmm2, xmm1 ; xmm2=tmp12L
235
+ movdqa xmm0, xmm7
236
+ paddd xmm7, xmm3 ; xmm7=tmp11H
237
+ psubd xmm0, xmm3 ; xmm0=tmp12H
238
+
239
+ movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L
240
+ movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H
241
+ movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L
242
+ movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H
243
+
244
+ ; -- Odd part
245
+
246
+ movdqa xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
247
+ movdqa xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
248
+ pmullw xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
249
+ pmullw xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
250
+ movdqa xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
251
+ movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
252
+ pmullw xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
253
+ pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
254
+
255
+ movdqa xmm5, xmm6
256
+ movdqa xmm7, xmm4
257
+ paddw xmm5, xmm3 ; xmm5=z3
258
+ paddw xmm7, xmm1 ; xmm7=z4
259
+
260
+ ; (Original)
261
+ ; z5 = (z3 + z4) * 1.175875602;
262
+ ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644;
263
+ ; z3 += z5; z4 += z5;
264
+ ;
265
+ ; (This implementation)
266
+ ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
267
+ ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
268
+
269
+ movdqa xmm2, xmm5
270
+ movdqa xmm0, xmm5
271
+ punpcklwd xmm2, xmm7
272
+ punpckhwd xmm0, xmm7
273
+ movdqa xmm5, xmm2
274
+ movdqa xmm7, xmm0
275
+ pmaddwd xmm2, [rel PW_MF078_F117] ; xmm2=z3L
276
+ pmaddwd xmm0, [rel PW_MF078_F117] ; xmm0=z3H
277
+ pmaddwd xmm5, [rel PW_F117_F078] ; xmm5=z4L
278
+ pmaddwd xmm7, [rel PW_F117_F078] ; xmm7=z4H
279
+
280
+ movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L
281
+ movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H
282
+
283
+ ; (Original)
284
+ ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2;
285
+ ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869;
286
+ ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110;
287
+ ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447;
288
+ ; tmp0 += z1 + z3; tmp1 += z2 + z4;
289
+ ; tmp2 += z2 + z3; tmp3 += z1 + z4;
290
+ ;
291
+ ; (This implementation)
292
+ ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
293
+ ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
294
+ ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
295
+ ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
296
+ ; tmp0 += z3; tmp1 += z4;
297
+ ; tmp2 += z3; tmp3 += z4;
298
+
299
+ movdqa xmm2, xmm3
300
+ movdqa xmm0, xmm3
301
+ punpcklwd xmm2, xmm4
302
+ punpckhwd xmm0, xmm4
303
+ movdqa xmm3, xmm2
304
+ movdqa xmm4, xmm0
305
+ pmaddwd xmm2, [rel PW_MF060_MF089] ; xmm2=tmp0L
306
+ pmaddwd xmm0, [rel PW_MF060_MF089] ; xmm0=tmp0H
307
+ pmaddwd xmm3, [rel PW_MF089_F060] ; xmm3=tmp3L
308
+ pmaddwd xmm4, [rel PW_MF089_F060] ; xmm4=tmp3H
309
+
310
+ paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L
311
+ paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H
312
+ paddd xmm3, xmm5 ; xmm3=tmp3L
313
+ paddd xmm4, xmm7 ; xmm4=tmp3H
314
+
315
+ movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L
316
+ movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H
317
+
318
+ movdqa xmm2, xmm1
319
+ movdqa xmm0, xmm1
320
+ punpcklwd xmm2, xmm6
321
+ punpckhwd xmm0, xmm6
322
+ movdqa xmm1, xmm2
323
+ movdqa xmm6, xmm0
324
+ pmaddwd xmm2, [rel PW_MF050_MF256] ; xmm2=tmp1L
325
+ pmaddwd xmm0, [rel PW_MF050_MF256] ; xmm0=tmp1H
326
+ pmaddwd xmm1, [rel PW_MF256_F050] ; xmm1=tmp2L
327
+ pmaddwd xmm6, [rel PW_MF256_F050] ; xmm6=tmp2H
328
+
329
+ paddd xmm2, xmm5 ; xmm2=tmp1L
330
+ paddd xmm0, xmm7 ; xmm0=tmp1H
331
+ paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L
332
+ paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H
333
+
334
+ movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L
335
+ movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H
336
+
337
+ ; -- Final output stage
338
+
339
+ movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L
340
+ movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H
341
+
342
+ movdqa xmm2, xmm5
343
+ movdqa xmm0, xmm7
344
+ paddd xmm5, xmm3 ; xmm5=data0L
345
+ paddd xmm7, xmm4 ; xmm7=data0H
346
+ psubd xmm2, xmm3 ; xmm2=data7L
347
+ psubd xmm0, xmm4 ; xmm0=data7H
348
+
349
+ movdqa xmm3, [rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1]
350
+
351
+ paddd xmm5, xmm3
352
+ paddd xmm7, xmm3
353
+ psrad xmm5, DESCALE_P1
354
+ psrad xmm7, DESCALE_P1
355
+ paddd xmm2, xmm3
356
+ paddd xmm0, xmm3
357
+ psrad xmm2, DESCALE_P1
358
+ psrad xmm0, DESCALE_P1
359
+
360
+ packssdw xmm5, xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07)
361
+ packssdw xmm2, xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77)
362
+
363
+ movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L
364
+ movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H
365
+
366
+ movdqa xmm7, xmm4
367
+ movdqa xmm0, xmm3
368
+ paddd xmm4, xmm1 ; xmm4=data1L
369
+ paddd xmm3, xmm6 ; xmm3=data1H
370
+ psubd xmm7, xmm1 ; xmm7=data6L
371
+ psubd xmm0, xmm6 ; xmm0=data6H
372
+
373
+ movdqa xmm1, [rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1]
374
+
375
+ paddd xmm4, xmm1
376
+ paddd xmm3, xmm1
377
+ psrad xmm4, DESCALE_P1
378
+ psrad xmm3, DESCALE_P1
379
+ paddd xmm7, xmm1
380
+ paddd xmm0, xmm1
381
+ psrad xmm7, DESCALE_P1
382
+ psrad xmm0, DESCALE_P1
383
+
384
+ packssdw xmm4, xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17)
385
+ packssdw xmm7, xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67)
386
+
387
+ movdqa xmm6, xmm5 ; transpose coefficients(phase 1)
388
+ punpcklwd xmm5, xmm4 ; xmm5=(00 10 01 11 02 12 03 13)
389
+ punpckhwd xmm6, xmm4 ; xmm6=(04 14 05 15 06 16 07 17)
390
+ movdqa xmm1, xmm7 ; transpose coefficients(phase 1)
391
+ punpcklwd xmm7, xmm2 ; xmm7=(60 70 61 71 62 72 63 73)
392
+ punpckhwd xmm1, xmm2 ; xmm1=(64 74 65 75 66 76 67 77)
393
+
394
+ movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L
395
+ movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H
396
+ movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L
397
+ movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H
398
+
399
+ movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13)
400
+ movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17)
401
+ movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73)
402
+ movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77)
403
+
404
+ movdqa xmm5, xmm3
405
+ movdqa xmm6, xmm0
406
+ paddd xmm3, xmm4 ; xmm3=data2L
407
+ paddd xmm0, xmm2 ; xmm0=data2H
408
+ psubd xmm5, xmm4 ; xmm5=data5L
409
+ psubd xmm6, xmm2 ; xmm6=data5H
410
+
411
+ movdqa xmm7, [rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1]
412
+
413
+ paddd xmm3, xmm7
414
+ paddd xmm0, xmm7
415
+ psrad xmm3, DESCALE_P1
416
+ psrad xmm0, DESCALE_P1
417
+ paddd xmm5, xmm7
418
+ paddd xmm6, xmm7
419
+ psrad xmm5, DESCALE_P1
420
+ psrad xmm6, DESCALE_P1
421
+
422
+ packssdw xmm3, xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27)
423
+ packssdw xmm5, xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57)
424
+
425
+ movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L
426
+ movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H
427
+ movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L
428
+ movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H
429
+
430
+ movdqa xmm0, xmm1
431
+ movdqa xmm6, xmm4
432
+ paddd xmm1, xmm2 ; xmm1=data3L
433
+ paddd xmm4, xmm7 ; xmm4=data3H
434
+ psubd xmm0, xmm2 ; xmm0=data4L
435
+ psubd xmm6, xmm7 ; xmm6=data4H
436
+
437
+ movdqa xmm2, [rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1]
438
+
439
+ paddd xmm1, xmm2
440
+ paddd xmm4, xmm2
441
+ psrad xmm1, DESCALE_P1
442
+ psrad xmm4, DESCALE_P1
443
+ paddd xmm0, xmm2
444
+ paddd xmm6, xmm2
445
+ psrad xmm0, DESCALE_P1
446
+ psrad xmm6, DESCALE_P1
447
+
448
+ packssdw xmm1, xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37)
449
+ packssdw xmm0, xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47)
450
+
451
+ movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13)
452
+ movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17)
453
+
454
+ movdqa xmm4, xmm3 ; transpose coefficients(phase 1)
455
+ punpcklwd xmm3, xmm1 ; xmm3=(20 30 21 31 22 32 23 33)
456
+ punpckhwd xmm4, xmm1 ; xmm4=(24 34 25 35 26 36 27 37)
457
+ movdqa xmm6, xmm0 ; transpose coefficients(phase 1)
458
+ punpcklwd xmm0, xmm5 ; xmm0=(40 50 41 51 42 52 43 53)
459
+ punpckhwd xmm6, xmm5 ; xmm6=(44 54 45 55 46 56 47 57)
460
+
461
+ movdqa xmm1, xmm7 ; transpose coefficients(phase 2)
462
+ punpckldq xmm7, xmm3 ; xmm7=(00 10 20 30 01 11 21 31)
463
+ punpckhdq xmm1, xmm3 ; xmm1=(02 12 22 32 03 13 23 33)
464
+ movdqa xmm5, xmm2 ; transpose coefficients(phase 2)
465
+ punpckldq xmm2, xmm4 ; xmm2=(04 14 24 34 05 15 25 35)
466
+ punpckhdq xmm5, xmm4 ; xmm5=(06 16 26 36 07 17 27 37)
467
+
468
+ movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73)
469
+ movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77)
470
+
471
+ movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35)
472
+ movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37)
473
+
474
+ movdqa xmm2, xmm0 ; transpose coefficients(phase 2)
475
+ punpckldq xmm0, xmm3 ; xmm0=(40 50 60 70 41 51 61 71)
476
+ punpckhdq xmm2, xmm3 ; xmm2=(42 52 62 72 43 53 63 73)
477
+ movdqa xmm5, xmm6 ; transpose coefficients(phase 2)
478
+ punpckldq xmm6, xmm4 ; xmm6=(44 54 64 74 45 55 65 75)
479
+ punpckhdq xmm5, xmm4 ; xmm5=(46 56 66 76 47 57 67 77)
480
+
481
+ movdqa xmm3, xmm7 ; transpose coefficients(phase 3)
482
+ punpcklqdq xmm7, xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70)
483
+ punpckhqdq xmm3, xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71)
484
+ movdqa xmm4, xmm1 ; transpose coefficients(phase 3)
485
+ punpcklqdq xmm1, xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72)
486
+ punpckhqdq xmm4, xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73)
487
+
488
+ movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35)
489
+ movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37)
490
+
491
+ movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1
492
+ movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3
493
+
494
+ movdqa xmm3, xmm0 ; transpose coefficients(phase 3)
495
+ punpcklqdq xmm0, xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74)
496
+ punpckhqdq xmm3, xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75)
497
+ movdqa xmm4, xmm2 ; transpose coefficients(phase 3)
498
+ punpcklqdq xmm2, xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76)
499
+ punpckhqdq xmm4, xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77)
500
+
501
+ movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5
502
+ movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7
503
+ .column_end:
504
+
505
+ ; -- Prefetch the next coefficient block
506
+
507
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
508
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
509
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
510
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
511
+
512
+ ; ---- Pass 2: process rows from work array, store into output array.
513
+
514
+ mov rax, [original_rbp]
515
+ mov rdi, r12 ; (JSAMPROW *)
516
+ mov eax, r13d
517
+
518
+ ; -- Even part
519
+
520
+ ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
521
+
522
+ ; (Original)
523
+ ; z1 = (z2 + z3) * 0.541196100;
524
+ ; tmp2 = z1 + z3 * -1.847759065;
525
+ ; tmp3 = z1 + z2 * 0.765366865;
526
+ ;
527
+ ; (This implementation)
528
+ ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
529
+ ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
530
+
531
+ movdqa xmm6, xmm1 ; xmm1=in2=z2
532
+ movdqa xmm5, xmm1
533
+ punpcklwd xmm6, xmm2 ; xmm2=in6=z3
534
+ punpckhwd xmm5, xmm2
535
+ movdqa xmm1, xmm6
536
+ movdqa xmm2, xmm5
537
+ pmaddwd xmm6, [rel PW_F130_F054] ; xmm6=tmp3L
538
+ pmaddwd xmm5, [rel PW_F130_F054] ; xmm5=tmp3H
539
+ pmaddwd xmm1, [rel PW_F054_MF130] ; xmm1=tmp2L
540
+ pmaddwd xmm2, [rel PW_F054_MF130] ; xmm2=tmp2H
541
+
542
+ movdqa xmm3, xmm7
543
+ paddw xmm7, xmm0 ; xmm7=in0+in4
544
+ psubw xmm3, xmm0 ; xmm3=in0-in4
545
+
546
+ pxor xmm4, xmm4
547
+ pxor xmm0, xmm0
548
+ punpcklwd xmm4, xmm7 ; xmm4=tmp0L
549
+ punpckhwd xmm0, xmm7 ; xmm0=tmp0H
550
+ psrad xmm4, (16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS
551
+ psrad xmm0, (16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS
552
+
553
+ movdqa xmm7, xmm4
554
+ paddd xmm4, xmm6 ; xmm4=tmp10L
555
+ psubd xmm7, xmm6 ; xmm7=tmp13L
556
+ movdqa xmm6, xmm0
557
+ paddd xmm0, xmm5 ; xmm0=tmp10H
558
+ psubd xmm6, xmm5 ; xmm6=tmp13H
559
+
560
+ movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L
561
+ movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H
562
+ movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L
563
+ movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H
564
+
565
+ pxor xmm5, xmm5
566
+ pxor xmm4, xmm4
567
+ punpcklwd xmm5, xmm3 ; xmm5=tmp1L
568
+ punpckhwd xmm4, xmm3 ; xmm4=tmp1H
569
+ psrad xmm5, (16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS
570
+ psrad xmm4, (16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS
571
+
572
+ movdqa xmm0, xmm5
573
+ paddd xmm5, xmm1 ; xmm5=tmp11L
574
+ psubd xmm0, xmm1 ; xmm0=tmp12L
575
+ movdqa xmm7, xmm4
576
+ paddd xmm4, xmm2 ; xmm4=tmp11H
577
+ psubd xmm7, xmm2 ; xmm7=tmp12H
578
+
579
+ movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L
580
+ movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H
581
+ movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L
582
+ movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H
583
+
584
+ ; -- Odd part
585
+
586
+ movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3
587
+ movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1
588
+ movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7
589
+ movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5
590
+
591
+ movdqa xmm5, xmm6
592
+ movdqa xmm4, xmm3
593
+ paddw xmm5, xmm1 ; xmm5=z3
594
+ paddw xmm4, xmm2 ; xmm4=z4
595
+
596
+ ; (Original)
597
+ ; z5 = (z3 + z4) * 1.175875602;
598
+ ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644;
599
+ ; z3 += z5; z4 += z5;
600
+ ;
601
+ ; (This implementation)
602
+ ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
603
+ ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
604
+
605
+ movdqa xmm0, xmm5
606
+ movdqa xmm7, xmm5
607
+ punpcklwd xmm0, xmm4
608
+ punpckhwd xmm7, xmm4
609
+ movdqa xmm5, xmm0
610
+ movdqa xmm4, xmm7
611
+ pmaddwd xmm0, [rel PW_MF078_F117] ; xmm0=z3L
612
+ pmaddwd xmm7, [rel PW_MF078_F117] ; xmm7=z3H
613
+ pmaddwd xmm5, [rel PW_F117_F078] ; xmm5=z4L
614
+ pmaddwd xmm4, [rel PW_F117_F078] ; xmm4=z4H
615
+
616
+ movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L
617
+ movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H
618
+
619
+ ; (Original)
620
+ ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2;
621
+ ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869;
622
+ ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110;
623
+ ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447;
624
+ ; tmp0 += z1 + z3; tmp1 += z2 + z4;
625
+ ; tmp2 += z2 + z3; tmp3 += z1 + z4;
626
+ ;
627
+ ; (This implementation)
628
+ ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
629
+ ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
630
+ ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
631
+ ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
632
+ ; tmp0 += z3; tmp1 += z4;
633
+ ; tmp2 += z3; tmp3 += z4;
634
+
635
+ movdqa xmm0, xmm1
636
+ movdqa xmm7, xmm1
637
+ punpcklwd xmm0, xmm3
638
+ punpckhwd xmm7, xmm3
639
+ movdqa xmm1, xmm0
640
+ movdqa xmm3, xmm7
641
+ pmaddwd xmm0, [rel PW_MF060_MF089] ; xmm0=tmp0L
642
+ pmaddwd xmm7, [rel PW_MF060_MF089] ; xmm7=tmp0H
643
+ pmaddwd xmm1, [rel PW_MF089_F060] ; xmm1=tmp3L
644
+ pmaddwd xmm3, [rel PW_MF089_F060] ; xmm3=tmp3H
645
+
646
+ paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L
647
+ paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H
648
+ paddd xmm1, xmm5 ; xmm1=tmp3L
649
+ paddd xmm3, xmm4 ; xmm3=tmp3H
650
+
651
+ movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L
652
+ movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H
653
+
654
+ movdqa xmm0, xmm2
655
+ movdqa xmm7, xmm2
656
+ punpcklwd xmm0, xmm6
657
+ punpckhwd xmm7, xmm6
658
+ movdqa xmm2, xmm0
659
+ movdqa xmm6, xmm7
660
+ pmaddwd xmm0, [rel PW_MF050_MF256] ; xmm0=tmp1L
661
+ pmaddwd xmm7, [rel PW_MF050_MF256] ; xmm7=tmp1H
662
+ pmaddwd xmm2, [rel PW_MF256_F050] ; xmm2=tmp2L
663
+ pmaddwd xmm6, [rel PW_MF256_F050] ; xmm6=tmp2H
664
+
665
+ paddd xmm0, xmm5 ; xmm0=tmp1L
666
+ paddd xmm7, xmm4 ; xmm7=tmp1H
667
+ paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L
668
+ paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H
669
+
670
+ movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L
671
+ movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H
672
+
673
+ ; -- Final output stage
674
+
675
+ movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L
676
+ movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H
677
+
678
+ movdqa xmm0, xmm5
679
+ movdqa xmm7, xmm4
680
+ paddd xmm5, xmm1 ; xmm5=data0L
681
+ paddd xmm4, xmm3 ; xmm4=data0H
682
+ psubd xmm0, xmm1 ; xmm0=data7L
683
+ psubd xmm7, xmm3 ; xmm7=data7H
684
+
685
+ movdqa xmm1, [rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2]
686
+
687
+ paddd xmm5, xmm1
688
+ paddd xmm4, xmm1
689
+ psrad xmm5, DESCALE_P2
690
+ psrad xmm4, DESCALE_P2
691
+ paddd xmm0, xmm1
692
+ paddd xmm7, xmm1
693
+ psrad xmm0, DESCALE_P2
694
+ psrad xmm7, DESCALE_P2
695
+
696
+ packssdw xmm5, xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70)
697
+ packssdw xmm0, xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77)
698
+
699
+ movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L
700
+ movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H
701
+
702
+ movdqa xmm4, xmm3
703
+ movdqa xmm7, xmm1
704
+ paddd xmm3, xmm2 ; xmm3=data1L
705
+ paddd xmm1, xmm6 ; xmm1=data1H
706
+ psubd xmm4, xmm2 ; xmm4=data6L
707
+ psubd xmm7, xmm6 ; xmm7=data6H
708
+
709
+ movdqa xmm2, [rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2]
710
+
711
+ paddd xmm3, xmm2
712
+ paddd xmm1, xmm2
713
+ psrad xmm3, DESCALE_P2
714
+ psrad xmm1, DESCALE_P2
715
+ paddd xmm4, xmm2
716
+ paddd xmm7, xmm2
717
+ psrad xmm4, DESCALE_P2
718
+ psrad xmm7, DESCALE_P2
719
+
720
+ packssdw xmm3, xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71)
721
+ packssdw xmm4, xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76)
722
+
723
+ packsswb xmm5, xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
724
+ packsswb xmm3, xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
725
+
726
+ movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L
727
+ movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H
728
+ movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L
729
+ movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H
730
+
731
+ movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
732
+ movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
733
+
734
+ movdqa xmm4, xmm6
735
+ movdqa xmm0, xmm2
736
+ paddd xmm6, xmm1 ; xmm6=data2L
737
+ paddd xmm2, xmm7 ; xmm2=data2H
738
+ psubd xmm4, xmm1 ; xmm4=data5L
739
+ psubd xmm0, xmm7 ; xmm0=data5H
740
+
741
+ movdqa xmm5, [rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2]
742
+
743
+ paddd xmm6, xmm5
744
+ paddd xmm2, xmm5
745
+ psrad xmm6, DESCALE_P2
746
+ psrad xmm2, DESCALE_P2
747
+ paddd xmm4, xmm5
748
+ paddd xmm0, xmm5
749
+ psrad xmm4, DESCALE_P2
750
+ psrad xmm0, DESCALE_P2
751
+
752
+ packssdw xmm6, xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72)
753
+ packssdw xmm4, xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75)
754
+
755
+ movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L
756
+ movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H
757
+ movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L
758
+ movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H
759
+
760
+ movdqa xmm2, xmm3
761
+ movdqa xmm0, xmm1
762
+ paddd xmm3, xmm7 ; xmm3=data3L
763
+ paddd xmm1, xmm5 ; xmm1=data3H
764
+ psubd xmm2, xmm7 ; xmm2=data4L
765
+ psubd xmm0, xmm5 ; xmm0=data4H
766
+
767
+ movdqa xmm7, [rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2]
768
+
769
+ paddd xmm3, xmm7
770
+ paddd xmm1, xmm7
771
+ psrad xmm3, DESCALE_P2
772
+ psrad xmm1, DESCALE_P2
773
+ paddd xmm2, xmm7
774
+ paddd xmm0, xmm7
775
+ psrad xmm2, DESCALE_P2
776
+ psrad xmm0, DESCALE_P2
777
+
778
+ movdqa xmm5, [rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP]
779
+
780
+ packssdw xmm3, xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73)
781
+ packssdw xmm2, xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74)
782
+
783
+ movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
784
+ movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
785
+
786
+ packsswb xmm6, xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
787
+ packsswb xmm3, xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
788
+
789
+ paddb xmm7, xmm5
790
+ paddb xmm1, xmm5
791
+ paddb xmm6, xmm5
792
+ paddb xmm3, xmm5
793
+
794
+ movdqa xmm0, xmm7 ; transpose coefficients(phase 1)
795
+ punpcklbw xmm7, xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
796
+ punpckhbw xmm0, xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
797
+ movdqa xmm2, xmm6 ; transpose coefficients(phase 1)
798
+ punpcklbw xmm6, xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
799
+ punpckhbw xmm2, xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
800
+
801
+ movdqa xmm4, xmm7 ; transpose coefficients(phase 2)
802
+ punpcklwd xmm7, xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
803
+ punpckhwd xmm4, xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
804
+ movdqa xmm5, xmm2 ; transpose coefficients(phase 2)
805
+ punpcklwd xmm2, xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
806
+ punpckhwd xmm5, xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
807
+
808
+ movdqa xmm1, xmm7 ; transpose coefficients(phase 3)
809
+ punpckldq xmm7, xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
810
+ punpckhdq xmm1, xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
811
+ movdqa xmm3, xmm4 ; transpose coefficients(phase 3)
812
+ punpckldq xmm4, xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
813
+ punpckhdq xmm3, xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
814
+
815
+ pshufd xmm6, xmm7, 0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
816
+ pshufd xmm0, xmm1, 0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
817
+ pshufd xmm2, xmm4, 0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
818
+ pshufd xmm5, xmm3, 0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
819
+
820
+ mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
821
+ mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
822
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
823
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
824
+ mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
825
+ mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
826
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
827
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
828
+
829
+ mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
830
+ mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
831
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
832
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
833
+ mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
834
+ mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
835
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
836
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
837
+
838
+ uncollect_args 4
839
+ mov rsp, rbp ; rsp <- aligned rbp
840
+ pop rsp ; rsp <- original rbp
841
+ pop rbp
842
+ ret
843
+
844
+ ; For some reason, the OS X linker does not honor the request to align the
845
+ ; segment unless we do this.
846
+ align 32