image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,491 @@
1
+ ;
2
+ ; jidctfst.asm - fast integer IDCT (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+ ;
18
+ ; This file contains a fast, not so accurate integer implementation of
19
+ ; the inverse DCT (Discrete Cosine Transform). The following code is
20
+ ; based directly on the IJG's original jidctfst.c; see the jidctfst.c
21
+ ; for more details.
22
+
23
+ %include "jsimdext.inc"
24
+ %include "jdct.inc"
25
+
26
+ ; --------------------------------------------------------------------------
27
+
28
+ %define CONST_BITS 8 ; 14 is also OK.
29
+ %define PASS1_BITS 2
30
+
31
+ %if IFAST_SCALE_BITS != PASS1_BITS
32
+ %error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
33
+ %endif
34
+
35
+ %if CONST_BITS == 8
36
+ F_1_082 equ 277 ; FIX(1.082392200)
37
+ F_1_414 equ 362 ; FIX(1.414213562)
38
+ F_1_847 equ 473 ; FIX(1.847759065)
39
+ F_2_613 equ 669 ; FIX(2.613125930)
40
+ F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
41
+ %else
42
+ ; NASM cannot do compile-time arithmetic on floating-point constants.
43
+ %define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n))
44
+ F_1_082 equ DESCALE(1162209775, 30 - CONST_BITS) ; FIX(1.082392200)
45
+ F_1_414 equ DESCALE(1518500249, 30 - CONST_BITS) ; FIX(1.414213562)
46
+ F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065)
47
+ F_2_613 equ DESCALE(2805822602, 30 - CONST_BITS) ; FIX(2.613125930)
48
+ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
49
+ %endif
50
+
51
+ ; --------------------------------------------------------------------------
52
+ SECTION SEG_CONST
53
+
54
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
55
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
56
+
57
+ %define PRE_MULTIPLY_SCALE_BITS 2
58
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
59
+
60
+ alignz 32
61
+ GLOBAL_DATA(jconst_idct_ifast_sse2)
62
+
63
+ EXTN(jconst_idct_ifast_sse2):
64
+
65
+ PW_F1414 times 8 dw F_1_414 << CONST_SHIFT
66
+ PW_F1847 times 8 dw F_1_847 << CONST_SHIFT
67
+ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
68
+ PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
69
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
70
+
71
+ alignz 32
72
+
73
+ ; --------------------------------------------------------------------------
74
+ SECTION SEG_TEXT
75
+ BITS 64
76
+ ;
77
+ ; Perform dequantization and inverse DCT on one block of coefficients.
78
+ ;
79
+ ; GLOBAL(void)
80
+ ; jsimd_idct_ifast_sse2(void *dct_table, JCOEFPTR coef_block,
81
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
82
+ ;
83
+
84
+ ; r10 = jpeg_component_info *compptr
85
+ ; r11 = JCOEFPTR coef_block
86
+ ; r12 = JSAMPARRAY output_buf
87
+ ; r13d = JDIMENSION output_col
88
+
89
+ %define original_rbp rbp + 0
90
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
91
+ ; xmmword wk[WK_NUM]
92
+ %define WK_NUM 2
93
+
94
+ align 32
95
+ GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
96
+
97
+ EXTN(jsimd_idct_ifast_sse2):
98
+ push rbp
99
+ mov rax, rsp ; rax = original rbp
100
+ sub rsp, byte 4
101
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
102
+ mov [rsp], rax
103
+ mov rbp, rsp ; rbp = aligned rbp
104
+ lea rsp, [wk(0)]
105
+ collect_args 4
106
+
107
+ ; ---- Pass 1: process columns from input.
108
+
109
+ mov rdx, r10 ; quantptr
110
+ mov rsi, r11 ; inptr
111
+
112
+ %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
113
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
114
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
115
+ jnz near .columnDCT
116
+
117
+ movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
118
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
119
+ por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
120
+ por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
121
+ por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
122
+ por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
123
+ por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
124
+ por xmm1, xmm0
125
+ packsswb xmm1, xmm1
126
+ packsswb xmm1, xmm1
127
+ movd eax, xmm1
128
+ test rax, rax
129
+ jnz short .columnDCT
130
+
131
+ ; -- AC terms all zero
132
+
133
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
134
+ pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
135
+
136
+ movdqa xmm7, xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07)
137
+ punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03)
138
+ punpckhwd xmm7, xmm7 ; xmm7=(04 04 05 05 06 06 07 07)
139
+
140
+ pshufd xmm6, xmm0, 0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00)
141
+ pshufd xmm2, xmm0, 0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01)
142
+ pshufd xmm5, xmm0, 0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02)
143
+ pshufd xmm0, xmm0, 0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03)
144
+ pshufd xmm1, xmm7, 0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04)
145
+ pshufd xmm4, xmm7, 0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05)
146
+ pshufd xmm3, xmm7, 0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06)
147
+ pshufd xmm7, xmm7, 0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07)
148
+
149
+ movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1
150
+ movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3
151
+ jmp near .column_end
152
+ %endif
153
+ .columnDCT:
154
+
155
+ ; -- Even part
156
+
157
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
158
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
159
+ pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
160
+ pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
161
+ movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
162
+ movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
163
+ pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
164
+ pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
165
+
166
+ movdqa xmm4, xmm0
167
+ movdqa xmm5, xmm1
168
+ psubw xmm0, xmm2 ; xmm0=tmp11
169
+ psubw xmm1, xmm3
170
+ paddw xmm4, xmm2 ; xmm4=tmp10
171
+ paddw xmm5, xmm3 ; xmm5=tmp13
172
+
173
+ psllw xmm1, PRE_MULTIPLY_SCALE_BITS
174
+ pmulhw xmm1, [rel PW_F1414]
175
+ psubw xmm1, xmm5 ; xmm1=tmp12
176
+
177
+ movdqa xmm6, xmm4
178
+ movdqa xmm7, xmm0
179
+ psubw xmm4, xmm5 ; xmm4=tmp3
180
+ psubw xmm0, xmm1 ; xmm0=tmp2
181
+ paddw xmm6, xmm5 ; xmm6=tmp0
182
+ paddw xmm7, xmm1 ; xmm7=tmp1
183
+
184
+ movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3
185
+ movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2
186
+
187
+ ; -- Odd part
188
+
189
+ movdqa xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
190
+ movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
191
+ pmullw xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
192
+ pmullw xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
193
+ movdqa xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
194
+ movdqa xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
195
+ pmullw xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
196
+ pmullw xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
197
+
198
+ movdqa xmm4, xmm2
199
+ movdqa xmm0, xmm5
200
+ psubw xmm2, xmm1 ; xmm2=z12
201
+ psubw xmm5, xmm3 ; xmm5=z10
202
+ paddw xmm4, xmm1 ; xmm4=z11
203
+ paddw xmm0, xmm3 ; xmm0=z13
204
+
205
+ movdqa xmm1, xmm5 ; xmm1=z10(unscaled)
206
+ psllw xmm2, PRE_MULTIPLY_SCALE_BITS
207
+ psllw xmm5, PRE_MULTIPLY_SCALE_BITS
208
+
209
+ movdqa xmm3, xmm4
210
+ psubw xmm4, xmm0
211
+ paddw xmm3, xmm0 ; xmm3=tmp7
212
+
213
+ psllw xmm4, PRE_MULTIPLY_SCALE_BITS
214
+ pmulhw xmm4, [rel PW_F1414] ; xmm4=tmp11
215
+
216
+ ; To avoid overflow...
217
+ ;
218
+ ; (Original)
219
+ ; tmp12 = -2.613125930 * z10 + z5;
220
+ ;
221
+ ; (This implementation)
222
+ ; tmp12 = (-1.613125930 - 1) * z10 + z5;
223
+ ; = -1.613125930 * z10 - z10 + z5;
224
+
225
+ movdqa xmm0, xmm5
226
+ paddw xmm5, xmm2
227
+ pmulhw xmm5, [rel PW_F1847] ; xmm5=z5
228
+ pmulhw xmm0, [rel PW_MF1613]
229
+ pmulhw xmm2, [rel PW_F1082]
230
+ psubw xmm0, xmm1
231
+ psubw xmm2, xmm5 ; xmm2=tmp10
232
+ paddw xmm0, xmm5 ; xmm0=tmp12
233
+
234
+ ; -- Final output stage
235
+
236
+ psubw xmm0, xmm3 ; xmm0=tmp6
237
+ movdqa xmm1, xmm6
238
+ movdqa xmm5, xmm7
239
+ paddw xmm6, xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07)
240
+ paddw xmm7, xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17)
241
+ psubw xmm1, xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77)
242
+ psubw xmm5, xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67)
243
+ psubw xmm4, xmm0 ; xmm4=tmp5
244
+
245
+ movdqa xmm3, xmm6 ; transpose coefficients(phase 1)
246
+ punpcklwd xmm6, xmm7 ; xmm6=(00 10 01 11 02 12 03 13)
247
+ punpckhwd xmm3, xmm7 ; xmm3=(04 14 05 15 06 16 07 17)
248
+ movdqa xmm0, xmm5 ; transpose coefficients(phase 1)
249
+ punpcklwd xmm5, xmm1 ; xmm5=(60 70 61 71 62 72 63 73)
250
+ punpckhwd xmm0, xmm1 ; xmm0=(64 74 65 75 66 76 67 77)
251
+
252
+ movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2
253
+ movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3
254
+
255
+ movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73)
256
+ movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77)
257
+
258
+ paddw xmm2, xmm4 ; xmm2=tmp4
259
+ movdqa xmm5, xmm7
260
+ movdqa xmm0, xmm1
261
+ paddw xmm7, xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27)
262
+ paddw xmm1, xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47)
263
+ psubw xmm5, xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57)
264
+ psubw xmm0, xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37)
265
+
266
+ movdqa xmm4, xmm7 ; transpose coefficients(phase 1)
267
+ punpcklwd xmm7, xmm0 ; xmm7=(20 30 21 31 22 32 23 33)
268
+ punpckhwd xmm4, xmm0 ; xmm4=(24 34 25 35 26 36 27 37)
269
+ movdqa xmm2, xmm1 ; transpose coefficients(phase 1)
270
+ punpcklwd xmm1, xmm5 ; xmm1=(40 50 41 51 42 52 43 53)
271
+ punpckhwd xmm2, xmm5 ; xmm2=(44 54 45 55 46 56 47 57)
272
+
273
+ movdqa xmm0, xmm3 ; transpose coefficients(phase 2)
274
+ punpckldq xmm3, xmm4 ; xmm3=(04 14 24 34 05 15 25 35)
275
+ punpckhdq xmm0, xmm4 ; xmm0=(06 16 26 36 07 17 27 37)
276
+ movdqa xmm5, xmm6 ; transpose coefficients(phase 2)
277
+ punpckldq xmm6, xmm7 ; xmm6=(00 10 20 30 01 11 21 31)
278
+ punpckhdq xmm5, xmm7 ; xmm5=(02 12 22 32 03 13 23 33)
279
+
280
+ movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73)
281
+ movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77)
282
+
283
+ movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35)
284
+ movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37)
285
+
286
+ movdqa xmm3, xmm1 ; transpose coefficients(phase 2)
287
+ punpckldq xmm1, xmm4 ; xmm1=(40 50 60 70 41 51 61 71)
288
+ punpckhdq xmm3, xmm4 ; xmm3=(42 52 62 72 43 53 63 73)
289
+ movdqa xmm0, xmm2 ; transpose coefficients(phase 2)
290
+ punpckldq xmm2, xmm7 ; xmm2=(44 54 64 74 45 55 65 75)
291
+ punpckhdq xmm0, xmm7 ; xmm0=(46 56 66 76 47 57 67 77)
292
+
293
+ movdqa xmm4, xmm6 ; transpose coefficients(phase 3)
294
+ punpcklqdq xmm6, xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70)
295
+ punpckhqdq xmm4, xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71)
296
+ movdqa xmm7, xmm5 ; transpose coefficients(phase 3)
297
+ punpcklqdq xmm5, xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72)
298
+ punpckhqdq xmm7, xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73)
299
+
300
+ movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35)
301
+ movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37)
302
+
303
+ movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1
304
+ movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3
305
+
306
+ movdqa xmm4, xmm1 ; transpose coefficients(phase 3)
307
+ punpcklqdq xmm1, xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74)
308
+ punpckhqdq xmm4, xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75)
309
+ movdqa xmm7, xmm3 ; transpose coefficients(phase 3)
310
+ punpcklqdq xmm3, xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76)
311
+ punpckhqdq xmm7, xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77)
312
+ .column_end:
313
+
314
+ ; -- Prefetch the next coefficient block
315
+
316
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
317
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
318
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
319
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
320
+
321
+ ; ---- Pass 2: process rows from work array, store into output array.
322
+
323
+ mov rax, [original_rbp]
324
+ mov rdi, r12 ; (JSAMPROW *)
325
+ mov eax, r13d
326
+
327
+ ; -- Even part
328
+
329
+ ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
330
+
331
+ movdqa xmm2, xmm6
332
+ movdqa xmm0, xmm5
333
+ psubw xmm6, xmm1 ; xmm6=tmp11
334
+ psubw xmm5, xmm3
335
+ paddw xmm2, xmm1 ; xmm2=tmp10
336
+ paddw xmm0, xmm3 ; xmm0=tmp13
337
+
338
+ psllw xmm5, PRE_MULTIPLY_SCALE_BITS
339
+ pmulhw xmm5, [rel PW_F1414]
340
+ psubw xmm5, xmm0 ; xmm5=tmp12
341
+
342
+ movdqa xmm1, xmm2
343
+ movdqa xmm3, xmm6
344
+ psubw xmm2, xmm0 ; xmm2=tmp3
345
+ psubw xmm6, xmm5 ; xmm6=tmp2
346
+ paddw xmm1, xmm0 ; xmm1=tmp0
347
+ paddw xmm3, xmm5 ; xmm3=tmp1
348
+
349
+ movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1
350
+ movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3
351
+
352
+ movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3
353
+ movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2
354
+
355
+ ; -- Odd part
356
+
357
+ ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
358
+
359
+ movdqa xmm2, xmm0
360
+ movdqa xmm6, xmm4
361
+ psubw xmm0, xmm7 ; xmm0=z12
362
+ psubw xmm4, xmm5 ; xmm4=z10
363
+ paddw xmm2, xmm7 ; xmm2=z11
364
+ paddw xmm6, xmm5 ; xmm6=z13
365
+
366
+ movdqa xmm7, xmm4 ; xmm7=z10(unscaled)
367
+ psllw xmm0, PRE_MULTIPLY_SCALE_BITS
368
+ psllw xmm4, PRE_MULTIPLY_SCALE_BITS
369
+
370
+ movdqa xmm5, xmm2
371
+ psubw xmm2, xmm6
372
+ paddw xmm5, xmm6 ; xmm5=tmp7
373
+
374
+ psllw xmm2, PRE_MULTIPLY_SCALE_BITS
375
+ pmulhw xmm2, [rel PW_F1414] ; xmm2=tmp11
376
+
377
+ ; To avoid overflow...
378
+ ;
379
+ ; (Original)
380
+ ; tmp12 = -2.613125930 * z10 + z5;
381
+ ;
382
+ ; (This implementation)
383
+ ; tmp12 = (-1.613125930 - 1) * z10 + z5;
384
+ ; = -1.613125930 * z10 - z10 + z5;
385
+
386
+ movdqa xmm6, xmm4
387
+ paddw xmm4, xmm0
388
+ pmulhw xmm4, [rel PW_F1847] ; xmm4=z5
389
+ pmulhw xmm6, [rel PW_MF1613]
390
+ pmulhw xmm0, [rel PW_F1082]
391
+ psubw xmm6, xmm7
392
+ psubw xmm0, xmm4 ; xmm0=tmp10
393
+ paddw xmm6, xmm4 ; xmm6=tmp12
394
+
395
+ ; -- Final output stage
396
+
397
+ psubw xmm6, xmm5 ; xmm6=tmp6
398
+ movdqa xmm7, xmm1
399
+ movdqa xmm4, xmm3
400
+ paddw xmm1, xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70)
401
+ paddw xmm3, xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71)
402
+ psraw xmm1, (PASS1_BITS+3) ; descale
403
+ psraw xmm3, (PASS1_BITS+3) ; descale
404
+ psubw xmm7, xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77)
405
+ psubw xmm4, xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76)
406
+ psraw xmm7, (PASS1_BITS+3) ; descale
407
+ psraw xmm4, (PASS1_BITS+3) ; descale
408
+ psubw xmm2, xmm6 ; xmm2=tmp5
409
+
410
+ packsswb xmm1, xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
411
+ packsswb xmm3, xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
412
+
413
+ movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2
414
+ movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3
415
+
416
+ paddw xmm0, xmm2 ; xmm0=tmp4
417
+ movdqa xmm4, xmm5
418
+ movdqa xmm7, xmm6
419
+ paddw xmm5, xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72)
420
+ paddw xmm6, xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74)
421
+ psraw xmm5, (PASS1_BITS+3) ; descale
422
+ psraw xmm6, (PASS1_BITS+3) ; descale
423
+ psubw xmm4, xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75)
424
+ psubw xmm7, xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73)
425
+ psraw xmm4, (PASS1_BITS+3) ; descale
426
+ psraw xmm7, (PASS1_BITS+3) ; descale
427
+
428
+ movdqa xmm2, [rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP]
429
+
430
+ packsswb xmm5, xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
431
+ packsswb xmm7, xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
432
+
433
+ paddb xmm1, xmm2
434
+ paddb xmm3, xmm2
435
+ paddb xmm5, xmm2
436
+ paddb xmm7, xmm2
437
+
438
+ movdqa xmm0, xmm1 ; transpose coefficients(phase 1)
439
+ punpcklbw xmm1, xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
440
+ punpckhbw xmm0, xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
441
+ movdqa xmm6, xmm5 ; transpose coefficients(phase 1)
442
+ punpcklbw xmm5, xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
443
+ punpckhbw xmm6, xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
444
+
445
+ movdqa xmm4, xmm1 ; transpose coefficients(phase 2)
446
+ punpcklwd xmm1, xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
447
+ punpckhwd xmm4, xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
448
+ movdqa xmm2, xmm6 ; transpose coefficients(phase 2)
449
+ punpcklwd xmm6, xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
450
+ punpckhwd xmm2, xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
451
+
452
+ movdqa xmm3, xmm1 ; transpose coefficients(phase 3)
453
+ punpckldq xmm1, xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
454
+ punpckhdq xmm3, xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
455
+ movdqa xmm7, xmm4 ; transpose coefficients(phase 3)
456
+ punpckldq xmm4, xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
457
+ punpckhdq xmm7, xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
458
+
459
+ pshufd xmm5, xmm1, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
460
+ pshufd xmm0, xmm3, 0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
461
+ pshufd xmm6, xmm4, 0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
462
+ pshufd xmm2, xmm7, 0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
463
+
464
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
465
+ mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
466
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
467
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
468
+ mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
469
+ mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
470
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
471
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
472
+
473
+ mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
474
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
475
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
476
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
477
+ mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
478
+ mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
479
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
480
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
481
+
482
+ uncollect_args 4
483
+ mov rsp, rbp ; rsp <- aligned rbp
484
+ pop rsp ; rsp <- original rbp
485
+ pop rbp
486
+ ret
487
+ ret
488
+
489
+ ; For some reason, the OS X linker does not honor the request to align the
490
+ ; segment unless we do this.
491
+ align 32