image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,482 @@
1
+ ;
2
+ ; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+ ;
18
+ ; This file contains a floating-point implementation of the inverse DCT
19
+ ; (Discrete Cosine Transform). The following code is based directly on
20
+ ; the IJG's original jidctflt.c; see the jidctflt.c for more details.
21
+
22
+ %include "jsimdext.inc"
23
+ %include "jdct.inc"
24
+
25
+ ; --------------------------------------------------------------------------
26
+
27
+ %macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
28
+ shufps %1, %2, 0x44
29
+ %endmacro
30
+
31
+ %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
32
+ shufps %1, %2, 0xEE
33
+ %endmacro
34
+
35
+ ; --------------------------------------------------------------------------
36
+ SECTION SEG_CONST
37
+
38
+ alignz 32
39
+ GLOBAL_DATA(jconst_idct_float_sse2)
40
+
41
+ EXTN(jconst_idct_float_sse2):
42
+
43
+ PD_1_414 times 4 dd 1.414213562373095048801689
44
+ PD_1_847 times 4 dd 1.847759065022573512256366
45
+ PD_1_082 times 4 dd 1.082392200292393968799446
46
+ PD_M2_613 times 4 dd -2.613125929752753055713286
47
+ PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
48
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
49
+
50
+ alignz 32
51
+
52
+ ; --------------------------------------------------------------------------
53
+ SECTION SEG_TEXT
54
+ BITS 64
55
+ ;
56
+ ; Perform dequantization and inverse DCT on one block of coefficients.
57
+ ;
58
+ ; GLOBAL(void)
59
+ ; jsimd_idct_float_sse2(void *dct_table, JCOEFPTR coef_block,
60
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
61
+ ;
62
+
63
+ ; r10 = void *dct_table
64
+ ; r11 = JCOEFPTR coef_block
65
+ ; r12 = JSAMPARRAY output_buf
66
+ ; r13d = JDIMENSION output_col
67
+
68
+ %define original_rbp rbp + 0
69
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
70
+ ; xmmword wk[WK_NUM]
71
+ %define WK_NUM 2
72
+ %define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT
73
+ ; FAST_FLOAT workspace[DCTSIZE2]
74
+
75
+ align 32
76
+ GLOBAL_FUNCTION(jsimd_idct_float_sse2)
77
+
78
+ EXTN(jsimd_idct_float_sse2):
79
+ push rbp
80
+ mov rax, rsp ; rax = original rbp
81
+ sub rsp, byte 4
82
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
83
+ mov [rsp], rax
84
+ mov rbp, rsp ; rbp = aligned rbp
85
+ lea rsp, [workspace]
86
+ collect_args 4
87
+ push rbx
88
+
89
+ ; ---- Pass 1: process columns from input, store into work array.
90
+
91
+ mov rdx, r10 ; quantptr
92
+ mov rsi, r11 ; inptr
93
+ lea rdi, [workspace] ; FAST_FLOAT *wsptr
94
+ mov rcx, DCTSIZE/4 ; ctr
95
+ .columnloop:
96
+ %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
97
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
98
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
99
+ jnz near .columnDCT
100
+
101
+ movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
102
+ movq xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
103
+ movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
104
+ movq xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
105
+ movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
106
+ movq xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
107
+ movq xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
108
+ por xmm1, xmm2
109
+ por xmm3, xmm4
110
+ por xmm5, xmm6
111
+ por xmm1, xmm3
112
+ por xmm5, xmm7
113
+ por xmm1, xmm5
114
+ packsswb xmm1, xmm1
115
+ movd eax, xmm1
116
+ test rax, rax
117
+ jnz short .columnDCT
118
+
119
+ ; -- AC terms all zero
120
+
121
+ movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
122
+
123
+ punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03)
124
+ psrad xmm0, (DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03)
125
+ cvtdq2ps xmm0, xmm0 ; xmm0=in0=(00 01 02 03)
126
+
127
+ mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
128
+
129
+ movaps xmm1, xmm0
130
+ movaps xmm2, xmm0
131
+ movaps xmm3, xmm0
132
+
133
+ shufps xmm0, xmm0, 0x00 ; xmm0=(00 00 00 00)
134
+ shufps xmm1, xmm1, 0x55 ; xmm1=(01 01 01 01)
135
+ shufps xmm2, xmm2, 0xAA ; xmm2=(02 02 02 02)
136
+ shufps xmm3, xmm3, 0xFF ; xmm3=(03 03 03 03)
137
+
138
+ movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
139
+ movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
140
+ movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
141
+ movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
142
+ movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
143
+ movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2
144
+ movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
145
+ movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
146
+ jmp near .nextcolumn
147
+ %endif
148
+ .columnDCT:
149
+
150
+ ; -- Even part
151
+
152
+ movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
153
+ movq xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
154
+ movq xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
155
+ movq xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
156
+
157
+ punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03)
158
+ punpcklwd xmm1, xmm1 ; xmm1=(20 20 21 21 22 22 23 23)
159
+ psrad xmm0, (DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03)
160
+ psrad xmm1, (DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23)
161
+ cvtdq2ps xmm0, xmm0 ; xmm0=in0=(00 01 02 03)
162
+ cvtdq2ps xmm1, xmm1 ; xmm1=in2=(20 21 22 23)
163
+
164
+ punpcklwd xmm2, xmm2 ; xmm2=(40 40 41 41 42 42 43 43)
165
+ punpcklwd xmm3, xmm3 ; xmm3=(60 60 61 61 62 62 63 63)
166
+ psrad xmm2, (DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43)
167
+ psrad xmm3, (DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63)
168
+ cvtdq2ps xmm2, xmm2 ; xmm2=in4=(40 41 42 43)
169
+ cvtdq2ps xmm3, xmm3 ; xmm3=in6=(60 61 62 63)
170
+
171
+ mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
172
+ mulps xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
173
+ mulps xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
174
+ mulps xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
175
+
176
+ movaps xmm4, xmm0
177
+ movaps xmm5, xmm1
178
+ subps xmm0, xmm2 ; xmm0=tmp11
179
+ subps xmm1, xmm3
180
+ addps xmm4, xmm2 ; xmm4=tmp10
181
+ addps xmm5, xmm3 ; xmm5=tmp13
182
+
183
+ mulps xmm1, [rel PD_1_414]
184
+ subps xmm1, xmm5 ; xmm1=tmp12
185
+
186
+ movaps xmm6, xmm4
187
+ movaps xmm7, xmm0
188
+ subps xmm4, xmm5 ; xmm4=tmp3
189
+ subps xmm0, xmm1 ; xmm0=tmp2
190
+ addps xmm6, xmm5 ; xmm6=tmp0
191
+ addps xmm7, xmm1 ; xmm7=tmp1
192
+
193
+ movaps XMMWORD [wk(1)], xmm4 ; tmp3
194
+ movaps XMMWORD [wk(0)], xmm0 ; tmp2
195
+
196
+ ; -- Odd part
197
+
198
+ movq xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
199
+ movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
200
+ movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
201
+ movq xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
202
+
203
+ punpcklwd xmm2, xmm2 ; xmm2=(10 10 11 11 12 12 13 13)
204
+ punpcklwd xmm3, xmm3 ; xmm3=(30 30 31 31 32 32 33 33)
205
+ psrad xmm2, (DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13)
206
+ psrad xmm3, (DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33)
207
+ cvtdq2ps xmm2, xmm2 ; xmm2=in1=(10 11 12 13)
208
+ cvtdq2ps xmm3, xmm3 ; xmm3=in3=(30 31 32 33)
209
+
210
+ punpcklwd xmm5, xmm5 ; xmm5=(50 50 51 51 52 52 53 53)
211
+ punpcklwd xmm1, xmm1 ; xmm1=(70 70 71 71 72 72 73 73)
212
+ psrad xmm5, (DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53)
213
+ psrad xmm1, (DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73)
214
+ cvtdq2ps xmm5, xmm5 ; xmm5=in5=(50 51 52 53)
215
+ cvtdq2ps xmm1, xmm1 ; xmm1=in7=(70 71 72 73)
216
+
217
+ mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
218
+ mulps xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
219
+ mulps xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
220
+ mulps xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
221
+
222
+ movaps xmm4, xmm2
223
+ movaps xmm0, xmm5
224
+ addps xmm2, xmm1 ; xmm2=z11
225
+ addps xmm5, xmm3 ; xmm5=z13
226
+ subps xmm4, xmm1 ; xmm4=z12
227
+ subps xmm0, xmm3 ; xmm0=z10
228
+
229
+ movaps xmm1, xmm2
230
+ subps xmm2, xmm5
231
+ addps xmm1, xmm5 ; xmm1=tmp7
232
+
233
+ mulps xmm2, [rel PD_1_414] ; xmm2=tmp11
234
+
235
+ movaps xmm3, xmm0
236
+ addps xmm0, xmm4
237
+ mulps xmm0, [rel PD_1_847] ; xmm0=z5
238
+ mulps xmm3, [rel PD_M2_613] ; xmm3=(z10 * -2.613125930)
239
+ mulps xmm4, [rel PD_1_082] ; xmm4=(z12 * 1.082392200)
240
+ addps xmm3, xmm0 ; xmm3=tmp12
241
+ subps xmm4, xmm0 ; xmm4=tmp10
242
+
243
+ ; -- Final output stage
244
+
245
+ subps xmm3, xmm1 ; xmm3=tmp6
246
+ movaps xmm5, xmm6
247
+ movaps xmm0, xmm7
248
+ addps xmm6, xmm1 ; xmm6=data0=(00 01 02 03)
249
+ addps xmm7, xmm3 ; xmm7=data1=(10 11 12 13)
250
+ subps xmm5, xmm1 ; xmm5=data7=(70 71 72 73)
251
+ subps xmm0, xmm3 ; xmm0=data6=(60 61 62 63)
252
+ subps xmm2, xmm3 ; xmm2=tmp5
253
+
254
+ movaps xmm1, xmm6 ; transpose coefficients(phase 1)
255
+ unpcklps xmm6, xmm7 ; xmm6=(00 10 01 11)
256
+ unpckhps xmm1, xmm7 ; xmm1=(02 12 03 13)
257
+ movaps xmm3, xmm0 ; transpose coefficients(phase 1)
258
+ unpcklps xmm0, xmm5 ; xmm0=(60 70 61 71)
259
+ unpckhps xmm3, xmm5 ; xmm3=(62 72 63 73)
260
+
261
+ movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2
262
+ movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3
263
+
264
+ movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71)
265
+ movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73)
266
+
267
+ addps xmm4, xmm2 ; xmm4=tmp4
268
+ movaps xmm0, xmm7
269
+ movaps xmm3, xmm5
270
+ addps xmm7, xmm2 ; xmm7=data2=(20 21 22 23)
271
+ addps xmm5, xmm4 ; xmm5=data4=(40 41 42 43)
272
+ subps xmm0, xmm2 ; xmm0=data5=(50 51 52 53)
273
+ subps xmm3, xmm4 ; xmm3=data3=(30 31 32 33)
274
+
275
+ movaps xmm2, xmm7 ; transpose coefficients(phase 1)
276
+ unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31)
277
+ unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33)
278
+ movaps xmm4, xmm5 ; transpose coefficients(phase 1)
279
+ unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51)
280
+ unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
281
+
282
+ movaps xmm3, xmm6 ; transpose coefficients(phase 2)
283
+ unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
284
+ unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
285
+ movaps xmm0, xmm1 ; transpose coefficients(phase 2)
286
+ unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
287
+ unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
288
+
289
+ movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
290
+ movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
291
+
292
+ movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6
293
+ movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
294
+ movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
295
+ movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
296
+
297
+ movaps xmm6, xmm5 ; transpose coefficients(phase 2)
298
+ unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
299
+ unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
300
+ movaps xmm3, xmm4 ; transpose coefficients(phase 2)
301
+ unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
302
+ unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
303
+
304
+ movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
305
+ movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
306
+ movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4
307
+ movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
308
+
309
+ .nextcolumn:
310
+ add rsi, byte 4*SIZEOF_JCOEF ; coef_block
311
+ add rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr
312
+ add rdi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr
313
+ dec rcx ; ctr
314
+ jnz near .columnloop
315
+
316
+ ; -- Prefetch the next coefficient block
317
+
318
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
319
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
320
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
321
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
322
+
323
+ ; ---- Pass 2: process rows from work array, store into output array.
324
+
325
+ mov rax, [original_rbp]
326
+ lea rsi, [workspace] ; FAST_FLOAT *wsptr
327
+ mov rdi, r12 ; (JSAMPROW *)
328
+ mov eax, r13d
329
+ mov rcx, DCTSIZE/4 ; ctr
330
+ .rowloop:
331
+
332
+ ; -- Even part
333
+
334
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
335
+ movaps xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)]
336
+ movaps xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)]
337
+ movaps xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)]
338
+
339
+ movaps xmm4, xmm0
340
+ movaps xmm5, xmm1
341
+ subps xmm0, xmm2 ; xmm0=tmp11
342
+ subps xmm1, xmm3
343
+ addps xmm4, xmm2 ; xmm4=tmp10
344
+ addps xmm5, xmm3 ; xmm5=tmp13
345
+
346
+ mulps xmm1, [rel PD_1_414]
347
+ subps xmm1, xmm5 ; xmm1=tmp12
348
+
349
+ movaps xmm6, xmm4
350
+ movaps xmm7, xmm0
351
+ subps xmm4, xmm5 ; xmm4=tmp3
352
+ subps xmm0, xmm1 ; xmm0=tmp2
353
+ addps xmm6, xmm5 ; xmm6=tmp0
354
+ addps xmm7, xmm1 ; xmm7=tmp1
355
+
356
+ movaps XMMWORD [wk(1)], xmm4 ; tmp3
357
+ movaps XMMWORD [wk(0)], xmm0 ; tmp2
358
+
359
+ ; -- Odd part
360
+
361
+ movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
362
+ movaps xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)]
363
+ movaps xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)]
364
+ movaps xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)]
365
+
366
+ movaps xmm4, xmm2
367
+ movaps xmm0, xmm5
368
+ addps xmm2, xmm1 ; xmm2=z11
369
+ addps xmm5, xmm3 ; xmm5=z13
370
+ subps xmm4, xmm1 ; xmm4=z12
371
+ subps xmm0, xmm3 ; xmm0=z10
372
+
373
+ movaps xmm1, xmm2
374
+ subps xmm2, xmm5
375
+ addps xmm1, xmm5 ; xmm1=tmp7
376
+
377
+ mulps xmm2, [rel PD_1_414] ; xmm2=tmp11
378
+
379
+ movaps xmm3, xmm0
380
+ addps xmm0, xmm4
381
+ mulps xmm0, [rel PD_1_847] ; xmm0=z5
382
+ mulps xmm3, [rel PD_M2_613] ; xmm3=(z10 * -2.613125930)
383
+ mulps xmm4, [rel PD_1_082] ; xmm4=(z12 * 1.082392200)
384
+ addps xmm3, xmm0 ; xmm3=tmp12
385
+ subps xmm4, xmm0 ; xmm4=tmp10
386
+
387
+ ; -- Final output stage
388
+
389
+ subps xmm3, xmm1 ; xmm3=tmp6
390
+ movaps xmm5, xmm6
391
+ movaps xmm0, xmm7
392
+ addps xmm6, xmm1 ; xmm6=data0=(00 10 20 30)
393
+ addps xmm7, xmm3 ; xmm7=data1=(01 11 21 31)
394
+ subps xmm5, xmm1 ; xmm5=data7=(07 17 27 37)
395
+ subps xmm0, xmm3 ; xmm0=data6=(06 16 26 36)
396
+ subps xmm2, xmm3 ; xmm2=tmp5
397
+
398
+ movaps xmm1, [rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC]
399
+ pcmpeqd xmm3, xmm3
400
+ psrld xmm3, WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
401
+
402
+ addps xmm6, xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
403
+ addps xmm7, xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
404
+ addps xmm0, xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
405
+ addps xmm5, xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
406
+
407
+ pand xmm6, xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --)
408
+ pslld xmm7, WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31)
409
+ pand xmm0, xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --)
410
+ pslld xmm5, WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37)
411
+ por xmm6, xmm7 ; xmm6=(00 01 10 11 20 21 30 31)
412
+ por xmm0, xmm5 ; xmm0=(06 07 16 17 26 27 36 37)
413
+
414
+ movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2
415
+ movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3
416
+
417
+ addps xmm4, xmm2 ; xmm4=tmp4
418
+ movaps xmm7, xmm1
419
+ movaps xmm5, xmm3
420
+ addps xmm1, xmm2 ; xmm1=data2=(02 12 22 32)
421
+ addps xmm3, xmm4 ; xmm3=data4=(04 14 24 34)
422
+ subps xmm7, xmm2 ; xmm7=data5=(05 15 25 35)
423
+ subps xmm5, xmm4 ; xmm5=data3=(03 13 23 33)
424
+
425
+ movaps xmm2, [rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC]
426
+ pcmpeqd xmm4, xmm4
427
+ psrld xmm4, WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
428
+
429
+ addps xmm3, xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
430
+ addps xmm7, xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
431
+ addps xmm1, xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
432
+ addps xmm5, xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
433
+
434
+ pand xmm3, xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --)
435
+ pslld xmm7, WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35)
436
+ pand xmm1, xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --)
437
+ pslld xmm5, WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33)
438
+ por xmm3, xmm7 ; xmm3=(04 05 14 15 24 25 34 35)
439
+ por xmm1, xmm5 ; xmm1=(02 03 12 13 22 23 32 33)
440
+
441
+ movdqa xmm2, [rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP]
442
+
443
+ packsswb xmm6, xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
444
+ packsswb xmm1, xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
445
+ paddb xmm6, xmm2
446
+ paddb xmm1, xmm2
447
+
448
+ movdqa xmm4, xmm6 ; transpose coefficients(phase 2)
449
+ punpcklwd xmm6, xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
450
+ punpckhwd xmm4, xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
451
+
452
+ movdqa xmm7, xmm6 ; transpose coefficients(phase 3)
453
+ punpckldq xmm6, xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
454
+ punpckhdq xmm7, xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
455
+
456
+ pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
457
+ pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
458
+
459
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
460
+ mov rbxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
461
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
462
+ movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
463
+ mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
464
+ mov rbxp, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
465
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
466
+ movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
467
+
468
+ add rsi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
469
+ add rdi, byte 4*SIZEOF_JSAMPROW
470
+ dec rcx ; ctr
471
+ jnz near .rowloop
472
+
473
+ pop rbx
474
+ uncollect_args 4
475
+ mov rsp, rbp ; rsp <- aligned rbp
476
+ pop rsp ; rsp <- original rbp
477
+ pop rbp
478
+ ret
479
+
480
+ ; For some reason, the OS X linker does not honor the request to align the
481
+ ; segment unless we do this.
482
+ align 32