image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,574 @@
1
+ ;
2
+ ; jidctred.asm - reduced-size IDCT (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+ ;
18
+ ; This file contains inverse-DCT routines that produce reduced-size
19
+ ; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
20
+ ; The following code is based directly on the IJG's original jidctred.c;
21
+ ; see the jidctred.c for more details.
22
+
23
+ %include "jsimdext.inc"
24
+ %include "jdct.inc"
25
+
26
+ ; --------------------------------------------------------------------------
27
+
28
+ %define CONST_BITS 13
29
+ %define PASS1_BITS 2
30
+
31
+ %define DESCALE_P1_4 (CONST_BITS - PASS1_BITS + 1)
32
+ %define DESCALE_P2_4 (CONST_BITS + PASS1_BITS + 3 + 1)
33
+ %define DESCALE_P1_2 (CONST_BITS - PASS1_BITS + 2)
34
+ %define DESCALE_P2_2 (CONST_BITS + PASS1_BITS + 3 + 2)
35
+
36
+ %if CONST_BITS == 13
37
+ F_0_211 equ 1730 ; FIX(0.211164243)
38
+ F_0_509 equ 4176 ; FIX(0.509795579)
39
+ F_0_601 equ 4926 ; FIX(0.601344887)
40
+ F_0_720 equ 5906 ; FIX(0.720959822)
41
+ F_0_765 equ 6270 ; FIX(0.765366865)
42
+ F_0_850 equ 6967 ; FIX(0.850430095)
43
+ F_0_899 equ 7373 ; FIX(0.899976223)
44
+ F_1_061 equ 8697 ; FIX(1.061594337)
45
+ F_1_272 equ 10426 ; FIX(1.272758580)
46
+ F_1_451 equ 11893 ; FIX(1.451774981)
47
+ F_1_847 equ 15137 ; FIX(1.847759065)
48
+ F_2_172 equ 17799 ; FIX(2.172734803)
49
+ F_2_562 equ 20995 ; FIX(2.562915447)
50
+ F_3_624 equ 29692 ; FIX(3.624509785)
51
+ %else
52
+ ; NASM cannot do compile-time arithmetic on floating-point constants.
53
+ %define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n))
54
+ F_0_211 equ DESCALE( 226735879, 30 - CONST_BITS) ; FIX(0.211164243)
55
+ F_0_509 equ DESCALE( 547388834, 30 - CONST_BITS) ; FIX(0.509795579)
56
+ F_0_601 equ DESCALE( 645689155, 30 - CONST_BITS) ; FIX(0.601344887)
57
+ F_0_720 equ DESCALE( 774124714, 30 - CONST_BITS) ; FIX(0.720959822)
58
+ F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865)
59
+ F_0_850 equ DESCALE( 913142361, 30 - CONST_BITS) ; FIX(0.850430095)
60
+ F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223)
61
+ F_1_061 equ DESCALE(1139878239, 30 - CONST_BITS) ; FIX(1.061594337)
62
+ F_1_272 equ DESCALE(1366614119, 30 - CONST_BITS) ; FIX(1.272758580)
63
+ F_1_451 equ DESCALE(1558831516, 30 - CONST_BITS) ; FIX(1.451774981)
64
+ F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065)
65
+ F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803)
66
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
67
+ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
68
+ %endif
69
+
70
+ ; --------------------------------------------------------------------------
71
+ SECTION SEG_CONST
72
+
73
+ alignz 32
74
+ GLOBAL_DATA(jconst_idct_red_sse2)
75
+
76
+ EXTN(jconst_idct_red_sse2):
77
+
78
+ PW_F184_MF076 times 4 dw F_1_847, -F_0_765
79
+ PW_F256_F089 times 4 dw F_2_562, F_0_899
80
+ PW_F106_MF217 times 4 dw F_1_061, -F_2_172
81
+ PW_MF060_MF050 times 4 dw -F_0_601, -F_0_509
82
+ PW_F145_MF021 times 4 dw F_1_451, -F_0_211
83
+ PW_F362_MF127 times 4 dw F_3_624, -F_1_272
84
+ PW_F085_MF072 times 4 dw F_0_850, -F_0_720
85
+ PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4 - 1)
86
+ PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4 - 1)
87
+ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
88
+ PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
89
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
90
+
91
+ alignz 32
92
+
93
+ ; --------------------------------------------------------------------------
94
+ SECTION SEG_TEXT
95
+ BITS 64
96
+ ;
97
+ ; Perform dequantization and inverse DCT on one block of coefficients,
98
+ ; producing a reduced-size 4x4 output block.
99
+ ;
100
+ ; GLOBAL(void)
101
+ ; jsimd_idct_4x4_sse2(void *dct_table, JCOEFPTR coef_block,
102
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
103
+ ;
104
+
105
+ ; r10 = void *dct_table
106
+ ; r11 = JCOEFPTR coef_block
107
+ ; r12 = JSAMPARRAY output_buf
108
+ ; r13d = JDIMENSION output_col
109
+
110
+ %define original_rbp rbp + 0
111
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
112
+ ; xmmword wk[WK_NUM]
113
+ %define WK_NUM 2
114
+
115
+ align 32
116
+ GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
117
+
118
+ EXTN(jsimd_idct_4x4_sse2):
119
+ push rbp
120
+ mov rax, rsp ; rax = original rbp
121
+ sub rsp, byte 4
122
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
123
+ mov [rsp], rax
124
+ mov rbp, rsp ; rbp = aligned rbp
125
+ lea rsp, [wk(0)]
126
+ collect_args 4
127
+
128
+ ; ---- Pass 1: process columns from input.
129
+
130
+ mov rdx, r10 ; quantptr
131
+ mov rsi, r11 ; inptr
132
+
133
+ %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
134
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
135
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
136
+ jnz short .columnDCT
137
+
138
+ movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
139
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
140
+ por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
141
+ por xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
142
+ por xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
143
+ por xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
144
+ por xmm0, xmm1
145
+ packsswb xmm0, xmm0
146
+ packsswb xmm0, xmm0
147
+ movd eax, xmm0
148
+ test rax, rax
149
+ jnz short .columnDCT
150
+
151
+ ; -- AC terms all zero
152
+
153
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
154
+ pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
155
+
156
+ psllw xmm0, PASS1_BITS
157
+
158
+ movdqa xmm3, xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07)
159
+ punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03)
160
+ punpckhwd xmm3, xmm3 ; xmm3=(04 04 05 05 06 06 07 07)
161
+
162
+ pshufd xmm1, xmm0, 0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
163
+ pshufd xmm0, xmm0, 0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
164
+ pshufd xmm6, xmm3, 0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
165
+ pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
166
+
167
+ jmp near .column_end
168
+ %endif
169
+ .columnDCT:
170
+
171
+ ; -- Odd part
172
+
173
+ movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
174
+ movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
175
+ pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
176
+ pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
177
+ movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
178
+ movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
179
+ pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
180
+ pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
181
+
182
+ movdqa xmm4, xmm0
183
+ movdqa xmm5, xmm0
184
+ punpcklwd xmm4, xmm1
185
+ punpckhwd xmm5, xmm1
186
+ movdqa xmm0, xmm4
187
+ movdqa xmm1, xmm5
188
+ pmaddwd xmm4, [rel PW_F256_F089] ; xmm4=(tmp2L)
189
+ pmaddwd xmm5, [rel PW_F256_F089] ; xmm5=(tmp2H)
190
+ pmaddwd xmm0, [rel PW_F106_MF217] ; xmm0=(tmp0L)
191
+ pmaddwd xmm1, [rel PW_F106_MF217] ; xmm1=(tmp0H)
192
+
193
+ movdqa xmm6, xmm2
194
+ movdqa xmm7, xmm2
195
+ punpcklwd xmm6, xmm3
196
+ punpckhwd xmm7, xmm3
197
+ movdqa xmm2, xmm6
198
+ movdqa xmm3, xmm7
199
+ pmaddwd xmm6, [rel PW_MF060_MF050] ; xmm6=(tmp2L)
200
+ pmaddwd xmm7, [rel PW_MF060_MF050] ; xmm7=(tmp2H)
201
+ pmaddwd xmm2, [rel PW_F145_MF021] ; xmm2=(tmp0L)
202
+ pmaddwd xmm3, [rel PW_F145_MF021] ; xmm3=(tmp0H)
203
+
204
+ paddd xmm6, xmm4 ; xmm6=tmp2L
205
+ paddd xmm7, xmm5 ; xmm7=tmp2H
206
+ paddd xmm2, xmm0 ; xmm2=tmp0L
207
+ paddd xmm3, xmm1 ; xmm3=tmp0H
208
+
209
+ movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L
210
+ movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H
211
+
212
+ ; -- Even part
213
+
214
+ movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
215
+ movdqa xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
216
+ movdqa xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
217
+ pmullw xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
218
+ pmullw xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
219
+ pmullw xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
220
+
221
+ pxor xmm1, xmm1
222
+ pxor xmm2, xmm2
223
+ punpcklwd xmm1, xmm4 ; xmm1=tmp0L
224
+ punpckhwd xmm2, xmm4 ; xmm2=tmp0H
225
+ psrad xmm1, (16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
226
+ psrad xmm2, (16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
227
+
228
+ movdqa xmm3, xmm5 ; xmm5=in2=z2
229
+ punpcklwd xmm5, xmm0 ; xmm0=in6=z3
230
+ punpckhwd xmm3, xmm0
231
+ pmaddwd xmm5, [rel PW_F184_MF076] ; xmm5=tmp2L
232
+ pmaddwd xmm3, [rel PW_F184_MF076] ; xmm3=tmp2H
233
+
234
+ movdqa xmm4, xmm1
235
+ movdqa xmm0, xmm2
236
+ paddd xmm1, xmm5 ; xmm1=tmp10L
237
+ paddd xmm2, xmm3 ; xmm2=tmp10H
238
+ psubd xmm4, xmm5 ; xmm4=tmp12L
239
+ psubd xmm0, xmm3 ; xmm0=tmp12H
240
+
241
+ ; -- Final output stage
242
+
243
+ movdqa xmm5, xmm1
244
+ movdqa xmm3, xmm2
245
+ paddd xmm1, xmm6 ; xmm1=data0L
246
+ paddd xmm2, xmm7 ; xmm2=data0H
247
+ psubd xmm5, xmm6 ; xmm5=data3L
248
+ psubd xmm3, xmm7 ; xmm3=data3H
249
+
250
+ movdqa xmm6, [rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4]
251
+
252
+ paddd xmm1, xmm6
253
+ paddd xmm2, xmm6
254
+ psrad xmm1, DESCALE_P1_4
255
+ psrad xmm2, DESCALE_P1_4
256
+ paddd xmm5, xmm6
257
+ paddd xmm3, xmm6
258
+ psrad xmm5, DESCALE_P1_4
259
+ psrad xmm3, DESCALE_P1_4
260
+
261
+ packssdw xmm1, xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07)
262
+ packssdw xmm5, xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37)
263
+
264
+ movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L
265
+ movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H
266
+
267
+ movdqa xmm2, xmm4
268
+ movdqa xmm3, xmm0
269
+ paddd xmm4, xmm7 ; xmm4=data1L
270
+ paddd xmm0, xmm6 ; xmm0=data1H
271
+ psubd xmm2, xmm7 ; xmm2=data2L
272
+ psubd xmm3, xmm6 ; xmm3=data2H
273
+
274
+ movdqa xmm7, [rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4]
275
+
276
+ paddd xmm4, xmm7
277
+ paddd xmm0, xmm7
278
+ psrad xmm4, DESCALE_P1_4
279
+ psrad xmm0, DESCALE_P1_4
280
+ paddd xmm2, xmm7
281
+ paddd xmm3, xmm7
282
+ psrad xmm2, DESCALE_P1_4
283
+ psrad xmm3, DESCALE_P1_4
284
+
285
+ packssdw xmm4, xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17)
286
+ packssdw xmm2, xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27)
287
+
288
+ movdqa xmm6, xmm1 ; transpose coefficients(phase 1)
289
+ punpcklwd xmm1, xmm4 ; xmm1=(00 10 01 11 02 12 03 13)
290
+ punpckhwd xmm6, xmm4 ; xmm6=(04 14 05 15 06 16 07 17)
291
+ movdqa xmm7, xmm2 ; transpose coefficients(phase 1)
292
+ punpcklwd xmm2, xmm5 ; xmm2=(20 30 21 31 22 32 23 33)
293
+ punpckhwd xmm7, xmm5 ; xmm7=(24 34 25 35 26 36 27 37)
294
+
295
+ movdqa xmm0, xmm1 ; transpose coefficients(phase 2)
296
+ punpckldq xmm1, xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
297
+ punpckhdq xmm0, xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
298
+ movdqa xmm3, xmm6 ; transpose coefficients(phase 2)
299
+ punpckldq xmm6, xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
300
+ punpckhdq xmm3, xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
301
+ .column_end:
302
+
303
+ ; -- Prefetch the next coefficient block
304
+
305
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
306
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
307
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
308
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
309
+
310
+ ; ---- Pass 2: process rows, store into output array.
311
+
312
+ mov rax, [original_rbp]
313
+ mov rdi, r12 ; (JSAMPROW *)
314
+ mov eax, r13d
315
+
316
+ ; -- Even part
317
+
318
+ pxor xmm4, xmm4
319
+ punpcklwd xmm4, xmm1 ; xmm4=tmp0
320
+ psrad xmm4, (16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
321
+
322
+ ; -- Odd part
323
+
324
+ punpckhwd xmm1, xmm0
325
+ punpckhwd xmm6, xmm3
326
+ movdqa xmm5, xmm1
327
+ movdqa xmm2, xmm6
328
+ pmaddwd xmm1, [rel PW_F256_F089] ; xmm1=(tmp2)
329
+ pmaddwd xmm6, [rel PW_MF060_MF050] ; xmm6=(tmp2)
330
+ pmaddwd xmm5, [rel PW_F106_MF217] ; xmm5=(tmp0)
331
+ pmaddwd xmm2, [rel PW_F145_MF021] ; xmm2=(tmp0)
332
+
333
+ paddd xmm6, xmm1 ; xmm6=tmp2
334
+ paddd xmm2, xmm5 ; xmm2=tmp0
335
+
336
+ ; -- Even part
337
+
338
+ punpcklwd xmm0, xmm3
339
+ pmaddwd xmm0, [rel PW_F184_MF076] ; xmm0=tmp2
340
+
341
+ movdqa xmm7, xmm4
342
+ paddd xmm4, xmm0 ; xmm4=tmp10
343
+ psubd xmm7, xmm0 ; xmm7=tmp12
344
+
345
+ ; -- Final output stage
346
+
347
+ movdqa xmm1, [rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4]
348
+
349
+ movdqa xmm5, xmm4
350
+ movdqa xmm3, xmm7
351
+ paddd xmm4, xmm6 ; xmm4=data0=(00 10 20 30)
352
+ paddd xmm7, xmm2 ; xmm7=data1=(01 11 21 31)
353
+ psubd xmm5, xmm6 ; xmm5=data3=(03 13 23 33)
354
+ psubd xmm3, xmm2 ; xmm3=data2=(02 12 22 32)
355
+
356
+ paddd xmm4, xmm1
357
+ paddd xmm7, xmm1
358
+ psrad xmm4, DESCALE_P2_4
359
+ psrad xmm7, DESCALE_P2_4
360
+ paddd xmm5, xmm1
361
+ paddd xmm3, xmm1
362
+ psrad xmm5, DESCALE_P2_4
363
+ psrad xmm3, DESCALE_P2_4
364
+
365
+ packssdw xmm4, xmm3 ; xmm4=(00 10 20 30 02 12 22 32)
366
+ packssdw xmm7, xmm5 ; xmm7=(01 11 21 31 03 13 23 33)
367
+
368
+ movdqa xmm0, xmm4 ; transpose coefficients(phase 1)
369
+ punpcklwd xmm4, xmm7 ; xmm4=(00 01 10 11 20 21 30 31)
370
+ punpckhwd xmm0, xmm7 ; xmm0=(02 03 12 13 22 23 32 33)
371
+
372
+ movdqa xmm6, xmm4 ; transpose coefficients(phase 2)
373
+ punpckldq xmm4, xmm0 ; xmm4=(00 01 02 03 10 11 12 13)
374
+ punpckhdq xmm6, xmm0 ; xmm6=(20 21 22 23 30 31 32 33)
375
+
376
+ packsswb xmm4, xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
377
+ paddb xmm4, [rel PB_CENTERJSAMP]
378
+
379
+ pshufd xmm2, xmm4, 0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
380
+ pshufd xmm1, xmm4, 0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
381
+ pshufd xmm3, xmm4, 0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
382
+
383
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
384
+ mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
385
+ movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
386
+ movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
387
+ mov rdxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
388
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
389
+ movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
390
+ movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
391
+
392
+ uncollect_args 4
393
+ mov rsp, rbp ; rsp <- aligned rbp
394
+ pop rsp ; rsp <- original rbp
395
+ pop rbp
396
+ ret
397
+
398
+ ; --------------------------------------------------------------------------
399
+ ;
400
+ ; Perform dequantization and inverse DCT on one block of coefficients,
401
+ ; producing a reduced-size 2x2 output block.
402
+ ;
403
+ ; GLOBAL(void)
404
+ ; jsimd_idct_2x2_sse2(void *dct_table, JCOEFPTR coef_block,
405
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
406
+ ;
407
+
408
+ ; r10 = void *dct_table
409
+ ; r11 = JCOEFPTR coef_block
410
+ ; r12 = JSAMPARRAY output_buf
411
+ ; r13d = JDIMENSION output_col
412
+
413
+ align 32
414
+ GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
415
+
416
+ EXTN(jsimd_idct_2x2_sse2):
417
+ push rbp
418
+ mov rax, rsp
419
+ mov rbp, rsp
420
+ collect_args 4
421
+ push rbx
422
+
423
+ ; ---- Pass 1: process columns from input.
424
+
425
+ mov rdx, r10 ; quantptr
426
+ mov rsi, r11 ; inptr
427
+
428
+ ; | input: | result: |
429
+ ; | 00 01 ** 03 ** 05 ** 07 | |
430
+ ; | 10 11 ** 13 ** 15 ** 17 | |
431
+ ; | ** ** ** ** ** ** ** ** | |
432
+ ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
433
+ ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
434
+ ; | 50 51 ** 53 ** 55 ** 57 | |
435
+ ; | ** ** ** ** ** ** ** ** | |
436
+ ; | 70 71 ** 73 ** 75 ** 77 | |
437
+
438
+ ; -- Odd part
439
+
440
+ movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
441
+ movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
442
+ pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
443
+ pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
444
+ movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
445
+ movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
446
+ pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
447
+ pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
448
+
449
+ ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
450
+ ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
451
+
452
+ pcmpeqd xmm7, xmm7
453
+ pslld xmm7, WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
454
+
455
+ movdqa xmm4, xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17)
456
+ movdqa xmm5, xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57)
457
+ punpcklwd xmm4, xmm1 ; xmm4=(10 30 11 31 ** ** 13 33)
458
+ punpcklwd xmm5, xmm3 ; xmm5=(50 70 51 71 ** ** 53 73)
459
+ pmaddwd xmm4, [rel PW_F362_MF127]
460
+ pmaddwd xmm5, [rel PW_F085_MF072]
461
+
462
+ psrld xmm0, WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --)
463
+ pand xmm1, xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37)
464
+ psrld xmm2, WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --)
465
+ pand xmm3, xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77)
466
+ por xmm0, xmm1 ; xmm0=(11 31 13 33 15 35 17 37)
467
+ por xmm2, xmm3 ; xmm2=(51 71 53 73 55 75 57 77)
468
+ pmaddwd xmm0, [rel PW_F362_MF127]
469
+ pmaddwd xmm2, [rel PW_F085_MF072]
470
+
471
+ paddd xmm4, xmm5 ; xmm4=tmp0[col0 col1 **** col3]
472
+ paddd xmm0, xmm2 ; xmm0=tmp0[col1 col3 col5 col7]
473
+
474
+ ; -- Even part
475
+
476
+ movdqa xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
477
+ pmullw xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
478
+
479
+ ; xmm6=(00 01 ** 03 ** 05 ** 07)
480
+
481
+ movdqa xmm1, xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07)
482
+ pslld xmm6, WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **)
483
+ pand xmm1, xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07)
484
+ psrad xmm6, (WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
485
+ psrad xmm1, (WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
486
+
487
+ ; -- Final output stage
488
+
489
+ movdqa xmm3, xmm6
490
+ movdqa xmm5, xmm1
491
+ paddd xmm6, xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
492
+ paddd xmm1, xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
493
+ psubd xmm3, xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
494
+ psubd xmm5, xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
495
+
496
+ movdqa xmm2, [rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2]
497
+
498
+ punpckldq xmm6, xmm3 ; xmm6=(A0 B0 ** **)
499
+
500
+ movdqa xmm7, xmm1
501
+ punpcklqdq xmm1, xmm5 ; xmm1=(A1 A3 B1 B3)
502
+ punpckhqdq xmm7, xmm5 ; xmm7=(A5 A7 B5 B7)
503
+
504
+ paddd xmm6, xmm2
505
+ psrad xmm6, DESCALE_P1_2
506
+
507
+ paddd xmm1, xmm2
508
+ paddd xmm7, xmm2
509
+ psrad xmm1, DESCALE_P1_2
510
+ psrad xmm7, DESCALE_P1_2
511
+
512
+ ; -- Prefetch the next coefficient block
513
+
514
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
515
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
516
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
517
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
518
+
519
+ ; ---- Pass 2: process rows, store into output array.
520
+
521
+ mov rdi, r12 ; (JSAMPROW *)
522
+ mov eax, r13d
523
+
524
+ ; | input:| result:|
525
+ ; | A0 B0 | |
526
+ ; | A1 B1 | C0 C1 |
527
+ ; | A3 B3 | D0 D1 |
528
+ ; | A5 B5 | |
529
+ ; | A7 B7 | |
530
+
531
+ ; -- Odd part
532
+
533
+ packssdw xmm1, xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
534
+ packssdw xmm7, xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
535
+ pmaddwd xmm1, [rel PW_F362_MF127]
536
+ pmaddwd xmm7, [rel PW_F085_MF072]
537
+
538
+ paddd xmm1, xmm7 ; xmm1=tmp0[row0 row1 row0 row1]
539
+
540
+ ; -- Even part
541
+
542
+ pslld xmm6, (CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****]
543
+
544
+ ; -- Final output stage
545
+
546
+ movdqa xmm4, xmm6
547
+ paddd xmm6, xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
548
+ psubd xmm4, xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
549
+
550
+ punpckldq xmm6, xmm4 ; xmm6=(C0 D0 C1 D1)
551
+
552
+ paddd xmm6, [rel PD_DESCALE_P2_2]
553
+ psrad xmm6, DESCALE_P2_2
554
+
555
+ packssdw xmm6, xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
556
+ packsswb xmm6, xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
557
+ paddb xmm6, [rel PB_CENTERJSAMP]
558
+
559
+ pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --)
560
+ pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --)
561
+
562
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
563
+ mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
564
+ mov word [rdx+rax*SIZEOF_JSAMPLE], bx
565
+ mov word [rsi+rax*SIZEOF_JSAMPLE], cx
566
+
567
+ pop rbx
568
+ uncollect_args 4
569
+ pop rbp
570
+ ret
571
+
572
+ ; For some reason, the OS X linker does not honor the request to align the
573
+ ; segment unless we do this.
574
+ align 32