image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,596 @@
1
+ ;
2
+ ; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
3
+ ;
4
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2012, 2016, D. R. Commander.
6
+ ; Copyright (C) 2015, Intel Corporation.
7
+ ; Copyright (C) 2018, Matthias Räncker.
8
+ ;
9
+ ; Based on the x86 SIMD extension for IJG JPEG library
10
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
11
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
12
+ ;
13
+ ; This file should be assembled with NASM (Netwide Assembler),
14
+ ; can *not* be assembled with Microsoft's MASM or any compatible
15
+ ; assembler (including Borland's Turbo Assembler).
16
+ ; NASM is available from http://nasm.sourceforge.net/ or
17
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
18
+
19
+ %include "jcolsamp.inc"
20
+
21
+ ; --------------------------------------------------------------------------
22
+ ;
23
+ ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
24
+ ;
25
+ ; GLOBAL(void)
26
+ ; jsimd_h2v1_merged_upsample_avx2(JDIMENSION output_width,
27
+ ; JSAMPIMAGE input_buf,
28
+ ; JDIMENSION in_row_group_ctr,
29
+ ; JSAMPARRAY output_buf);
30
+ ;
31
+
32
+ ; r10d = JDIMENSION output_width
33
+ ; r11 = JSAMPIMAGE input_buf
34
+ ; r12d = JDIMENSION in_row_group_ctr
35
+ ; r13 = JSAMPARRAY output_buf
36
+
37
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
38
+ %define WK_NUM 3
39
+
40
+ align 32
41
+ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
42
+
43
+ EXTN(jsimd_h2v1_merged_upsample_avx2):
44
+ push rbp
45
+ mov rax, rsp ; rax = original rbp
46
+ sub rsp, byte 4
47
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
48
+ mov [rsp], rax
49
+ mov rbp, rsp ; rbp = aligned rbp
50
+ lea rsp, [wk(0)]
51
+ collect_args 4
52
+ push rbx
53
+
54
+ mov ecx, r10d ; col
55
+ test rcx, rcx
56
+ jz near .return
57
+
58
+ push rcx
59
+
60
+ mov rdi, r11
61
+ mov ecx, r12d
62
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
63
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
64
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
65
+ mov rdi, r13
66
+ mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
67
+ mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
68
+ mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
69
+ mov rdip, JSAMPROW [rdi] ; outptr
70
+
71
+ pop rcx ; col
72
+
73
+ .columnloop:
74
+
75
+ vmovdqu ymm6, YMMWORD [rbx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
76
+ vmovdqu ymm7, YMMWORD [rdx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
77
+
78
+ vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
79
+ vpcmpeqw ymm3, ymm3, ymm3
80
+ vpsllw ymm3, ymm3, 7 ; ymm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
81
+
82
+ vpermq ymm6, ymm6, 0xd8 ; ymm6=Cb(01234567GHIJKLMN89ABCDEFOPQRSTUV)
83
+ vpermq ymm7, ymm7, 0xd8 ; ymm7=Cr(01234567GHIJKLMN89ABCDEFOPQRSTUV)
84
+ vpunpcklbw ymm4, ymm6, ymm1 ; ymm4=Cb(0123456789ABCDEF)=CbL
85
+ vpunpckhbw ymm6, ymm6, ymm1 ; ymm6=Cb(GHIJKLMNOPQRSTUV)=CbH
86
+ vpunpcklbw ymm0, ymm7, ymm1 ; ymm0=Cr(0123456789ABCDEF)=CrL
87
+ vpunpckhbw ymm7, ymm7, ymm1 ; ymm7=Cr(GHIJKLMNOPQRSTUV)=CrH
88
+
89
+ vpaddw ymm5, ymm6, ymm3
90
+ vpaddw ymm2, ymm4, ymm3
91
+ vpaddw ymm1, ymm7, ymm3
92
+ vpaddw ymm3, ymm0, ymm3
93
+
94
+ ; (Original)
95
+ ; R = Y + 1.40200 * Cr
96
+ ; G = Y - 0.34414 * Cb - 0.71414 * Cr
97
+ ; B = Y + 1.77200 * Cb
98
+ ;
99
+ ; (This implementation)
100
+ ; R = Y + 0.40200 * Cr + Cr
101
+ ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
102
+ ; B = Y - 0.22800 * Cb + Cb + Cb
103
+
104
+ vpaddw ymm6, ymm5, ymm5 ; ymm6=2*CbH
105
+ vpaddw ymm4, ymm2, ymm2 ; ymm4=2*CbL
106
+ vpaddw ymm7, ymm1, ymm1 ; ymm7=2*CrH
107
+ vpaddw ymm0, ymm3, ymm3 ; ymm0=2*CrL
108
+
109
+ vpmulhw ymm6, ymm6, [rel PW_MF0228] ; ymm6=(2*CbH * -FIX(0.22800))
110
+ vpmulhw ymm4, ymm4, [rel PW_MF0228] ; ymm4=(2*CbL * -FIX(0.22800))
111
+ vpmulhw ymm7, ymm7, [rel PW_F0402] ; ymm7=(2*CrH * FIX(0.40200))
112
+ vpmulhw ymm0, ymm0, [rel PW_F0402] ; ymm0=(2*CrL * FIX(0.40200))
113
+
114
+ vpaddw ymm6, ymm6, [rel PW_ONE]
115
+ vpaddw ymm4, ymm4, [rel PW_ONE]
116
+ vpsraw ymm6, ymm6, 1 ; ymm6=(CbH * -FIX(0.22800))
117
+ vpsraw ymm4, ymm4, 1 ; ymm4=(CbL * -FIX(0.22800))
118
+ vpaddw ymm7, ymm7, [rel PW_ONE]
119
+ vpaddw ymm0, ymm0, [rel PW_ONE]
120
+ vpsraw ymm7, ymm7, 1 ; ymm7=(CrH * FIX(0.40200))
121
+ vpsraw ymm0, ymm0, 1 ; ymm0=(CrL * FIX(0.40200))
122
+
123
+ vpaddw ymm6, ymm6, ymm5
124
+ vpaddw ymm4, ymm4, ymm2
125
+ vpaddw ymm6, ymm6, ymm5 ; ymm6=(CbH * FIX(1.77200))=(B-Y)H
126
+ vpaddw ymm4, ymm4, ymm2 ; ymm4=(CbL * FIX(1.77200))=(B-Y)L
127
+ vpaddw ymm7, ymm7, ymm1 ; ymm7=(CrH * FIX(1.40200))=(R-Y)H
128
+ vpaddw ymm0, ymm0, ymm3 ; ymm0=(CrL * FIX(1.40200))=(R-Y)L
129
+
130
+ vmovdqa YMMWORD [wk(0)], ymm6 ; wk(0)=(B-Y)H
131
+ vmovdqa YMMWORD [wk(1)], ymm7 ; wk(1)=(R-Y)H
132
+
133
+ vpunpckhwd ymm6, ymm5, ymm1
134
+ vpunpcklwd ymm5, ymm5, ymm1
135
+ vpmaddwd ymm5, ymm5, [rel PW_MF0344_F0285]
136
+ vpmaddwd ymm6, ymm6, [rel PW_MF0344_F0285]
137
+ vpunpckhwd ymm7, ymm2, ymm3
138
+ vpunpcklwd ymm2, ymm2, ymm3
139
+ vpmaddwd ymm2, ymm2, [rel PW_MF0344_F0285]
140
+ vpmaddwd ymm7, ymm7, [rel PW_MF0344_F0285]
141
+
142
+ vpaddd ymm5, ymm5, [rel PD_ONEHALF]
143
+ vpaddd ymm6, ymm6, [rel PD_ONEHALF]
144
+ vpsrad ymm5, ymm5, SCALEBITS
145
+ vpsrad ymm6, ymm6, SCALEBITS
146
+ vpaddd ymm2, ymm2, [rel PD_ONEHALF]
147
+ vpaddd ymm7, ymm7, [rel PD_ONEHALF]
148
+ vpsrad ymm2, ymm2, SCALEBITS
149
+ vpsrad ymm7, ymm7, SCALEBITS
150
+
151
+ vpackssdw ymm5, ymm5, ymm6 ; ymm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
152
+ vpackssdw ymm2, ymm2, ymm7 ; ymm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
153
+ vpsubw ymm5, ymm5, ymm1 ; ymm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
154
+ vpsubw ymm2, ymm2, ymm3 ; ymm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
155
+
156
+ vmovdqa YMMWORD [wk(2)], ymm5 ; wk(2)=(G-Y)H
157
+
158
+ mov al, 2 ; Yctr
159
+ jmp short .Yloop_1st
160
+
161
+ .Yloop_2nd:
162
+ vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H
163
+ vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H
164
+ vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H
165
+
166
+ .Yloop_1st:
167
+ vmovdqu ymm7, YMMWORD [rsi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
168
+
169
+ vpcmpeqw ymm6, ymm6, ymm6
170
+ vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..}
171
+ vpand ymm6, ymm6, ymm7 ; ymm6=Y(02468ACEGIKMOQSU)=YE
172
+ vpsrlw ymm7, ymm7, BYTE_BIT ; ymm7=Y(13579BDFHJLNPRTV)=YO
173
+
174
+ vmovdqa ymm1, ymm0 ; ymm1=ymm0=(R-Y)(L/H)
175
+ vmovdqa ymm3, ymm2 ; ymm3=ymm2=(G-Y)(L/H)
176
+ vmovdqa ymm5, ymm4 ; ymm5=ymm4=(B-Y)(L/H)
177
+
178
+ vpaddw ymm0, ymm0, ymm6 ; ymm0=((R-Y)+YE)=RE=R(02468ACEGIKMOQSU)
179
+ vpaddw ymm1, ymm1, ymm7 ; ymm1=((R-Y)+YO)=RO=R(13579BDFHJLNPRTV)
180
+ vpackuswb ymm0, ymm0, ymm0 ; ymm0=R(02468ACE********GIKMOQSU********)
181
+ vpackuswb ymm1, ymm1, ymm1 ; ymm1=R(13579BDF********HJLNPRTV********)
182
+
183
+ vpaddw ymm2, ymm2, ymm6 ; ymm2=((G-Y)+YE)=GE=G(02468ACEGIKMOQSU)
184
+ vpaddw ymm3, ymm3, ymm7 ; ymm3=((G-Y)+YO)=GO=G(13579BDFHJLNPRTV)
185
+ vpackuswb ymm2, ymm2, ymm2 ; ymm2=G(02468ACE********GIKMOQSU********)
186
+ vpackuswb ymm3, ymm3, ymm3 ; ymm3=G(13579BDF********HJLNPRTV********)
187
+
188
+ vpaddw ymm4, ymm4, ymm6 ; ymm4=((B-Y)+YE)=BE=B(02468ACEGIKMOQSU)
189
+ vpaddw ymm5, ymm5, ymm7 ; ymm5=((B-Y)+YO)=BO=B(13579BDFHJLNPRTV)
190
+ vpackuswb ymm4, ymm4, ymm4 ; ymm4=B(02468ACE********GIKMOQSU********)
191
+ vpackuswb ymm5, ymm5, ymm5 ; ymm5=B(13579BDF********HJLNPRTV********)
192
+
193
+ %if RGB_PIXELSIZE == 3 ; ---------------
194
+
195
+ ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **)
196
+ ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **)
197
+ ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **)
198
+ ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **)
199
+ ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **)
200
+ ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **)
201
+ ; ymmG=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **)
202
+ ; ymmH=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **)
203
+
204
+ vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E
205
+ ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U)
206
+ vpunpcklbw ymmE, ymmE, ymmB ; ymmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F
207
+ ; 2G 0H 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V)
208
+ vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F
209
+ ; 1H 2H 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V)
210
+
211
+ vpsrldq ymmH, ymmA, 2 ; ymmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E 0G 1G
212
+ ; 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U -- --)
213
+ vpunpckhwd ymmG, ymmA, ymmE ; ymmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F
214
+ ; 0O 1O 2O 0P 0Q 1Q 2Q 0R 0S 1S 2S 0T 0U 1U 2U 0V)
215
+ vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07
216
+ ; 0G 1G 2G 0H 0I 1I 2I 0J 0K 1K 2K 0L 0M 1M 2M 0N)
217
+
218
+ vpsrldq ymmE, ymmE, 2 ; ymmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F 2G 0H
219
+ ; 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V -- --)
220
+
221
+ vpsrldq ymmB, ymmD, 2 ; ymmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F 1H 2H
222
+ ; 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V -- --)
223
+ vpunpckhwd ymmC, ymmD, ymmH ; ymmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F 0G 1G
224
+ ; 1P 2P 0Q 1Q 1R 2R 0S 1S 1T 2T 0U 1U 1V 2V -- --)
225
+ vpunpcklwd ymmD, ymmD, ymmH ; ymmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18
226
+ ; 1H 2H 0I 1I 1J 2J 0K 1K 1L 2L 0M 1M 1N 2N 0O 1O)
227
+
228
+ vpunpckhwd ymmF, ymmE, ymmB ; ymmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F 2G 0H 1H 2H
229
+ ; 2Q 0R 1R 2R 2S 0T 1T 2T 2U 0V 1V 2V -- -- -- --)
230
+ vpunpcklwd ymmE, ymmE, ymmB ; ymmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29
231
+ ; 2I 0J 1J 2J 2K 0L 1L 2L 2M 0N 1N 2N 2O 0P 1P 2P)
232
+
233
+ vpshufd ymmH, ymmA, 0x4E ; ymmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03
234
+ ; 0K 1K 2K 0L 0M 1M 2M 0N 0G 1G 2G 0H 0I 1I 2I 0J)
235
+ vpunpckldq ymmA, ymmA, ymmD ; ymmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14
236
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 0I 1I 2I 0J 1J 2J 0K 1K)
237
+ vpunpckhdq ymmD, ymmD, ymmE ; ymmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29
238
+ ; 1L 2L 0M 1M 2M 0N 1N 2N 1N 2N 0O 1O 2O 0P 1P 2P)
239
+ vpunpckldq ymmE, ymmE, ymmH ; ymmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07
240
+ ; 2I 0J 1J 2J 0K 1K 2K 0L 2K 0L 1L 2L 0M 1M 2M 0N)
241
+
242
+ vpshufd ymmH, ymmG, 0x4E ; ymmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B
243
+ ; 0S 1S 2S 0T 0U 1U 2U 0V 0O 1O 2O 0P 0Q 1Q 2Q 0R)
244
+ vpunpckldq ymmG, ymmG, ymmC ; ymmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C
245
+ ; 0O 1O 2O 0P 1P 2P 0Q 1Q 0Q 1Q 2Q 0R 1R 2R 0S 1S)
246
+ vpunpckhdq ymmC, ymmC, ymmF ; ymmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F 0G 1G 2G 0H 1H 2H
247
+ ; 1T 2T 0U 1U 2U 0V 1V 2V 1V 2V -- -- -- -- -- --)
248
+ vpunpckldq ymmF, ymmF, ymmH ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F
249
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 2S 0T 1T 2T 0U 1U 2U 0V)
250
+
251
+ vpunpcklqdq ymmH, ymmA, ymmE ; ymmH=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
252
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L)
253
+ vpunpcklqdq ymmG, ymmD, ymmG ; ymmG=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A
254
+ ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q)
255
+ vpunpcklqdq ymmC, ymmF, ymmC ; ymmC=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F
256
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V)
257
+
258
+ vperm2i128 ymmA, ymmH, ymmG, 0x20 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
259
+ ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
260
+ vperm2i128 ymmD, ymmC, ymmH, 0x30 ; ymmD=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F
261
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L)
262
+ vperm2i128 ymmF, ymmG, ymmC, 0x31 ; ymmF=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q
263
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V)
264
+
265
+ cmp rcx, byte SIZEOF_YMMWORD
266
+ jb short .column_st64
267
+
268
+ test rdi, SIZEOF_YMMWORD-1
269
+ jnz short .out1
270
+ ; --(aligned)-------------------
271
+ vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
272
+ vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
273
+ vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF
274
+ jmp short .out0
275
+ .out1: ; --(unaligned)-----------------
276
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
277
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
278
+ vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF
279
+ .out0:
280
+ add rdi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr
281
+ sub rcx, byte SIZEOF_YMMWORD
282
+ jz near .endcolumn
283
+
284
+ add rsi, byte SIZEOF_YMMWORD ; inptr0
285
+ dec al ; Yctr
286
+ jnz near .Yloop_2nd
287
+
288
+ add rbx, byte SIZEOF_YMMWORD ; inptr1
289
+ add rdx, byte SIZEOF_YMMWORD ; inptr2
290
+ jmp near .columnloop
291
+
292
+ .column_st64:
293
+ lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
294
+ cmp rcx, byte 2*SIZEOF_YMMWORD
295
+ jb short .column_st32
296
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
297
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
298
+ add rdi, byte 2*SIZEOF_YMMWORD ; outptr
299
+ vmovdqa ymmA, ymmF
300
+ sub rcx, byte 2*SIZEOF_YMMWORD
301
+ jmp short .column_st31
302
+ .column_st32:
303
+ cmp rcx, byte SIZEOF_YMMWORD
304
+ jb short .column_st31
305
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
306
+ add rdi, byte SIZEOF_YMMWORD ; outptr
307
+ vmovdqa ymmA, ymmD
308
+ sub rcx, byte SIZEOF_YMMWORD
309
+ jmp short .column_st31
310
+ .column_st31:
311
+ cmp rcx, byte SIZEOF_XMMWORD
312
+ jb short .column_st15
313
+ vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
314
+ add rdi, byte SIZEOF_XMMWORD ; outptr
315
+ vperm2i128 ymmA, ymmA, ymmA, 1
316
+ sub rcx, byte SIZEOF_XMMWORD
317
+ .column_st15:
318
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
319
+ ; space.
320
+ cmp rcx, byte SIZEOF_MMWORD
321
+ jb short .column_st7
322
+ vmovq XMM_MMWORD [rdi], xmmA
323
+ add rdi, byte SIZEOF_MMWORD
324
+ sub rcx, byte SIZEOF_MMWORD
325
+ vpsrldq xmmA, xmmA, SIZEOF_MMWORD
326
+ .column_st7:
327
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
328
+ ; space.
329
+ cmp rcx, byte SIZEOF_DWORD
330
+ jb short .column_st3
331
+ vmovd XMM_DWORD [rdi], xmmA
332
+ add rdi, byte SIZEOF_DWORD
333
+ sub rcx, byte SIZEOF_DWORD
334
+ vpsrldq xmmA, xmmA, SIZEOF_DWORD
335
+ .column_st3:
336
+ ; Store the lower 2 bytes of rax to the output when it has enough
337
+ ; space.
338
+ vmovd eax, xmmA
339
+ cmp rcx, byte SIZEOF_WORD
340
+ jb short .column_st1
341
+ mov word [rdi], ax
342
+ add rdi, byte SIZEOF_WORD
343
+ sub rcx, byte SIZEOF_WORD
344
+ shr rax, 16
345
+ .column_st1:
346
+ ; Store the lower 1 byte of rax to the output when it has enough
347
+ ; space.
348
+ test rcx, rcx
349
+ jz short .endcolumn
350
+ mov byte [rdi], al
351
+
352
+ %else ; RGB_PIXELSIZE == 4 ; -----------
353
+
354
+ %ifdef RGBX_FILLER_0XFF
355
+ vpcmpeqb ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********)
356
+ vpcmpeqb ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********)
357
+ %else
358
+ vpxor ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********)
359
+ vpxor ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********)
360
+ %endif
361
+ ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **)
362
+ ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **)
363
+ ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **)
364
+ ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **)
365
+ ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **)
366
+ ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **)
367
+ ; ymmG=(30 32 34 36 38 3A 3C 3E ** 3G 3I 3K 3M 3O 3Q 3S 3U **)
368
+ ; ymmH=(31 33 35 37 39 3B 3D 3F ** 3H 3J 3L 3N 3P 3R 3T 3V **)
369
+
370
+ vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E
371
+ ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U)
372
+ vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E
373
+ ; 2G 3G 2I 3I 2K 3K 2M 3M 2O 3O 2Q 3Q 2S 3S 2U 3U)
374
+ vpunpcklbw ymmB, ymmB, ymmD ; ymmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F
375
+ ; 0H 1H 0J 1J 0L 1L 0N 1N 0P 1P 0R 1R 0T 1T 0V 1V)
376
+ vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F
377
+ ; 2H 3H 2J 3J 2L 3L 2N 3N 2P 3P 2R 3R 2T 3T 2V 3V)
378
+
379
+ vpunpckhwd ymmC, ymmA, ymmE ; ymmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E
380
+ ; 0O 1O 2O 3O 0Q 1Q 2Q 3Q 0S 1S 2S 3S 0U 1U 2U 3U)
381
+ vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36
382
+ ; 0G 1G 2G 3G 0I 1I 2I 3I 0K 1K 2K 3K 0M 1M 2M 3M)
383
+ vpunpckhwd ymmG, ymmB, ymmF ; ymmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F
384
+ ; 0P 1P 2P 3P 0R 1R 2R 3R 0T 1T 2T 3T 0V 1V 2V 3V)
385
+ vpunpcklwd ymmB, ymmB, ymmF ; ymmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37
386
+ ; 0H 1H 2H 3H 0J 1J 2J 3J 0L 1L 2L 3L 0N 1N 2N 3N)
387
+
388
+ vpunpckhdq ymmE, ymmA, ymmB ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
389
+ ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N)
390
+ vpunpckldq ymmB, ymmA, ymmB ; ymmB=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
391
+ ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J)
392
+ vpunpckhdq ymmF, ymmC, ymmG ; ymmF=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
393
+ ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V)
394
+ vpunpckldq ymmG, ymmC, ymmG ; ymmG=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
395
+ ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R)
396
+
397
+ vperm2i128 ymmA, ymmB, ymmE, 0x20 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
398
+ ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
399
+ vperm2i128 ymmD, ymmG, ymmF, 0x20 ; ymmD=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
400
+ ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
401
+ vperm2i128 ymmC, ymmB, ymmE, 0x31 ; ymmC=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J
402
+ ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N)
403
+ vperm2i128 ymmH, ymmG, ymmF, 0x31 ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R
404
+ ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V)
405
+
406
+ cmp rcx, byte SIZEOF_YMMWORD
407
+ jb short .column_st64
408
+
409
+ test rdi, SIZEOF_YMMWORD-1
410
+ jnz short .out1
411
+ ; --(aligned)-------------------
412
+ vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
413
+ vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
414
+ vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC
415
+ vmovntdq YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH
416
+ jmp short .out0
417
+ .out1: ; --(unaligned)-----------------
418
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
419
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
420
+ vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC
421
+ vmovdqu YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH
422
+ .out0:
423
+ add rdi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr
424
+ sub rcx, byte SIZEOF_YMMWORD
425
+ jz near .endcolumn
426
+
427
+ add rsi, byte SIZEOF_YMMWORD ; inptr0
428
+ dec al
429
+ jnz near .Yloop_2nd
430
+
431
+ add rbx, byte SIZEOF_YMMWORD ; inptr1
432
+ add rdx, byte SIZEOF_YMMWORD ; inptr2
433
+ jmp near .columnloop
434
+
435
+ .column_st64:
436
+ cmp rcx, byte SIZEOF_YMMWORD/2
437
+ jb short .column_st32
438
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
439
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
440
+ add rdi, byte 2*SIZEOF_YMMWORD ; outptr
441
+ vmovdqa ymmA, ymmC
442
+ vmovdqa ymmD, ymmH
443
+ sub rcx, byte SIZEOF_YMMWORD/2
444
+ .column_st32:
445
+ cmp rcx, byte SIZEOF_YMMWORD/4
446
+ jb short .column_st16
447
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
448
+ add rdi, byte SIZEOF_YMMWORD ; outptr
449
+ vmovdqa ymmA, ymmD
450
+ sub rcx, byte SIZEOF_YMMWORD/4
451
+ .column_st16:
452
+ cmp rcx, byte SIZEOF_YMMWORD/8
453
+ jb short .column_st15
454
+ vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
455
+ add rdi, byte SIZEOF_XMMWORD ; outptr
456
+ vperm2i128 ymmA, ymmA, ymmA, 1
457
+ sub rcx, byte SIZEOF_YMMWORD/8
458
+ .column_st15:
459
+ ; Store two pixels (8 bytes) of ymmA to the output when it has enough
460
+ ; space.
461
+ cmp rcx, byte SIZEOF_YMMWORD/16
462
+ jb short .column_st7
463
+ vmovq MMWORD [rdi], xmmA
464
+ add rdi, byte SIZEOF_YMMWORD/16*4
465
+ sub rcx, byte SIZEOF_YMMWORD/16
466
+ vpsrldq xmmA, SIZEOF_YMMWORD/16*4
467
+ .column_st7:
468
+ ; Store one pixel (4 bytes) of ymmA to the output when it has enough
469
+ ; space.
470
+ test rcx, rcx
471
+ jz short .endcolumn
472
+ vmovd XMM_DWORD [rdi], xmmA
473
+
474
+ %endif ; RGB_PIXELSIZE ; ---------------
475
+
476
+ .endcolumn:
477
+ sfence ; flush the write buffer
478
+
479
+ .return:
480
+ pop rbx
481
+ vzeroupper
482
+ uncollect_args 4
483
+ mov rsp, rbp ; rsp <- aligned rbp
484
+ pop rsp ; rsp <- original rbp
485
+ pop rbp
486
+ ret
487
+
488
+ ; --------------------------------------------------------------------------
489
+ ;
490
+ ; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
491
+ ;
492
+ ; GLOBAL(void)
493
+ ; jsimd_h2v2_merged_upsample_avx2(JDIMENSION output_width,
494
+ ; JSAMPIMAGE input_buf,
495
+ ; JDIMENSION in_row_group_ctr,
496
+ ; JSAMPARRAY output_buf);
497
+ ;
498
+
499
+ ; r10d = JDIMENSION output_width
500
+ ; r11 = JSAMPIMAGE input_buf
501
+ ; r12d = JDIMENSION in_row_group_ctr
502
+ ; r13 = JSAMPARRAY output_buf
503
+
504
+ align 32
505
+ GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
506
+
507
+ EXTN(jsimd_h2v2_merged_upsample_avx2):
508
+ push rbp
509
+ mov rax, rsp
510
+ mov rbp, rsp
511
+ collect_args 4
512
+ push rbx
513
+
514
+ mov eax, r10d
515
+
516
+ mov rdi, r11
517
+ mov ecx, r12d
518
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
519
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
520
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
521
+ mov rdi, r13
522
+ lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
523
+
524
+ sub rsp, SIZEOF_JSAMPARRAY*4
525
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
526
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
527
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
528
+ mov rbx, rsp
529
+
530
+ push rdi
531
+ push rcx
532
+ push rax
533
+
534
+ %ifdef WIN64
535
+ mov r8, rcx
536
+ mov r9, rdi
537
+ mov rcx, rax
538
+ mov rdx, rbx
539
+ %else
540
+ mov rdx, rcx
541
+ mov rcx, rdi
542
+ mov rdi, rax
543
+ mov rsi, rbx
544
+ %endif
545
+
546
+ call EXTN(jsimd_h2v1_merged_upsample_avx2)
547
+
548
+ pop rax
549
+ pop rcx
550
+ pop rdi
551
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
552
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
553
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
554
+
555
+ add rdi, byte SIZEOF_JSAMPROW ; outptr1
556
+ add rsi, byte SIZEOF_JSAMPROW ; inptr01
557
+
558
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
559
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
560
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
561
+ mov rbx, rsp
562
+
563
+ push rdi
564
+ push rcx
565
+ push rax
566
+
567
+ %ifdef WIN64
568
+ mov r8, rcx
569
+ mov r9, rdi
570
+ mov rcx, rax
571
+ mov rdx, rbx
572
+ %else
573
+ mov rdx, rcx
574
+ mov rcx, rdi
575
+ mov rdi, rax
576
+ mov rsi, rbx
577
+ %endif
578
+
579
+ call EXTN(jsimd_h2v1_merged_upsample_avx2)
580
+
581
+ pop rax
582
+ pop rcx
583
+ pop rdi
584
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
585
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
586
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
587
+ add rsp, SIZEOF_JSAMPARRAY*4
588
+
589
+ pop rbx
590
+ uncollect_args 4
591
+ pop rbp
592
+ ret
593
+
594
+ ; For some reason, the OS X linker does not honor the request to align the
595
+ ; segment unless we do this.
596
+ align 32