image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,538 @@
1
+ ;
2
+ ; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2012, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jcolsamp.inc"
19
+
20
+ ; --------------------------------------------------------------------------
21
+ ;
22
+ ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
23
+ ;
24
+ ; GLOBAL(void)
25
+ ; jsimd_h2v1_merged_upsample_sse2(JDIMENSION output_width,
26
+ ; JSAMPIMAGE input_buf,
27
+ ; JDIMENSION in_row_group_ctr,
28
+ ; JSAMPARRAY output_buf);
29
+ ;
30
+
31
+ ; r10d = JDIMENSION output_width
32
+ ; r11 = JSAMPIMAGE input_buf
33
+ ; r12d = JDIMENSION in_row_group_ctr
34
+ ; r13 = JSAMPARRAY output_buf
35
+
36
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
37
+ %define WK_NUM 3
38
+
39
+ align 32
40
+ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
41
+
42
+ EXTN(jsimd_h2v1_merged_upsample_sse2):
43
+ push rbp
44
+ mov rax, rsp ; rax = original rbp
45
+ sub rsp, byte 4
46
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
47
+ mov [rsp], rax
48
+ mov rbp, rsp ; rbp = aligned rbp
49
+ lea rsp, [wk(0)]
50
+ collect_args 4
51
+ push rbx
52
+
53
+ mov ecx, r10d ; col
54
+ test rcx, rcx
55
+ jz near .return
56
+
57
+ push rcx
58
+
59
+ mov rdi, r11
60
+ mov ecx, r12d
61
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
62
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
63
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
64
+ mov rdi, r13
65
+ mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
66
+ mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
67
+ mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
68
+ mov rdip, JSAMPROW [rdi] ; outptr
69
+
70
+ pop rcx ; col
71
+
72
+ .columnloop:
73
+
74
+ movdqa xmm6, XMMWORD [rbx] ; xmm6=Cb(0123456789ABCDEF)
75
+ movdqa xmm7, XMMWORD [rdx] ; xmm7=Cr(0123456789ABCDEF)
76
+
77
+ pxor xmm1, xmm1 ; xmm1=(all 0's)
78
+ pcmpeqw xmm3, xmm3
79
+ psllw xmm3, 7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
80
+
81
+ movdqa xmm4, xmm6
82
+ punpckhbw xmm6, xmm1 ; xmm6=Cb(89ABCDEF)=CbH
83
+ punpcklbw xmm4, xmm1 ; xmm4=Cb(01234567)=CbL
84
+ movdqa xmm0, xmm7
85
+ punpckhbw xmm7, xmm1 ; xmm7=Cr(89ABCDEF)=CrH
86
+ punpcklbw xmm0, xmm1 ; xmm0=Cr(01234567)=CrL
87
+
88
+ paddw xmm6, xmm3
89
+ paddw xmm4, xmm3
90
+ paddw xmm7, xmm3
91
+ paddw xmm0, xmm3
92
+
93
+ ; (Original)
94
+ ; R = Y + 1.40200 * Cr
95
+ ; G = Y - 0.34414 * Cb - 0.71414 * Cr
96
+ ; B = Y + 1.77200 * Cb
97
+ ;
98
+ ; (This implementation)
99
+ ; R = Y + 0.40200 * Cr + Cr
100
+ ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
101
+ ; B = Y - 0.22800 * Cb + Cb + Cb
102
+
103
+ movdqa xmm5, xmm6 ; xmm5=CbH
104
+ movdqa xmm2, xmm4 ; xmm2=CbL
105
+ paddw xmm6, xmm6 ; xmm6=2*CbH
106
+ paddw xmm4, xmm4 ; xmm4=2*CbL
107
+ movdqa xmm1, xmm7 ; xmm1=CrH
108
+ movdqa xmm3, xmm0 ; xmm3=CrL
109
+ paddw xmm7, xmm7 ; xmm7=2*CrH
110
+ paddw xmm0, xmm0 ; xmm0=2*CrL
111
+
112
+ pmulhw xmm6, [rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800))
113
+ pmulhw xmm4, [rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800))
114
+ pmulhw xmm7, [rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200))
115
+ pmulhw xmm0, [rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200))
116
+
117
+ paddw xmm6, [rel PW_ONE]
118
+ paddw xmm4, [rel PW_ONE]
119
+ psraw xmm6, 1 ; xmm6=(CbH * -FIX(0.22800))
120
+ psraw xmm4, 1 ; xmm4=(CbL * -FIX(0.22800))
121
+ paddw xmm7, [rel PW_ONE]
122
+ paddw xmm0, [rel PW_ONE]
123
+ psraw xmm7, 1 ; xmm7=(CrH * FIX(0.40200))
124
+ psraw xmm0, 1 ; xmm0=(CrL * FIX(0.40200))
125
+
126
+ paddw xmm6, xmm5
127
+ paddw xmm4, xmm2
128
+ paddw xmm6, xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
129
+ paddw xmm4, xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
130
+ paddw xmm7, xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
131
+ paddw xmm0, xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
132
+
133
+ movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H
134
+ movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H
135
+
136
+ movdqa xmm6, xmm5
137
+ movdqa xmm7, xmm2
138
+ punpcklwd xmm5, xmm1
139
+ punpckhwd xmm6, xmm1
140
+ pmaddwd xmm5, [rel PW_MF0344_F0285]
141
+ pmaddwd xmm6, [rel PW_MF0344_F0285]
142
+ punpcklwd xmm2, xmm3
143
+ punpckhwd xmm7, xmm3
144
+ pmaddwd xmm2, [rel PW_MF0344_F0285]
145
+ pmaddwd xmm7, [rel PW_MF0344_F0285]
146
+
147
+ paddd xmm5, [rel PD_ONEHALF]
148
+ paddd xmm6, [rel PD_ONEHALF]
149
+ psrad xmm5, SCALEBITS
150
+ psrad xmm6, SCALEBITS
151
+ paddd xmm2, [rel PD_ONEHALF]
152
+ paddd xmm7, [rel PD_ONEHALF]
153
+ psrad xmm2, SCALEBITS
154
+ psrad xmm7, SCALEBITS
155
+
156
+ packssdw xmm5, xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
157
+ packssdw xmm2, xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
158
+ psubw xmm5, xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
159
+ psubw xmm2, xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
160
+
161
+ movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H
162
+
163
+ mov al, 2 ; Yctr
164
+ jmp short .Yloop_1st
165
+
166
+ .Yloop_2nd:
167
+ movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H
168
+ movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H
169
+ movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H
170
+
171
+ .Yloop_1st:
172
+ movdqa xmm7, XMMWORD [rsi] ; xmm7=Y(0123456789ABCDEF)
173
+
174
+ pcmpeqw xmm6, xmm6
175
+ psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
176
+ pand xmm6, xmm7 ; xmm6=Y(02468ACE)=YE
177
+ psrlw xmm7, BYTE_BIT ; xmm7=Y(13579BDF)=YO
178
+
179
+ movdqa xmm1, xmm0 ; xmm1=xmm0=(R-Y)(L/H)
180
+ movdqa xmm3, xmm2 ; xmm3=xmm2=(G-Y)(L/H)
181
+ movdqa xmm5, xmm4 ; xmm5=xmm4=(B-Y)(L/H)
182
+
183
+ paddw xmm0, xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
184
+ paddw xmm1, xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
185
+ packuswb xmm0, xmm0 ; xmm0=R(02468ACE********)
186
+ packuswb xmm1, xmm1 ; xmm1=R(13579BDF********)
187
+
188
+ paddw xmm2, xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
189
+ paddw xmm3, xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
190
+ packuswb xmm2, xmm2 ; xmm2=G(02468ACE********)
191
+ packuswb xmm3, xmm3 ; xmm3=G(13579BDF********)
192
+
193
+ paddw xmm4, xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
194
+ paddw xmm5, xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
195
+ packuswb xmm4, xmm4 ; xmm4=B(02468ACE********)
196
+ packuswb xmm5, xmm5 ; xmm5=B(13579BDF********)
197
+
198
+ %if RGB_PIXELSIZE == 3 ; ---------------
199
+
200
+ ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
201
+ ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
202
+ ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
203
+ ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
204
+
205
+ punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
206
+ punpcklbw xmmE, xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
207
+ punpcklbw xmmD, xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
208
+
209
+ movdqa xmmG, xmmA
210
+ movdqa xmmH, xmmA
211
+ punpcklwd xmmA, xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
212
+ punpckhwd xmmG, xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
213
+
214
+ psrldq xmmH, 2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
215
+ psrldq xmmE, 2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
216
+
217
+ movdqa xmmC, xmmD
218
+ movdqa xmmB, xmmD
219
+ punpcklwd xmmD, xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
220
+ punpckhwd xmmC, xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
221
+
222
+ psrldq xmmB, 2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
223
+
224
+ movdqa xmmF, xmmE
225
+ punpcklwd xmmE, xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
226
+ punpckhwd xmmF, xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
227
+
228
+ pshufd xmmH, xmmA, 0x4E ; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
229
+ movdqa xmmB, xmmE
230
+ punpckldq xmmA, xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
231
+ punpckldq xmmE, xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
232
+ punpckhdq xmmD, xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
233
+
234
+ pshufd xmmH, xmmG, 0x4E ; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
235
+ movdqa xmmB, xmmF
236
+ punpckldq xmmG, xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
237
+ punpckldq xmmF, xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
238
+ punpckhdq xmmC, xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
239
+
240
+ punpcklqdq xmmA, xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
241
+ punpcklqdq xmmD, xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
242
+ punpcklqdq xmmF, xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
243
+
244
+ cmp rcx, byte SIZEOF_XMMWORD
245
+ jb short .column_st32
246
+
247
+ test rdi, SIZEOF_XMMWORD-1
248
+ jnz short .out1
249
+ ; --(aligned)-------------------
250
+ movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
251
+ movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
252
+ movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
253
+ jmp short .out0
254
+ .out1: ; --(unaligned)-----------------
255
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
256
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
257
+ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
258
+ .out0:
259
+ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
260
+ sub rcx, byte SIZEOF_XMMWORD
261
+ jz near .endcolumn
262
+
263
+ add rsi, byte SIZEOF_XMMWORD ; inptr0
264
+ dec al ; Yctr
265
+ jnz near .Yloop_2nd
266
+
267
+ add rbx, byte SIZEOF_XMMWORD ; inptr1
268
+ add rdx, byte SIZEOF_XMMWORD ; inptr2
269
+ jmp near .columnloop
270
+
271
+ .column_st32:
272
+ lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
273
+ cmp rcx, byte 2*SIZEOF_XMMWORD
274
+ jb short .column_st16
275
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
276
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
277
+ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
278
+ movdqa xmmA, xmmF
279
+ sub rcx, byte 2*SIZEOF_XMMWORD
280
+ jmp short .column_st15
281
+ .column_st16:
282
+ cmp rcx, byte SIZEOF_XMMWORD
283
+ jb short .column_st15
284
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
285
+ add rdi, byte SIZEOF_XMMWORD ; outptr
286
+ movdqa xmmA, xmmD
287
+ sub rcx, byte SIZEOF_XMMWORD
288
+ .column_st15:
289
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
290
+ ; space.
291
+ cmp rcx, byte SIZEOF_MMWORD
292
+ jb short .column_st7
293
+ movq XMM_MMWORD [rdi], xmmA
294
+ add rdi, byte SIZEOF_MMWORD
295
+ sub rcx, byte SIZEOF_MMWORD
296
+ psrldq xmmA, SIZEOF_MMWORD
297
+ .column_st7:
298
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
299
+ ; space.
300
+ cmp rcx, byte SIZEOF_DWORD
301
+ jb short .column_st3
302
+ movd XMM_DWORD [rdi], xmmA
303
+ add rdi, byte SIZEOF_DWORD
304
+ sub rcx, byte SIZEOF_DWORD
305
+ psrldq xmmA, SIZEOF_DWORD
306
+ .column_st3:
307
+ ; Store the lower 2 bytes of rax to the output when it has enough
308
+ ; space.
309
+ movd eax, xmmA
310
+ cmp rcx, byte SIZEOF_WORD
311
+ jb short .column_st1
312
+ mov word [rdi], ax
313
+ add rdi, byte SIZEOF_WORD
314
+ sub rcx, byte SIZEOF_WORD
315
+ shr rax, 16
316
+ .column_st1:
317
+ ; Store the lower 1 byte of rax to the output when it has enough
318
+ ; space.
319
+ test rcx, rcx
320
+ jz short .endcolumn
321
+ mov byte [rdi], al
322
+
323
+ %else ; RGB_PIXELSIZE == 4 ; -----------
324
+
325
+ %ifdef RGBX_FILLER_0XFF
326
+ pcmpeqb xmm6, xmm6 ; xmm6=XE=X(02468ACE********)
327
+ pcmpeqb xmm7, xmm7 ; xmm7=XO=X(13579BDF********)
328
+ %else
329
+ pxor xmm6, xmm6 ; xmm6=XE=X(02468ACE********)
330
+ pxor xmm7, xmm7 ; xmm7=XO=X(13579BDF********)
331
+ %endif
332
+ ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
333
+ ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
334
+ ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
335
+ ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
336
+
337
+ punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
338
+ punpcklbw xmmE, xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
339
+ punpcklbw xmmB, xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
340
+ punpcklbw xmmF, xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
341
+
342
+ movdqa xmmC, xmmA
343
+ punpcklwd xmmA, xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
344
+ punpckhwd xmmC, xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
345
+ movdqa xmmG, xmmB
346
+ punpcklwd xmmB, xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
347
+ punpckhwd xmmG, xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
348
+
349
+ movdqa xmmD, xmmA
350
+ punpckldq xmmA, xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
351
+ punpckhdq xmmD, xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
352
+ movdqa xmmH, xmmC
353
+ punpckldq xmmC, xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
354
+ punpckhdq xmmH, xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
355
+
356
+ cmp rcx, byte SIZEOF_XMMWORD
357
+ jb short .column_st32
358
+
359
+ test rdi, SIZEOF_XMMWORD-1
360
+ jnz short .out1
361
+ ; --(aligned)-------------------
362
+ movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
363
+ movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
364
+ movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
365
+ movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
366
+ jmp short .out0
367
+ .out1: ; --(unaligned)-----------------
368
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
369
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
370
+ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
371
+ movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
372
+ .out0:
373
+ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
374
+ sub rcx, byte SIZEOF_XMMWORD
375
+ jz near .endcolumn
376
+
377
+ add rsi, byte SIZEOF_XMMWORD ; inptr0
378
+ dec al ; Yctr
379
+ jnz near .Yloop_2nd
380
+
381
+ add rbx, byte SIZEOF_XMMWORD ; inptr1
382
+ add rdx, byte SIZEOF_XMMWORD ; inptr2
383
+ jmp near .columnloop
384
+
385
+ .column_st32:
386
+ cmp rcx, byte SIZEOF_XMMWORD/2
387
+ jb short .column_st16
388
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
389
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
390
+ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
391
+ movdqa xmmA, xmmC
392
+ movdqa xmmD, xmmH
393
+ sub rcx, byte SIZEOF_XMMWORD/2
394
+ .column_st16:
395
+ cmp rcx, byte SIZEOF_XMMWORD/4
396
+ jb short .column_st15
397
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
398
+ add rdi, byte SIZEOF_XMMWORD ; outptr
399
+ movdqa xmmA, xmmD
400
+ sub rcx, byte SIZEOF_XMMWORD/4
401
+ .column_st15:
402
+ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
403
+ ; space.
404
+ cmp rcx, byte SIZEOF_XMMWORD/8
405
+ jb short .column_st7
406
+ movq XMM_MMWORD [rdi], xmmA
407
+ add rdi, byte SIZEOF_XMMWORD/8*4
408
+ sub rcx, byte SIZEOF_XMMWORD/8
409
+ psrldq xmmA, SIZEOF_XMMWORD/8*4
410
+ .column_st7:
411
+ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
412
+ ; space.
413
+ test rcx, rcx
414
+ jz short .endcolumn
415
+ movd XMM_DWORD [rdi], xmmA
416
+
417
+ %endif ; RGB_PIXELSIZE ; ---------------
418
+
419
+ .endcolumn:
420
+ sfence ; flush the write buffer
421
+
422
+ .return:
423
+ pop rbx
424
+ uncollect_args 4
425
+ mov rsp, rbp ; rsp <- aligned rbp
426
+ pop rsp ; rsp <- original rbp
427
+ pop rbp
428
+ ret
429
+
430
+ ; --------------------------------------------------------------------------
431
+ ;
432
+ ; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
433
+ ;
434
+ ; GLOBAL(void)
435
+ ; jsimd_h2v2_merged_upsample_sse2(JDIMENSION output_width,
436
+ ; JSAMPIMAGE input_buf,
437
+ ; JDIMENSION in_row_group_ctr,
438
+ ; JSAMPARRAY output_buf);
439
+ ;
440
+
441
+ ; r10d = JDIMENSION output_width
442
+ ; r11 = JSAMPIMAGE input_buf
443
+ ; r12d = JDIMENSION in_row_group_ctr
444
+ ; r13 = JSAMPARRAY output_buf
445
+
446
+ align 32
447
+ GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
448
+
449
+ EXTN(jsimd_h2v2_merged_upsample_sse2):
450
+ push rbp
451
+ mov rax, rsp
452
+ mov rbp, rsp
453
+ collect_args 4
454
+ push rbx
455
+
456
+ mov eax, r10d
457
+
458
+ mov rdi, r11
459
+ mov ecx, r12d
460
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
461
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
462
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
463
+ mov rdi, r13
464
+ lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
465
+
466
+ sub rsp, SIZEOF_JSAMPARRAY*4
467
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
468
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
469
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
470
+ mov rbx, rsp
471
+
472
+ push rdi
473
+ push rcx
474
+ push rax
475
+
476
+ %ifdef WIN64
477
+ mov r8, rcx
478
+ mov r9, rdi
479
+ mov rcx, rax
480
+ mov rdx, rbx
481
+ %else
482
+ mov rdx, rcx
483
+ mov rcx, rdi
484
+ mov rdi, rax
485
+ mov rsi, rbx
486
+ %endif
487
+
488
+ call EXTN(jsimd_h2v1_merged_upsample_sse2)
489
+
490
+ pop rax
491
+ pop rcx
492
+ pop rdi
493
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
494
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
495
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
496
+
497
+ add rdi, byte SIZEOF_JSAMPROW ; outptr1
498
+ add rsi, byte SIZEOF_JSAMPROW ; inptr01
499
+
500
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
501
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
502
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
503
+ mov rbx, rsp
504
+
505
+ push rdi
506
+ push rcx
507
+ push rax
508
+
509
+ %ifdef WIN64
510
+ mov r8, rcx
511
+ mov r9, rdi
512
+ mov rcx, rax
513
+ mov rdx, rbx
514
+ %else
515
+ mov rdx, rcx
516
+ mov rcx, rdi
517
+ mov rdi, rax
518
+ mov rsi, rbx
519
+ %endif
520
+
521
+ call EXTN(jsimd_h2v1_merged_upsample_sse2)
522
+
523
+ pop rax
524
+ pop rcx
525
+ pop rdi
526
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
527
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
528
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
529
+ add rsp, SIZEOF_JSAMPARRAY*4
530
+
531
+ pop rbx
532
+ uncollect_args 4
533
+ pop rbp
534
+ ret
535
+
536
+ ; For some reason, the OS X linker does not honor the request to align the
537
+ ; segment unless we do this.
538
+ align 32