image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,496 @@
1
+ ;
2
+ ; jdcolext.asm - colorspace conversion (64-bit AVX2)
3
+ ;
4
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2012, 2016, D. R. Commander.
6
+ ; Copyright (C) 2015, Intel Corporation.
7
+ ; Copyright (C) 2018, Matthias Räncker.
8
+ ;
9
+ ; Based on the x86 SIMD extension for IJG JPEG library
10
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
11
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
12
+ ;
13
+ ; This file should be assembled with NASM (Netwide Assembler),
14
+ ; can *not* be assembled with Microsoft's MASM or any compatible
15
+ ; assembler (including Borland's Turbo Assembler).
16
+ ; NASM is available from http://nasm.sourceforge.net/ or
17
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
18
+
19
+ %include "jcolsamp.inc"
20
+
21
+ ; --------------------------------------------------------------------------
22
+ ;
23
+ ; Convert some rows of samples to the output colorspace.
24
+ ;
25
+ ; GLOBAL(void)
26
+ ; jsimd_ycc_rgb_convert_avx2(JDIMENSION out_width, JSAMPIMAGE input_buf,
27
+ ; JDIMENSION input_row, JSAMPARRAY output_buf,
28
+ ; int num_rows)
29
+ ;
30
+
31
+ ; r10d = JDIMENSION out_width
32
+ ; r11 = JSAMPIMAGE input_buf
33
+ ; r12d = JDIMENSION input_row
34
+ ; r13 = JSAMPARRAY output_buf
35
+ ; r14d = int num_rows
36
+
37
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
38
+ %define WK_NUM 2
39
+
40
+ align 32
41
+ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
42
+
43
+ EXTN(jsimd_ycc_rgb_convert_avx2):
44
+ push rbp
45
+ mov rax, rsp ; rax = original rbp
46
+ sub rsp, byte 4
47
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
48
+ mov [rsp], rax
49
+ mov rbp, rsp ; rbp = aligned rbp
50
+ lea rsp, [wk(0)]
51
+ collect_args 5
52
+ push rbx
53
+
54
+ mov ecx, r10d ; num_cols
55
+ test rcx, rcx
56
+ jz near .return
57
+
58
+ push rcx
59
+
60
+ mov rdi, r11
61
+ mov ecx, r12d
62
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
63
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
64
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
65
+ lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
66
+ lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
67
+ lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
68
+
69
+ pop rcx
70
+
71
+ mov rdi, r13
72
+ mov eax, r14d
73
+ test rax, rax
74
+ jle near .return
75
+ .rowloop:
76
+ push rax
77
+ push rdi
78
+ push rdx
79
+ push rbx
80
+ push rsi
81
+ push rcx ; col
82
+
83
+ mov rsip, JSAMPROW [rsi] ; inptr0
84
+ mov rbxp, JSAMPROW [rbx] ; inptr1
85
+ mov rdxp, JSAMPROW [rdx] ; inptr2
86
+ mov rdip, JSAMPROW [rdi] ; outptr
87
+ .columnloop:
88
+
89
+ vmovdqu ymm5, YMMWORD [rbx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
90
+ vmovdqu ymm1, YMMWORD [rdx] ; ymm1=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
91
+
92
+ vpcmpeqw ymm0, ymm0, ymm0
93
+ vpcmpeqw ymm7, ymm7, ymm7
94
+ vpsrlw ymm0, ymm0, BYTE_BIT ; ymm0={0xFF 0x00 0xFF 0x00 ..}
95
+ vpsllw ymm7, ymm7, 7 ; ymm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
96
+
97
+ vpand ymm4, ymm0, ymm5 ; ymm4=Cb(02468ACEGIKMOQSU)=CbE
98
+ vpsrlw ymm5, ymm5, BYTE_BIT ; ymm5=Cb(13579BDFHJLNPRTV)=CbO
99
+ vpand ymm0, ymm0, ymm1 ; ymm0=Cr(02468ACEGIKMOQSU)=CrE
100
+ vpsrlw ymm1, ymm1, BYTE_BIT ; ymm1=Cr(13579BDFHJLNPRTV)=CrO
101
+
102
+ vpaddw ymm2, ymm4, ymm7
103
+ vpaddw ymm3, ymm5, ymm7
104
+ vpaddw ymm6, ymm0, ymm7
105
+ vpaddw ymm7, ymm1, ymm7
106
+
107
+ ; (Original)
108
+ ; R = Y + 1.40200 * Cr
109
+ ; G = Y - 0.34414 * Cb - 0.71414 * Cr
110
+ ; B = Y + 1.77200 * Cb
111
+ ;
112
+ ; (This implementation)
113
+ ; R = Y + 0.40200 * Cr + Cr
114
+ ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
115
+ ; B = Y - 0.22800 * Cb + Cb + Cb
116
+
117
+ vpaddw ymm4, ymm2, ymm2 ; ymm4=2*CbE
118
+ vpaddw ymm5, ymm3, ymm3 ; ymm5=2*CbO
119
+ vpaddw ymm0, ymm6, ymm6 ; ymm0=2*CrE
120
+ vpaddw ymm1, ymm7, ymm7 ; ymm1=2*CrO
121
+
122
+ vpmulhw ymm4, ymm4, [rel PW_MF0228] ; ymm4=(2*CbE * -FIX(0.22800))
123
+ vpmulhw ymm5, ymm5, [rel PW_MF0228] ; ymm5=(2*CbO * -FIX(0.22800))
124
+ vpmulhw ymm0, ymm0, [rel PW_F0402] ; ymm0=(2*CrE * FIX(0.40200))
125
+ vpmulhw ymm1, ymm1, [rel PW_F0402] ; ymm1=(2*CrO * FIX(0.40200))
126
+
127
+ vpaddw ymm4, ymm4, [rel PW_ONE]
128
+ vpaddw ymm5, ymm5, [rel PW_ONE]
129
+ vpsraw ymm4, ymm4, 1 ; ymm4=(CbE * -FIX(0.22800))
130
+ vpsraw ymm5, ymm5, 1 ; ymm5=(CbO * -FIX(0.22800))
131
+ vpaddw ymm0, ymm0, [rel PW_ONE]
132
+ vpaddw ymm1, ymm1, [rel PW_ONE]
133
+ vpsraw ymm0, ymm0, 1 ; ymm0=(CrE * FIX(0.40200))
134
+ vpsraw ymm1, ymm1, 1 ; ymm1=(CrO * FIX(0.40200))
135
+
136
+ vpaddw ymm4, ymm4, ymm2
137
+ vpaddw ymm5, ymm5, ymm3
138
+ vpaddw ymm4, ymm4, ymm2 ; ymm4=(CbE * FIX(1.77200))=(B-Y)E
139
+ vpaddw ymm5, ymm5, ymm3 ; ymm5=(CbO * FIX(1.77200))=(B-Y)O
140
+ vpaddw ymm0, ymm0, ymm6 ; ymm0=(CrE * FIX(1.40200))=(R-Y)E
141
+ vpaddw ymm1, ymm1, ymm7 ; ymm1=(CrO * FIX(1.40200))=(R-Y)O
142
+
143
+ vmovdqa YMMWORD [wk(0)], ymm4 ; wk(0)=(B-Y)E
144
+ vmovdqa YMMWORD [wk(1)], ymm5 ; wk(1)=(B-Y)O
145
+
146
+ vpunpckhwd ymm4, ymm2, ymm6
147
+ vpunpcklwd ymm2, ymm2, ymm6
148
+ vpmaddwd ymm2, ymm2, [rel PW_MF0344_F0285]
149
+ vpmaddwd ymm4, ymm4, [rel PW_MF0344_F0285]
150
+ vpunpckhwd ymm5, ymm3, ymm7
151
+ vpunpcklwd ymm3, ymm3, ymm7
152
+ vpmaddwd ymm3, ymm3, [rel PW_MF0344_F0285]
153
+ vpmaddwd ymm5, ymm5, [rel PW_MF0344_F0285]
154
+
155
+ vpaddd ymm2, ymm2, [rel PD_ONEHALF]
156
+ vpaddd ymm4, ymm4, [rel PD_ONEHALF]
157
+ vpsrad ymm2, ymm2, SCALEBITS
158
+ vpsrad ymm4, ymm4, SCALEBITS
159
+ vpaddd ymm3, ymm3, [rel PD_ONEHALF]
160
+ vpaddd ymm5, ymm5, [rel PD_ONEHALF]
161
+ vpsrad ymm3, ymm3, SCALEBITS
162
+ vpsrad ymm5, ymm5, SCALEBITS
163
+
164
+ vpackssdw ymm2, ymm2, ymm4 ; ymm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
165
+ vpackssdw ymm3, ymm3, ymm5 ; ymm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
166
+ vpsubw ymm2, ymm2, ymm6 ; ymm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
167
+ vpsubw ymm3, ymm3, ymm7 ; ymm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
168
+
169
+ vmovdqu ymm5, YMMWORD [rsi] ; ymm5=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
170
+
171
+ vpcmpeqw ymm4, ymm4, ymm4
172
+ vpsrlw ymm4, ymm4, BYTE_BIT ; ymm4={0xFF 0x00 0xFF 0x00 ..}
173
+ vpand ymm4, ymm4, ymm5 ; ymm4=Y(02468ACEGIKMOQSU)=YE
174
+ vpsrlw ymm5, ymm5, BYTE_BIT ; ymm5=Y(13579BDFHJLNPRTV)=YO
175
+
176
+ vpaddw ymm0, ymm0, ymm4 ; ymm0=((R-Y)E+YE)=RE=R(02468ACEGIKMOQSU)
177
+ vpaddw ymm1, ymm1, ymm5 ; ymm1=((R-Y)O+YO)=RO=R(13579BDFHJLNPRTV)
178
+ vpackuswb ymm0, ymm0, ymm0 ; ymm0=R(02468ACE********GIKMOQSU********)
179
+ vpackuswb ymm1, ymm1, ymm1 ; ymm1=R(13579BDF********HJLNPRTV********)
180
+
181
+ vpaddw ymm2, ymm2, ymm4 ; ymm2=((G-Y)E+YE)=GE=G(02468ACEGIKMOQSU)
182
+ vpaddw ymm3, ymm3, ymm5 ; ymm3=((G-Y)O+YO)=GO=G(13579BDFHJLNPRTV)
183
+ vpackuswb ymm2, ymm2, ymm2 ; ymm2=G(02468ACE********GIKMOQSU********)
184
+ vpackuswb ymm3, ymm3, ymm3 ; ymm3=G(13579BDF********HJLNPRTV********)
185
+
186
+ vpaddw ymm4, ymm4, YMMWORD [wk(0)] ; ymm4=(YE+(B-Y)E)=BE=B(02468ACEGIKMOQSU)
187
+ vpaddw ymm5, ymm5, YMMWORD [wk(1)] ; ymm5=(YO+(B-Y)O)=BO=B(13579BDFHJLNPRTV)
188
+ vpackuswb ymm4, ymm4, ymm4 ; ymm4=B(02468ACE********GIKMOQSU********)
189
+ vpackuswb ymm5, ymm5, ymm5 ; ymm5=B(13579BDF********HJLNPRTV********)
190
+
191
+ %if RGB_PIXELSIZE == 3 ; ---------------
192
+
193
+ ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **)
194
+ ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **)
195
+ ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **)
196
+ ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **)
197
+ ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **)
198
+ ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **)
199
+ ; ymmG=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **)
200
+ ; ymmH=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **)
201
+
202
+ vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E
203
+ ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U)
204
+ vpunpcklbw ymmE, ymmE, ymmB ; ymmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F
205
+ ; 2G 0H 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V)
206
+ vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F
207
+ ; 1H 2H 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V)
208
+
209
+ vpsrldq ymmH, ymmA, 2 ; ymmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E 0G 1G
210
+ ; 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U -- --)
211
+ vpunpckhwd ymmG, ymmA, ymmE ; ymmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F
212
+ ; 0O 1O 2O 0P 0Q 1Q 2Q 0R 0S 1S 2S 0T 0U 1U 2U 0V)
213
+ vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07
214
+ ; 0G 1G 2G 0H 0I 1I 2I 0J 0K 1K 2K 0L 0M 1M 2M 0N)
215
+
216
+ vpsrldq ymmE, ymmE, 2 ; ymmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F 2G 0H
217
+ ; 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V -- --)
218
+
219
+ vpsrldq ymmB, ymmD, 2 ; ymmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F 1H 2H
220
+ ; 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V -- --)
221
+ vpunpckhwd ymmC, ymmD, ymmH ; ymmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F 0G 1G
222
+ ; 1P 2P 0Q 1Q 1R 2R 0S 1S 1T 2T 0U 1U 1V 2V -- --)
223
+ vpunpcklwd ymmD, ymmD, ymmH ; ymmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18
224
+ ; 1H 2H 0I 1I 1J 2J 0K 1K 1L 2L 0M 1M 1N 2N 0O 1O)
225
+
226
+ vpunpckhwd ymmF, ymmE, ymmB ; ymmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F 2G 0H 1H 2H
227
+ ; 2Q 0R 1R 2R 2S 0T 1T 2T 2U 0V 1V 2V -- -- -- --)
228
+ vpunpcklwd ymmE, ymmE, ymmB ; ymmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29
229
+ ; 2I 0J 1J 2J 2K 0L 1L 2L 2M 0N 1N 2N 2O 0P 1P 2P)
230
+
231
+ vpshufd ymmH, ymmA, 0x4E ; ymmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03
232
+ ; 0K 1K 2K 0L 0M 1M 2M 0N 0G 1G 2G 0H 0I 1I 2I 0J)
233
+ vpunpckldq ymmA, ymmA, ymmD ; ymmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14
234
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 0I 1I 2I 0J 1J 2J 0K 1K)
235
+ vpunpckhdq ymmD, ymmD, ymmE ; ymmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29
236
+ ; 1L 2L 0M 1M 2M 0N 1N 2N 1N 2N 0O 1O 2O 0P 1P 2P)
237
+ vpunpckldq ymmE, ymmE, ymmH ; ymmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07
238
+ ; 2I 0J 1J 2J 0K 1K 2K 0L 2K 0L 1L 2L 0M 1M 2M 0N)
239
+
240
+ vpshufd ymmH, ymmG, 0x4E ; ymmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B
241
+ ; 0S 1S 2S 0T 0U 1U 2U 0V 0O 1O 2O 0P 0Q 1Q 2Q 0R)
242
+ vpunpckldq ymmG, ymmG, ymmC ; ymmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C
243
+ ; 0O 1O 2O 0P 1P 2P 0Q 1Q 0Q 1Q 2Q 0R 1R 2R 0S 1S)
244
+ vpunpckhdq ymmC, ymmC, ymmF ; ymmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F 0G 1G 2G 0H 1H 2H
245
+ ; 1T 2T 0U 1U 2U 0V 1V 2V 1V 2V -- -- -- -- -- --)
246
+ vpunpckldq ymmF, ymmF, ymmH ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F
247
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 2S 0T 1T 2T 0U 1U 2U 0V)
248
+
249
+ vpunpcklqdq ymmH, ymmA, ymmE ; ymmH=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
250
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L)
251
+ vpunpcklqdq ymmG, ymmD, ymmG ; ymmG=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A
252
+ ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q)
253
+ vpunpcklqdq ymmC, ymmF, ymmC ; ymmC=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F
254
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V)
255
+
256
+ vperm2i128 ymmA, ymmH, ymmG, 0x20 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
257
+ ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
258
+ vperm2i128 ymmD, ymmC, ymmH, 0x30 ; ymmD=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F
259
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L)
260
+ vperm2i128 ymmF, ymmG, ymmC, 0x31 ; ymmF=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q
261
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V)
262
+
263
+ cmp rcx, byte SIZEOF_YMMWORD
264
+ jb short .column_st64
265
+
266
+ test rdi, SIZEOF_YMMWORD-1
267
+ jnz short .out1
268
+ ; --(aligned)-------------------
269
+ vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
270
+ vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
271
+ vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF
272
+ jmp short .out0
273
+ .out1: ; --(unaligned)-----------------
274
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
275
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
276
+ vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF
277
+ .out0:
278
+ add rdi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr
279
+ sub rcx, byte SIZEOF_YMMWORD
280
+ jz near .nextrow
281
+
282
+ add rsi, byte SIZEOF_YMMWORD ; inptr0
283
+ add rbx, byte SIZEOF_YMMWORD ; inptr1
284
+ add rdx, byte SIZEOF_YMMWORD ; inptr2
285
+ jmp near .columnloop
286
+
287
+ .column_st64:
288
+ lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
289
+ cmp rcx, byte 2*SIZEOF_YMMWORD
290
+ jb short .column_st32
291
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
292
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
293
+ add rdi, byte 2*SIZEOF_YMMWORD ; outptr
294
+ vmovdqa ymmA, ymmF
295
+ sub rcx, byte 2*SIZEOF_YMMWORD
296
+ jmp short .column_st31
297
+ .column_st32:
298
+ cmp rcx, byte SIZEOF_YMMWORD
299
+ jb short .column_st31
300
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
301
+ add rdi, byte SIZEOF_YMMWORD ; outptr
302
+ vmovdqa ymmA, ymmD
303
+ sub rcx, byte SIZEOF_YMMWORD
304
+ jmp short .column_st31
305
+ .column_st31:
306
+ cmp rcx, byte SIZEOF_XMMWORD
307
+ jb short .column_st15
308
+ vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
309
+ add rdi, byte SIZEOF_XMMWORD ; outptr
310
+ vperm2i128 ymmA, ymmA, ymmA, 1
311
+ sub rcx, byte SIZEOF_XMMWORD
312
+ .column_st15:
313
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
314
+ ; space.
315
+ cmp rcx, byte SIZEOF_MMWORD
316
+ jb short .column_st7
317
+ vmovq XMM_MMWORD [rdi], xmmA
318
+ add rdi, byte SIZEOF_MMWORD
319
+ sub rcx, byte SIZEOF_MMWORD
320
+ vpsrldq xmmA, xmmA, SIZEOF_MMWORD
321
+ .column_st7:
322
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
323
+ ; space.
324
+ cmp rcx, byte SIZEOF_DWORD
325
+ jb short .column_st3
326
+ vmovd XMM_DWORD [rdi], xmmA
327
+ add rdi, byte SIZEOF_DWORD
328
+ sub rcx, byte SIZEOF_DWORD
329
+ vpsrldq xmmA, xmmA, SIZEOF_DWORD
330
+ .column_st3:
331
+ ; Store the lower 2 bytes of rax to the output when it has enough
332
+ ; space.
333
+ vmovd eax, xmmA
334
+ cmp rcx, byte SIZEOF_WORD
335
+ jb short .column_st1
336
+ mov word [rdi], ax
337
+ add rdi, byte SIZEOF_WORD
338
+ sub rcx, byte SIZEOF_WORD
339
+ shr rax, 16
340
+ .column_st1:
341
+ ; Store the lower 1 byte of rax to the output when it has enough
342
+ ; space.
343
+ test rcx, rcx
344
+ jz short .nextrow
345
+ mov byte [rdi], al
346
+
347
+ %else ; RGB_PIXELSIZE == 4 ; -----------
348
+
349
+ %ifdef RGBX_FILLER_0XFF
350
+ vpcmpeqb ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********)
351
+ vpcmpeqb ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********)
352
+ %else
353
+ vpxor ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********)
354
+ vpxor ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********)
355
+ %endif
356
+ ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **)
357
+ ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **)
358
+ ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **)
359
+ ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **)
360
+ ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **)
361
+ ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **)
362
+ ; ymmG=(30 32 34 36 38 3A 3C 3E ** 3G 3I 3K 3M 3O 3Q 3S 3U **)
363
+ ; ymmH=(31 33 35 37 39 3B 3D 3F ** 3H 3J 3L 3N 3P 3R 3T 3V **)
364
+
365
+ vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E
366
+ ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U)
367
+ vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E
368
+ ; 2G 3G 2I 3I 2K 3K 2M 3M 2O 3O 2Q 3Q 2S 3S 2U 3U)
369
+ vpunpcklbw ymmB, ymmB, ymmD ; ymmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F
370
+ ; 0H 1H 0J 1J 0L 1L 0N 1N 0P 1P 0R 1R 0T 1T 0V 1V)
371
+ vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F
372
+ ; 2H 3H 2J 3J 2L 3L 2N 3N 2P 3P 2R 3R 2T 3T 2V 3V)
373
+
374
+ vpunpckhwd ymmC, ymmA, ymmE ; ymmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E
375
+ ; 0O 1O 2O 3O 0Q 1Q 2Q 3Q 0S 1S 2S 3S 0U 1U 2U 3U)
376
+ vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36
377
+ ; 0G 1G 2G 3G 0I 1I 2I 3I 0K 1K 2K 3K 0M 1M 2M 3M)
378
+ vpunpckhwd ymmG, ymmB, ymmF ; ymmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F
379
+ ; 0P 1P 2P 3P 0R 1R 2R 3R 0T 1T 2T 3T 0V 1V 2V 3V)
380
+ vpunpcklwd ymmB, ymmB, ymmF ; ymmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37
381
+ ; 0H 1H 2H 3H 0J 1J 2J 3J 0L 1L 2L 3L 0N 1N 2N 3N)
382
+
383
+ vpunpckhdq ymmE, ymmA, ymmB ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
384
+ ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N)
385
+ vpunpckldq ymmB, ymmA, ymmB ; ymmB=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
386
+ ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J)
387
+ vpunpckhdq ymmF, ymmC, ymmG ; ymmF=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
388
+ ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V)
389
+ vpunpckldq ymmG, ymmC, ymmG ; ymmG=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
390
+ ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R)
391
+
392
+ vperm2i128 ymmA, ymmB, ymmE, 0x20 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
393
+ ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
394
+ vperm2i128 ymmD, ymmG, ymmF, 0x20 ; ymmD=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
395
+ ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
396
+ vperm2i128 ymmC, ymmB, ymmE, 0x31 ; ymmC=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J
397
+ ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N)
398
+ vperm2i128 ymmH, ymmG, ymmF, 0x31 ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R
399
+ ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V)
400
+
401
+ cmp rcx, byte SIZEOF_YMMWORD
402
+ jb short .column_st64
403
+
404
+ test rdi, SIZEOF_YMMWORD-1
405
+ jnz short .out1
406
+ ; --(aligned)-------------------
407
+ vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
408
+ vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
409
+ vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC
410
+ vmovntdq YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH
411
+ jmp short .out0
412
+ .out1: ; --(unaligned)-----------------
413
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
414
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
415
+ vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC
416
+ vmovdqu YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH
417
+ .out0:
418
+ add rdi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr
419
+ sub rcx, byte SIZEOF_YMMWORD
420
+ jz near .nextrow
421
+
422
+ add rsi, byte SIZEOF_YMMWORD ; inptr0
423
+ add rbx, byte SIZEOF_YMMWORD ; inptr1
424
+ add rdx, byte SIZEOF_YMMWORD ; inptr2
425
+ jmp near .columnloop
426
+
427
+ .column_st64:
428
+ cmp rcx, byte SIZEOF_YMMWORD/2
429
+ jb short .column_st32
430
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
431
+ vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD
432
+ add rdi, byte 2*SIZEOF_YMMWORD ; outptr
433
+ vmovdqa ymmA, ymmC
434
+ vmovdqa ymmD, ymmH
435
+ sub rcx, byte SIZEOF_YMMWORD/2
436
+ .column_st32:
437
+ cmp rcx, byte SIZEOF_YMMWORD/4
438
+ jb short .column_st16
439
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA
440
+ add rdi, byte SIZEOF_YMMWORD ; outptr
441
+ vmovdqa ymmA, ymmD
442
+ sub rcx, byte SIZEOF_YMMWORD/4
443
+ .column_st16:
444
+ cmp rcx, byte SIZEOF_YMMWORD/8
445
+ jb short .column_st15
446
+ vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
447
+ vperm2i128 ymmA, ymmA, ymmA, 1
448
+ add rdi, byte SIZEOF_XMMWORD ; outptr
449
+ sub rcx, byte SIZEOF_YMMWORD/8
450
+ .column_st15:
451
+ ; Store two pixels (8 bytes) of ymmA to the output when it has enough
452
+ ; space.
453
+ cmp rcx, byte SIZEOF_YMMWORD/16
454
+ jb short .column_st7
455
+ vmovq MMWORD [rdi], xmmA
456
+ add rdi, byte SIZEOF_YMMWORD/16*4
457
+ sub rcx, byte SIZEOF_YMMWORD/16
458
+ vpsrldq xmmA, SIZEOF_YMMWORD/16*4
459
+ .column_st7:
460
+ ; Store one pixel (4 bytes) of ymmA to the output when it has enough
461
+ ; space.
462
+ test rcx, rcx
463
+ jz short .nextrow
464
+ vmovd XMM_DWORD [rdi], xmmA
465
+
466
+ %endif ; RGB_PIXELSIZE ; ---------------
467
+
468
+ .nextrow:
469
+ pop rcx
470
+ pop rsi
471
+ pop rbx
472
+ pop rdx
473
+ pop rdi
474
+ pop rax
475
+
476
+ add rsi, byte SIZEOF_JSAMPROW
477
+ add rbx, byte SIZEOF_JSAMPROW
478
+ add rdx, byte SIZEOF_JSAMPROW
479
+ add rdi, byte SIZEOF_JSAMPROW ; output_buf
480
+ dec rax ; num_rows
481
+ jg near .rowloop
482
+
483
+ sfence ; flush the write buffer
484
+
485
+ .return:
486
+ pop rbx
487
+ vzeroupper
488
+ uncollect_args 5
489
+ mov rsp, rbp ; rsp <- aligned rbp
490
+ pop rsp ; rsp <- original rbp
491
+ pop rbp
492
+ ret
493
+
494
+ ; For some reason, the OS X linker does not honor the request to align the
495
+ ; segment unless we do this.
496
+ align 32