image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,438 @@
1
+ ;
2
+ ; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
3
+ ;
4
+ ; Copyright (C) 2011, 2016, D. R. Commander.
5
+ ; Copyright (C) 2015, Intel Corporation.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jcolsamp.inc"
19
+
20
+ ; --------------------------------------------------------------------------
21
+ ;
22
+ ; Convert some rows of samples to the output colorspace.
23
+ ;
24
+ ; GLOBAL(void)
25
+ ; jsimd_rgb_gray_convert_avx2(JDIMENSION img_width, JSAMPARRAY input_buf,
26
+ ; JSAMPIMAGE output_buf, JDIMENSION output_row,
27
+ ; int num_rows);
28
+ ;
29
+
30
+ ; r10d = JDIMENSION img_width
31
+ ; r11 = JSAMPARRAY input_buf
32
+ ; r12 = JSAMPIMAGE output_buf
33
+ ; r13d = JDIMENSION output_row
34
+ ; r14d = int num_rows
35
+
36
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
37
+ %define WK_NUM 2
38
+
39
+ align 32
40
+ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
41
+
42
+ EXTN(jsimd_rgb_gray_convert_avx2):
43
+ push rbp
44
+ mov rax, rsp ; rax = original rbp
45
+ sub rsp, byte 4
46
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
47
+ mov [rsp], rax
48
+ mov rbp, rsp ; rbp = aligned rbp
49
+ lea rsp, [wk(0)]
50
+ collect_args 5
51
+ push rbx
52
+
53
+ mov ecx, r10d
54
+ test rcx, rcx
55
+ jz near .return
56
+
57
+ push rcx
58
+
59
+ mov rsi, r12
60
+ mov ecx, r13d
61
+ mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
62
+ lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
63
+
64
+ pop rcx
65
+
66
+ mov rsi, r11
67
+ mov eax, r14d
68
+ test rax, rax
69
+ jle near .return
70
+ .rowloop:
71
+ push rdi
72
+ push rsi
73
+ push rcx ; col
74
+
75
+ mov rsip, JSAMPROW [rsi] ; inptr
76
+ mov rdip, JSAMPROW [rdi] ; outptr0
77
+
78
+ cmp rcx, byte SIZEOF_YMMWORD
79
+ jae near .columnloop
80
+
81
+ %if RGB_PIXELSIZE == 3 ; ---------------
82
+
83
+ .column_ld1:
84
+ push rax
85
+ push rdx
86
+ lea rcx, [rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE
87
+ test cl, SIZEOF_BYTE
88
+ jz short .column_ld2
89
+ sub rcx, byte SIZEOF_BYTE
90
+ movzx rax, byte [rsi+rcx]
91
+ .column_ld2:
92
+ test cl, SIZEOF_WORD
93
+ jz short .column_ld4
94
+ sub rcx, byte SIZEOF_WORD
95
+ movzx rdx, word [rsi+rcx]
96
+ shl rax, WORD_BIT
97
+ or rax, rdx
98
+ .column_ld4:
99
+ vmovd xmmA, eax
100
+ pop rdx
101
+ pop rax
102
+ test cl, SIZEOF_DWORD
103
+ jz short .column_ld8
104
+ sub rcx, byte SIZEOF_DWORD
105
+ vmovd xmmF, XMM_DWORD [rsi+rcx]
106
+ vpslldq xmmA, xmmA, SIZEOF_DWORD
107
+ vpor xmmA, xmmA, xmmF
108
+ .column_ld8:
109
+ test cl, SIZEOF_MMWORD
110
+ jz short .column_ld16
111
+ sub rcx, byte SIZEOF_MMWORD
112
+ vmovq xmmB, XMM_MMWORD [rsi+rcx]
113
+ vpslldq xmmA, xmmA, SIZEOF_MMWORD
114
+ vpor xmmA, xmmA, xmmB
115
+ .column_ld16:
116
+ test cl, SIZEOF_XMMWORD
117
+ jz short .column_ld32
118
+ sub rcx, byte SIZEOF_XMMWORD
119
+ vmovdqu xmmB, XMM_MMWORD [rsi+rcx]
120
+ vperm2i128 ymmA, ymmA, ymmA, 1
121
+ vpor ymmA, ymmB
122
+ .column_ld32:
123
+ test cl, SIZEOF_YMMWORD
124
+ jz short .column_ld64
125
+ sub rcx, byte SIZEOF_YMMWORD
126
+ vmovdqa ymmF, ymmA
127
+ vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD]
128
+ .column_ld64:
129
+ test cl, 2*SIZEOF_YMMWORD
130
+ mov rcx, SIZEOF_YMMWORD
131
+ jz short .rgb_gray_cnv
132
+ vmovdqa ymmB, ymmA
133
+ vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD]
134
+ vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD]
135
+ jmp short .rgb_gray_cnv
136
+
137
+ .columnloop:
138
+ vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD]
139
+ vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD]
140
+ vmovdqu ymmB, YMMWORD [rsi+2*SIZEOF_YMMWORD]
141
+
142
+ .rgb_gray_cnv:
143
+ ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
144
+ ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
145
+ ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F
146
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L)
147
+ ; ymmB=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q
148
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V)
149
+
150
+ vmovdqu ymmC, ymmA
151
+ vinserti128 ymmA, ymmF, xmmA, 0 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
152
+ ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L)
153
+ vinserti128 ymmC, ymmC, xmmB, 0 ; ymmC=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q
154
+ ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
155
+ vinserti128 ymmB, ymmB, xmmF, 0 ; ymmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F
156
+ ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V)
157
+ vperm2i128 ymmF, ymmC, ymmC, 1 ; ymmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A
158
+ ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q)
159
+
160
+ vmovdqa ymmG, ymmA
161
+ vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12
162
+ ; 22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I)
163
+ vpsrldq ymmG, ymmG, 8 ; ymmG=(22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I
164
+ ; 2I 0J 1J 2J 0K 1K 2K 0L -- -- -- -- -- -- -- --)
165
+
166
+ vpunpckhbw ymmA, ymmA, ymmF ; ymmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A
167
+ ; 0G 0O 1G 1O 2G 2O 0H 0P 1H 1P 2H 2P 0I 0Q 1I 1Q)
168
+ vpslldq ymmF, ymmF, 8 ; ymmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27
169
+ ; 08 18 28 09 19 29 0A 1A 1L 2L 0M 1M 2M 0N 1N 2N)
170
+
171
+ vpunpcklbw ymmG, ymmG, ymmB ; ymmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D
172
+ ; 2I 2Q 0J 0R 1J 1R 2J 2R 0K 0S 1K 1S 2K 2S 0L 0T)
173
+ vpunpckhbw ymmF, ymmF, ymmB ; ymmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F
174
+ ; 1L 1T 2L 2T 0M 0U 1M 1U 2M 2U 0N 0V 1N 1V 2N 2V)
175
+
176
+ vmovdqa ymmD, ymmA
177
+ vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09
178
+ ; 11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P)
179
+ vpsrldq ymmD, ymmD, 8 ; ymmD=(11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P
180
+ ; 1H 1P 2H 2P 0I 0Q 1I 1Q -- -- -- -- -- -- -- --)
181
+
182
+ vpunpckhbw ymmA, ymmA, ymmG ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D
183
+ ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 0H 0L 0P 0T)
184
+ vpslldq ymmG, ymmG, 8 ; ymmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B
185
+ ; 04 0C 14 1C 24 2C 05 0D 2I 2Q 0J 0R 1J 1R 2J 2R)
186
+
187
+ vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E
188
+ ; 1H 1L 1P 1T 2H 2L 2P 2T 0I 0M 0Q 0U 1I 1M 1Q 1U)
189
+ vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F
190
+ ; 2I 2M 2Q 2U 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V)
191
+
192
+ vmovdqa ymmE, ymmA
193
+ vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C
194
+ ; 20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S)
195
+ vpsrldq ymmE, ymmE, 8 ; ymmE=(20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S
196
+ ; 2G 2K 2O 2S 0H 0L 0P 0T -- -- -- -- -- -- -- --)
197
+
198
+ vpunpckhbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E
199
+ ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U)
200
+ vpslldq ymmD, ymmD, 8 ; ymmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D
201
+ ; 02 06 0A 0E 12 16 1A 1E 1H 1L 1P 1T 2H 2L 2P 2T)
202
+
203
+ vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F
204
+ ; 2G 2I 2K 2M 2O 2Q 2S 2U 0H 0J 0L 0N 0P 0R 0T 0V)
205
+ vpunpckhbw ymmD, ymmD, ymmG ; ymmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F
206
+ ; 1H 1J 1L 1N 1P 1R 1T 1V 2H 2J 2L 2N 2P 2R 2T 2V)
207
+
208
+ vpxor ymmH, ymmH, ymmH
209
+
210
+ vmovdqa ymmC, ymmA
211
+ vpunpcklbw ymmA, ymmA, ymmH ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U)
212
+ vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U)
213
+
214
+ vmovdqa ymmB, ymmE
215
+ vpunpcklbw ymmE, ymmE, ymmH ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U)
216
+ vpunpckhbw ymmB, ymmB, ymmH ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V)
217
+
218
+ vmovdqa ymmF, ymmD
219
+ vpunpcklbw ymmD, ymmD, ymmH ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V)
220
+ vpunpckhbw ymmF, ymmF, ymmH ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V)
221
+
222
+ %else ; RGB_PIXELSIZE == 4 ; -----------
223
+
224
+ .column_ld1:
225
+ test cl, SIZEOF_XMMWORD/16
226
+ jz short .column_ld2
227
+ sub rcx, byte SIZEOF_XMMWORD/16
228
+ vmovd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
229
+ .column_ld2:
230
+ test cl, SIZEOF_XMMWORD/8
231
+ jz short .column_ld4
232
+ sub rcx, byte SIZEOF_XMMWORD/8
233
+ vmovq xmmF, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
234
+ vpslldq xmmA, xmmA, SIZEOF_MMWORD
235
+ vpor xmmA, xmmA, xmmF
236
+ .column_ld4:
237
+ test cl, SIZEOF_XMMWORD/4
238
+ jz short .column_ld8
239
+ sub rcx, byte SIZEOF_XMMWORD/4
240
+ vmovdqa xmmF, xmmA
241
+ vperm2i128 ymmF, ymmF, ymmF, 1
242
+ vmovdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
243
+ vpor ymmA, ymmA, ymmF
244
+ .column_ld8:
245
+ test cl, SIZEOF_XMMWORD/2
246
+ jz short .column_ld16
247
+ sub rcx, byte SIZEOF_XMMWORD/2
248
+ vmovdqa ymmF, ymmA
249
+ vmovdqu ymmA, YMMWORD [rsi+rcx*RGB_PIXELSIZE]
250
+ .column_ld16:
251
+ test cl, SIZEOF_XMMWORD
252
+ mov rcx, SIZEOF_YMMWORD
253
+ jz short .rgb_gray_cnv
254
+ vmovdqa ymmE, ymmA
255
+ vmovdqa ymmH, ymmF
256
+ vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD]
257
+ vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD]
258
+ jmp short .rgb_gray_cnv
259
+
260
+ .columnloop:
261
+ vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD]
262
+ vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD]
263
+ vmovdqu ymmE, YMMWORD [rsi+2*SIZEOF_YMMWORD]
264
+ vmovdqu ymmH, YMMWORD [rsi+3*SIZEOF_YMMWORD]
265
+
266
+ .rgb_gray_cnv:
267
+ ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
268
+ ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
269
+ ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
270
+ ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
271
+ ; ymmE=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J
272
+ ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N)
273
+ ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R
274
+ ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V)
275
+
276
+ vmovdqa ymmB, ymmA
277
+ vinserti128 ymmA, ymmA, xmmE, 1 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
278
+ ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J)
279
+ vperm2i128 ymmE, ymmB, ymmE, 0x31 ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
280
+ ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N)
281
+
282
+ vmovdqa ymmB, ymmF
283
+ vinserti128 ymmF, ymmF, xmmH, 1 ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
284
+ ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R)
285
+ vperm2i128 ymmH, ymmB, ymmH, 0x31 ; ymmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
286
+ ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V)
287
+
288
+ vmovdqa ymmD, ymmA
289
+ vpunpcklbw ymmA, ymmA, ymmE ; ymmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35
290
+ ; 0G 0K 1G 1K 2G 2K 3G 3K 0H 0L 1H 1L 2H 2L 3H 3L)
291
+ vpunpckhbw ymmD, ymmD, ymmE ; ymmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37
292
+ ; 0I 0M 1I 1M 2I 2M 3I 3M 0J 0N 1J 1N 2J 2N 3J 3N)
293
+
294
+ vmovdqa ymmC, ymmF
295
+ vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D
296
+ ; 0O 0S 1O 1S 2O 2S 3O 3S 0P 0T 1P 1T 2P 2T 3P 3T)
297
+ vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F
298
+ ; 0Q 0U 1Q 1U 2Q 2U 3Q 3U 0R 0V 1R 1V 2R 2V 3R 3V)
299
+
300
+ vmovdqa ymmB, ymmA
301
+ vpunpcklwd ymmA, ymmA, ymmF ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C
302
+ ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 3G 3K 3O 3S)
303
+ vpunpckhwd ymmB, ymmB, ymmF ; ymmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D
304
+ ; 0H 0L 0P 0T 1H 1L 1P 1T 2H 2L 2P 2T 3H 3L 3P 3T)
305
+
306
+ vmovdqa ymmG, ymmD
307
+ vpunpcklwd ymmD, ymmD, ymmC ; ymmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E
308
+ ; 0I 0M 0Q 0U 1I 1M 1Q 1U 2I 2M 2Q 2U 3I 3M 3Q 3U)
309
+ vpunpckhwd ymmG, ymmG, ymmC ; ymmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F
310
+ ; 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V 3J 3N 3R 3V)
311
+
312
+ vmovdqa ymmE, ymmA
313
+ vpunpcklbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E
314
+ ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U)
315
+ vpunpckhbw ymmE, ymmE, ymmD ; ymmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E
316
+ ; 2G 2I 2K 2M 2O 2Q 2S 2U 3G 3I 3K 3M 3O 3Q 3S 3U)
317
+
318
+ vmovdqa ymmH, ymmB
319
+ vpunpcklbw ymmB, ymmB, ymmG ; ymmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F
320
+ ; 0H 0J 0L 0N 0P 0R 0T 0V 1H 1J 1L 1N 1P 1R 1T 1V)
321
+ vpunpckhbw ymmH, ymmH, ymmG ; ymmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F
322
+ ; 2H 2J 2L 2N 2P 2R 2T 2V 3H 3J 3L 3N 3P 3R 3T 3V)
323
+
324
+ vpxor ymmF, ymmF, ymmF
325
+
326
+ vmovdqa ymmC, ymmA
327
+ vpunpcklbw ymmA, ymmA, ymmF ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U)
328
+ vpunpckhbw ymmC, ymmC, ymmF ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U)
329
+
330
+ vmovdqa ymmD, ymmB
331
+ vpunpcklbw ymmB, ymmB, ymmF ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V)
332
+ vpunpckhbw ymmD, ymmD, ymmF ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V)
333
+
334
+ vmovdqa ymmG, ymmE
335
+ vpunpcklbw ymmE, ymmE, ymmF ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U)
336
+ vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(30 32 34 36 38 3A 3C 3E 3G 3I 3K 3M 3O 3Q 3S 3U)
337
+
338
+ vpunpcklbw ymmF, ymmF, ymmH
339
+ vpunpckhbw ymmH, ymmH, ymmH
340
+ vpsrlw ymmF, ymmF, BYTE_BIT ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V)
341
+ vpsrlw ymmH, ymmH, BYTE_BIT ; ymmH=(31 33 35 37 39 3B 3D 3F 3H 3J 3L 3N 3P 3R 3T 3V)
342
+
343
+ %endif ; RGB_PIXELSIZE ; ---------------
344
+
345
+ ; ymm0=R(02468ACEGIKMOQSU)=RE, ymm2=G(02468ACEGIKMOQSU)=GE, ymm4=B(02468ACEGIKMOQSU)=BE
346
+ ; ymm1=R(13579BDFHJLNPRTV)=RO, ymm3=G(13579BDFHJLNPRTV)=GO, ymm5=B(13579BDFHJLNPRTV)=BO
347
+
348
+ ; (Original)
349
+ ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
350
+ ;
351
+ ; (This implementation)
352
+ ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
353
+
354
+ vmovdqa ymm6, ymm1
355
+ vpunpcklwd ymm1, ymm1, ymm3
356
+ vpunpckhwd ymm6, ymm6, ymm3
357
+ vpmaddwd ymm1, ymm1, [rel PW_F0299_F0337] ; ymm1=ROL*FIX(0.299)+GOL*FIX(0.337)
358
+ vpmaddwd ymm6, ymm6, [rel PW_F0299_F0337] ; ymm6=ROH*FIX(0.299)+GOH*FIX(0.337)
359
+
360
+ vmovdqa ymm7, ymm6 ; ymm7=ROH*FIX(0.299)+GOH*FIX(0.337)
361
+
362
+ vmovdqa ymm6, ymm0
363
+ vpunpcklwd ymm0, ymm0, ymm2
364
+ vpunpckhwd ymm6, ymm6, ymm2
365
+ vpmaddwd ymm0, ymm0, [rel PW_F0299_F0337] ; ymm0=REL*FIX(0.299)+GEL*FIX(0.337)
366
+ vpmaddwd ymm6, ymm6, [rel PW_F0299_F0337] ; ymm6=REH*FIX(0.299)+GEH*FIX(0.337)
367
+
368
+ vmovdqa YMMWORD [wk(0)], ymm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
369
+ vmovdqa YMMWORD [wk(1)], ymm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
370
+
371
+ vmovdqa ymm0, ymm5 ; ymm0=BO
372
+ vmovdqa ymm6, ymm4 ; ymm6=BE
373
+
374
+ vmovdqa ymm4, ymm0
375
+ vpunpcklwd ymm0, ymm0, ymm3
376
+ vpunpckhwd ymm4, ymm4, ymm3
377
+ vpmaddwd ymm0, ymm0, [rel PW_F0114_F0250] ; ymm0=BOL*FIX(0.114)+GOL*FIX(0.250)
378
+ vpmaddwd ymm4, ymm4, [rel PW_F0114_F0250] ; ymm4=BOH*FIX(0.114)+GOH*FIX(0.250)
379
+
380
+ vmovdqa ymm3, [rel PD_ONEHALF] ; ymm3=[PD_ONEHALF]
381
+
382
+ vpaddd ymm0, ymm0, ymm1
383
+ vpaddd ymm4, ymm4, ymm7
384
+ vpaddd ymm0, ymm0, ymm3
385
+ vpaddd ymm4, ymm4, ymm3
386
+ vpsrld ymm0, ymm0, SCALEBITS ; ymm0=YOL
387
+ vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YOH
388
+ vpackssdw ymm0, ymm0, ymm4 ; ymm0=YO
389
+
390
+ vmovdqa ymm4, ymm6
391
+ vpunpcklwd ymm6, ymm6, ymm2
392
+ vpunpckhwd ymm4, ymm4, ymm2
393
+ vpmaddwd ymm6, ymm6, [rel PW_F0114_F0250] ; ymm6=BEL*FIX(0.114)+GEL*FIX(0.250)
394
+ vpmaddwd ymm4, ymm4, [rel PW_F0114_F0250] ; ymm4=BEH*FIX(0.114)+GEH*FIX(0.250)
395
+
396
+ vmovdqa ymm2, [rel PD_ONEHALF] ; ymm2=[PD_ONEHALF]
397
+
398
+ vpaddd ymm6, ymm6, YMMWORD [wk(0)]
399
+ vpaddd ymm4, ymm4, YMMWORD [wk(1)]
400
+ vpaddd ymm6, ymm6, ymm2
401
+ vpaddd ymm4, ymm4, ymm2
402
+ vpsrld ymm6, ymm6, SCALEBITS ; ymm6=YEL
403
+ vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YEH
404
+ vpackssdw ymm6, ymm6, ymm4 ; ymm6=YE
405
+
406
+ vpsllw ymm0, ymm0, BYTE_BIT
407
+ vpor ymm6, ymm6, ymm0 ; ymm6=Y
408
+ vmovdqu YMMWORD [rdi], ymm6 ; Save Y
409
+
410
+ sub rcx, byte SIZEOF_YMMWORD
411
+ add rsi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; inptr
412
+ add rdi, byte SIZEOF_YMMWORD ; outptr0
413
+ cmp rcx, byte SIZEOF_YMMWORD
414
+ jae near .columnloop
415
+ test rcx, rcx
416
+ jnz near .column_ld1
417
+
418
+ pop rcx ; col
419
+ pop rsi
420
+ pop rdi
421
+
422
+ add rsi, byte SIZEOF_JSAMPROW ; input_buf
423
+ add rdi, byte SIZEOF_JSAMPROW
424
+ dec rax ; num_rows
425
+ jg near .rowloop
426
+
427
+ .return:
428
+ pop rbx
429
+ vzeroupper
430
+ uncollect_args 5
431
+ mov rsp, rbp ; rsp <- aligned rbp
432
+ pop rsp ; rsp <- original rbp
433
+ pop rbp
434
+ ret
435
+
436
+ ; For some reason, the OS X linker does not honor the request to align the
437
+ ; segment unless we do this.
438
+ align 32