image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,367 @@
1
+ ;
2
+ ; jcsample.asm - downsampling (64-bit AVX2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2015, Intel Corporation.
7
+ ; Copyright (C) 2018, Matthias Räncker.
8
+ ;
9
+ ; Based on the x86 SIMD extension for IJG JPEG library
10
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
11
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
12
+ ;
13
+ ; This file should be assembled with NASM (Netwide Assembler),
14
+ ; can *not* be assembled with Microsoft's MASM or any compatible
15
+ ; assembler (including Borland's Turbo Assembler).
16
+ ; NASM is available from http://nasm.sourceforge.net/ or
17
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
18
+
19
+ %include "jsimdext.inc"
20
+
21
+ ; --------------------------------------------------------------------------
22
+ SECTION SEG_TEXT
23
+ BITS 64
24
+ ;
25
+ ; Downsample pixel values of a single component.
26
+ ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
27
+ ; without smoothing.
28
+ ;
29
+ ; GLOBAL(void)
30
+ ; jsimd_h2v1_downsample_avx2(JDIMENSION image_width, int max_v_samp_factor,
31
+ ; JDIMENSION v_samp_factor,
32
+ ; JDIMENSION width_in_blocks, JSAMPARRAY input_data,
33
+ ; JSAMPARRAY output_data);
34
+ ;
35
+
36
+ ; r10d = JDIMENSION image_width
37
+ ; r11 = int max_v_samp_factor
38
+ ; r12d = JDIMENSION v_samp_factor
39
+ ; r13d = JDIMENSION width_in_blocks
40
+ ; r14 = JSAMPARRAY input_data
41
+ ; r15 = JSAMPARRAY output_data
42
+
43
+ align 32
44
+ GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
45
+
46
+ EXTN(jsimd_h2v1_downsample_avx2):
47
+ push rbp
48
+ mov rax, rsp
49
+ mov rbp, rsp
50
+ collect_args 6
51
+
52
+ mov ecx, r13d
53
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
54
+ jz near .return
55
+
56
+ mov edx, r10d
57
+
58
+ ; -- expand_right_edge
59
+
60
+ push rcx
61
+ shl rcx, 1 ; output_cols * 2
62
+ sub rcx, rdx
63
+ jle short .expand_end
64
+
65
+ mov rax, r11
66
+ test rax, rax
67
+ jle short .expand_end
68
+
69
+ cld
70
+ mov rsi, r14 ; input_data
71
+ .expandloop:
72
+ push rax
73
+ push rcx
74
+
75
+ mov rdip, JSAMPROW [rsi]
76
+ add rdi, rdx
77
+ mov al, JSAMPLE [rdi-1]
78
+
79
+ rep stosb
80
+
81
+ pop rcx
82
+ pop rax
83
+
84
+ add rsi, byte SIZEOF_JSAMPROW
85
+ dec rax
86
+ jg short .expandloop
87
+
88
+ .expand_end:
89
+ pop rcx ; output_cols
90
+
91
+ ; -- h2v1_downsample
92
+
93
+ mov eax, r12d ; rowctr
94
+ test eax, eax
95
+ jle near .return
96
+
97
+ mov rdx, 0x00010000 ; bias pattern
98
+ vmovd xmm7, edx
99
+ vpshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
100
+ vperm2i128 ymm7, ymm7, ymm7, 0 ; ymm7={xmm7, xmm7}
101
+ vpcmpeqw ymm6, ymm6, ymm6
102
+ vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..}
103
+
104
+ mov rsi, r14 ; input_data
105
+ mov rdi, r15 ; output_data
106
+ .rowloop:
107
+ push rcx
108
+ push rdi
109
+ push rsi
110
+
111
+ mov rsip, JSAMPROW [rsi] ; inptr
112
+ mov rdip, JSAMPROW [rdi] ; outptr
113
+
114
+ cmp rcx, byte SIZEOF_YMMWORD
115
+ jae short .columnloop
116
+
117
+ .columnloop_r24:
118
+ ; rcx can possibly be 8, 16, 24
119
+ cmp rcx, 24
120
+ jne .columnloop_r16
121
+ vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD]
122
+ vmovdqu xmm1, XMMWORD [rsi+1*SIZEOF_YMMWORD]
123
+ mov rcx, SIZEOF_YMMWORD
124
+ jmp short .downsample
125
+
126
+ .columnloop_r16:
127
+ cmp rcx, 16
128
+ jne .columnloop_r8
129
+ vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD]
130
+ vpxor ymm1, ymm1, ymm1
131
+ mov rcx, SIZEOF_YMMWORD
132
+ jmp short .downsample
133
+
134
+ .columnloop_r8:
135
+ vmovdqu xmm0, XMMWORD[rsi+0*SIZEOF_YMMWORD]
136
+ vpxor ymm1, ymm1, ymm1
137
+ mov rcx, SIZEOF_YMMWORD
138
+ jmp short .downsample
139
+
140
+ .columnloop:
141
+ vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD]
142
+ vmovdqu ymm1, YMMWORD [rsi+1*SIZEOF_YMMWORD]
143
+
144
+ .downsample:
145
+ vpsrlw ymm2, ymm0, BYTE_BIT
146
+ vpand ymm0, ymm0, ymm6
147
+ vpsrlw ymm3, ymm1, BYTE_BIT
148
+ vpand ymm1, ymm1, ymm6
149
+
150
+ vpaddw ymm0, ymm0, ymm2
151
+ vpaddw ymm1, ymm1, ymm3
152
+ vpaddw ymm0, ymm0, ymm7
153
+ vpaddw ymm1, ymm1, ymm7
154
+ vpsrlw ymm0, ymm0, 1
155
+ vpsrlw ymm1, ymm1, 1
156
+
157
+ vpackuswb ymm0, ymm0, ymm1
158
+ vpermq ymm0, ymm0, 0xd8
159
+
160
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm0
161
+
162
+ sub rcx, byte SIZEOF_YMMWORD ; outcol
163
+ add rsi, byte 2*SIZEOF_YMMWORD ; inptr
164
+ add rdi, byte 1*SIZEOF_YMMWORD ; outptr
165
+ cmp rcx, byte SIZEOF_YMMWORD
166
+ jae short .columnloop
167
+ test rcx, rcx
168
+ jnz near .columnloop_r24
169
+
170
+ pop rsi
171
+ pop rdi
172
+ pop rcx
173
+
174
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
175
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
176
+ dec rax ; rowctr
177
+ jg near .rowloop
178
+
179
+ .return:
180
+ vzeroupper
181
+ uncollect_args 6
182
+ pop rbp
183
+ ret
184
+
185
+ ; --------------------------------------------------------------------------
186
+ ;
187
+ ; Downsample pixel values of a single component.
188
+ ; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
189
+ ; without smoothing.
190
+ ;
191
+ ; GLOBAL(void)
192
+ ; jsimd_h2v2_downsample_avx2(JDIMENSION image_width, int max_v_samp_factor,
193
+ ; JDIMENSION v_samp_factor,
194
+ ; JDIMENSION width_in_blocks, JSAMPARRAY input_data,
195
+ ; JSAMPARRAY output_data);
196
+ ;
197
+
198
+ ; r10d = JDIMENSION image_width
199
+ ; r11 = int max_v_samp_factor
200
+ ; r12d = JDIMENSION v_samp_factor
201
+ ; r13d = JDIMENSION width_in_blocks
202
+ ; r14 = JSAMPARRAY input_data
203
+ ; r15 = JSAMPARRAY output_data
204
+
205
+ align 32
206
+ GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
207
+
208
+ EXTN(jsimd_h2v2_downsample_avx2):
209
+ push rbp
210
+ mov rax, rsp
211
+ mov rbp, rsp
212
+ collect_args 6
213
+
214
+ mov ecx, r13d
215
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
216
+ jz near .return
217
+
218
+ mov edx, r10d
219
+
220
+ ; -- expand_right_edge
221
+
222
+ push rcx
223
+ shl rcx, 1 ; output_cols * 2
224
+ sub rcx, rdx
225
+ jle short .expand_end
226
+
227
+ mov rax, r11
228
+ test rax, rax
229
+ jle short .expand_end
230
+
231
+ cld
232
+ mov rsi, r14 ; input_data
233
+ .expandloop:
234
+ push rax
235
+ push rcx
236
+
237
+ mov rdip, JSAMPROW [rsi]
238
+ add rdi, rdx
239
+ mov al, JSAMPLE [rdi-1]
240
+
241
+ rep stosb
242
+
243
+ pop rcx
244
+ pop rax
245
+
246
+ add rsi, byte SIZEOF_JSAMPROW
247
+ dec rax
248
+ jg short .expandloop
249
+
250
+ .expand_end:
251
+ pop rcx ; output_cols
252
+
253
+ ; -- h2v2_downsample
254
+
255
+ mov eax, r12d ; rowctr
256
+ test rax, rax
257
+ jle near .return
258
+
259
+ mov rdx, 0x00020001 ; bias pattern
260
+ vmovd xmm7, edx
261
+ vpcmpeqw ymm6, ymm6, ymm6
262
+ vpshufd xmm7, xmm7, 0x00 ; ymm7={1, 2, 1, 2, 1, 2, 1, 2}
263
+ vperm2i128 ymm7, ymm7, ymm7, 0
264
+ vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..}
265
+
266
+ mov rsi, r14 ; input_data
267
+ mov rdi, r15 ; output_data
268
+ .rowloop:
269
+ push rcx
270
+ push rdi
271
+ push rsi
272
+
273
+ mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
274
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
275
+ mov rdip, JSAMPROW [rdi] ; outptr
276
+
277
+ cmp rcx, byte SIZEOF_YMMWORD
278
+ jae short .columnloop
279
+
280
+ .columnloop_r24:
281
+ cmp rcx, 24
282
+ jne .columnloop_r16
283
+ vmovdqu ymm0, YMMWORD [rdx+0*SIZEOF_YMMWORD]
284
+ vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD]
285
+ vmovdqu xmm2, XMMWORD [rdx+1*SIZEOF_YMMWORD]
286
+ vmovdqu xmm3, XMMWORD [rsi+1*SIZEOF_YMMWORD]
287
+ mov rcx, SIZEOF_YMMWORD
288
+ jmp short .downsample
289
+
290
+ .columnloop_r16:
291
+ cmp rcx, 16
292
+ jne .columnloop_r8
293
+ vmovdqu ymm0, YMMWORD [rdx+0*SIZEOF_YMMWORD]
294
+ vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD]
295
+ vpxor ymm2, ymm2, ymm2
296
+ vpxor ymm3, ymm3, ymm3
297
+ mov rcx, SIZEOF_YMMWORD
298
+ jmp short .downsample
299
+
300
+ .columnloop_r8:
301
+ vmovdqu xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
302
+ vmovdqu xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
303
+ vpxor ymm2, ymm2, ymm2
304
+ vpxor ymm3, ymm3, ymm3
305
+ mov rcx, SIZEOF_YMMWORD
306
+ jmp short .downsample
307
+
308
+ .columnloop:
309
+ vmovdqu ymm0, YMMWORD [rdx+0*SIZEOF_YMMWORD]
310
+ vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD]
311
+ vmovdqu ymm2, YMMWORD [rdx+1*SIZEOF_YMMWORD]
312
+ vmovdqu ymm3, YMMWORD [rsi+1*SIZEOF_YMMWORD]
313
+
314
+ .downsample:
315
+ vpand ymm4, ymm0, ymm6
316
+ vpsrlw ymm0, ymm0, BYTE_BIT
317
+ vpand ymm5, ymm1, ymm6
318
+ vpsrlw ymm1, ymm1, BYTE_BIT
319
+ vpaddw ymm0, ymm0, ymm4
320
+ vpaddw ymm1, ymm1, ymm5
321
+
322
+ vpand ymm4, ymm2, ymm6
323
+ vpsrlw ymm2, ymm2, BYTE_BIT
324
+ vpand ymm5, ymm3, ymm6
325
+ vpsrlw ymm3, ymm3, BYTE_BIT
326
+ vpaddw ymm2, ymm2, ymm4
327
+ vpaddw ymm3, ymm3, ymm5
328
+
329
+ vpaddw ymm0, ymm0, ymm1
330
+ vpaddw ymm2, ymm2, ymm3
331
+ vpaddw ymm0, ymm0, ymm7
332
+ vpaddw ymm2, ymm2, ymm7
333
+ vpsrlw ymm0, ymm0, 2
334
+ vpsrlw ymm2, ymm2, 2
335
+
336
+ vpackuswb ymm0, ymm0, ymm2
337
+ vpermq ymm0, ymm0, 0xd8
338
+
339
+ vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm0
340
+
341
+ sub rcx, byte SIZEOF_YMMWORD ; outcol
342
+ add rdx, byte 2*SIZEOF_YMMWORD ; inptr0
343
+ add rsi, byte 2*SIZEOF_YMMWORD ; inptr1
344
+ add rdi, byte 1*SIZEOF_YMMWORD ; outptr
345
+ cmp rcx, byte SIZEOF_YMMWORD
346
+ jae near .columnloop
347
+ test rcx, rcx
348
+ jnz near .columnloop_r24
349
+
350
+ pop rsi
351
+ pop rdi
352
+ pop rcx
353
+
354
+ add rsi, byte 2*SIZEOF_JSAMPROW ; input_data
355
+ add rdi, byte 1*SIZEOF_JSAMPROW ; output_data
356
+ dec rax ; rowctr
357
+ jg near .rowloop
358
+
359
+ .return:
360
+ vzeroupper
361
+ uncollect_args 6
362
+ pop rbp
363
+ ret
364
+
365
+ ; For some reason, the OS X linker does not honor the request to align the
366
+ ; segment unless we do this.
367
+ align 32
@@ -0,0 +1,330 @@
1
+ ;
2
+ ; jcsample.asm - downsampling (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jsimdext.inc"
19
+
20
+ ; --------------------------------------------------------------------------
21
+ SECTION SEG_TEXT
22
+ BITS 64
23
+ ;
24
+ ; Downsample pixel values of a single component.
25
+ ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
26
+ ; without smoothing.
27
+ ;
28
+ ; GLOBAL(void)
29
+ ; jsimd_h2v1_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor,
30
+ ; JDIMENSION v_samp_factor,
31
+ ; JDIMENSION width_in_blocks, JSAMPARRAY input_data,
32
+ ; JSAMPARRAY output_data);
33
+ ;
34
+
35
+ ; r10d = JDIMENSION image_width
36
+ ; r11 = int max_v_samp_factor
37
+ ; r12d = JDIMENSION v_samp_factor
38
+ ; r13d = JDIMENSION width_in_blocks
39
+ ; r14 = JSAMPARRAY input_data
40
+ ; r15 = JSAMPARRAY output_data
41
+
42
+ align 32
43
+ GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
44
+
45
+ EXTN(jsimd_h2v1_downsample_sse2):
46
+ push rbp
47
+ mov rax, rsp
48
+ mov rbp, rsp
49
+ collect_args 6
50
+
51
+ mov ecx, r13d
52
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
53
+ jz near .return
54
+
55
+ mov edx, r10d
56
+
57
+ ; -- expand_right_edge
58
+
59
+ push rcx
60
+ shl rcx, 1 ; output_cols * 2
61
+ sub rcx, rdx
62
+ jle short .expand_end
63
+
64
+ mov rax, r11
65
+ test rax, rax
66
+ jle short .expand_end
67
+
68
+ cld
69
+ mov rsi, r14 ; input_data
70
+ .expandloop:
71
+ push rax
72
+ push rcx
73
+
74
+ mov rdip, JSAMPROW [rsi]
75
+ add rdi, rdx
76
+ mov al, JSAMPLE [rdi-1]
77
+
78
+ rep stosb
79
+
80
+ pop rcx
81
+ pop rax
82
+
83
+ add rsi, byte SIZEOF_JSAMPROW
84
+ dec rax
85
+ jg short .expandloop
86
+
87
+ .expand_end:
88
+ pop rcx ; output_cols
89
+
90
+ ; -- h2v1_downsample
91
+
92
+ mov eax, r12d ; rowctr
93
+ test eax, eax
94
+ jle near .return
95
+
96
+ mov rdx, 0x00010000 ; bias pattern
97
+ movd xmm7, edx
98
+ pcmpeqw xmm6, xmm6
99
+ pshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
100
+ psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
101
+
102
+ mov rsi, r14 ; input_data
103
+ mov rdi, r15 ; output_data
104
+ .rowloop:
105
+ push rcx
106
+ push rdi
107
+ push rsi
108
+
109
+ mov rsip, JSAMPROW [rsi] ; inptr
110
+ mov rdip, JSAMPROW [rdi] ; outptr
111
+
112
+ cmp rcx, byte SIZEOF_XMMWORD
113
+ jae short .columnloop
114
+
115
+ .columnloop_r8:
116
+ movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
117
+ pxor xmm1, xmm1
118
+ mov rcx, SIZEOF_XMMWORD
119
+ jmp short .downsample
120
+
121
+ .columnloop:
122
+ movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
123
+ movdqa xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD]
124
+
125
+ .downsample:
126
+ movdqa xmm2, xmm0
127
+ movdqa xmm3, xmm1
128
+
129
+ pand xmm0, xmm6
130
+ psrlw xmm2, BYTE_BIT
131
+ pand xmm1, xmm6
132
+ psrlw xmm3, BYTE_BIT
133
+
134
+ paddw xmm0, xmm2
135
+ paddw xmm1, xmm3
136
+ paddw xmm0, xmm7
137
+ paddw xmm1, xmm7
138
+ psrlw xmm0, 1
139
+ psrlw xmm1, 1
140
+
141
+ packuswb xmm0, xmm1
142
+
143
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
144
+
145
+ sub rcx, byte SIZEOF_XMMWORD ; outcol
146
+ add rsi, byte 2*SIZEOF_XMMWORD ; inptr
147
+ add rdi, byte 1*SIZEOF_XMMWORD ; outptr
148
+ cmp rcx, byte SIZEOF_XMMWORD
149
+ jae short .columnloop
150
+ test rcx, rcx
151
+ jnz short .columnloop_r8
152
+
153
+ pop rsi
154
+ pop rdi
155
+ pop rcx
156
+
157
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
158
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
159
+ dec rax ; rowctr
160
+ jg near .rowloop
161
+
162
+ .return:
163
+ uncollect_args 6
164
+ pop rbp
165
+ ret
166
+
167
+ ; --------------------------------------------------------------------------
168
+ ;
169
+ ; Downsample pixel values of a single component.
170
+ ; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
171
+ ; without smoothing.
172
+ ;
173
+ ; GLOBAL(void)
174
+ ; jsimd_h2v2_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor,
175
+ ; JDIMENSION v_samp_factor,
176
+ ; JDIMENSION width_in_blocks, JSAMPARRAY input_data,
177
+ ; JSAMPARRAY output_data);
178
+ ;
179
+
180
+ ; r10d = JDIMENSION image_width
181
+ ; r11 = int max_v_samp_factor
182
+ ; r12d = JDIMENSION v_samp_factor
183
+ ; r13d = JDIMENSION width_in_blocks
184
+ ; r14 = JSAMPARRAY input_data
185
+ ; r15 = JSAMPARRAY output_data
186
+
187
+ align 32
188
+ GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
189
+
190
+ EXTN(jsimd_h2v2_downsample_sse2):
191
+ push rbp
192
+ mov rax, rsp
193
+ mov rbp, rsp
194
+ collect_args 6
195
+
196
+ mov ecx, r13d
197
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
198
+ jz near .return
199
+
200
+ mov edx, r10d
201
+
202
+ ; -- expand_right_edge
203
+
204
+ push rcx
205
+ shl rcx, 1 ; output_cols * 2
206
+ sub rcx, rdx
207
+ jle short .expand_end
208
+
209
+ mov rax, r11
210
+ test rax, rax
211
+ jle short .expand_end
212
+
213
+ cld
214
+ mov rsi, r14 ; input_data
215
+ .expandloop:
216
+ push rax
217
+ push rcx
218
+
219
+ mov rdip, JSAMPROW [rsi]
220
+ add rdi, rdx
221
+ mov al, JSAMPLE [rdi-1]
222
+
223
+ rep stosb
224
+
225
+ pop rcx
226
+ pop rax
227
+
228
+ add rsi, byte SIZEOF_JSAMPROW
229
+ dec rax
230
+ jg short .expandloop
231
+
232
+ .expand_end:
233
+ pop rcx ; output_cols
234
+
235
+ ; -- h2v2_downsample
236
+
237
+ mov eax, r12d ; rowctr
238
+ test rax, rax
239
+ jle near .return
240
+
241
+ mov rdx, 0x00020001 ; bias pattern
242
+ movd xmm7, edx
243
+ pcmpeqw xmm6, xmm6
244
+ pshufd xmm7, xmm7, 0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
245
+ psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
246
+
247
+ mov rsi, r14 ; input_data
248
+ mov rdi, r15 ; output_data
249
+ .rowloop:
250
+ push rcx
251
+ push rdi
252
+ push rsi
253
+
254
+ mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
255
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
256
+ mov rdip, JSAMPROW [rdi] ; outptr
257
+
258
+ cmp rcx, byte SIZEOF_XMMWORD
259
+ jae short .columnloop
260
+
261
+ .columnloop_r8:
262
+ movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
263
+ movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
264
+ pxor xmm2, xmm2
265
+ pxor xmm3, xmm3
266
+ mov rcx, SIZEOF_XMMWORD
267
+ jmp short .downsample
268
+
269
+ .columnloop:
270
+ movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
271
+ movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
272
+ movdqa xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD]
273
+ movdqa xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD]
274
+
275
+ .downsample:
276
+ movdqa xmm4, xmm0
277
+ movdqa xmm5, xmm1
278
+ pand xmm0, xmm6
279
+ psrlw xmm4, BYTE_BIT
280
+ pand xmm1, xmm6
281
+ psrlw xmm5, BYTE_BIT
282
+ paddw xmm0, xmm4
283
+ paddw xmm1, xmm5
284
+
285
+ movdqa xmm4, xmm2
286
+ movdqa xmm5, xmm3
287
+ pand xmm2, xmm6
288
+ psrlw xmm4, BYTE_BIT
289
+ pand xmm3, xmm6
290
+ psrlw xmm5, BYTE_BIT
291
+ paddw xmm2, xmm4
292
+ paddw xmm3, xmm5
293
+
294
+ paddw xmm0, xmm1
295
+ paddw xmm2, xmm3
296
+ paddw xmm0, xmm7
297
+ paddw xmm2, xmm7
298
+ psrlw xmm0, 2
299
+ psrlw xmm2, 2
300
+
301
+ packuswb xmm0, xmm2
302
+
303
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
304
+
305
+ sub rcx, byte SIZEOF_XMMWORD ; outcol
306
+ add rdx, byte 2*SIZEOF_XMMWORD ; inptr0
307
+ add rsi, byte 2*SIZEOF_XMMWORD ; inptr1
308
+ add rdi, byte 1*SIZEOF_XMMWORD ; outptr
309
+ cmp rcx, byte SIZEOF_XMMWORD
310
+ jae near .columnloop
311
+ test rcx, rcx
312
+ jnz near .columnloop_r8
313
+
314
+ pop rsi
315
+ pop rdi
316
+ pop rcx
317
+
318
+ add rsi, byte 2*SIZEOF_JSAMPROW ; input_data
319
+ add rdi, byte 1*SIZEOF_JSAMPROW ; output_data
320
+ dec rax ; rowctr
321
+ jg near .rowloop
322
+
323
+ .return:
324
+ uncollect_args 6
325
+ pop rbp
326
+ ret
327
+
328
+ ; For some reason, the OS X linker does not honor the request to align the
329
+ ; segment unless we do this.
330
+ align 32