image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,665 @@
1
+ ;
2
+ ; jdsample.asm - upsampling (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jsimdext.inc"
19
+
20
+ ; --------------------------------------------------------------------------
21
+ SECTION SEG_CONST
22
+
23
+ alignz 32
24
+ GLOBAL_DATA(jconst_fancy_upsample_sse2)
25
+
26
+ EXTN(jconst_fancy_upsample_sse2):
27
+
28
+ PW_ONE times 8 dw 1
29
+ PW_TWO times 8 dw 2
30
+ PW_THREE times 8 dw 3
31
+ PW_SEVEN times 8 dw 7
32
+ PW_EIGHT times 8 dw 8
33
+
34
+ alignz 32
35
+
36
+ ; --------------------------------------------------------------------------
37
+ SECTION SEG_TEXT
38
+ BITS 64
39
+ ;
40
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
41
+ ;
42
+ ; The upsampling algorithm is linear interpolation between pixel centers,
43
+ ; also known as a "triangle filter". This is a good compromise between
44
+ ; speed and visual quality. The centers of the output pixels are 1/4 and 3/4
45
+ ; of the way between input pixel centers.
46
+ ;
47
+ ; GLOBAL(void)
48
+ ; jsimd_h2v1_fancy_upsample_sse2(int max_v_samp_factor,
49
+ ; JDIMENSION downsampled_width,
50
+ ; JSAMPARRAY input_data,
51
+ ; JSAMPARRAY *output_data_ptr);
52
+ ;
53
+
54
+ ; r10 = int max_v_samp_factor
55
+ ; r11d = JDIMENSION downsampled_width
56
+ ; r12 = JSAMPARRAY input_data
57
+ ; r13 = JSAMPARRAY *output_data_ptr
58
+
59
+ align 32
60
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
61
+
62
+ EXTN(jsimd_h2v1_fancy_upsample_sse2):
63
+ push rbp
64
+ mov rax, rsp
65
+ mov rbp, rsp
66
+ collect_args 4
67
+
68
+ mov eax, r11d ; colctr
69
+ test rax, rax
70
+ jz near .return
71
+
72
+ mov rcx, r10 ; rowctr
73
+ test rcx, rcx
74
+ jz near .return
75
+
76
+ mov rsi, r12 ; input_data
77
+ mov rdi, r13
78
+ mov rdip, JSAMPARRAY [rdi] ; output_data
79
+ .rowloop:
80
+ push rax ; colctr
81
+ push rdi
82
+ push rsi
83
+
84
+ mov rsip, JSAMPROW [rsi] ; inptr
85
+ mov rdip, JSAMPROW [rdi] ; outptr
86
+
87
+ test rax, SIZEOF_XMMWORD-1
88
+ jz short .skip
89
+ mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
90
+ mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample
91
+ .skip:
92
+ pxor xmm0, xmm0 ; xmm0=(all 0's)
93
+ pcmpeqb xmm7, xmm7
94
+ psrldq xmm7, (SIZEOF_XMMWORD-1)
95
+ pand xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD]
96
+
97
+ add rax, byte SIZEOF_XMMWORD-1
98
+ and rax, byte -SIZEOF_XMMWORD
99
+ cmp rax, byte SIZEOF_XMMWORD
100
+ ja short .columnloop
101
+
102
+ .columnloop_last:
103
+ pcmpeqb xmm6, xmm6
104
+ pslldq xmm6, (SIZEOF_XMMWORD-1)
105
+ pand xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD]
106
+ jmp short .upsample
107
+
108
+ .columnloop:
109
+ movdqa xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD]
110
+ pslldq xmm6, (SIZEOF_XMMWORD-1)
111
+
112
+ .upsample:
113
+ movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
114
+ movdqa xmm2, xmm1
115
+ movdqa xmm3, xmm1 ; xmm1=( 0 1 2 ... 13 14 15)
116
+ pslldq xmm2, 1 ; xmm2=(-- 0 1 ... 12 13 14)
117
+ psrldq xmm3, 1 ; xmm3=( 1 2 3 ... 14 15 --)
118
+
119
+ por xmm2, xmm7 ; xmm2=(-1 0 1 ... 12 13 14)
120
+ por xmm3, xmm6 ; xmm3=( 1 2 3 ... 14 15 16)
121
+
122
+ movdqa xmm7, xmm1
123
+ psrldq xmm7, (SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --)
124
+
125
+ movdqa xmm4, xmm1
126
+ punpcklbw xmm1, xmm0 ; xmm1=( 0 1 2 3 4 5 6 7)
127
+ punpckhbw xmm4, xmm0 ; xmm4=( 8 9 10 11 12 13 14 15)
128
+ movdqa xmm5, xmm2
129
+ punpcklbw xmm2, xmm0 ; xmm2=(-1 0 1 2 3 4 5 6)
130
+ punpckhbw xmm5, xmm0 ; xmm5=( 7 8 9 10 11 12 13 14)
131
+ movdqa xmm6, xmm3
132
+ punpcklbw xmm3, xmm0 ; xmm3=( 1 2 3 4 5 6 7 8)
133
+ punpckhbw xmm6, xmm0 ; xmm6=( 9 10 11 12 13 14 15 16)
134
+
135
+ pmullw xmm1, [rel PW_THREE]
136
+ pmullw xmm4, [rel PW_THREE]
137
+ paddw xmm2, [rel PW_ONE]
138
+ paddw xmm5, [rel PW_ONE]
139
+ paddw xmm3, [rel PW_TWO]
140
+ paddw xmm6, [rel PW_TWO]
141
+
142
+ paddw xmm2, xmm1
143
+ paddw xmm5, xmm4
144
+ psrlw xmm2, 2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14)
145
+ psrlw xmm5, 2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30)
146
+ paddw xmm3, xmm1
147
+ paddw xmm6, xmm4
148
+ psrlw xmm3, 2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15)
149
+ psrlw xmm6, 2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31)
150
+
151
+ psllw xmm3, BYTE_BIT
152
+ psllw xmm6, BYTE_BIT
153
+ por xmm2, xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15)
154
+ por xmm5, xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31)
155
+
156
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
157
+ movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5
158
+
159
+ sub rax, byte SIZEOF_XMMWORD
160
+ add rsi, byte 1*SIZEOF_XMMWORD ; inptr
161
+ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
162
+ cmp rax, byte SIZEOF_XMMWORD
163
+ ja near .columnloop
164
+ test eax, eax
165
+ jnz near .columnloop_last
166
+
167
+ pop rsi
168
+ pop rdi
169
+ pop rax
170
+
171
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
172
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
173
+ dec rcx ; rowctr
174
+ jg near .rowloop
175
+
176
+ .return:
177
+ uncollect_args 4
178
+ pop rbp
179
+ ret
180
+
181
+ ; --------------------------------------------------------------------------
182
+ ;
183
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
184
+ ; Again a triangle filter; see comments for h2v1 case, above.
185
+ ;
186
+ ; GLOBAL(void)
187
+ ; jsimd_h2v2_fancy_upsample_sse2(int max_v_samp_factor,
188
+ ; JDIMENSION downsampled_width,
189
+ ; JSAMPARRAY input_data,
190
+ ; JSAMPARRAY *output_data_ptr);
191
+ ;
192
+
193
+ ; r10 = int max_v_samp_factor
194
+ ; r11d = JDIMENSION downsampled_width
195
+ ; r12 = JSAMPARRAY input_data
196
+ ; r13 = JSAMPARRAY *output_data_ptr
197
+
198
+ %define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
199
+ %define WK_NUM 4
200
+
201
+ align 32
202
+ GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
203
+
204
+ EXTN(jsimd_h2v2_fancy_upsample_sse2):
205
+ push rbp
206
+ mov rax, rsp ; rax = original rbp
207
+ sub rsp, byte 4
208
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
209
+ mov [rsp], rax
210
+ mov rbp, rsp ; rbp = aligned rbp
211
+ lea rsp, [wk(0)]
212
+ collect_args 4
213
+ push rbx
214
+
215
+ mov eax, r11d ; colctr
216
+ test rax, rax
217
+ jz near .return
218
+
219
+ mov rcx, r10 ; rowctr
220
+ test rcx, rcx
221
+ jz near .return
222
+
223
+ mov rsi, r12 ; input_data
224
+ mov rdi, r13
225
+ mov rdip, JSAMPARRAY [rdi] ; output_data
226
+ .rowloop:
227
+ push rax ; colctr
228
+ push rcx
229
+ push rdi
230
+ push rsi
231
+
232
+ mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
233
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
234
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
235
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
236
+ mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
237
+
238
+ test rax, SIZEOF_XMMWORD-1
239
+ jz short .skip
240
+ push rdx
241
+ mov dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE]
242
+ mov JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl
243
+ mov dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE]
244
+ mov JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl
245
+ mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
246
+ mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample
247
+ pop rdx
248
+ .skip:
249
+ ; -- process the first column block
250
+
251
+ movdqa xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0]
252
+ movdqa xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0]
253
+ movdqa xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0]
254
+
255
+ pxor xmm3, xmm3 ; xmm3=(all 0's)
256
+ movdqa xmm4, xmm0
257
+ punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7)
258
+ punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15)
259
+ movdqa xmm5, xmm1
260
+ punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7)
261
+ punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15)
262
+ movdqa xmm6, xmm2
263
+ punpcklbw xmm2, xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7)
264
+ punpckhbw xmm6, xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15)
265
+
266
+ pmullw xmm0, [rel PW_THREE]
267
+ pmullw xmm4, [rel PW_THREE]
268
+
269
+ pcmpeqb xmm7, xmm7
270
+ psrldq xmm7, (SIZEOF_XMMWORD-2)
271
+
272
+ paddw xmm1, xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7)
273
+ paddw xmm5, xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15)
274
+ paddw xmm2, xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7)
275
+ paddw xmm6, xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15)
276
+
277
+ movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save
278
+ movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data
279
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
280
+ movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6
281
+
282
+ pand xmm1, xmm7 ; xmm1=( 0 -- -- -- -- -- -- --)
283
+ pand xmm2, xmm7 ; xmm2=( 0 -- -- -- -- -- -- --)
284
+
285
+ movdqa XMMWORD [wk(0)], xmm1
286
+ movdqa XMMWORD [wk(1)], xmm2
287
+
288
+ add rax, byte SIZEOF_XMMWORD-1
289
+ and rax, byte -SIZEOF_XMMWORD
290
+ cmp rax, byte SIZEOF_XMMWORD
291
+ ja short .columnloop
292
+
293
+ .columnloop_last:
294
+ ; -- process the last column block
295
+
296
+ pcmpeqb xmm1, xmm1
297
+ pslldq xmm1, (SIZEOF_XMMWORD-2)
298
+ movdqa xmm2, xmm1
299
+
300
+ pand xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD]
301
+ pand xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD]
302
+
303
+ movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15)
304
+ movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15)
305
+
306
+ jmp near .upsample
307
+
308
+ .columnloop:
309
+ ; -- process the next column block
310
+
311
+ movdqa xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1]
312
+ movdqa xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1]
313
+ movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1]
314
+
315
+ pxor xmm3, xmm3 ; xmm3=(all 0's)
316
+ movdqa xmm4, xmm0
317
+ punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7)
318
+ punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15)
319
+ movdqa xmm5, xmm1
320
+ punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7)
321
+ punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15)
322
+ movdqa xmm6, xmm2
323
+ punpcklbw xmm2, xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7)
324
+ punpckhbw xmm6, xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15)
325
+
326
+ pmullw xmm0, [rel PW_THREE]
327
+ pmullw xmm4, [rel PW_THREE]
328
+
329
+ paddw xmm1, xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7)
330
+ paddw xmm5, xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15)
331
+ paddw xmm2, xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7)
332
+ paddw xmm6, xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15)
333
+
334
+ movdqa XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save
335
+ movdqa XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data
336
+ movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
337
+ movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6
338
+
339
+ pslldq xmm1, (SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0)
340
+ pslldq xmm2, (SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0)
341
+
342
+ movdqa XMMWORD [wk(2)], xmm1
343
+ movdqa XMMWORD [wk(3)], xmm2
344
+
345
+ .upsample:
346
+ ; -- process the upper row
347
+
348
+ movdqa xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD]
349
+ movdqa xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD]
350
+
351
+ movdqa xmm0, xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7)
352
+ movdqa xmm4, xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15)
353
+ psrldq xmm0, 2 ; xmm0=( 1 2 3 4 5 6 7 --)
354
+ pslldq xmm4, (SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8)
355
+ movdqa xmm5, xmm7
356
+ movdqa xmm6, xmm3
357
+ psrldq xmm5, (SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --)
358
+ pslldq xmm6, 2 ; xmm6=(-- 8 9 10 11 12 13 14)
359
+
360
+ por xmm0, xmm4 ; xmm0=( 1 2 3 4 5 6 7 8)
361
+ por xmm5, xmm6 ; xmm5=( 7 8 9 10 11 12 13 14)
362
+
363
+ movdqa xmm1, xmm7
364
+ movdqa xmm2, xmm3
365
+ pslldq xmm1, 2 ; xmm1=(-- 0 1 2 3 4 5 6)
366
+ psrldq xmm2, 2 ; xmm2=( 9 10 11 12 13 14 15 --)
367
+ movdqa xmm4, xmm3
368
+ psrldq xmm4, (SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --)
369
+
370
+ por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6)
371
+ por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16)
372
+
373
+ movdqa XMMWORD [wk(0)], xmm4
374
+
375
+ pmullw xmm7, [rel PW_THREE]
376
+ pmullw xmm3, [rel PW_THREE]
377
+ paddw xmm1, [rel PW_EIGHT]
378
+ paddw xmm5, [rel PW_EIGHT]
379
+ paddw xmm0, [rel PW_SEVEN]
380
+ paddw xmm2, [rel PW_SEVEN]
381
+
382
+ paddw xmm1, xmm7
383
+ paddw xmm5, xmm3
384
+ psrlw xmm1, 4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14)
385
+ psrlw xmm5, 4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
386
+ paddw xmm0, xmm7
387
+ paddw xmm2, xmm3
388
+ psrlw xmm0, 4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15)
389
+ psrlw xmm2, 4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
390
+
391
+ psllw xmm0, BYTE_BIT
392
+ psllw xmm2, BYTE_BIT
393
+ por xmm1, xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15)
394
+ por xmm5, xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31)
395
+
396
+ movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1
397
+ movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5
398
+
399
+ ; -- process the lower row
400
+
401
+ movdqa xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD]
402
+ movdqa xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD]
403
+
404
+ movdqa xmm7, xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7)
405
+ movdqa xmm3, xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15)
406
+ psrldq xmm7, 2 ; xmm7=( 1 2 3 4 5 6 7 --)
407
+ pslldq xmm3, (SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8)
408
+ movdqa xmm0, xmm6
409
+ movdqa xmm2, xmm4
410
+ psrldq xmm0, (SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --)
411
+ pslldq xmm2, 2 ; xmm2=(-- 8 9 10 11 12 13 14)
412
+
413
+ por xmm7, xmm3 ; xmm7=( 1 2 3 4 5 6 7 8)
414
+ por xmm0, xmm2 ; xmm0=( 7 8 9 10 11 12 13 14)
415
+
416
+ movdqa xmm1, xmm6
417
+ movdqa xmm5, xmm4
418
+ pslldq xmm1, 2 ; xmm1=(-- 0 1 2 3 4 5 6)
419
+ psrldq xmm5, 2 ; xmm5=( 9 10 11 12 13 14 15 --)
420
+ movdqa xmm3, xmm4
421
+ psrldq xmm3, (SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --)
422
+
423
+ por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6)
424
+ por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16)
425
+
426
+ movdqa XMMWORD [wk(1)], xmm3
427
+
428
+ pmullw xmm6, [rel PW_THREE]
429
+ pmullw xmm4, [rel PW_THREE]
430
+ paddw xmm1, [rel PW_EIGHT]
431
+ paddw xmm0, [rel PW_EIGHT]
432
+ paddw xmm7, [rel PW_SEVEN]
433
+ paddw xmm5, [rel PW_SEVEN]
434
+
435
+ paddw xmm1, xmm6
436
+ paddw xmm0, xmm4
437
+ psrlw xmm1, 4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14)
438
+ psrlw xmm0, 4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
439
+ paddw xmm7, xmm6
440
+ paddw xmm5, xmm4
441
+ psrlw xmm7, 4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15)
442
+ psrlw xmm5, 4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
443
+
444
+ psllw xmm7, BYTE_BIT
445
+ psllw xmm5, BYTE_BIT
446
+ por xmm1, xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15)
447
+ por xmm0, xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31)
448
+
449
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1
450
+ movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0
451
+
452
+ sub rax, byte SIZEOF_XMMWORD
453
+ add rcx, byte 1*SIZEOF_XMMWORD ; inptr1(above)
454
+ add rbx, byte 1*SIZEOF_XMMWORD ; inptr0
455
+ add rsi, byte 1*SIZEOF_XMMWORD ; inptr1(below)
456
+ add rdx, byte 2*SIZEOF_XMMWORD ; outptr0
457
+ add rdi, byte 2*SIZEOF_XMMWORD ; outptr1
458
+ cmp rax, byte SIZEOF_XMMWORD
459
+ ja near .columnloop
460
+ test rax, rax
461
+ jnz near .columnloop_last
462
+
463
+ pop rsi
464
+ pop rdi
465
+ pop rcx
466
+ pop rax
467
+
468
+ add rsi, byte 1*SIZEOF_JSAMPROW ; input_data
469
+ add rdi, byte 2*SIZEOF_JSAMPROW ; output_data
470
+ sub rcx, byte 2 ; rowctr
471
+ jg near .rowloop
472
+
473
+ .return:
474
+ pop rbx
475
+ uncollect_args 4
476
+ mov rsp, rbp ; rsp <- aligned rbp
477
+ pop rsp ; rsp <- original rbp
478
+ pop rbp
479
+ ret
480
+
481
+ ; --------------------------------------------------------------------------
482
+ ;
483
+ ; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
484
+ ; It's still a box filter.
485
+ ;
486
+ ; GLOBAL(void)
487
+ ; jsimd_h2v1_upsample_sse2(int max_v_samp_factor, JDIMENSION output_width,
488
+ ; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr);
489
+ ;
490
+
491
+ ; r10 = int max_v_samp_factor
492
+ ; r11d = JDIMENSION output_width
493
+ ; r12 = JSAMPARRAY input_data
494
+ ; r13 = JSAMPARRAY *output_data_ptr
495
+
496
+ align 32
497
+ GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
498
+
499
+ EXTN(jsimd_h2v1_upsample_sse2):
500
+ push rbp
501
+ mov rax, rsp
502
+ mov rbp, rsp
503
+ collect_args 4
504
+
505
+ mov edx, r11d
506
+ add rdx, byte (2*SIZEOF_XMMWORD)-1
507
+ and rdx, byte -(2*SIZEOF_XMMWORD)
508
+ jz near .return
509
+
510
+ mov rcx, r10 ; rowctr
511
+ test rcx, rcx
512
+ jz short .return
513
+
514
+ mov rsi, r12 ; input_data
515
+ mov rdi, r13
516
+ mov rdip, JSAMPARRAY [rdi] ; output_data
517
+ .rowloop:
518
+ push rdi
519
+ push rsi
520
+
521
+ mov rsip, JSAMPROW [rsi] ; inptr
522
+ mov rdip, JSAMPROW [rdi] ; outptr
523
+ mov rax, rdx ; colctr
524
+ .columnloop:
525
+
526
+ movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
527
+
528
+ movdqa xmm1, xmm0
529
+ punpcklbw xmm0, xmm0
530
+ punpckhbw xmm1, xmm1
531
+
532
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
533
+ movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
534
+
535
+ sub rax, byte 2*SIZEOF_XMMWORD
536
+ jz short .nextrow
537
+
538
+ movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
539
+
540
+ movdqa xmm3, xmm2
541
+ punpcklbw xmm2, xmm2
542
+ punpckhbw xmm3, xmm3
543
+
544
+ movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
545
+ movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
546
+
547
+ sub rax, byte 2*SIZEOF_XMMWORD
548
+ jz short .nextrow
549
+
550
+ add rsi, byte 2*SIZEOF_XMMWORD ; inptr
551
+ add rdi, byte 4*SIZEOF_XMMWORD ; outptr
552
+ jmp short .columnloop
553
+
554
+ .nextrow:
555
+ pop rsi
556
+ pop rdi
557
+
558
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
559
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
560
+ dec rcx ; rowctr
561
+ jg short .rowloop
562
+
563
+ .return:
564
+ uncollect_args 4
565
+ pop rbp
566
+ ret
567
+
568
+ ; --------------------------------------------------------------------------
569
+ ;
570
+ ; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
571
+ ; It's still a box filter.
572
+ ;
573
+ ; GLOBAL(void)
574
+ ; jsimd_h2v2_upsample_sse2(int max_v_samp_factor, JDIMENSION output_width,
575
+ ; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr);
576
+ ;
577
+
578
+ ; r10 = int max_v_samp_factor
579
+ ; r11d = JDIMENSION output_width
580
+ ; r12 = JSAMPARRAY input_data
581
+ ; r13 = JSAMPARRAY *output_data_ptr
582
+
583
+ align 32
584
+ GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
585
+
586
+ EXTN(jsimd_h2v2_upsample_sse2):
587
+ push rbp
588
+ mov rax, rsp
589
+ mov rbp, rsp
590
+ collect_args 4
591
+ push rbx
592
+
593
+ mov edx, r11d
594
+ add rdx, byte (2*SIZEOF_XMMWORD)-1
595
+ and rdx, byte -(2*SIZEOF_XMMWORD)
596
+ jz near .return
597
+
598
+ mov rcx, r10 ; rowctr
599
+ test rcx, rcx
600
+ jz near .return
601
+
602
+ mov rsi, r12 ; input_data
603
+ mov rdi, r13
604
+ mov rdip, JSAMPARRAY [rdi] ; output_data
605
+ .rowloop:
606
+ push rdi
607
+ push rsi
608
+
609
+ mov rsip, JSAMPROW [rsi] ; inptr
610
+ mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
611
+ mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
612
+ mov rax, rdx ; colctr
613
+ .columnloop:
614
+
615
+ movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
616
+
617
+ movdqa xmm1, xmm0
618
+ punpcklbw xmm0, xmm0
619
+ punpckhbw xmm1, xmm1
620
+
621
+ movdqa XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0
622
+ movdqa XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1
623
+ movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
624
+ movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
625
+
626
+ sub rax, byte 2*SIZEOF_XMMWORD
627
+ jz short .nextrow
628
+
629
+ movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
630
+
631
+ movdqa xmm3, xmm2
632
+ punpcklbw xmm2, xmm2
633
+ punpckhbw xmm3, xmm3
634
+
635
+ movdqa XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2
636
+ movdqa XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3
637
+ movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
638
+ movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
639
+
640
+ sub rax, byte 2*SIZEOF_XMMWORD
641
+ jz short .nextrow
642
+
643
+ add rsi, byte 2*SIZEOF_XMMWORD ; inptr
644
+ add rbx, byte 4*SIZEOF_XMMWORD ; outptr0
645
+ add rdi, byte 4*SIZEOF_XMMWORD ; outptr1
646
+ jmp short .columnloop
647
+
648
+ .nextrow:
649
+ pop rsi
650
+ pop rdi
651
+
652
+ add rsi, byte 1*SIZEOF_JSAMPROW ; input_data
653
+ add rdi, byte 2*SIZEOF_JSAMPROW ; output_data
654
+ sub rcx, byte 2 ; rowctr
655
+ jg near .rowloop
656
+
657
+ .return:
658
+ pop rbx
659
+ uncollect_args 4
660
+ pop rbp
661
+ ret
662
+
663
+ ; For some reason, the OS X linker does not honor the request to align the
664
+ ; segment unless we do this.
665
+ align 32