image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,155 @@
1
+ ;
2
+ ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jsimdext.inc"
19
+ %include "jdct.inc"
20
+
21
+ ; --------------------------------------------------------------------------
22
+ SECTION SEG_TEXT
23
+ BITS 64
24
+ ;
25
+ ; Load data into workspace, applying unsigned->signed conversion
26
+ ;
27
+ ; GLOBAL(void)
28
+ ; jsimd_convsamp_float_sse2(JSAMPARRAY sample_data, JDIMENSION start_col,
29
+ ; FAST_FLOAT *workspace);
30
+ ;
31
+
32
+ ; r10 = JSAMPARRAY sample_data
33
+ ; r11d = JDIMENSION start_col
34
+ ; r12 = FAST_FLOAT *workspace
35
+
36
+ align 32
37
+ GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
38
+
39
+ EXTN(jsimd_convsamp_float_sse2):
40
+ push rbp
41
+ mov rax, rsp
42
+ mov rbp, rsp
43
+ collect_args 3
44
+ push rbx
45
+
46
+ pcmpeqw xmm7, xmm7
47
+ psllw xmm7, 7
48
+ packsswb xmm7, xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
49
+
50
+ mov rsi, r10
51
+ mov eax, r11d
52
+ mov rdi, r12
53
+ mov rcx, DCTSIZE/2
54
+ .convloop:
55
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
56
+ mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
57
+
58
+ movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
59
+ movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
60
+
61
+ psubb xmm0, xmm7 ; xmm0=(01234567)
62
+ psubb xmm1, xmm7 ; xmm1=(89ABCDEF)
63
+
64
+ punpcklbw xmm0, xmm0 ; xmm0=(*0*1*2*3*4*5*6*7)
65
+ punpcklbw xmm1, xmm1 ; xmm1=(*8*9*A*B*C*D*E*F)
66
+
67
+ punpcklwd xmm2, xmm0 ; xmm2=(***0***1***2***3)
68
+ punpckhwd xmm0, xmm0 ; xmm0=(***4***5***6***7)
69
+ punpcklwd xmm3, xmm1 ; xmm3=(***8***9***A***B)
70
+ punpckhwd xmm1, xmm1 ; xmm1=(***C***D***E***F)
71
+
72
+ psrad xmm2, (DWORD_BIT-BYTE_BIT) ; xmm2=(0123)
73
+ psrad xmm0, (DWORD_BIT-BYTE_BIT) ; xmm0=(4567)
74
+ cvtdq2ps xmm2, xmm2 ; xmm2=(0123)
75
+ cvtdq2ps xmm0, xmm0 ; xmm0=(4567)
76
+ psrad xmm3, (DWORD_BIT-BYTE_BIT) ; xmm3=(89AB)
77
+ psrad xmm1, (DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF)
78
+ cvtdq2ps xmm3, xmm3 ; xmm3=(89AB)
79
+ cvtdq2ps xmm1, xmm1 ; xmm1=(CDEF)
80
+
81
+ movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
82
+ movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
83
+ movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
84
+ movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
85
+
86
+ add rsi, byte 2*SIZEOF_JSAMPROW
87
+ add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
88
+ dec rcx
89
+ jnz short .convloop
90
+
91
+ pop rbx
92
+ uncollect_args 3
93
+ pop rbp
94
+ ret
95
+
96
+ ; --------------------------------------------------------------------------
97
+ ;
98
+ ; Quantize/descale the coefficients, and store into coef_block
99
+ ;
100
+ ; GLOBAL(void)
101
+ ; jsimd_quantize_float_sse2(JCOEFPTR coef_block, FAST_FLOAT *divisors,
102
+ ; FAST_FLOAT *workspace);
103
+ ;
104
+
105
+ ; r10 = JCOEFPTR coef_block
106
+ ; r11 = FAST_FLOAT *divisors
107
+ ; r12 = FAST_FLOAT *workspace
108
+
109
+ align 32
110
+ GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
111
+
112
+ EXTN(jsimd_quantize_float_sse2):
113
+ push rbp
114
+ mov rax, rsp
115
+ mov rbp, rsp
116
+ collect_args 3
117
+
118
+ mov rsi, r12
119
+ mov rdx, r11
120
+ mov rdi, r10
121
+ mov rax, DCTSIZE2/16
122
+ .quantloop:
123
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
124
+ movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)]
125
+ mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
126
+ mulps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
127
+ movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
128
+ movaps xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)]
129
+ mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
130
+ mulps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
131
+
132
+ cvtps2dq xmm0, xmm0
133
+ cvtps2dq xmm1, xmm1
134
+ cvtps2dq xmm2, xmm2
135
+ cvtps2dq xmm3, xmm3
136
+
137
+ packssdw xmm0, xmm1
138
+ packssdw xmm2, xmm3
139
+
140
+ movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0
141
+ movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2
142
+
143
+ add rsi, byte 16*SIZEOF_FAST_FLOAT
144
+ add rdx, byte 16*SIZEOF_FAST_FLOAT
145
+ add rdi, byte 16*SIZEOF_JCOEF
146
+ dec rax
147
+ jnz short .quantloop
148
+
149
+ uncollect_args 3
150
+ pop rbp
151
+ ret
152
+
153
+ ; For some reason, the OS X linker does not honor the request to align the
154
+ ; segment unless we do this.
155
+ align 32
@@ -0,0 +1,163 @@
1
+ ;
2
+ ; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, 2018, D. R. Commander.
6
+ ; Copyright (C) 2016, Matthieu Darbois.
7
+ ; Copyright (C) 2018, Matthias Räncker.
8
+ ;
9
+ ; Based on the x86 SIMD extension for IJG JPEG library
10
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
11
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
12
+ ;
13
+ ; This file should be assembled with NASM (Netwide Assembler),
14
+ ; can *not* be assembled with Microsoft's MASM or any compatible
15
+ ; assembler (including Borland's Turbo Assembler).
16
+ ; NASM is available from http://nasm.sourceforge.net/ or
17
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
18
+
19
+ %include "jsimdext.inc"
20
+ %include "jdct.inc"
21
+
22
+ ; --------------------------------------------------------------------------
23
+ SECTION SEG_TEXT
24
+ BITS 64
25
+ ;
26
+ ; Load data into workspace, applying unsigned->signed conversion
27
+ ;
28
+ ; GLOBAL(void)
29
+ ; jsimd_convsamp_avx2(JSAMPARRAY sample_data, JDIMENSION start_col,
30
+ ; DCTELEM *workspace);
31
+ ;
32
+
33
+ ; r10 = JSAMPARRAY sample_data
34
+ ; r11d = JDIMENSION start_col
35
+ ; r12 = DCTELEM *workspace
36
+
37
+ align 32
38
+ GLOBAL_FUNCTION(jsimd_convsamp_avx2)
39
+
40
+ EXTN(jsimd_convsamp_avx2):
41
+ push rbp
42
+ mov rax, rsp
43
+ mov rbp, rsp
44
+ collect_args 3
45
+
46
+ mov eax, r11d
47
+
48
+ mov rsip, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
49
+ mov rdip, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
50
+ movq xmm0, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
51
+ pinsrq xmm0, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
52
+
53
+ mov rsip, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
54
+ mov rdip, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
55
+ movq xmm1, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
56
+ pinsrq xmm1, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
57
+
58
+ mov rsip, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
59
+ mov rdip, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
60
+ movq xmm2, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
61
+ pinsrq xmm2, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
62
+
63
+ mov rsip, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
64
+ mov rdip, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
65
+ movq xmm3, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
66
+ pinsrq xmm3, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
67
+
68
+ vpmovzxbw ymm0, xmm0 ; ymm0=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
69
+ vpmovzxbw ymm1, xmm1 ; ymm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
70
+ vpmovzxbw ymm2, xmm2 ; ymm2=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
71
+ vpmovzxbw ymm3, xmm3 ; ymm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
72
+
73
+ vpcmpeqw ymm7, ymm7, ymm7
74
+ vpsllw ymm7, ymm7, 7 ; ymm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
75
+
76
+ vpaddw ymm0, ymm0, ymm7
77
+ vpaddw ymm1, ymm1, ymm7
78
+ vpaddw ymm2, ymm2, ymm7
79
+ vpaddw ymm3, ymm3, ymm7
80
+
81
+ vmovdqu YMMWORD [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)], ymm0
82
+ vmovdqu YMMWORD [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)], ymm1
83
+ vmovdqu YMMWORD [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)], ymm2
84
+ vmovdqu YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
85
+
86
+ vzeroupper
87
+ uncollect_args 3
88
+ pop rbp
89
+ ret
90
+
91
+ ; --------------------------------------------------------------------------
92
+ ;
93
+ ; Quantize/descale the coefficients, and store into coef_block
94
+ ;
95
+ ; This implementation is based on an algorithm described in
96
+ ; "How to optimize for the Pentium family of microprocessors"
97
+ ; (http://www.agner.org/assem/).
98
+ ;
99
+ ; GLOBAL(void)
100
+ ; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors,
101
+ ; DCTELEM *workspace);
102
+ ;
103
+
104
+ %define RECIPROCAL(m, n, b) \
105
+ YMMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM)
106
+ %define CORRECTION(m, n, b) \
107
+ YMMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM)
108
+ %define SCALE(m, n, b) \
109
+ YMMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM)
110
+
111
+ ; r10 = JCOEFPTR coef_block
112
+ ; r11 = DCTELEM *divisors
113
+ ; r12 = DCTELEM *workspace
114
+
115
+ align 32
116
+ GLOBAL_FUNCTION(jsimd_quantize_avx2)
117
+
118
+ EXTN(jsimd_quantize_avx2):
119
+ push rbp
120
+ mov rax, rsp
121
+ mov rbp, rsp
122
+ collect_args 3
123
+
124
+ vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
125
+ vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
126
+ vmovdqu ymm6, [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
127
+ vmovdqu ymm7, [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
128
+ vpabsw ymm0, ymm4
129
+ vpabsw ymm1, ymm5
130
+ vpabsw ymm2, ymm6
131
+ vpabsw ymm3, ymm7
132
+
133
+ vpaddw ymm0, YMMWORD [CORRECTION(0,0,r11)] ; correction + roundfactor
134
+ vpaddw ymm1, YMMWORD [CORRECTION(2,0,r11)]
135
+ vpaddw ymm2, YMMWORD [CORRECTION(4,0,r11)]
136
+ vpaddw ymm3, YMMWORD [CORRECTION(6,0,r11)]
137
+ vpmulhuw ymm0, YMMWORD [RECIPROCAL(0,0,r11)] ; reciprocal
138
+ vpmulhuw ymm1, YMMWORD [RECIPROCAL(2,0,r11)]
139
+ vpmulhuw ymm2, YMMWORD [RECIPROCAL(4,0,r11)]
140
+ vpmulhuw ymm3, YMMWORD [RECIPROCAL(6,0,r11)]
141
+ vpmulhuw ymm0, YMMWORD [SCALE(0,0,r11)] ; scale
142
+ vpmulhuw ymm1, YMMWORD [SCALE(2,0,r11)]
143
+ vpmulhuw ymm2, YMMWORD [SCALE(4,0,r11)]
144
+ vpmulhuw ymm3, YMMWORD [SCALE(6,0,r11)]
145
+
146
+ vpsignw ymm0, ymm0, ymm4
147
+ vpsignw ymm1, ymm1, ymm5
148
+ vpsignw ymm2, ymm2, ymm6
149
+ vpsignw ymm3, ymm3, ymm7
150
+
151
+ vmovdqu [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
152
+ vmovdqu [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
153
+ vmovdqu [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
154
+ vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
155
+
156
+ vzeroupper
157
+ uncollect_args 3
158
+ pop rbp
159
+ ret
160
+
161
+ ; For some reason, the OS X linker does not honor the request to align the
162
+ ; segment unless we do this.
163
+ align 32
@@ -0,0 +1,188 @@
1
+ ;
2
+ ; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
3
+ ;
4
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
+ ; Copyright (C) 2009, 2016, D. R. Commander.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+
18
+ %include "jsimdext.inc"
19
+ %include "jdct.inc"
20
+
21
+ ; --------------------------------------------------------------------------
22
+ SECTION SEG_TEXT
23
+ BITS 64
24
+ ;
25
+ ; Load data into workspace, applying unsigned->signed conversion
26
+ ;
27
+ ; GLOBAL(void)
28
+ ; jsimd_convsamp_sse2(JSAMPARRAY sample_data, JDIMENSION start_col,
29
+ ; DCTELEM *workspace);
30
+ ;
31
+
32
+ ; r10 = JSAMPARRAY sample_data
33
+ ; r11d = JDIMENSION start_col
34
+ ; r12 = DCTELEM *workspace
35
+
36
+ align 32
37
+ GLOBAL_FUNCTION(jsimd_convsamp_sse2)
38
+
39
+ EXTN(jsimd_convsamp_sse2):
40
+ push rbp
41
+ mov rax, rsp
42
+ mov rbp, rsp
43
+ collect_args 3
44
+ push rbx
45
+
46
+ pxor xmm6, xmm6 ; xmm6=(all 0's)
47
+ pcmpeqw xmm7, xmm7
48
+ psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
49
+
50
+ mov rsi, r10
51
+ mov eax, r11d
52
+ mov rdi, r12
53
+ mov rcx, DCTSIZE/4
54
+ .convloop:
55
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
56
+ mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
57
+
58
+ movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
59
+ movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
60
+
61
+ mov rbxp, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
62
+ mov rdxp, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
63
+
64
+ movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN)
65
+ movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV)
66
+
67
+ punpcklbw xmm0, xmm6 ; xmm0=(01234567)
68
+ punpcklbw xmm1, xmm6 ; xmm1=(89ABCDEF)
69
+ paddw xmm0, xmm7
70
+ paddw xmm1, xmm7
71
+ punpcklbw xmm2, xmm6 ; xmm2=(GHIJKLMN)
72
+ punpcklbw xmm3, xmm6 ; xmm3=(OPQRSTUV)
73
+ paddw xmm2, xmm7
74
+ paddw xmm3, xmm7
75
+
76
+ movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
77
+ movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
78
+ movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
79
+ movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
80
+
81
+ add rsi, byte 4*SIZEOF_JSAMPROW
82
+ add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM
83
+ dec rcx
84
+ jnz short .convloop
85
+
86
+ pop rbx
87
+ uncollect_args 3
88
+ pop rbp
89
+ ret
90
+
91
+ ; --------------------------------------------------------------------------
92
+ ;
93
+ ; Quantize/descale the coefficients, and store into coef_block
94
+ ;
95
+ ; This implementation is based on an algorithm described in
96
+ ; "How to optimize for the Pentium family of microprocessors"
97
+ ; (http://www.agner.org/assem/).
98
+ ;
99
+ ; GLOBAL(void)
100
+ ; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors,
101
+ ; DCTELEM *workspace);
102
+ ;
103
+
104
+ %define RECIPROCAL(m, n, b) \
105
+ XMMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM)
106
+ %define CORRECTION(m, n, b) \
107
+ XMMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM)
108
+ %define SCALE(m, n, b) \
109
+ XMMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM)
110
+
111
+ ; r10 = JCOEFPTR coef_block
112
+ ; r11 = DCTELEM *divisors
113
+ ; r12 = DCTELEM *workspace
114
+
115
+ align 32
116
+ GLOBAL_FUNCTION(jsimd_quantize_sse2)
117
+
118
+ EXTN(jsimd_quantize_sse2):
119
+ push rbp
120
+ mov rax, rsp
121
+ mov rbp, rsp
122
+ collect_args 3
123
+
124
+ mov rsi, r12
125
+ mov rdx, r11
126
+ mov rdi, r10
127
+ mov rax, DCTSIZE2/32
128
+ .quantloop:
129
+ movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)]
130
+ movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)]
131
+ movdqa xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)]
132
+ movdqa xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)]
133
+ movdqa xmm0, xmm4
134
+ movdqa xmm1, xmm5
135
+ movdqa xmm2, xmm6
136
+ movdqa xmm3, xmm7
137
+ psraw xmm4, (WORD_BIT-1)
138
+ psraw xmm5, (WORD_BIT-1)
139
+ psraw xmm6, (WORD_BIT-1)
140
+ psraw xmm7, (WORD_BIT-1)
141
+ pxor xmm0, xmm4
142
+ pxor xmm1, xmm5
143
+ pxor xmm2, xmm6
144
+ pxor xmm3, xmm7
145
+ psubw xmm0, xmm4 ; if (xmm0 < 0) xmm0 = -xmm0;
146
+ psubw xmm1, xmm5 ; if (xmm1 < 0) xmm1 = -xmm1;
147
+ psubw xmm2, xmm6 ; if (xmm2 < 0) xmm2 = -xmm2;
148
+ psubw xmm3, xmm7 ; if (xmm3 < 0) xmm3 = -xmm3;
149
+
150
+ paddw xmm0, XMMWORD [CORRECTION(0,0,rdx)] ; correction + roundfactor
151
+ paddw xmm1, XMMWORD [CORRECTION(1,0,rdx)]
152
+ paddw xmm2, XMMWORD [CORRECTION(2,0,rdx)]
153
+ paddw xmm3, XMMWORD [CORRECTION(3,0,rdx)]
154
+ pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)] ; reciprocal
155
+ pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)]
156
+ pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)]
157
+ pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)]
158
+ pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)] ; scale
159
+ pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)]
160
+ pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)]
161
+ pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)]
162
+
163
+ pxor xmm0, xmm4
164
+ pxor xmm1, xmm5
165
+ pxor xmm2, xmm6
166
+ pxor xmm3, xmm7
167
+ psubw xmm0, xmm4
168
+ psubw xmm1, xmm5
169
+ psubw xmm2, xmm6
170
+ psubw xmm3, xmm7
171
+ movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
172
+ movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
173
+ movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
174
+ movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
175
+
176
+ add rsi, byte 32*SIZEOF_DCTELEM
177
+ add rdx, byte 32*SIZEOF_DCTELEM
178
+ add rdi, byte 32*SIZEOF_JCOEF
179
+ dec rax
180
+ jnz near .quantloop
181
+
182
+ uncollect_args 3
183
+ pop rbp
184
+ ret
185
+
186
+ ; For some reason, the OS X linker does not honor the request to align the
187
+ ; segment unless we do this.
188
+ align 32