image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,583 @@
1
+ ;
2
+ ; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
3
+ ;
4
+ ; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, D. R. Commander.
5
+ ; Copyright (C) 2015, Matthieu Darbois.
6
+ ; Copyright (C) 2018, Matthias Räncker.
7
+ ;
8
+ ; Based on the x86 SIMD extension for IJG JPEG library
9
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
11
+ ;
12
+ ; This file should be assembled with NASM (Netwide Assembler),
13
+ ; can *not* be assembled with Microsoft's MASM or any compatible
14
+ ; assembler (including Borland's Turbo Assembler).
15
+ ; NASM is available from http://nasm.sourceforge.net/ or
16
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
17
+ ;
18
+ ; This file contains an SSE2 implementation for Huffman coding of one block.
19
+ ; The following code is based on jchuff.c; see jchuff.c for more details.
20
+
21
+ %include "jsimdext.inc"
22
+
23
+ struc working_state
24
+ .next_output_byte: resp 1 ; => next byte to write in buffer
25
+ .free_in_buffer: resp 1 ; # of byte spaces remaining in buffer
26
+ .cur.put_buffer.simd resq 1 ; current bit accumulation buffer
27
+ .cur.free_bits resd 1 ; # of bits available in it
28
+ .cur.last_dc_val resd 4 ; last DC coef for each component
29
+ .cinfo: resp 1 ; dump_buffer needs access to this
30
+ endstruc
31
+
32
+ struc c_derived_tbl
33
+ .ehufco: resd 256 ; code for each symbol
34
+ .ehufsi: resb 256 ; length of code for each symbol
35
+ ; If no code has been allocated for a symbol S, ehufsi[S] contains 0
36
+ endstruc
37
+
38
+ ; --------------------------------------------------------------------------
39
+ SECTION SEG_CONST
40
+
41
+ alignz 32
42
+ GLOBAL_DATA(jconst_huff_encode_one_block)
43
+
44
+ EXTN(jconst_huff_encode_one_block):
45
+
46
+ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
47
+ dd 0x000f, 0x001f, 0x003f, 0x007f
48
+ dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
49
+ dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
50
+
51
+ alignz 32
52
+
53
+ times 1 << 14 db 15
54
+ times 1 << 13 db 14
55
+ times 1 << 12 db 13
56
+ times 1 << 11 db 12
57
+ times 1 << 10 db 11
58
+ times 1 << 9 db 10
59
+ times 1 << 8 db 9
60
+ times 1 << 7 db 8
61
+ times 1 << 6 db 7
62
+ times 1 << 5 db 6
63
+ times 1 << 4 db 5
64
+ times 1 << 3 db 4
65
+ times 1 << 2 db 3
66
+ times 1 << 1 db 2
67
+ times 1 << 0 db 1
68
+ times 1 db 0
69
+ jpeg_nbits_table:
70
+ times 1 db 0
71
+ times 1 << 0 db 1
72
+ times 1 << 1 db 2
73
+ times 1 << 2 db 3
74
+ times 1 << 3 db 4
75
+ times 1 << 4 db 5
76
+ times 1 << 5 db 6
77
+ times 1 << 6 db 7
78
+ times 1 << 7 db 8
79
+ times 1 << 8 db 9
80
+ times 1 << 9 db 10
81
+ times 1 << 10 db 11
82
+ times 1 << 11 db 12
83
+ times 1 << 12 db 13
84
+ times 1 << 13 db 14
85
+ times 1 << 14 db 15
86
+ times 1 << 15 db 16
87
+
88
+ alignz 32
89
+
90
+ %define NBITS(x) nbits_base + x
91
+ %define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - jpeg_nbits_table)
92
+
93
+ ; --------------------------------------------------------------------------
94
+ SECTION SEG_TEXT
95
+ BITS 64
96
+
97
+ ; Shorthand used to describe SIMD operations:
98
+ ; wN: xmmN treated as eight signed 16-bit values
99
+ ; wN[i]: perform the same operation on all eight signed 16-bit values, i=0..7
100
+ ; bN: xmmN treated as 16 unsigned 8-bit values
101
+ ; bN[i]: perform the same operation on all 16 unsigned 8-bit values, i=0..15
102
+ ; Contents of SIMD registers are shown in memory order.
103
+
104
+ ; Fill the bit buffer to capacity with the leading bits from code, then output
105
+ ; the bit buffer and put the remaining bits from code into the bit buffer.
106
+ ;
107
+ ; Usage:
108
+ ; code - contains the bits to shift into the bit buffer (LSB-aligned)
109
+ ; %1 - the label to which to jump when the macro completes
110
+ ; %2 (optional) - extra instructions to execute after nbits has been set
111
+ ;
112
+ ; Upon completion, free_bits will be set to the number of remaining bits from
113
+ ; code, and put_buffer will contain those remaining bits. temp and code will
114
+ ; be clobbered.
115
+ ;
116
+ ; This macro encodes any 0xFF bytes as 0xFF 0x00, as does the EMIT_BYTE()
117
+ ; macro in jchuff.c.
118
+
119
+ %macro EMIT_QWORD 1-2
120
+ add nbitsb, free_bitsb ; nbits += free_bits;
121
+ neg free_bitsb ; free_bits = -free_bits;
122
+ mov tempd, code ; temp = code;
123
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
124
+ mov nbitsb, free_bitsb ; nbits = free_bits;
125
+ neg free_bitsb ; free_bits = -free_bits;
126
+ shr tempd, nbitsb ; temp >>= nbits;
127
+ or tempq, put_buffer ; temp |= put_buffer;
128
+ movq xmm0, tempq ; xmm0.u64 = { temp, 0 };
129
+ bswap tempq ; temp = htonl(temp);
130
+ mov put_buffer, codeq ; put_buffer = code;
131
+ pcmpeqb xmm0, xmm1 ; b0[i] = (b0[i] == 0xFF ? 0xFF : 0);
132
+ %2
133
+ pmovmskb code, xmm0 ; code = 0; code |= ((b0[i] >> 7) << i);
134
+ mov qword [buffer], tempq ; memcpy(buffer, &temp, 8);
135
+ ; (speculative; will be overwritten if
136
+ ; code contains any 0xFF bytes)
137
+ add free_bitsb, 64 ; free_bits += 64;
138
+ add bufferp, 8 ; buffer += 8;
139
+ test code, code ; if (code == 0) /* No 0xFF bytes */
140
+ jz %1 ; return;
141
+ ; Execute the equivalent of the EMIT_BYTE() macro in jchuff.c for all 8
142
+ ; bytes in the qword.
143
+ cmp tempb, 0xFF ; Set CF if temp[0] < 0xFF
144
+ mov byte [buffer-7], 0 ; buffer[-7] = 0;
145
+ sbb bufferp, 6 ; buffer -= (6 + (temp[0] < 0xFF ? 1 : 0));
146
+ mov byte [buffer], temph ; buffer[0] = temp[1];
147
+ cmp temph, 0xFF ; Set CF if temp[1] < 0xFF
148
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
149
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[1] < 0xFF ? 1 : 0));
150
+ shr tempq, 16 ; temp >>= 16;
151
+ mov byte [buffer], tempb ; buffer[0] = temp[0];
152
+ cmp tempb, 0xFF ; Set CF if temp[0] < 0xFF
153
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
154
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[0] < 0xFF ? 1 : 0));
155
+ mov byte [buffer], temph ; buffer[0] = temp[1];
156
+ cmp temph, 0xFF ; Set CF if temp[1] < 0xFF
157
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
158
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[1] < 0xFF ? 1 : 0));
159
+ shr tempq, 16 ; temp >>= 16;
160
+ mov byte [buffer], tempb ; buffer[0] = temp[0];
161
+ cmp tempb, 0xFF ; Set CF if temp[0] < 0xFF
162
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
163
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[0] < 0xFF ? 1 : 0));
164
+ mov byte [buffer], temph ; buffer[0] = temp[1];
165
+ cmp temph, 0xFF ; Set CF if temp[1] < 0xFF
166
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
167
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[1] < 0xFF ? 1 : 0));
168
+ shr tempd, 16 ; temp >>= 16;
169
+ mov byte [buffer], tempb ; buffer[0] = temp[0];
170
+ cmp tempb, 0xFF ; Set CF if temp[0] < 0xFF
171
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
172
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[0] < 0xFF ? 1 : 0));
173
+ mov byte [buffer], temph ; buffer[0] = temp[1];
174
+ cmp temph, 0xFF ; Set CF if temp[1] < 0xFF
175
+ mov byte [buffer+1], 0 ; buffer[1] = 0;
176
+ sbb bufferp, -2 ; buffer -= (-2 + (temp[1] < 0xFF ? 1 : 0));
177
+ jmp %1 ; return;
178
+ %endmacro
179
+
180
+ ;
181
+ ; Encode a single block's worth of coefficients.
182
+ ;
183
+ ; GLOBAL(JOCTET *)
184
+ ; jsimd_huff_encode_one_block_sse2(working_state *state, JOCTET *buffer,
185
+ ; JCOEFPTR block, int last_dc_val,
186
+ ; c_derived_tbl *dctbl, c_derived_tbl *actbl)
187
+ ;
188
+ ; NOTES:
189
+ ; When shuffling data, we try to avoid pinsrw as much as possible, since it is
190
+ ; slow on many CPUs. Its reciprocal throughput (issue latency) is 1 even on
191
+ ; modern CPUs, so chains of pinsrw instructions (even with different outputs)
192
+ ; can limit performance. pinsrw is a VectorPath instruction on AMD K8 and
193
+ ; requires 2 µops (with memory operand) on Intel. In either case, only one
194
+ ; pinsrw instruction can be decoded per cycle (and nothing else if they are
195
+ ; back-to-back), so out-of-order execution cannot be used to work around long
196
+ ; pinsrw chains (though for Sandy Bridge and later, this may be less of a
197
+ ; problem if the code runs from the µop cache.)
198
+ ;
199
+ ; We use tzcnt instead of bsf without checking for support. The instruction is
200
+ ; executed as bsf on CPUs that don't support tzcnt (encoding is equivalent to
201
+ ; rep bsf.) The destination (first) operand of bsf (and tzcnt on some CPUs) is
202
+ ; an input dependency (although the behavior is not formally defined, Intel
203
+ ; CPUs usually leave the destination unmodified if the source is zero.) This
204
+ ; can prevent out-of-order execution, so we clear the destination before
205
+ ; invoking tzcnt.
206
+ ;
207
+ ; Initial register allocation
208
+ ; rax - buffer
209
+ ; rbx - temp
210
+ ; rcx - nbits
211
+ ; rdx - block --> free_bits
212
+ ; rsi - nbits_base
213
+ ; rdi - t
214
+ ; rbp - code
215
+ ; r8 - dctbl --> code_temp
216
+ ; r9 - actbl
217
+ ; r10 - state
218
+ ; r11 - index
219
+ ; r12 - put_buffer
220
+
221
+ %define buffer rax
222
+ %ifdef WIN64
223
+ %define bufferp rax
224
+ %else
225
+ %define bufferp raxp
226
+ %endif
227
+ %define tempq rbx
228
+ %define tempd ebx
229
+ %define tempb bl
230
+ %define temph bh
231
+ %define nbitsq rcx
232
+ %define nbits ecx
233
+ %define nbitsb cl
234
+ %define block rdx
235
+ %define nbits_base rsi
236
+ %define t rdi
237
+ %define td edi
238
+ %define codeq rbp
239
+ %define code ebp
240
+ %define dctbl r8
241
+ %define actbl r9
242
+ %define state r10
243
+ %define index r11
244
+ %define indexd r11d
245
+ %define put_buffer r12
246
+ %define put_bufferd r12d
247
+
248
+ ; Step 1: Re-arrange input data according to jpeg_natural_order
249
+ ; xx 01 02 03 04 05 06 07 xx 01 08 16 09 02 03 10
250
+ ; 08 09 10 11 12 13 14 15 17 24 32 25 18 11 04 05
251
+ ; 16 17 18 19 20 21 22 23 12 19 26 33 40 48 41 34
252
+ ; 24 25 26 27 28 29 30 31 ==> 27 20 13 06 07 14 21 28
253
+ ; 32 33 34 35 36 37 38 39 35 42 49 56 57 50 43 36
254
+ ; 40 41 42 43 44 45 46 47 29 22 15 23 30 37 44 51
255
+ ; 48 49 50 51 52 53 54 55 58 59 52 45 38 31 39 46
256
+ ; 56 57 58 59 60 61 62 63 53 60 61 54 47 55 62 63
257
+
258
+ align 32
259
+ GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
260
+
261
+ EXTN(jsimd_huff_encode_one_block_sse2):
262
+
263
+ %ifdef WIN64
264
+
265
+ ; rcx = working_state *state
266
+ ; rdx = JOCTET *buffer
267
+ ; r8 = JCOEFPTR block
268
+ ; r9 = int last_dc_val
269
+ ; [rax+48] = c_derived_tbl *dctbl
270
+ ; [rax+56] = c_derived_tbl *actbl
271
+
272
+ ;X: X = code stream
273
+ mov buffer, rdx
274
+ mov block, r8
275
+ movups xmm3, XMMWORD [block + 0 * SIZEOF_WORD] ;D: w3 = xx 01 02 03 04 05 06 07
276
+ push rbx
277
+ push rbp
278
+ movdqa xmm0, xmm3 ;A: w0 = xx 01 02 03 04 05 06 07
279
+ push rsi
280
+ push rdi
281
+ push r12
282
+ movups xmm1, XMMWORD [block + 8 * SIZEOF_WORD] ;B: w1 = 08 09 10 11 12 13 14 15
283
+ mov state, rcx
284
+ movsx code, word [block] ;Z: code = block[0];
285
+ pxor xmm4, xmm4 ;A: w4[i] = 0;
286
+ sub code, r9d ;Z: code -= last_dc_val;
287
+ mov dctbl, POINTER [rsp+6*8+4*8]
288
+ mov actbl, POINTER [rsp+6*8+5*8]
289
+ punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
290
+ lea nbits_base, [rel jpeg_nbits_table]
291
+ add rsp, -DCTSIZE2 * SIZEOF_WORD
292
+ mov t, rsp
293
+
294
+ %else
295
+
296
+ ; rdi = working_state *state
297
+ ; rsi = JOCTET *buffer
298
+ ; rdx = JCOEFPTR block
299
+ ; rcx = int last_dc_val
300
+ ; r8 = c_derived_tbl *dctbl
301
+ ; r9 = c_derived_tbl *actbl
302
+
303
+ ;X: X = code stream
304
+ movups xmm3, XMMWORD [block + 0 * SIZEOF_WORD] ;D: w3 = xx 01 02 03 04 05 06 07
305
+ push rbx
306
+ push rbp
307
+ movdqa xmm0, xmm3 ;A: w0 = xx 01 02 03 04 05 06 07
308
+ push r12
309
+ mov state, rdi
310
+ mov buffer, rsi
311
+ movups xmm1, XMMWORD [block + 8 * SIZEOF_WORD] ;B: w1 = 08 09 10 11 12 13 14 15
312
+ movsx codeq, word [block] ;Z: code = block[0];
313
+ lea nbits_base, [rel jpeg_nbits_table]
314
+ pxor xmm4, xmm4 ;A: w4[i] = 0;
315
+ sub codeq, rcx ;Z: code -= last_dc_val;
316
+ punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
317
+ lea t, [rsp - DCTSIZE2 * SIZEOF_WORD] ; use red zone for t_
318
+
319
+ %endif
320
+
321
+ pshuflw xmm0, xmm0, 11001001b ;A: w0 = 01 08 xx 09 02 03 10 11
322
+ pinsrw xmm0, word [block + 16 * SIZEOF_WORD], 2 ;A: w0 = 01 08 16 09 02 03 10 11
323
+ punpckhdq xmm3, xmm1 ;D: w3 = 04 05 12 13 06 07 14 15
324
+ punpcklqdq xmm1, xmm3 ;B: w1 = 08 09 10 11 04 05 12 13
325
+ pinsrw xmm0, word [block + 17 * SIZEOF_WORD], 7 ;A: w0 = 01 08 16 09 02 03 10 17
326
+ ;A: (Row 0, offset 1)
327
+ pcmpgtw xmm4, xmm0 ;A: w4[i] = (w0[i] < 0 ? -1 : 0);
328
+ paddw xmm0, xmm4 ;A: w0[i] += w4[i];
329
+ movaps XMMWORD [t + 0 * SIZEOF_WORD], xmm0 ;A: t[i] = w0[i];
330
+
331
+ movq xmm2, qword [block + 24 * SIZEOF_WORD] ;B: w2 = 24 25 26 27 -- -- -- --
332
+ pshuflw xmm2, xmm2, 11011000b ;B: w2 = 24 26 25 27 -- -- -- --
333
+ pslldq xmm1, 1 * SIZEOF_WORD ;B: w1 = -- 08 09 10 11 04 05 12
334
+ movups xmm5, XMMWORD [block + 48 * SIZEOF_WORD] ;H: w5 = 48 49 50 51 52 53 54 55
335
+ movsd xmm1, xmm2 ;B: w1 = 24 26 25 27 11 04 05 12
336
+ punpcklqdq xmm2, xmm5 ;C: w2 = 24 26 25 27 48 49 50 51
337
+ pinsrw xmm1, word [block + 32 * SIZEOF_WORD], 1 ;B: w1 = 24 32 25 27 11 04 05 12
338
+ pxor xmm4, xmm4 ;A: w4[i] = 0;
339
+ psrldq xmm3, 2 * SIZEOF_WORD ;D: w3 = 12 13 06 07 14 15 -- --
340
+ pcmpeqw xmm0, xmm4 ;A: w0[i] = (w0[i] == 0 ? -1 : 0);
341
+ pinsrw xmm1, word [block + 18 * SIZEOF_WORD], 3 ;B: w1 = 24 32 25 18 11 04 05 12
342
+ ; (Row 1, offset 1)
343
+ pcmpgtw xmm4, xmm1 ;B: w4[i] = (w1[i] < 0 ? -1 : 0);
344
+ paddw xmm1, xmm4 ;B: w1[i] += w4[i];
345
+ movaps XMMWORD [t + 8 * SIZEOF_WORD], xmm1 ;B: t[i+8] = w1[i];
346
+ pxor xmm4, xmm4 ;B: w4[i] = 0;
347
+ pcmpeqw xmm1, xmm4 ;B: w1[i] = (w1[i] == 0 ? -1 : 0);
348
+
349
+ packsswb xmm0, xmm1 ;AB: b0[i] = w0[i], b0[i+8] = w1[i]
350
+ ; w/ signed saturation
351
+
352
+ pinsrw xmm3, word [block + 20 * SIZEOF_WORD], 0 ;D: w3 = 20 13 06 07 14 15 -- --
353
+ pinsrw xmm3, word [block + 21 * SIZEOF_WORD], 5 ;D: w3 = 20 13 06 07 14 21 -- --
354
+ pinsrw xmm3, word [block + 28 * SIZEOF_WORD], 6 ;D: w3 = 20 13 06 07 14 21 28 --
355
+ pinsrw xmm3, word [block + 35 * SIZEOF_WORD], 7 ;D: w3 = 20 13 06 07 14 21 28 35
356
+ ; (Row 3, offset 1)
357
+ pcmpgtw xmm4, xmm3 ;D: w4[i] = (w3[i] < 0 ? -1 : 0);
358
+ paddw xmm3, xmm4 ;D: w3[i] += w4[i];
359
+ movaps XMMWORD [t + 24 * SIZEOF_WORD], xmm3 ;D: t[i+24] = w3[i];
360
+ pxor xmm4, xmm4 ;D: w4[i] = 0;
361
+ pcmpeqw xmm3, xmm4 ;D: w3[i] = (w3[i] == 0 ? -1 : 0);
362
+
363
+ pinsrw xmm2, word [block + 19 * SIZEOF_WORD], 0 ;C: w2 = 19 26 25 27 48 49 50 51
364
+ cmp code, 1 << 31 ;Z: Set CF if code < 0x80000000,
365
+ ;Z: i.e. if code is positive
366
+ pinsrw xmm2, word [block + 33 * SIZEOF_WORD], 2 ;C: w2 = 19 26 33 27 48 49 50 51
367
+ pinsrw xmm2, word [block + 40 * SIZEOF_WORD], 3 ;C: w2 = 19 26 33 40 48 49 50 51
368
+ adc code, -1 ;Z: code += -1 + (code >= 0 ? 1 : 0);
369
+ pinsrw xmm2, word [block + 41 * SIZEOF_WORD], 5 ;C: w2 = 19 26 33 40 48 41 50 51
370
+ pinsrw xmm2, word [block + 34 * SIZEOF_WORD], 6 ;C: w2 = 19 26 33 40 48 41 34 51
371
+ movsxd codeq, code ;Z: sign extend code
372
+ pinsrw xmm2, word [block + 27 * SIZEOF_WORD], 7 ;C: w2 = 19 26 33 40 48 41 34 27
373
+ ; (Row 2, offset 1)
374
+ pcmpgtw xmm4, xmm2 ;C: w4[i] = (w2[i] < 0 ? -1 : 0);
375
+ paddw xmm2, xmm4 ;C: w2[i] += w4[i];
376
+ movaps XMMWORD [t + 16 * SIZEOF_WORD], xmm2 ;C: t[i+16] = w2[i];
377
+ pxor xmm4, xmm4 ;C: w4[i] = 0;
378
+ pcmpeqw xmm2, xmm4 ;C: w2[i] = (w2[i] == 0 ? -1 : 0);
379
+
380
+ packsswb xmm2, xmm3 ;CD: b2[i] = w2[i], b2[i+8] = w3[i]
381
+ ; w/ signed saturation
382
+
383
+ movzx nbitsq, byte [NBITS(codeq)] ;Z: nbits = JPEG_NBITS(code);
384
+ movdqa xmm3, xmm5 ;H: w3 = 48 49 50 51 52 53 54 55
385
+ pmovmskb tempd, xmm2 ;Z: temp = 0; temp |= ((b2[i] >> 7) << i);
386
+ pmovmskb put_bufferd, xmm0 ;Z: put_buffer = 0; put_buffer |= ((b0[i] >> 7) << i);
387
+ movups xmm0, XMMWORD [block + 56 * SIZEOF_WORD] ;H: w0 = 56 57 58 59 60 61 62 63
388
+ punpckhdq xmm3, xmm0 ;H: w3 = 52 53 60 61 54 55 62 63
389
+ shl tempd, 16 ;Z: temp <<= 16;
390
+ psrldq xmm3, 1 * SIZEOF_WORD ;H: w3 = 53 60 61 54 55 62 63 --
391
+ pxor xmm2, xmm2 ;H: w2[i] = 0;
392
+ or put_bufferd, tempd ;Z: put_buffer |= temp;
393
+ pshuflw xmm3, xmm3, 00111001b ;H: w3 = 60 61 54 53 55 62 63 --
394
+ movq xmm1, qword [block + 44 * SIZEOF_WORD] ;G: w1 = 44 45 46 47 -- -- -- --
395
+ unpcklps xmm5, xmm0 ;E: w5 = 48 49 56 57 50 51 58 59
396
+ pxor xmm0, xmm0 ;H: w0[i] = 0;
397
+ pinsrw xmm3, word [block + 47 * SIZEOF_WORD], 3 ;H: w3 = 60 61 54 47 55 62 63 --
398
+ ; (Row 7, offset 1)
399
+ pcmpgtw xmm2, xmm3 ;H: w2[i] = (w3[i] < 0 ? -1 : 0);
400
+ paddw xmm3, xmm2 ;H: w3[i] += w2[i];
401
+ movaps XMMWORD [t + 56 * SIZEOF_WORD], xmm3 ;H: t[i+56] = w3[i];
402
+ movq xmm4, qword [block + 36 * SIZEOF_WORD] ;G: w4 = 36 37 38 39 -- -- -- --
403
+ pcmpeqw xmm3, xmm0 ;H: w3[i] = (w3[i] == 0 ? -1 : 0);
404
+ punpckldq xmm4, xmm1 ;G: w4 = 36 37 44 45 38 39 46 47
405
+ mov tempd, [dctbl + c_derived_tbl.ehufco + nbitsq * 4]
406
+ ;Z: temp = dctbl->ehufco[nbits];
407
+ movdqa xmm1, xmm4 ;F: w1 = 36 37 44 45 38 39 46 47
408
+ psrldq xmm4, 1 * SIZEOF_WORD ;G: w4 = 37 44 45 38 39 46 47 --
409
+ shufpd xmm1, xmm5, 10b ;F: w1 = 36 37 44 45 50 51 58 59
410
+ and code, dword [MASK_BITS(nbitsq)] ;Z: code &= (1 << nbits) - 1;
411
+ pshufhw xmm4, xmm4, 11010011b ;G: w4 = 37 44 45 38 -- 39 46 --
412
+ pslldq xmm1, 1 * SIZEOF_WORD ;F: w1 = -- 36 37 44 45 50 51 58
413
+ shl tempq, nbitsb ;Z: temp <<= nbits;
414
+ pinsrw xmm4, word [block + 59 * SIZEOF_WORD], 0 ;G: w4 = 59 44 45 38 -- 39 46 --
415
+ pshufd xmm1, xmm1, 11011000b ;F: w1 = -- 36 45 50 37 44 51 58
416
+ pinsrw xmm4, word [block + 52 * SIZEOF_WORD], 1 ;G: w4 = 59 52 45 38 -- 39 46 --
417
+ or code, tempd ;Z: code |= temp;
418
+ movlps xmm1, qword [block + 20 * SIZEOF_WORD] ;F: w1 = 20 21 22 23 37 44 51 58
419
+ pinsrw xmm4, word [block + 31 * SIZEOF_WORD], 4 ;G: w4 = 59 52 45 38 31 39 46 --
420
+ pshuflw xmm1, xmm1, 01110010b ;F: w1 = 22 20 23 21 37 44 51 58
421
+ pinsrw xmm4, word [block + 53 * SIZEOF_WORD], 7 ;G: w4 = 59 52 45 38 31 39 46 53
422
+ ; (Row 6, offset 1)
423
+ pxor xmm2, xmm2 ;G: w2[i] = 0;
424
+ pcmpgtw xmm0, xmm4 ;G: w0[i] = (w4[i] < 0 ? -1 : 0);
425
+ pinsrw xmm1, word [block + 15 * SIZEOF_WORD], 1 ;F: w1 = 22 15 23 21 37 44 51 58
426
+ paddw xmm4, xmm0 ;G: w4[i] += w0[i];
427
+ movaps XMMWORD [t + 48 * SIZEOF_WORD], xmm4 ;G: t[48+i] = w4[i];
428
+ pinsrw xmm1, word [block + 30 * SIZEOF_WORD], 3 ;F: w1 = 22 15 23 30 37 44 51 58
429
+ ; (Row 5, offset 1)
430
+ pcmpeqw xmm4, xmm2 ;G: w4[i] = (w4[i] == 0 ? -1 : 0);
431
+ pinsrw xmm5, word [block + 42 * SIZEOF_WORD], 0 ;E: w5 = 42 49 56 57 50 51 58 59
432
+
433
+ packsswb xmm4, xmm3 ;GH: b4[i] = w4[i], b4[i+8] = w3[i]
434
+ ; w/ signed saturation
435
+
436
+ pxor xmm0, xmm0 ;F: w0[i] = 0;
437
+ pinsrw xmm5, word [block + 43 * SIZEOF_WORD], 5 ;E: w5 = 42 49 56 57 50 43 58 59
438
+ pcmpgtw xmm2, xmm1 ;F: w2[i] = (w1[i] < 0 ? -1 : 0);
439
+ pmovmskb tempd, xmm4 ;Z: temp = 0; temp |= ((b4[i] >> 7) << i);
440
+ pinsrw xmm5, word [block + 36 * SIZEOF_WORD], 6 ;E: w5 = 42 49 56 57 50 43 36 59
441
+ paddw xmm1, xmm2 ;F: w1[i] += w2[i];
442
+ movaps XMMWORD [t + 40 * SIZEOF_WORD], xmm1 ;F: t[40+i] = w1[i];
443
+ pinsrw xmm5, word [block + 29 * SIZEOF_WORD], 7 ;E: w5 = 42 49 56 57 50 43 36 29
444
+ ; (Row 4, offset 1)
445
+ %undef block
446
+ %define free_bitsq rdx
447
+ %define free_bitsd edx
448
+ %define free_bitsb dl
449
+ pcmpeqw xmm1, xmm0 ;F: w1[i] = (w1[i] == 0 ? -1 : 0);
450
+ shl tempq, 48 ;Z: temp <<= 48;
451
+ pxor xmm2, xmm2 ;E: w2[i] = 0;
452
+ pcmpgtw xmm0, xmm5 ;E: w0[i] = (w5[i] < 0 ? -1 : 0);
453
+ paddw xmm5, xmm0 ;E: w5[i] += w0[i];
454
+ or tempq, put_buffer ;Z: temp |= put_buffer;
455
+ movaps XMMWORD [t + 32 * SIZEOF_WORD], xmm5 ;E: t[32+i] = w5[i];
456
+ lea t, [dword t - 2] ;Z: t = &t[-1];
457
+ pcmpeqw xmm5, xmm2 ;E: w5[i] = (w5[i] == 0 ? -1 : 0);
458
+
459
+ packsswb xmm5, xmm1 ;EF: b5[i] = w5[i], b5[i+8] = w1[i]
460
+ ; w/ signed saturation
461
+
462
+ add nbitsb, byte [dctbl + c_derived_tbl.ehufsi + nbitsq]
463
+ ;Z: nbits += dctbl->ehufsi[nbits];
464
+ %undef dctbl
465
+ %define code_temp r8d
466
+ pmovmskb indexd, xmm5 ;Z: index = 0; index |= ((b5[i] >> 7) << i);
467
+ mov free_bitsd, [state+working_state.cur.free_bits]
468
+ ;Z: free_bits = state->cur.free_bits;
469
+ pcmpeqw xmm1, xmm1 ;Z: b1[i] = 0xFF;
470
+ shl index, 32 ;Z: index <<= 32;
471
+ mov put_buffer, [state+working_state.cur.put_buffer.simd]
472
+ ;Z: put_buffer = state->cur.put_buffer.simd;
473
+ or index, tempq ;Z: index |= temp;
474
+ not index ;Z: index = ~index;
475
+ sub free_bitsb, nbitsb ;Z: if ((free_bits -= nbits) >= 0)
476
+ jnl .ENTRY_SKIP_EMIT_CODE ;Z: goto .ENTRY_SKIP_EMIT_CODE;
477
+ align 16
478
+ .EMIT_CODE: ;Z: .EMIT_CODE:
479
+ EMIT_QWORD .BLOOP_COND ;Z: insert code, flush buffer, goto .BLOOP_COND
480
+
481
+ ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
482
+
483
+ align 16
484
+ .BRLOOP: ; do {
485
+ lea code_temp, [nbitsq - 16] ; code_temp = nbits - 16;
486
+ movzx nbits, byte [actbl + c_derived_tbl.ehufsi + 0xf0]
487
+ ; nbits = actbl->ehufsi[0xf0];
488
+ mov code, [actbl + c_derived_tbl.ehufco + 0xf0 * 4]
489
+ ; code = actbl->ehufco[0xf0];
490
+ sub free_bitsb, nbitsb ; if ((free_bits -= nbits) <= 0)
491
+ jle .EMIT_BRLOOP_CODE ; goto .EMIT_BRLOOP_CODE;
492
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
493
+ mov nbits, code_temp ; nbits = code_temp;
494
+ or put_buffer, codeq ; put_buffer |= code;
495
+ cmp nbits, 16 ; if (nbits <= 16)
496
+ jle .ERLOOP ; break;
497
+ jmp .BRLOOP ; } while (1);
498
+
499
+ ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
500
+
501
+ align 16
502
+ times 5 nop
503
+ .ENTRY_SKIP_EMIT_CODE: ; .ENTRY_SKIP_EMIT_CODE:
504
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
505
+ or put_buffer, codeq ; put_buffer |= code;
506
+ .BLOOP_COND: ; .BLOOP_COND:
507
+ test index, index ; if (index != 0)
508
+ jz .ELOOP ; {
509
+ .BLOOP: ; do {
510
+ xor nbits, nbits ; nbits = 0; /* kill tzcnt input dependency */
511
+ tzcnt nbitsq, index ; nbits = # of trailing 0 bits in index
512
+ inc nbits ; ++nbits;
513
+ lea t, [t + nbitsq * 2] ; t = &t[nbits];
514
+ shr index, nbitsb ; index >>= nbits;
515
+ .EMIT_BRLOOP_CODE_END: ; .EMIT_BRLOOP_CODE_END:
516
+ cmp nbits, 16 ; if (nbits > 16)
517
+ jg .BRLOOP ; goto .BRLOOP;
518
+ .ERLOOP: ; .ERLOOP:
519
+ movsx codeq, word [t] ; code = *t;
520
+ lea tempd, [nbitsq * 2] ; temp = nbits * 2;
521
+ movzx nbits, byte [NBITS(codeq)] ; nbits = JPEG_NBITS(code);
522
+ lea tempd, [nbitsq + tempq * 8] ; temp = temp * 8 + nbits;
523
+ mov code_temp, [actbl + c_derived_tbl.ehufco + (tempq - 16) * 4]
524
+ ; code_temp = actbl->ehufco[temp-16];
525
+ shl code_temp, nbitsb ; code_temp <<= nbits;
526
+ and code, dword [MASK_BITS(nbitsq)] ; code &= (1 << nbits) - 1;
527
+ add nbitsb, [actbl + c_derived_tbl.ehufsi + (tempq - 16)]
528
+ ; free_bits -= actbl->ehufsi[temp-16];
529
+ or code, code_temp ; code |= code_temp;
530
+ sub free_bitsb, nbitsb ; if ((free_bits -= nbits) <= 0)
531
+ jle .EMIT_CODE ; goto .EMIT_CODE;
532
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
533
+ or put_buffer, codeq ; put_buffer |= code;
534
+ test index, index
535
+ jnz .BLOOP ; } while (index != 0);
536
+ .ELOOP: ; } /* index != 0 */
537
+ sub td, esp ; t -= (WIN64: &t_[0], UNIX: &t_[64]);
538
+ %ifdef WIN64
539
+ cmp td, (DCTSIZE2 - 2) * SIZEOF_WORD ; if (t != 62)
540
+ %else
541
+ cmp td, -2 * SIZEOF_WORD ; if (t != -2)
542
+ %endif
543
+ je .EFN ; {
544
+ movzx nbits, byte [actbl + c_derived_tbl.ehufsi + 0]
545
+ ; nbits = actbl->ehufsi[0];
546
+ mov code, [actbl + c_derived_tbl.ehufco + 0] ; code = actbl->ehufco[0];
547
+ sub free_bitsb, nbitsb ; if ((free_bits -= nbits) <= 0)
548
+ jg .EFN_SKIP_EMIT_CODE ; {
549
+ EMIT_QWORD .EFN ; insert code, flush buffer
550
+ align 16
551
+ .EFN_SKIP_EMIT_CODE: ; } else {
552
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
553
+ or put_buffer, codeq ; put_buffer |= code;
554
+ .EFN: ; } }
555
+ mov [state + working_state.cur.put_buffer.simd], put_buffer
556
+ ; state->cur.put_buffer.simd = put_buffer;
557
+ mov byte [state + working_state.cur.free_bits], free_bitsb
558
+ ; state->cur.free_bits = free_bits;
559
+ %ifdef WIN64
560
+ sub rsp, -DCTSIZE2 * SIZEOF_WORD
561
+ pop r12
562
+ pop rdi
563
+ pop rsi
564
+ pop rbp
565
+ pop rbx
566
+ %else
567
+ pop r12
568
+ pop rbp
569
+ pop rbx
570
+ %endif
571
+ ret
572
+
573
+ ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
574
+
575
+ align 16
576
+ .EMIT_BRLOOP_CODE:
577
+ EMIT_QWORD .EMIT_BRLOOP_CODE_END, { mov nbits, code_temp }
578
+ ; insert code, flush buffer,
579
+ ; nbits = code_temp, goto .EMIT_BRLOOP_CODE_END
580
+
581
+ ; For some reason, the OS X linker does not honor the request to align the
582
+ ; segment unless we do this.
583
+ align 32