image_pack 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +18 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +140 -0
  5. data/THIRD_PARTY_NOTICES.md +8 -0
  6. data/ext/image_pack/extconf.rb +515 -0
  7. data/ext/image_pack/image_pack.c +1618 -0
  8. data/ext/image_pack/vendor/.vendored +1 -0
  9. data/ext/image_pack/vendor/mozjpeg/BUILDING.txt +902 -0
  10. data/ext/image_pack/vendor/mozjpeg/CMakeLists.txt +1593 -0
  11. data/ext/image_pack/vendor/mozjpeg/LICENSE.md +132 -0
  12. data/ext/image_pack/vendor/mozjpeg/README-mozilla.txt +194 -0
  13. data/ext/image_pack/vendor/mozjpeg/README-turbo.txt +346 -0
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +258 -0
  15. data/ext/image_pack/vendor/mozjpeg/README.md +29 -0
  16. data/ext/image_pack/vendor/mozjpeg/cderror.h +128 -0
  17. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +156 -0
  18. data/ext/image_pack/vendor/mozjpeg/cdjpeg.h +171 -0
  19. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +961 -0
  20. data/ext/image_pack/vendor/mozjpeg/cmyk.h +60 -0
  21. data/ext/image_pack/vendor/mozjpeg/coderules.txt +78 -0
  22. data/ext/image_pack/vendor/mozjpeg/croptest.in +95 -0
  23. data/ext/image_pack/vendor/mozjpeg/djpeg.c +855 -0
  24. data/ext/image_pack/vendor/mozjpeg/example.txt +464 -0
  25. data/ext/image_pack/vendor/mozjpeg/jaricom.c +157 -0
  26. data/ext/image_pack/vendor/mozjpeg/jcapimin.c +307 -0
  27. data/ext/image_pack/vendor/mozjpeg/jcapistd.c +168 -0
  28. data/ext/image_pack/vendor/mozjpeg/jcarith.c +972 -0
  29. data/ext/image_pack/vendor/mozjpeg/jccoefct.c +609 -0
  30. data/ext/image_pack/vendor/mozjpeg/jccolext.c +144 -0
  31. data/ext/image_pack/vendor/mozjpeg/jccolor.c +721 -0
  32. data/ext/image_pack/vendor/mozjpeg/jcdctmgr.c +1776 -0
  33. data/ext/image_pack/vendor/mozjpeg/jcext.c +219 -0
  34. data/ext/image_pack/vendor/mozjpeg/jchuff.c +1146 -0
  35. data/ext/image_pack/vendor/mozjpeg/jchuff.h +57 -0
  36. data/ext/image_pack/vendor/mozjpeg/jcicc.c +105 -0
  37. data/ext/image_pack/vendor/mozjpeg/jcinit.c +82 -0
  38. data/ext/image_pack/vendor/mozjpeg/jcmainct.c +162 -0
  39. data/ext/image_pack/vendor/mozjpeg/jcmarker.c +844 -0
  40. data/ext/image_pack/vendor/mozjpeg/jcmaster.c +958 -0
  41. data/ext/image_pack/vendor/mozjpeg/jcmaster.h +56 -0
  42. data/ext/image_pack/vendor/mozjpeg/jcomapi.c +109 -0
  43. data/ext/image_pack/vendor/mozjpeg/jconfig.h.in +37 -0
  44. data/ext/image_pack/vendor/mozjpeg/jconfig.txt +93 -0
  45. data/ext/image_pack/vendor/mozjpeg/jconfigint.h.in +44 -0
  46. data/ext/image_pack/vendor/mozjpeg/jcparam.c +991 -0
  47. data/ext/image_pack/vendor/mozjpeg/jcphuff.c +1123 -0
  48. data/ext/image_pack/vendor/mozjpeg/jcprepct.c +351 -0
  49. data/ext/image_pack/vendor/mozjpeg/jcsample.c +522 -0
  50. data/ext/image_pack/vendor/mozjpeg/jcstest.c +126 -0
  51. data/ext/image_pack/vendor/mozjpeg/jctrans.c +408 -0
  52. data/ext/image_pack/vendor/mozjpeg/jdapimin.c +407 -0
  53. data/ext/image_pack/vendor/mozjpeg/jdapistd.c +691 -0
  54. data/ext/image_pack/vendor/mozjpeg/jdarith.c +782 -0
  55. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +198 -0
  56. data/ext/image_pack/vendor/mozjpeg/jdatadst.c +299 -0
  57. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +194 -0
  58. data/ext/image_pack/vendor/mozjpeg/jdatasrc.c +295 -0
  59. data/ext/image_pack/vendor/mozjpeg/jdcoefct.c +881 -0
  60. data/ext/image_pack/vendor/mozjpeg/jdcoefct.h +83 -0
  61. data/ext/image_pack/vendor/mozjpeg/jdcol565.c +384 -0
  62. data/ext/image_pack/vendor/mozjpeg/jdcolext.c +141 -0
  63. data/ext/image_pack/vendor/mozjpeg/jdcolor.c +881 -0
  64. data/ext/image_pack/vendor/mozjpeg/jdct.h +208 -0
  65. data/ext/image_pack/vendor/mozjpeg/jddctmgr.c +367 -0
  66. data/ext/image_pack/vendor/mozjpeg/jdhuff.c +834 -0
  67. data/ext/image_pack/vendor/mozjpeg/jdhuff.h +247 -0
  68. data/ext/image_pack/vendor/mozjpeg/jdicc.c +167 -0
  69. data/ext/image_pack/vendor/mozjpeg/jdinput.c +408 -0
  70. data/ext/image_pack/vendor/mozjpeg/jdmainct.c +460 -0
  71. data/ext/image_pack/vendor/mozjpeg/jdmainct.h +71 -0
  72. data/ext/image_pack/vendor/mozjpeg/jdmarker.c +1374 -0
  73. data/ext/image_pack/vendor/mozjpeg/jdmaster.c +727 -0
  74. data/ext/image_pack/vendor/mozjpeg/jdmaster.h +33 -0
  75. data/ext/image_pack/vendor/mozjpeg/jdmerge.c +587 -0
  76. data/ext/image_pack/vendor/mozjpeg/jdmerge.h +47 -0
  77. data/ext/image_pack/vendor/mozjpeg/jdmrg565.c +354 -0
  78. data/ext/image_pack/vendor/mozjpeg/jdmrgext.c +184 -0
  79. data/ext/image_pack/vendor/mozjpeg/jdphuff.c +679 -0
  80. data/ext/image_pack/vendor/mozjpeg/jdpostct.c +294 -0
  81. data/ext/image_pack/vendor/mozjpeg/jdsample.c +524 -0
  82. data/ext/image_pack/vendor/mozjpeg/jdsample.h +50 -0
  83. data/ext/image_pack/vendor/mozjpeg/jdtrans.c +156 -0
  84. data/ext/image_pack/vendor/mozjpeg/jerror.c +251 -0
  85. data/ext/image_pack/vendor/mozjpeg/jerror.h +335 -0
  86. data/ext/image_pack/vendor/mozjpeg/jfdctflt.c +169 -0
  87. data/ext/image_pack/vendor/mozjpeg/jfdctfst.c +227 -0
  88. data/ext/image_pack/vendor/mozjpeg/jfdctint.c +288 -0
  89. data/ext/image_pack/vendor/mozjpeg/jidctflt.c +240 -0
  90. data/ext/image_pack/vendor/mozjpeg/jidctfst.c +371 -0
  91. data/ext/image_pack/vendor/mozjpeg/jidctint.c +2627 -0
  92. data/ext/image_pack/vendor/mozjpeg/jidctred.c +409 -0
  93. data/ext/image_pack/vendor/mozjpeg/jinclude.h +147 -0
  94. data/ext/image_pack/vendor/mozjpeg/jmemmgr.c +1180 -0
  95. data/ext/image_pack/vendor/mozjpeg/jmemnobs.c +110 -0
  96. data/ext/image_pack/vendor/mozjpeg/jmemsys.h +178 -0
  97. data/ext/image_pack/vendor/mozjpeg/jmorecfg.h +382 -0
  98. data/ext/image_pack/vendor/mozjpeg/jpeg_nbits_table.h +4098 -0
  99. data/ext/image_pack/vendor/mozjpeg/jpegcomp.h +32 -0
  100. data/ext/image_pack/vendor/mozjpeg/jpegint.h +453 -0
  101. data/ext/image_pack/vendor/mozjpeg/jpeglib.h +1211 -0
  102. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +827 -0
  103. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +172 -0
  104. data/ext/image_pack/vendor/mozjpeg/jquant1.c +856 -0
  105. data/ext/image_pack/vendor/mozjpeg/jquant2.c +1286 -0
  106. data/ext/image_pack/vendor/mozjpeg/jsimd.h +123 -0
  107. data/ext/image_pack/vendor/mozjpeg/jsimd_none.c +431 -0
  108. data/ext/image_pack/vendor/mozjpeg/jsimddct.h +70 -0
  109. data/ext/image_pack/vendor/mozjpeg/jstdhuff.c +144 -0
  110. data/ext/image_pack/vendor/mozjpeg/jutils.c +133 -0
  111. data/ext/image_pack/vendor/mozjpeg/jversion.h.in +56 -0
  112. data/ext/image_pack/vendor/mozjpeg/libjpeg.map.in +11 -0
  113. data/ext/image_pack/vendor/mozjpeg/libjpeg.txt +3150 -0
  114. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +690 -0
  115. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +253 -0
  116. data/ext/image_pack/vendor/mozjpeg/rdgif.c +720 -0
  117. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +160 -0
  118. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +494 -0
  119. data/ext/image_pack/vendor/mozjpeg/rdpng.c +194 -0
  120. data/ext/image_pack/vendor/mozjpeg/rdppm.c +781 -0
  121. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +642 -0
  122. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +508 -0
  123. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +148 -0
  124. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +334 -0
  125. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +976 -0
  126. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd_neon.S +1200 -0
  127. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jccolext-neon.c +316 -0
  128. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jchuff-neon.c +411 -0
  129. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd.c +1053 -0
  130. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch64/jsimd_neon.S +2254 -0
  131. data/ext/image_pack/vendor/mozjpeg/simd/arm/align.h +28 -0
  132. data/ext/image_pack/vendor/mozjpeg/simd/arm/jccolor-neon.c +160 -0
  133. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgray-neon.c +120 -0
  134. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcgryext-neon.c +106 -0
  135. data/ext/image_pack/vendor/mozjpeg/simd/arm/jchuff.h +131 -0
  136. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcphuff-neon.c +623 -0
  137. data/ext/image_pack/vendor/mozjpeg/simd/arm/jcsample-neon.c +192 -0
  138. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolext-neon.c +374 -0
  139. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdcolor-neon.c +141 -0
  140. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmerge-neon.c +144 -0
  141. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdmrgext-neon.c +723 -0
  142. data/ext/image_pack/vendor/mozjpeg/simd/arm/jdsample-neon.c +569 -0
  143. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctfst-neon.c +214 -0
  144. data/ext/image_pack/vendor/mozjpeg/simd/arm/jfdctint-neon.c +376 -0
  145. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctfst-neon.c +472 -0
  146. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctint-neon.c +801 -0
  147. data/ext/image_pack/vendor/mozjpeg/simd/arm/jidctred-neon.c +486 -0
  148. data/ext/image_pack/vendor/mozjpeg/simd/arm/jquanti-neon.c +193 -0
  149. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h +26 -0
  150. data/ext/image_pack/vendor/mozjpeg/simd/arm/neon-compat.h.in +37 -0
  151. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-avx2.asm +578 -0
  152. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-mmx.asm +476 -0
  153. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolext-sse2.asm +503 -0
  154. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-avx2.asm +121 -0
  155. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-mmx.asm +121 -0
  156. data/ext/image_pack/vendor/mozjpeg/simd/i386/jccolor-sse2.asm +120 -0
  157. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-avx2.asm +113 -0
  158. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-mmx.asm +113 -0
  159. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgray-sse2.asm +112 -0
  160. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-avx2.asm +457 -0
  161. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-mmx.asm +355 -0
  162. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcgryext-sse2.asm +382 -0
  163. data/ext/image_pack/vendor/mozjpeg/simd/i386/jchuff-sse2.asm +761 -0
  164. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcphuff-sse2.asm +662 -0
  165. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-avx2.asm +388 -0
  166. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-mmx.asm +324 -0
  167. data/ext/image_pack/vendor/mozjpeg/simd/i386/jcsample-sse2.asm +351 -0
  168. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-avx2.asm +515 -0
  169. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-mmx.asm +404 -0
  170. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolext-sse2.asm +458 -0
  171. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-avx2.asm +118 -0
  172. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-mmx.asm +117 -0
  173. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdcolor-sse2.asm +117 -0
  174. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-avx2.asm +136 -0
  175. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-mmx.asm +123 -0
  176. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmerge-sse2.asm +135 -0
  177. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-avx2.asm +575 -0
  178. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-mmx.asm +460 -0
  179. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdmrgext-sse2.asm +517 -0
  180. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-avx2.asm +760 -0
  181. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-mmx.asm +731 -0
  182. data/ext/image_pack/vendor/mozjpeg/simd/i386/jdsample-sse2.asm +724 -0
  183. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-3dn.asm +318 -0
  184. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctflt-sse.asm +369 -0
  185. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-mmx.asm +395 -0
  186. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctfst-sse2.asm +403 -0
  187. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-avx2.asm +331 -0
  188. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-mmx.asm +620 -0
  189. data/ext/image_pack/vendor/mozjpeg/simd/i386/jfdctint-sse2.asm +633 -0
  190. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-3dn.asm +451 -0
  191. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse.asm +571 -0
  192. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctflt-sse2.asm +497 -0
  193. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-mmx.asm +499 -0
  194. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctfst-sse2.asm +501 -0
  195. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-avx2.asm +453 -0
  196. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-mmx.asm +851 -0
  197. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctint-sse2.asm +858 -0
  198. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-mmx.asm +704 -0
  199. data/ext/image_pack/vendor/mozjpeg/simd/i386/jidctred-sse2.asm +592 -0
  200. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-3dn.asm +230 -0
  201. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-mmx.asm +276 -0
  202. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquant-sse.asm +208 -0
  203. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquantf-sse2.asm +168 -0
  204. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-avx2.asm +188 -0
  205. data/ext/image_pack/vendor/mozjpeg/simd/i386/jquanti-sse2.asm +201 -0
  206. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +1312 -0
  207. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimdcpu.asm +135 -0
  208. data/ext/image_pack/vendor/mozjpeg/simd/jsimd.h +1258 -0
  209. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +1143 -0
  210. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2.S +4543 -0
  211. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd_dspr2_asm.h +292 -0
  212. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +455 -0
  213. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +148 -0
  214. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +132 -0
  215. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +374 -0
  216. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +98 -0
  217. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample.h +28 -0
  218. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +415 -0
  219. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +139 -0
  220. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +149 -0
  221. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +615 -0
  222. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +304 -0
  223. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +255 -0
  224. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +398 -0
  225. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +395 -0
  226. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +571 -0
  227. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +124 -0
  228. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +866 -0
  229. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd_mmi.h +69 -0
  230. data/ext/image_pack/vendor/mozjpeg/simd/mips64/loongson-mmintrin.h +1334 -0
  231. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jcolsamp.inc +135 -0
  232. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jdct.inc +31 -0
  233. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc +93 -0
  234. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdcfg.inc.h +133 -0
  235. data/ext/image_pack/vendor/mozjpeg/simd/nasm/jsimdext.inc +520 -0
  236. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +269 -0
  237. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +116 -0
  238. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +111 -0
  239. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +228 -0
  240. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +159 -0
  241. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample.h +28 -0
  242. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +276 -0
  243. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +106 -0
  244. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +130 -0
  245. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +329 -0
  246. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +400 -0
  247. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +154 -0
  248. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +258 -0
  249. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +255 -0
  250. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +357 -0
  251. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +250 -0
  252. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +884 -0
  253. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd_altivec.h +98 -0
  254. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-avx2.asm +559 -0
  255. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolext-sse2.asm +484 -0
  256. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-avx2.asm +121 -0
  257. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jccolor-sse2.asm +120 -0
  258. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-avx2.asm +113 -0
  259. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgray-sse2.asm +112 -0
  260. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-avx2.asm +438 -0
  261. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcgryext-sse2.asm +363 -0
  262. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jchuff-sse2.asm +583 -0
  263. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcphuff-sse2.asm +639 -0
  264. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-avx2.asm +367 -0
  265. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jcsample-sse2.asm +330 -0
  266. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-avx2.asm +496 -0
  267. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolext-sse2.asm +439 -0
  268. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-avx2.asm +118 -0
  269. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdcolor-sse2.asm +117 -0
  270. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-avx2.asm +136 -0
  271. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmerge-sse2.asm +135 -0
  272. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-avx2.asm +596 -0
  273. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdmrgext-sse2.asm +538 -0
  274. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-avx2.asm +696 -0
  275. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jdsample-sse2.asm +665 -0
  276. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctflt-sse.asm +355 -0
  277. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctfst-sse2.asm +389 -0
  278. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-avx2.asm +320 -0
  279. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jfdctint-sse2.asm +619 -0
  280. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctflt-sse2.asm +482 -0
  281. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctfst-sse2.asm +491 -0
  282. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-avx2.asm +418 -0
  283. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctint-sse2.asm +847 -0
  284. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jidctred-sse2.asm +574 -0
  285. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquantf-sse2.asm +155 -0
  286. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-avx2.asm +163 -0
  287. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jquanti-sse2.asm +188 -0
  288. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimd.c +1110 -0
  289. data/ext/image_pack/vendor/mozjpeg/simd/x86_64/jsimdcpu.asm +86 -0
  290. data/ext/image_pack/vendor/mozjpeg/strtest.c +170 -0
  291. data/ext/image_pack/vendor/mozjpeg/structure.txt +900 -0
  292. data/ext/image_pack/vendor/mozjpeg/tjbench.c +1044 -0
  293. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.in +256 -0
  294. data/ext/image_pack/vendor/mozjpeg/tjbenchtest.java.in +215 -0
  295. data/ext/image_pack/vendor/mozjpeg/tjexample.c +406 -0
  296. data/ext/image_pack/vendor/mozjpeg/tjexampletest.in +149 -0
  297. data/ext/image_pack/vendor/mozjpeg/tjexampletest.java.in +151 -0
  298. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +961 -0
  299. data/ext/image_pack/vendor/mozjpeg/tjutil.c +70 -0
  300. data/ext/image_pack/vendor/mozjpeg/tjutil.h +53 -0
  301. data/ext/image_pack/vendor/mozjpeg/transupp.c +2373 -0
  302. data/ext/image_pack/vendor/mozjpeg/transupp.h +243 -0
  303. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +1259 -0
  304. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +2320 -0
  305. data/ext/image_pack/vendor/mozjpeg/turbojpeg.h +1784 -0
  306. data/ext/image_pack/vendor/mozjpeg/usage.txt +679 -0
  307. data/ext/image_pack/vendor/mozjpeg/wizard.txt +220 -0
  308. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +552 -0
  309. data/ext/image_pack/vendor/mozjpeg/wrgif.c +580 -0
  310. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +577 -0
  311. data/ext/image_pack/vendor/mozjpeg/wrppm.c +366 -0
  312. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +258 -0
  313. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +268 -0
  314. data/lib/image_pack/backend.rb +8 -0
  315. data/lib/image_pack/configuration.rb +23 -0
  316. data/lib/image_pack/errors.rb +13 -0
  317. data/lib/image_pack/version.rb +5 -0
  318. data/lib/image_pack.rb +208 -0
  319. metadata +433 -0
@@ -0,0 +1,662 @@
1
+ ;
2
+ ; jcphuff-sse2.asm - prepare data for progressive Huffman encoding (SSE2)
3
+ ;
4
+ ; Copyright (C) 2016, 2018, Matthieu Darbois
5
+ ;
6
+ ; Based on the x86 SIMD extension for IJG JPEG library
7
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
8
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
9
+ ;
10
+ ; This file should be assembled with NASM (Netwide Assembler),
11
+ ; can *not* be assembled with Microsoft's MASM or any compatible
12
+ ; assembler (including Borland's Turbo Assembler).
13
+ ; NASM is available from http://nasm.sourceforge.net/ or
14
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
15
+ ;
16
+ ; This file contains an SSE2 implementation of data preparation for progressive
17
+ ; Huffman encoding. See jcphuff.c for more details.
18
+
19
+ %include "jsimdext.inc"
20
+
21
+ ; --------------------------------------------------------------------------
22
+ SECTION SEG_TEXT
23
+ BITS 32
24
+
25
+ ; --------------------------------------------------------------------------
26
+ ; Macros to load data for jsimd_encode_mcu_AC_first_prepare_sse2() and
27
+ ; jsimd_encode_mcu_AC_refine_prepare_sse2()
28
+
29
+ %macro LOAD16 0
30
+ pxor N0, N0
31
+ pxor N1, N1
32
+
33
+ mov T0, INT [LUT + 0*SIZEOF_INT]
34
+ mov T1, INT [LUT + 8*SIZEOF_INT]
35
+ pinsrw X0, word [BLOCK + T0 * 2], 0
36
+ pinsrw X1, word [BLOCK + T1 * 2], 0
37
+
38
+ mov T0, INT [LUT + 1*SIZEOF_INT]
39
+ mov T1, INT [LUT + 9*SIZEOF_INT]
40
+ pinsrw X0, word [BLOCK + T0 * 2], 1
41
+ pinsrw X1, word [BLOCK + T1 * 2], 1
42
+
43
+ mov T0, INT [LUT + 2*SIZEOF_INT]
44
+ mov T1, INT [LUT + 10*SIZEOF_INT]
45
+ pinsrw X0, word [BLOCK + T0 * 2], 2
46
+ pinsrw X1, word [BLOCK + T1 * 2], 2
47
+
48
+ mov T0, INT [LUT + 3*SIZEOF_INT]
49
+ mov T1, INT [LUT + 11*SIZEOF_INT]
50
+ pinsrw X0, word [BLOCK + T0 * 2], 3
51
+ pinsrw X1, word [BLOCK + T1 * 2], 3
52
+
53
+ mov T0, INT [LUT + 4*SIZEOF_INT]
54
+ mov T1, INT [LUT + 12*SIZEOF_INT]
55
+ pinsrw X0, word [BLOCK + T0 * 2], 4
56
+ pinsrw X1, word [BLOCK + T1 * 2], 4
57
+
58
+ mov T0, INT [LUT + 5*SIZEOF_INT]
59
+ mov T1, INT [LUT + 13*SIZEOF_INT]
60
+ pinsrw X0, word [BLOCK + T0 * 2], 5
61
+ pinsrw X1, word [BLOCK + T1 * 2], 5
62
+
63
+ mov T0, INT [LUT + 6*SIZEOF_INT]
64
+ mov T1, INT [LUT + 14*SIZEOF_INT]
65
+ pinsrw X0, word [BLOCK + T0 * 2], 6
66
+ pinsrw X1, word [BLOCK + T1 * 2], 6
67
+
68
+ mov T0, INT [LUT + 7*SIZEOF_INT]
69
+ mov T1, INT [LUT + 15*SIZEOF_INT]
70
+ pinsrw X0, word [BLOCK + T0 * 2], 7
71
+ pinsrw X1, word [BLOCK + T1 * 2], 7
72
+ %endmacro
73
+
74
+ %macro LOAD15 0
75
+ pxor N0, N0
76
+ pxor N1, N1
77
+ pxor X1, X1
78
+
79
+ mov T0, INT [LUT + 0*SIZEOF_INT]
80
+ mov T1, INT [LUT + 8*SIZEOF_INT]
81
+ pinsrw X0, word [BLOCK + T0 * 2], 0
82
+ pinsrw X1, word [BLOCK + T1 * 2], 0
83
+
84
+ mov T0, INT [LUT + 1*SIZEOF_INT]
85
+ pinsrw X0, word [BLOCK + T0 * 2], 1
86
+
87
+ mov T0, INT [LUT + 2*SIZEOF_INT]
88
+ pinsrw X0, word [BLOCK + T0 * 2], 2
89
+
90
+ mov T0, INT [LUT + 3*SIZEOF_INT]
91
+ pinsrw X0, word [BLOCK + T0 * 2], 3
92
+
93
+ mov T0, INT [LUT + 4*SIZEOF_INT]
94
+ pinsrw X0, word [BLOCK + T0 * 2], 4
95
+
96
+ mov T0, INT [LUT + 5*SIZEOF_INT]
97
+ pinsrw X0, word [BLOCK + T0 * 2], 5
98
+
99
+ mov T0, INT [LUT + 6*SIZEOF_INT]
100
+ pinsrw X0, word [BLOCK + T0 * 2], 6
101
+
102
+ mov T0, INT [LUT + 7*SIZEOF_INT]
103
+ pinsrw X0, word [BLOCK + T0 * 2], 7
104
+
105
+ cmp LENEND, 2
106
+ jl %%.ELOAD15
107
+ mov T1, INT [LUT + 9*SIZEOF_INT]
108
+ pinsrw X1, word [BLOCK + T1 * 2], 1
109
+
110
+ cmp LENEND, 3
111
+ jl %%.ELOAD15
112
+ mov T1, INT [LUT + 10*SIZEOF_INT]
113
+ pinsrw X1, word [BLOCK + T1 * 2], 2
114
+
115
+ cmp LENEND, 4
116
+ jl %%.ELOAD15
117
+ mov T1, INT [LUT + 11*SIZEOF_INT]
118
+ pinsrw X1, word [BLOCK + T1 * 2], 3
119
+
120
+ cmp LENEND, 5
121
+ jl %%.ELOAD15
122
+ mov T1, INT [LUT + 12*SIZEOF_INT]
123
+ pinsrw X1, word [BLOCK + T1 * 2], 4
124
+
125
+ cmp LENEND, 6
126
+ jl %%.ELOAD15
127
+ mov T1, INT [LUT + 13*SIZEOF_INT]
128
+ pinsrw X1, word [BLOCK + T1 * 2], 5
129
+
130
+ cmp LENEND, 7
131
+ jl %%.ELOAD15
132
+ mov T1, INT [LUT + 14*SIZEOF_INT]
133
+ pinsrw X1, word [BLOCK + T1 * 2], 6
134
+ %%.ELOAD15:
135
+ %endmacro
136
+
137
+ %macro LOAD8 0
138
+ pxor N0, N0
139
+
140
+ mov T0, INT [LUT + 0*SIZEOF_INT]
141
+ pinsrw X0, word [BLOCK + T0 * 2], 0
142
+
143
+ mov T0, INT [LUT + 1*SIZEOF_INT]
144
+ pinsrw X0, word [BLOCK + T0 * 2], 1
145
+
146
+ mov T0, INT [LUT + 2*SIZEOF_INT]
147
+ pinsrw X0, word [BLOCK + T0 * 2], 2
148
+
149
+ mov T0, INT [LUT + 3*SIZEOF_INT]
150
+ pinsrw X0, word [BLOCK + T0 * 2], 3
151
+
152
+ mov T0, INT [LUT + 4*SIZEOF_INT]
153
+ pinsrw X0, word [BLOCK + T0 * 2], 4
154
+
155
+ mov T0, INT [LUT + 5*SIZEOF_INT]
156
+ pinsrw X0, word [BLOCK + T0 * 2], 5
157
+
158
+ mov T0, INT [LUT + 6*SIZEOF_INT]
159
+ pinsrw X0, word [BLOCK + T0 * 2], 6
160
+
161
+ mov T0, INT [LUT + 7*SIZEOF_INT]
162
+ pinsrw X0, word [BLOCK + T0 * 2], 7
163
+ %endmacro
164
+
165
+ %macro LOAD7 0
166
+ pxor N0, N0
167
+ pxor X0, X0
168
+
169
+ mov T1, INT [LUT + 0*SIZEOF_INT]
170
+ pinsrw X0, word [BLOCK + T1 * 2], 0
171
+
172
+ cmp LENEND, 2
173
+ jl %%.ELOAD7
174
+ mov T1, INT [LUT + 1*SIZEOF_INT]
175
+ pinsrw X0, word [BLOCK + T1 * 2], 1
176
+
177
+ cmp LENEND, 3
178
+ jl %%.ELOAD7
179
+ mov T1, INT [LUT + 2*SIZEOF_INT]
180
+ pinsrw X0, word [BLOCK + T1 * 2], 2
181
+
182
+ cmp LENEND, 4
183
+ jl %%.ELOAD7
184
+ mov T1, INT [LUT + 3*SIZEOF_INT]
185
+ pinsrw X0, word [BLOCK + T1 * 2], 3
186
+
187
+ cmp LENEND, 5
188
+ jl %%.ELOAD7
189
+ mov T1, INT [LUT + 4*SIZEOF_INT]
190
+ pinsrw X0, word [BLOCK + T1 * 2], 4
191
+
192
+ cmp LENEND, 6
193
+ jl %%.ELOAD7
194
+ mov T1, INT [LUT + 5*SIZEOF_INT]
195
+ pinsrw X0, word [BLOCK + T1 * 2], 5
196
+
197
+ cmp LENEND, 7
198
+ jl %%.ELOAD7
199
+ mov T1, INT [LUT + 6*SIZEOF_INT]
200
+ pinsrw X0, word [BLOCK + T1 * 2], 6
201
+ %%.ELOAD7:
202
+ %endmacro
203
+
204
+ %macro REDUCE0 0
205
+ movdqa xmm0, XMMWORD [VALUES + ( 0*2)]
206
+ movdqa xmm1, XMMWORD [VALUES + ( 8*2)]
207
+ movdqa xmm2, XMMWORD [VALUES + (16*2)]
208
+ movdqa xmm3, XMMWORD [VALUES + (24*2)]
209
+ movdqa xmm4, XMMWORD [VALUES + (32*2)]
210
+ movdqa xmm5, XMMWORD [VALUES + (40*2)]
211
+ movdqa xmm6, XMMWORD [VALUES + (48*2)]
212
+
213
+ pcmpeqw xmm0, ZERO
214
+ pcmpeqw xmm1, ZERO
215
+ pcmpeqw xmm2, ZERO
216
+ pcmpeqw xmm3, ZERO
217
+ pcmpeqw xmm4, ZERO
218
+ pcmpeqw xmm5, ZERO
219
+ pcmpeqw xmm6, ZERO
220
+ pcmpeqw xmm7, XMMWORD [VALUES + (56*2)]
221
+
222
+ packsswb xmm0, xmm1
223
+ packsswb xmm2, xmm3
224
+ packsswb xmm4, xmm5
225
+ packsswb xmm6, xmm7
226
+
227
+ pmovmskb eax, xmm0
228
+ pmovmskb ecx, xmm2
229
+ pmovmskb edx, xmm4
230
+ pmovmskb esi, xmm6
231
+
232
+ shl ecx, 16
233
+ shl esi, 16
234
+
235
+ or eax, ecx
236
+ or edx, esi
237
+
238
+ not eax
239
+ not edx
240
+
241
+ mov edi, ZEROBITS
242
+
243
+ mov INT [edi], eax
244
+ mov INT [edi+SIZEOF_INT], edx
245
+ %endmacro
246
+
247
+ ;
248
+ ; Prepare data for jsimd_encode_mcu_AC_first().
249
+ ;
250
+ ; GLOBAL(void)
251
+ ; jsimd_encode_mcu_AC_first_prepare_sse2(const JCOEF *block,
252
+ ; const int *jpeg_natural_order_start,
253
+ ; int Sl, int Al, JCOEF *values,
254
+ ; size_t *zerobits)
255
+ ;
256
+ ; eax + 8 = const JCOEF *block
257
+ ; eax + 12 = const int *jpeg_natural_order_start
258
+ ; eax + 16 = int Sl
259
+ ; eax + 20 = int Al
260
+ ; eax + 24 = JCOEF *values
261
+ ; eax + 28 = size_t *zerobits
262
+
263
+ %define ZERO xmm7
264
+ %define X0 xmm0
265
+ %define X1 xmm1
266
+ %define N0 xmm2
267
+ %define N1 xmm3
268
+ %define AL xmm4
269
+ %define K eax
270
+ %define LENEND eax
271
+ %define LUT ebx
272
+ %define T0 ecx
273
+ %define T1 edx
274
+ %define BLOCK esi
275
+ %define VALUES edi
276
+ %define LEN ebp
277
+
278
+ %define ZEROBITS INT [esp + 5 * 4]
279
+
280
+ align 32
281
+ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
282
+
283
+ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
284
+ push ebp
285
+ mov eax, esp ; eax = original ebp
286
+ sub esp, byte 4
287
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
288
+ mov [esp], eax
289
+ mov ebp, esp ; ebp = aligned ebp
290
+ sub esp, 4
291
+ push ebx
292
+ push ecx
293
+ ; push edx ; need not be preserved
294
+ push esi
295
+ push edi
296
+ push ebp
297
+
298
+ mov BLOCK, INT [eax + 8]
299
+ mov LUT, INT [eax + 12]
300
+ mov VALUES, INT [eax + 24]
301
+ movd AL, INT [eax + 20]
302
+ mov T0, INT [eax + 28]
303
+ mov ZEROBITS, T0
304
+ mov LEN, INT [eax + 16]
305
+ pxor ZERO, ZERO
306
+ mov K, LEN
307
+ and K, -16
308
+ shr K, 4
309
+ jz .ELOOP16
310
+ .BLOOP16:
311
+ LOAD16
312
+ pcmpgtw N0, X0
313
+ pcmpgtw N1, X1
314
+ paddw X0, N0
315
+ paddw X1, N1
316
+ pxor X0, N0
317
+ pxor X1, N1
318
+ psrlw X0, AL
319
+ psrlw X1, AL
320
+ pxor N0, X0
321
+ pxor N1, X1
322
+ movdqa XMMWORD [VALUES + (0) * 2], X0
323
+ movdqa XMMWORD [VALUES + (8) * 2], X1
324
+ movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0
325
+ movdqa XMMWORD [VALUES + (8 + DCTSIZE2) * 2], N1
326
+ add VALUES, 16*2
327
+ add LUT, 16*SIZEOF_INT
328
+ dec K
329
+ jnz .BLOOP16
330
+ test LEN, 15
331
+ je .PADDING
332
+ .ELOOP16:
333
+ mov LENEND, LEN
334
+ and LENEND, 7
335
+
336
+ test LEN, 8
337
+ jz .TRY7
338
+ test LEN, 7
339
+ jz .TRY8
340
+
341
+ LOAD15
342
+ pcmpgtw N0, X0
343
+ pcmpgtw N1, X1
344
+ paddw X0, N0
345
+ paddw X1, N1
346
+ pxor X0, N0
347
+ pxor X1, N1
348
+ psrlw X0, AL
349
+ psrlw X1, AL
350
+ pxor N0, X0
351
+ pxor N1, X1
352
+ movdqa XMMWORD [VALUES + (0) * 2], X0
353
+ movdqa XMMWORD [VALUES + (8) * 2], X1
354
+ movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0
355
+ movdqa XMMWORD [VALUES + (8 + DCTSIZE2) * 2], N1
356
+ add VALUES, 16*2
357
+ jmp .PADDING
358
+ .TRY8:
359
+ LOAD8
360
+ pcmpgtw N0, X0
361
+ paddw X0, N0
362
+ pxor X0, N0
363
+ psrlw X0, AL
364
+ pxor N0, X0
365
+ movdqa XMMWORD [VALUES + (0) * 2], X0
366
+ movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0
367
+ add VALUES, 8*2
368
+ jmp .PADDING
369
+ .TRY7:
370
+ LOAD7
371
+ pcmpgtw N0, X0
372
+ paddw X0, N0
373
+ pxor X0, N0
374
+ psrlw X0, AL
375
+ pxor N0, X0
376
+ movdqa XMMWORD [VALUES + (0) * 2], X0
377
+ movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0
378
+ add VALUES, 8*2
379
+ .PADDING:
380
+ mov K, LEN
381
+ add K, 7
382
+ and K, -8
383
+ shr K, 3
384
+ sub K, DCTSIZE2/8
385
+ jz .EPADDING
386
+ align 16
387
+ .ZEROLOOP:
388
+ movdqa XMMWORD [VALUES + 0], ZERO
389
+ add VALUES, 8*2
390
+ inc K
391
+ jnz .ZEROLOOP
392
+ .EPADDING:
393
+ sub VALUES, DCTSIZE2*2
394
+
395
+ REDUCE0
396
+
397
+ pop ebp
398
+ pop edi
399
+ pop esi
400
+ ; pop edx ; need not be preserved
401
+ pop ecx
402
+ pop ebx
403
+ mov esp, ebp ; esp <- aligned ebp
404
+ pop esp ; esp <- original ebp
405
+ pop ebp
406
+ ret
407
+
408
+ %undef ZERO
409
+ %undef X0
410
+ %undef X1
411
+ %undef N0
412
+ %undef N1
413
+ %undef AL
414
+ %undef K
415
+ %undef LUT
416
+ %undef T0
417
+ %undef T1
418
+ %undef BLOCK
419
+ %undef VALUES
420
+ %undef LEN
421
+
422
+ ;
423
+ ; Prepare data for jsimd_encode_mcu_AC_refine().
424
+ ;
425
+ ; GLOBAL(int)
426
+ ; jsimd_encode_mcu_AC_refine_prepare_sse2(const JCOEF *block,
427
+ ; const int *jpeg_natural_order_start,
428
+ ; int Sl, int Al, JCOEF *absvalues,
429
+ ; size_t *bits)
430
+ ;
431
+ ; eax + 8 = const JCOEF *block
432
+ ; eax + 12 = const int *jpeg_natural_order_start
433
+ ; eax + 16 = int Sl
434
+ ; eax + 20 = int Al
435
+ ; eax + 24 = JCOEF *values
436
+ ; eax + 28 = size_t *bits
437
+
438
+ %define ZERO xmm7
439
+ %define ONE xmm5
440
+ %define X0 xmm0
441
+ %define X1 xmm1
442
+ %define N0 xmm2
443
+ %define N1 xmm3
444
+ %define AL xmm4
445
+ %define K eax
446
+ %define LENEND eax
447
+ %define LUT ebx
448
+ %define T0 ecx
449
+ %define T0w cx
450
+ %define T1 edx
451
+ %define BLOCK esi
452
+ %define VALUES edi
453
+ %define KK ebp
454
+
455
+ %define ZEROBITS INT [esp + 5 * 4]
456
+ %define EOB INT [esp + 5 * 4 + 4]
457
+ %define LEN INT [esp + 5 * 4 + 8]
458
+
459
+ align 32
460
+ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
461
+
462
+ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
463
+ push ebp
464
+ mov eax, esp ; eax = original ebp
465
+ sub esp, byte 4
466
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
467
+ mov [esp], eax
468
+ mov ebp, esp ; ebp = aligned ebp
469
+ sub esp, 16
470
+ push ebx
471
+ push ecx
472
+ ; push edx ; need not be preserved
473
+ push esi
474
+ push edi
475
+ push ebp
476
+
477
+ pcmpeqw ONE, ONE
478
+ psrlw ONE, 15
479
+ mov BLOCK, INT [eax + 8]
480
+ mov LUT, INT [eax + 12]
481
+ mov VALUES, INT [eax + 24]
482
+ movd AL, INT [eax + 20]
483
+ mov T0, INT [eax + 28]
484
+ mov K, INT [eax + 16]
485
+ mov INT [T0 + 2 * SIZEOF_INT], -1
486
+ mov INT [T0 + 3 * SIZEOF_INT], -1
487
+ mov ZEROBITS, T0
488
+ mov LEN, K
489
+ pxor ZERO, ZERO
490
+ and K, -16
491
+ mov EOB, 0
492
+ xor KK, KK
493
+ shr K, 4
494
+ jz .ELOOPR16
495
+ .BLOOPR16:
496
+ LOAD16
497
+ pcmpgtw N0, X0
498
+ pcmpgtw N1, X1
499
+ paddw X0, N0
500
+ paddw X1, N1
501
+ pxor X0, N0
502
+ pxor X1, N1
503
+ psrlw X0, AL
504
+ psrlw X1, AL
505
+ movdqa XMMWORD [VALUES + (0) * 2], X0
506
+ movdqa XMMWORD [VALUES + (8) * 2], X1
507
+ pcmpeqw X0, ONE
508
+ pcmpeqw X1, ONE
509
+ packsswb N0, N1
510
+ packsswb X0, X1
511
+ pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
512
+ mov T1, ZEROBITS
513
+ not T0
514
+ mov word [T1 + 2 * SIZEOF_INT + KK], T0w
515
+ pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1);
516
+ bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
517
+ jz .CONTINUER16 ; if (idx) {
518
+ lea T1, [T1+KK*8]
519
+ mov EOB, T1 ; EOB = k + idx;
520
+ .CONTINUER16:
521
+ add VALUES, 16*2
522
+ add LUT, 16*SIZEOF_INT
523
+ add KK, 2
524
+ dec K
525
+ jnz .BLOOPR16
526
+ test LEN, 15
527
+ je .PADDINGR
528
+ .ELOOPR16:
529
+ mov LENEND, LEN
530
+
531
+ test LENEND, 8
532
+ jz .TRYR7
533
+ test LENEND, 7
534
+ jz .TRYR8
535
+
536
+ and LENEND, 7
537
+ LOAD15
538
+ pcmpgtw N0, X0
539
+ pcmpgtw N1, X1
540
+ paddw X0, N0
541
+ paddw X1, N1
542
+ pxor X0, N0
543
+ pxor X1, N1
544
+ psrlw X0, AL
545
+ psrlw X1, AL
546
+ movdqa XMMWORD [VALUES + (0) * 2], X0
547
+ movdqa XMMWORD [VALUES + (8) * 2], X1
548
+ pcmpeqw X0, ONE
549
+ pcmpeqw X1, ONE
550
+ packsswb N0, N1
551
+ packsswb X0, X1
552
+ pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
553
+ mov T1, ZEROBITS
554
+ not T0
555
+ mov word [T1 + 2 * SIZEOF_INT + KK], T0w
556
+ pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1);
557
+ bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
558
+ jz .CONTINUER15 ; if (idx) {
559
+ lea T1, [T1+KK*8]
560
+ mov EOB, T1 ; EOB = k + idx;
561
+ .CONTINUER15:
562
+ add VALUES, 16*2
563
+ jmp .PADDINGR
564
+ .TRYR8:
565
+ LOAD8
566
+
567
+ pcmpgtw N0, X0
568
+ paddw X0, N0
569
+ pxor X0, N0
570
+ psrlw X0, AL
571
+ movdqa XMMWORD [VALUES + (0) * 2], X0
572
+ pcmpeqw X0, ONE
573
+ packsswb N0, ZERO
574
+ packsswb X0, ZERO
575
+ pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
576
+ mov T1, ZEROBITS
577
+ not T0
578
+ mov word [T1 + 2 * SIZEOF_INT + KK], T0w
579
+ pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1);
580
+ bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
581
+ jz .CONTINUER8 ; if (idx) {
582
+ lea T1, [T1+KK*8]
583
+ mov EOB, T1 ; EOB = k + idx;
584
+ .CONTINUER8:
585
+ add VALUES, 8*2
586
+ jmp .PADDINGR
587
+ .TRYR7:
588
+ and LENEND, 7
589
+ LOAD7
590
+
591
+ pcmpgtw N0, X0
592
+ paddw X0, N0
593
+ pxor X0, N0
594
+ psrlw X0, AL
595
+ movdqa XMMWORD [VALUES + (0) * 2], X0
596
+ pcmpeqw X0, ONE
597
+ packsswb N0, ZERO
598
+ packsswb X0, ZERO
599
+ pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
600
+ mov T1, ZEROBITS
601
+ not T0
602
+ mov word [T1 + 2 * SIZEOF_INT + KK], T0w
603
+ pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1);
604
+ bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
605
+ jz .CONTINUER7 ; if (idx) {
606
+ lea T1, [T1+KK*8]
607
+ mov EOB, T1 ; EOB = k + idx;
608
+ .CONTINUER7:
609
+ add VALUES, 8*2
610
+ .PADDINGR:
611
+ mov K, LEN
612
+ add K, 7
613
+ and K, -8
614
+ shr K, 3
615
+ sub K, DCTSIZE2/8
616
+ jz .EPADDINGR
617
+ align 16
618
+ .ZEROLOOPR:
619
+ movdqa XMMWORD [VALUES + 0], ZERO
620
+ add VALUES, 8*2
621
+ inc K
622
+ jnz .ZEROLOOPR
623
+ .EPADDINGR:
624
+ sub VALUES, DCTSIZE2*2
625
+
626
+ REDUCE0
627
+
628
+ mov eax, EOB
629
+
630
+ pop ebp
631
+ pop edi
632
+ pop esi
633
+ ; pop edx ; need not be preserved
634
+ pop ecx
635
+ pop ebx
636
+ mov esp, ebp ; esp <- aligned ebp
637
+ pop esp ; esp <- original ebp
638
+ pop ebp
639
+ ret
640
+
641
+ %undef ZERO
642
+ %undef ONE
643
+ %undef X0
644
+ %undef X1
645
+ %undef N0
646
+ %undef N1
647
+ %undef AL
648
+ %undef K
649
+ %undef KK
650
+ %undef EOB
651
+ %undef SIGN
652
+ %undef LUT
653
+ %undef T0
654
+ %undef T1
655
+ %undef BLOCK
656
+ %undef VALUES
657
+ %undef LEN
658
+ %undef LENEND
659
+
660
+ ; For some reason, the OS X linker does not honor the request to align the
661
+ ; segment unless we do this.
662
+ align 32