rfreeimage 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (860) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +1 -0
  4. data/Rakefile +34 -0
  5. data/ext/rfreeimage/extconf.rb +35 -0
  6. data/ext/rfreeimage/rfi_main.c +389 -0
  7. data/lib/rfreeimage/image.rb +26 -0
  8. data/lib/rfreeimage/version.rb +3 -0
  9. data/lib/rfreeimage.rb +3 -0
  10. data/rfreeimage.gemspec +32 -0
  11. data/vendor/FreeImage/Makefile +34 -0
  12. data/vendor/FreeImage/Makefile.cygwin +74 -0
  13. data/vendor/FreeImage/Makefile.fip +84 -0
  14. data/vendor/FreeImage/Makefile.gnu +83 -0
  15. data/vendor/FreeImage/Makefile.iphone +96 -0
  16. data/vendor/FreeImage/Makefile.mingw +136 -0
  17. data/vendor/FreeImage/Makefile.osx +115 -0
  18. data/vendor/FreeImage/Makefile.solaris +66 -0
  19. data/vendor/FreeImage/Makefile.srcs +6 -0
  20. data/vendor/FreeImage/README.iphone +19 -0
  21. data/vendor/FreeImage/README.linux +50 -0
  22. data/vendor/FreeImage/README.minGW +236 -0
  23. data/vendor/FreeImage/README.osx +44 -0
  24. data/vendor/FreeImage/README.solaris +67 -0
  25. data/vendor/FreeImage/Source/CacheFile.h +92 -0
  26. data/vendor/FreeImage/Source/DeprecationManager/Deprecated.cpp +36 -0
  27. data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.cpp +103 -0
  28. data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.h +83 -0
  29. data/vendor/FreeImage/Source/FreeImage/BitmapAccess.cpp +1573 -0
  30. data/vendor/FreeImage/Source/FreeImage/CacheFile.cpp +271 -0
  31. data/vendor/FreeImage/Source/FreeImage/ColorLookup.cpp +785 -0
  32. data/vendor/FreeImage/Source/FreeImage/Conversion.cpp +551 -0
  33. data/vendor/FreeImage/Source/FreeImage/Conversion16_555.cpp +209 -0
  34. data/vendor/FreeImage/Source/FreeImage/Conversion16_565.cpp +204 -0
  35. data/vendor/FreeImage/Source/FreeImage/Conversion24.cpp +252 -0
  36. data/vendor/FreeImage/Source/FreeImage/Conversion32.cpp +345 -0
  37. data/vendor/FreeImage/Source/FreeImage/Conversion4.cpp +246 -0
  38. data/vendor/FreeImage/Source/FreeImage/Conversion8.cpp +305 -0
  39. data/vendor/FreeImage/Source/FreeImage/ConversionFloat.cpp +194 -0
  40. data/vendor/FreeImage/Source/FreeImage/ConversionRGB16.cpp +144 -0
  41. data/vendor/FreeImage/Source/FreeImage/ConversionRGBA16.cpp +147 -0
  42. data/vendor/FreeImage/Source/FreeImage/ConversionRGBAF.cpp +250 -0
  43. data/vendor/FreeImage/Source/FreeImage/ConversionRGBF.cpp +243 -0
  44. data/vendor/FreeImage/Source/FreeImage/ConversionType.cpp +699 -0
  45. data/vendor/FreeImage/Source/FreeImage/ConversionUINT16.cpp +134 -0
  46. data/vendor/FreeImage/Source/FreeImage/FreeImage.cpp +226 -0
  47. data/vendor/FreeImage/Source/FreeImage/FreeImageC.c +22 -0
  48. data/vendor/FreeImage/Source/FreeImage/FreeImageIO.cpp +175 -0
  49. data/vendor/FreeImage/Source/FreeImage/GetType.cpp +92 -0
  50. data/vendor/FreeImage/Source/FreeImage/Halftoning.cpp +474 -0
  51. data/vendor/FreeImage/Source/FreeImage/J2KHelper.cpp +591 -0
  52. data/vendor/FreeImage/Source/FreeImage/J2KHelper.h +36 -0
  53. data/vendor/FreeImage/Source/FreeImage/LFPQuantizer.cpp +208 -0
  54. data/vendor/FreeImage/Source/FreeImage/MNGHelper.cpp +1320 -0
  55. data/vendor/FreeImage/Source/FreeImage/MemoryIO.cpp +237 -0
  56. data/vendor/FreeImage/Source/FreeImage/MultiPage.cpp +974 -0
  57. data/vendor/FreeImage/Source/FreeImage/NNQuantizer.cpp +507 -0
  58. data/vendor/FreeImage/Source/FreeImage/PSDParser.cpp +1057 -0
  59. data/vendor/FreeImage/Source/FreeImage/PSDParser.h +271 -0
  60. data/vendor/FreeImage/Source/FreeImage/PixelAccess.cpp +197 -0
  61. data/vendor/FreeImage/Source/FreeImage/Plugin.cpp +822 -0
  62. data/vendor/FreeImage/Source/FreeImage/PluginBMP.cpp +1494 -0
  63. data/vendor/FreeImage/Source/FreeImage/PluginCUT.cpp +240 -0
  64. data/vendor/FreeImage/Source/FreeImage/PluginDDS.cpp +655 -0
  65. data/vendor/FreeImage/Source/FreeImage/PluginEXR.cpp +773 -0
  66. data/vendor/FreeImage/Source/FreeImage/PluginG3.cpp +433 -0
  67. data/vendor/FreeImage/Source/FreeImage/PluginGIF.cpp +1407 -0
  68. data/vendor/FreeImage/Source/FreeImage/PluginHDR.cpp +722 -0
  69. data/vendor/FreeImage/Source/FreeImage/PluginICO.cpp +824 -0
  70. data/vendor/FreeImage/Source/FreeImage/PluginIFF.cpp +459 -0
  71. data/vendor/FreeImage/Source/FreeImage/PluginJ2K.cpp +328 -0
  72. data/vendor/FreeImage/Source/FreeImage/PluginJNG.cpp +162 -0
  73. data/vendor/FreeImage/Source/FreeImage/PluginJP2.cpp +328 -0
  74. data/vendor/FreeImage/Source/FreeImage/PluginJPEG.cpp +1706 -0
  75. data/vendor/FreeImage/Source/FreeImage/PluginJXR.cpp +1475 -0
  76. data/vendor/FreeImage/Source/FreeImage/PluginKOALA.cpp +243 -0
  77. data/vendor/FreeImage/Source/FreeImage/PluginMNG.cpp +153 -0
  78. data/vendor/FreeImage/Source/FreeImage/PluginPCD.cpp +251 -0
  79. data/vendor/FreeImage/Source/FreeImage/PluginPCX.cpp +659 -0
  80. data/vendor/FreeImage/Source/FreeImage/PluginPFM.cpp +409 -0
  81. data/vendor/FreeImage/Source/FreeImage/PluginPICT.cpp +1343 -0
  82. data/vendor/FreeImage/Source/FreeImage/PluginPNG.cpp +1115 -0
  83. data/vendor/FreeImage/Source/FreeImage/PluginPNM.cpp +838 -0
  84. data/vendor/FreeImage/Source/FreeImage/PluginPSD.cpp +131 -0
  85. data/vendor/FreeImage/Source/FreeImage/PluginRAS.cpp +512 -0
  86. data/vendor/FreeImage/Source/FreeImage/PluginRAW.cpp +793 -0
  87. data/vendor/FreeImage/Source/FreeImage/PluginSGI.cpp +425 -0
  88. data/vendor/FreeImage/Source/FreeImage/PluginTARGA.cpp +1591 -0
  89. data/vendor/FreeImage/Source/FreeImage/PluginTIFF.cpp +2631 -0
  90. data/vendor/FreeImage/Source/FreeImage/PluginWBMP.cpp +372 -0
  91. data/vendor/FreeImage/Source/FreeImage/PluginWebP.cpp +698 -0
  92. data/vendor/FreeImage/Source/FreeImage/PluginXBM.cpp +399 -0
  93. data/vendor/FreeImage/Source/FreeImage/PluginXPM.cpp +487 -0
  94. data/vendor/FreeImage/Source/FreeImage/TIFFLogLuv.cpp +65 -0
  95. data/vendor/FreeImage/Source/FreeImage/ToneMapping.cpp +75 -0
  96. data/vendor/FreeImage/Source/FreeImage/WuQuantizer.cpp +559 -0
  97. data/vendor/FreeImage/Source/FreeImage/ZLibInterface.cpp +223 -0
  98. data/vendor/FreeImage/Source/FreeImage/tmoColorConvert.cpp +479 -0
  99. data/vendor/FreeImage/Source/FreeImage/tmoDrago03.cpp +295 -0
  100. data/vendor/FreeImage/Source/FreeImage/tmoFattal02.cpp +689 -0
  101. data/vendor/FreeImage/Source/FreeImage/tmoReinhard05.cpp +260 -0
  102. data/vendor/FreeImage/Source/FreeImage.h +1153 -0
  103. data/vendor/FreeImage/Source/FreeImageIO.h +63 -0
  104. data/vendor/FreeImage/Source/FreeImageToolkit/BSplineRotate.cpp +730 -0
  105. data/vendor/FreeImage/Source/FreeImageToolkit/Background.cpp +895 -0
  106. data/vendor/FreeImage/Source/FreeImageToolkit/Channels.cpp +488 -0
  107. data/vendor/FreeImage/Source/FreeImageToolkit/ClassicRotate.cpp +917 -0
  108. data/vendor/FreeImage/Source/FreeImageToolkit/Colors.cpp +967 -0
  109. data/vendor/FreeImage/Source/FreeImageToolkit/CopyPaste.cpp +861 -0
  110. data/vendor/FreeImage/Source/FreeImageToolkit/Display.cpp +230 -0
  111. data/vendor/FreeImage/Source/FreeImageToolkit/Filters.h +287 -0
  112. data/vendor/FreeImage/Source/FreeImageToolkit/Flip.cpp +166 -0
  113. data/vendor/FreeImage/Source/FreeImageToolkit/JPEGTransform.cpp +623 -0
  114. data/vendor/FreeImage/Source/FreeImageToolkit/MultigridPoissonSolver.cpp +505 -0
  115. data/vendor/FreeImage/Source/FreeImageToolkit/Rescale.cpp +192 -0
  116. data/vendor/FreeImage/Source/FreeImageToolkit/Resize.cpp +2116 -0
  117. data/vendor/FreeImage/Source/FreeImageToolkit/Resize.h +196 -0
  118. data/vendor/FreeImage/Source/LibJPEG/ansi2knr.c +739 -0
  119. data/vendor/FreeImage/Source/LibJPEG/cderror.h +134 -0
  120. data/vendor/FreeImage/Source/LibJPEG/cdjpeg.c +181 -0
  121. data/vendor/FreeImage/Source/LibJPEG/cdjpeg.h +187 -0
  122. data/vendor/FreeImage/Source/LibJPEG/cjpeg.c +664 -0
  123. data/vendor/FreeImage/Source/LibJPEG/ckconfig.c +402 -0
  124. data/vendor/FreeImage/Source/LibJPEG/djpeg.c +617 -0
  125. data/vendor/FreeImage/Source/LibJPEG/example.c +433 -0
  126. data/vendor/FreeImage/Source/LibJPEG/jaricom.c +153 -0
  127. data/vendor/FreeImage/Source/LibJPEG/jcapimin.c +288 -0
  128. data/vendor/FreeImage/Source/LibJPEG/jcapistd.c +162 -0
  129. data/vendor/FreeImage/Source/LibJPEG/jcarith.c +944 -0
  130. data/vendor/FreeImage/Source/LibJPEG/jccoefct.c +454 -0
  131. data/vendor/FreeImage/Source/LibJPEG/jccolor.c +604 -0
  132. data/vendor/FreeImage/Source/LibJPEG/jcdctmgr.c +477 -0
  133. data/vendor/FreeImage/Source/LibJPEG/jchuff.c +1573 -0
  134. data/vendor/FreeImage/Source/LibJPEG/jcinit.c +84 -0
  135. data/vendor/FreeImage/Source/LibJPEG/jcmainct.c +297 -0
  136. data/vendor/FreeImage/Source/LibJPEG/jcmarker.c +719 -0
  137. data/vendor/FreeImage/Source/LibJPEG/jcmaster.c +856 -0
  138. data/vendor/FreeImage/Source/LibJPEG/jcomapi.c +106 -0
  139. data/vendor/FreeImage/Source/LibJPEG/jconfig.h +161 -0
  140. data/vendor/FreeImage/Source/LibJPEG/jcparam.c +675 -0
  141. data/vendor/FreeImage/Source/LibJPEG/jcprepct.c +358 -0
  142. data/vendor/FreeImage/Source/LibJPEG/jcsample.c +545 -0
  143. data/vendor/FreeImage/Source/LibJPEG/jctrans.c +385 -0
  144. data/vendor/FreeImage/Source/LibJPEG/jdapimin.c +399 -0
  145. data/vendor/FreeImage/Source/LibJPEG/jdapistd.c +276 -0
  146. data/vendor/FreeImage/Source/LibJPEG/jdarith.c +796 -0
  147. data/vendor/FreeImage/Source/LibJPEG/jdatadst.c +270 -0
  148. data/vendor/FreeImage/Source/LibJPEG/jdatasrc.c +275 -0
  149. data/vendor/FreeImage/Source/LibJPEG/jdcoefct.c +741 -0
  150. data/vendor/FreeImage/Source/LibJPEG/jdcolor.c +748 -0
  151. data/vendor/FreeImage/Source/LibJPEG/jdct.h +393 -0
  152. data/vendor/FreeImage/Source/LibJPEG/jddctmgr.c +384 -0
  153. data/vendor/FreeImage/Source/LibJPEG/jdhuff.c +1554 -0
  154. data/vendor/FreeImage/Source/LibJPEG/jdinput.c +662 -0
  155. data/vendor/FreeImage/Source/LibJPEG/jdmainct.c +513 -0
  156. data/vendor/FreeImage/Source/LibJPEG/jdmarker.c +1511 -0
  157. data/vendor/FreeImage/Source/LibJPEG/jdmaster.c +543 -0
  158. data/vendor/FreeImage/Source/LibJPEG/jdmerge.c +401 -0
  159. data/vendor/FreeImage/Source/LibJPEG/jdpostct.c +290 -0
  160. data/vendor/FreeImage/Source/LibJPEG/jdsample.c +361 -0
  161. data/vendor/FreeImage/Source/LibJPEG/jdtrans.c +140 -0
  162. data/vendor/FreeImage/Source/LibJPEG/jerror.c +253 -0
  163. data/vendor/FreeImage/Source/LibJPEG/jerror.h +304 -0
  164. data/vendor/FreeImage/Source/LibJPEG/jfdctflt.c +174 -0
  165. data/vendor/FreeImage/Source/LibJPEG/jfdctfst.c +230 -0
  166. data/vendor/FreeImage/Source/LibJPEG/jfdctint.c +4406 -0
  167. data/vendor/FreeImage/Source/LibJPEG/jidctflt.c +235 -0
  168. data/vendor/FreeImage/Source/LibJPEG/jidctfst.c +368 -0
  169. data/vendor/FreeImage/Source/LibJPEG/jidctint.c +5179 -0
  170. data/vendor/FreeImage/Source/LibJPEG/jinclude.h +91 -0
  171. data/vendor/FreeImage/Source/LibJPEG/jmemansi.c +167 -0
  172. data/vendor/FreeImage/Source/LibJPEG/jmemdos.c +638 -0
  173. data/vendor/FreeImage/Source/LibJPEG/jmemmac.c +289 -0
  174. data/vendor/FreeImage/Source/LibJPEG/jmemmgr.c +1119 -0
  175. data/vendor/FreeImage/Source/LibJPEG/jmemname.c +276 -0
  176. data/vendor/FreeImage/Source/LibJPEG/jmemnobs.c +109 -0
  177. data/vendor/FreeImage/Source/LibJPEG/jmemsys.h +198 -0
  178. data/vendor/FreeImage/Source/LibJPEG/jmorecfg.h +442 -0
  179. data/vendor/FreeImage/Source/LibJPEG/jpegint.h +426 -0
  180. data/vendor/FreeImage/Source/LibJPEG/jpeglib.h +1180 -0
  181. data/vendor/FreeImage/Source/LibJPEG/jpegtran.c +577 -0
  182. data/vendor/FreeImage/Source/LibJPEG/jquant1.c +857 -0
  183. data/vendor/FreeImage/Source/LibJPEG/jquant2.c +1311 -0
  184. data/vendor/FreeImage/Source/LibJPEG/jutils.c +227 -0
  185. data/vendor/FreeImage/Source/LibJPEG/jversion.h +14 -0
  186. data/vendor/FreeImage/Source/LibJPEG/rdbmp.c +480 -0
  187. data/vendor/FreeImage/Source/LibJPEG/rdcolmap.c +253 -0
  188. data/vendor/FreeImage/Source/LibJPEG/rdgif.c +38 -0
  189. data/vendor/FreeImage/Source/LibJPEG/rdjpgcom.c +515 -0
  190. data/vendor/FreeImage/Source/LibJPEG/rdppm.c +459 -0
  191. data/vendor/FreeImage/Source/LibJPEG/rdrle.c +387 -0
  192. data/vendor/FreeImage/Source/LibJPEG/rdswitch.c +365 -0
  193. data/vendor/FreeImage/Source/LibJPEG/rdtarga.c +500 -0
  194. data/vendor/FreeImage/Source/LibJPEG/transupp.c +1763 -0
  195. data/vendor/FreeImage/Source/LibJPEG/transupp.h +219 -0
  196. data/vendor/FreeImage/Source/LibJPEG/wrbmp.c +442 -0
  197. data/vendor/FreeImage/Source/LibJPEG/wrgif.c +399 -0
  198. data/vendor/FreeImage/Source/LibJPEG/wrjpgcom.c +583 -0
  199. data/vendor/FreeImage/Source/LibJPEG/wrppm.c +269 -0
  200. data/vendor/FreeImage/Source/LibJPEG/wrrle.c +305 -0
  201. data/vendor/FreeImage/Source/LibJPEG/wrtarga.c +253 -0
  202. data/vendor/FreeImage/Source/LibJXR/common/include/guiddef.h +230 -0
  203. data/vendor/FreeImage/Source/LibJXR/common/include/wmsal.h +757 -0
  204. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstring.h +342 -0
  205. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_adt.h +71 -0
  206. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_strict.h +1096 -0
  207. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_undef.h +406 -0
  208. data/vendor/FreeImage/Source/LibJXR/image/decode/JXRTranscode.c +987 -0
  209. data/vendor/FreeImage/Source/LibJXR/image/decode/decode.c +200 -0
  210. data/vendor/FreeImage/Source/LibJXR/image/decode/decode.h +143 -0
  211. data/vendor/FreeImage/Source/LibJXR/image/decode/postprocess.c +288 -0
  212. data/vendor/FreeImage/Source/LibJXR/image/decode/segdec.c +1205 -0
  213. data/vendor/FreeImage/Source/LibJXR/image/decode/strInvTransform.c +1888 -0
  214. data/vendor/FreeImage/Source/LibJXR/image/decode/strPredQuantDec.c +539 -0
  215. data/vendor/FreeImage/Source/LibJXR/image/decode/strdec.c +3628 -0
  216. data/vendor/FreeImage/Source/LibJXR/image/decode/strdec_x86.c +1640 -0
  217. data/vendor/FreeImage/Source/LibJXR/image/encode/encode.c +144 -0
  218. data/vendor/FreeImage/Source/LibJXR/image/encode/encode.h +113 -0
  219. data/vendor/FreeImage/Source/LibJXR/image/encode/segenc.c +1186 -0
  220. data/vendor/FreeImage/Source/LibJXR/image/encode/strFwdTransform.c +1111 -0
  221. data/vendor/FreeImage/Source/LibJXR/image/encode/strPredQuantEnc.c +511 -0
  222. data/vendor/FreeImage/Source/LibJXR/image/encode/strenc.c +2370 -0
  223. data/vendor/FreeImage/Source/LibJXR/image/encode/strenc_x86.c +409 -0
  224. data/vendor/FreeImage/Source/LibJXR/image/sys/adapthuff.c +511 -0
  225. data/vendor/FreeImage/Source/LibJXR/image/sys/ansi.h +61 -0
  226. data/vendor/FreeImage/Source/LibJXR/image/sys/common.h +131 -0
  227. data/vendor/FreeImage/Source/LibJXR/image/sys/image.c +183 -0
  228. data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimer.h +115 -0
  229. data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimerANSI.c +274 -0
  230. data/vendor/FreeImage/Source/LibJXR/image/sys/strPredQuant.c +306 -0
  231. data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.c +85 -0
  232. data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.h +50 -0
  233. data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.c +1251 -0
  234. data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.h +681 -0
  235. data/vendor/FreeImage/Source/LibJXR/image/sys/windowsmediaphoto.h +515 -0
  236. data/vendor/FreeImage/Source/LibJXR/image/sys/xplatform_image.h +84 -0
  237. data/vendor/FreeImage/Source/LibJXR/image/x86/x86.h +58 -0
  238. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.c +930 -0
  239. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.h +636 -0
  240. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlueJxr.c +2246 -0
  241. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGluePFC.c +2338 -0
  242. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.c +905 -0
  243. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.h +258 -0
  244. data/vendor/FreeImage/Source/LibOpenJPEG/bio.c +188 -0
  245. data/vendor/FreeImage/Source/LibOpenJPEG/bio.h +128 -0
  246. data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.c +239 -0
  247. data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.h +68 -0
  248. data/vendor/FreeImage/Source/LibOpenJPEG/cio.c +644 -0
  249. data/vendor/FreeImage/Source/LibOpenJPEG/cio.h +393 -0
  250. data/vendor/FreeImage/Source/LibOpenJPEG/dwt.c +919 -0
  251. data/vendor/FreeImage/Source/LibOpenJPEG/dwt.h +116 -0
  252. data/vendor/FreeImage/Source/LibOpenJPEG/event.c +141 -0
  253. data/vendor/FreeImage/Source/LibOpenJPEG/event.h +97 -0
  254. data/vendor/FreeImage/Source/LibOpenJPEG/function_list.c +114 -0
  255. data/vendor/FreeImage/Source/LibOpenJPEG/function_list.h +126 -0
  256. data/vendor/FreeImage/Source/LibOpenJPEG/image.c +235 -0
  257. data/vendor/FreeImage/Source/LibOpenJPEG/image.h +63 -0
  258. data/vendor/FreeImage/Source/LibOpenJPEG/indexbox_manager.h +148 -0
  259. data/vendor/FreeImage/Source/LibOpenJPEG/invert.c +289 -0
  260. data/vendor/FreeImage/Source/LibOpenJPEG/invert.h +59 -0
  261. data/vendor/FreeImage/Source/LibOpenJPEG/j2k.c +10238 -0
  262. data/vendor/FreeImage/Source/LibOpenJPEG/j2k.h +838 -0
  263. data/vendor/FreeImage/Source/LibOpenJPEG/jp2.c +2776 -0
  264. data/vendor/FreeImage/Source/LibOpenJPEG/jp2.h +490 -0
  265. data/vendor/FreeImage/Source/LibOpenJPEG/mct.c +319 -0
  266. data/vendor/FreeImage/Source/LibOpenJPEG/mct.h +149 -0
  267. data/vendor/FreeImage/Source/LibOpenJPEG/mqc.c +604 -0
  268. data/vendor/FreeImage/Source/LibOpenJPEG/mqc.h +201 -0
  269. data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.c +955 -0
  270. data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.h +1475 -0
  271. data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.c +59 -0
  272. data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.h +54 -0
  273. data/vendor/FreeImage/Source/LibOpenJPEG/opj_codec.h +160 -0
  274. data/vendor/FreeImage/Source/LibOpenJPEG/opj_config.h +9 -0
  275. data/vendor/FreeImage/Source/LibOpenJPEG/opj_config_private.h +16 -0
  276. data/vendor/FreeImage/Source/LibOpenJPEG/opj_includes.h +175 -0
  277. data/vendor/FreeImage/Source/LibOpenJPEG/opj_intmath.h +172 -0
  278. data/vendor/FreeImage/Source/LibOpenJPEG/opj_inttypes.h +43 -0
  279. data/vendor/FreeImage/Source/LibOpenJPEG/opj_malloc.h +180 -0
  280. data/vendor/FreeImage/Source/LibOpenJPEG/opj_stdint.h +47 -0
  281. data/vendor/FreeImage/Source/LibOpenJPEG/phix_manager.c +191 -0
  282. data/vendor/FreeImage/Source/LibOpenJPEG/pi.c +1870 -0
  283. data/vendor/FreeImage/Source/LibOpenJPEG/pi.h +182 -0
  284. data/vendor/FreeImage/Source/LibOpenJPEG/ppix_manager.c +194 -0
  285. data/vendor/FreeImage/Source/LibOpenJPEG/raw.c +89 -0
  286. data/vendor/FreeImage/Source/LibOpenJPEG/raw.h +100 -0
  287. data/vendor/FreeImage/Source/LibOpenJPEG/t1.c +1751 -0
  288. data/vendor/FreeImage/Source/LibOpenJPEG/t1.h +157 -0
  289. data/vendor/FreeImage/Source/LibOpenJPEG/t1_generate_luts.c +276 -0
  290. data/vendor/FreeImage/Source/LibOpenJPEG/t1_luts.h +143 -0
  291. data/vendor/FreeImage/Source/LibOpenJPEG/t2.c +1334 -0
  292. data/vendor/FreeImage/Source/LibOpenJPEG/t2.h +127 -0
  293. data/vendor/FreeImage/Source/LibOpenJPEG/tcd.c +2123 -0
  294. data/vendor/FreeImage/Source/LibOpenJPEG/tcd.h +348 -0
  295. data/vendor/FreeImage/Source/LibOpenJPEG/tgt.c +331 -0
  296. data/vendor/FreeImage/Source/LibOpenJPEG/tgt.h +140 -0
  297. data/vendor/FreeImage/Source/LibOpenJPEG/thix_manager.c +134 -0
  298. data/vendor/FreeImage/Source/LibOpenJPEG/tpix_manager.c +185 -0
  299. data/vendor/FreeImage/Source/LibPNG/example.c +1061 -0
  300. data/vendor/FreeImage/Source/LibPNG/png.c +4493 -0
  301. data/vendor/FreeImage/Source/LibPNG/png.h +3282 -0
  302. data/vendor/FreeImage/Source/LibPNG/pngconf.h +644 -0
  303. data/vendor/FreeImage/Source/LibPNG/pngdebug.h +154 -0
  304. data/vendor/FreeImage/Source/LibPNG/pngerror.c +963 -0
  305. data/vendor/FreeImage/Source/LibPNG/pngget.c +1213 -0
  306. data/vendor/FreeImage/Source/LibPNG/pnginfo.h +260 -0
  307. data/vendor/FreeImage/Source/LibPNG/pnglibconf.h +218 -0
  308. data/vendor/FreeImage/Source/LibPNG/pngmem.c +281 -0
  309. data/vendor/FreeImage/Source/LibPNG/pngpread.c +1168 -0
  310. data/vendor/FreeImage/Source/LibPNG/pngpriv.h +1944 -0
  311. data/vendor/FreeImage/Source/LibPNG/pngread.c +4121 -0
  312. data/vendor/FreeImage/Source/LibPNG/pngrio.c +120 -0
  313. data/vendor/FreeImage/Source/LibPNG/pngrtran.c +4994 -0
  314. data/vendor/FreeImage/Source/LibPNG/pngrutil.c +4474 -0
  315. data/vendor/FreeImage/Source/LibPNG/pngset.c +1611 -0
  316. data/vendor/FreeImage/Source/LibPNG/pngstruct.h +489 -0
  317. data/vendor/FreeImage/Source/LibPNG/pngtest.c +2011 -0
  318. data/vendor/FreeImage/Source/LibPNG/pngtrans.c +849 -0
  319. data/vendor/FreeImage/Source/LibPNG/pngwio.c +168 -0
  320. data/vendor/FreeImage/Source/LibPNG/pngwrite.c +2455 -0
  321. data/vendor/FreeImage/Source/LibPNG/pngwtran.c +574 -0
  322. data/vendor/FreeImage/Source/LibPNG/pngwutil.c +3029 -0
  323. data/vendor/FreeImage/Source/LibRawLite/dcraw/dcraw.c +15462 -0
  324. data/vendor/FreeImage/Source/LibRawLite/internal/aahd_demosaic.cpp +706 -0
  325. data/vendor/FreeImage/Source/LibRawLite/internal/dcb_demosaicing.c +710 -0
  326. data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_common.cpp +13593 -0
  327. data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_fileio.cpp +240 -0
  328. data/vendor/FreeImage/Source/LibRawLite/internal/defines.h +167 -0
  329. data/vendor/FreeImage/Source/LibRawLite/internal/demosaic_packs.cpp +99 -0
  330. data/vendor/FreeImage/Source/LibRawLite/internal/dht_demosaic.cpp +873 -0
  331. data/vendor/FreeImage/Source/LibRawLite/internal/libraw_internal_funcs.h +282 -0
  332. data/vendor/FreeImage/Source/LibRawLite/internal/libraw_x3f.cpp +1919 -0
  333. data/vendor/FreeImage/Source/LibRawLite/internal/var_defines.h +216 -0
  334. data/vendor/FreeImage/Source/LibRawLite/internal/wf_filtering.cpp +1950 -0
  335. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw.h +338 -0
  336. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_alloc.h +99 -0
  337. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_const.h +233 -0
  338. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_datastream.h +238 -0
  339. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_internal.h +225 -0
  340. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_types.h +442 -0
  341. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_version.h +62 -0
  342. data/vendor/FreeImage/Source/LibRawLite/src/libraw_c_api.cpp +230 -0
  343. data/vendor/FreeImage/Source/LibRawLite/src/libraw_cxx.cpp +4533 -0
  344. data/vendor/FreeImage/Source/LibRawLite/src/libraw_datastream.cpp +703 -0
  345. data/vendor/FreeImage/Source/LibTIFF4/mkg3states.c +451 -0
  346. data/vendor/FreeImage/Source/LibTIFF4/mkspans.c +82 -0
  347. data/vendor/FreeImage/Source/LibTIFF4/t4.h +292 -0
  348. data/vendor/FreeImage/Source/LibTIFF4/tif_aux.c +358 -0
  349. data/vendor/FreeImage/Source/LibTIFF4/tif_close.c +140 -0
  350. data/vendor/FreeImage/Source/LibTIFF4/tif_codec.c +166 -0
  351. data/vendor/FreeImage/Source/LibTIFF4/tif_color.c +287 -0
  352. data/vendor/FreeImage/Source/LibTIFF4/tif_compress.c +304 -0
  353. data/vendor/FreeImage/Source/LibTIFF4/tif_config.h +97 -0
  354. data/vendor/FreeImage/Source/LibTIFF4/tif_config.vc.h +74 -0
  355. data/vendor/FreeImage/Source/LibTIFF4/tif_config.wince.h +71 -0
  356. data/vendor/FreeImage/Source/LibTIFF4/tif_dir.c +1700 -0
  357. data/vendor/FreeImage/Source/LibTIFF4/tif_dir.h +308 -0
  358. data/vendor/FreeImage/Source/LibTIFF4/tif_dirinfo.c +959 -0
  359. data/vendor/FreeImage/Source/LibTIFF4/tif_dirread.c +5640 -0
  360. data/vendor/FreeImage/Source/LibTIFF4/tif_dirwrite.c +2910 -0
  361. data/vendor/FreeImage/Source/LibTIFF4/tif_dumpmode.c +143 -0
  362. data/vendor/FreeImage/Source/LibTIFF4/tif_error.c +80 -0
  363. data/vendor/FreeImage/Source/LibTIFF4/tif_extension.c +118 -0
  364. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.c +1595 -0
  365. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.h +538 -0
  366. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3sm.c +1260 -0
  367. data/vendor/FreeImage/Source/LibTIFF4/tif_flush.c +118 -0
  368. data/vendor/FreeImage/Source/LibTIFF4/tif_getimage.c +2890 -0
  369. data/vendor/FreeImage/Source/LibTIFF4/tif_jbig.c +213 -0
  370. data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg.c +2354 -0
  371. data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg_12.c +65 -0
  372. data/vendor/FreeImage/Source/LibTIFF4/tif_luv.c +1683 -0
  373. data/vendor/FreeImage/Source/LibTIFF4/tif_lzma.c +495 -0
  374. data/vendor/FreeImage/Source/LibTIFF4/tif_lzw.c +1169 -0
  375. data/vendor/FreeImage/Source/LibTIFF4/tif_next.c +181 -0
  376. data/vendor/FreeImage/Source/LibTIFF4/tif_ojpeg.c +2501 -0
  377. data/vendor/FreeImage/Source/LibTIFF4/tif_open.c +725 -0
  378. data/vendor/FreeImage/Source/LibTIFF4/tif_packbits.c +300 -0
  379. data/vendor/FreeImage/Source/LibTIFF4/tif_pixarlog.c +1442 -0
  380. data/vendor/FreeImage/Source/LibTIFF4/tif_predict.c +764 -0
  381. data/vendor/FreeImage/Source/LibTIFF4/tif_predict.h +77 -0
  382. data/vendor/FreeImage/Source/LibTIFF4/tif_print.c +716 -0
  383. data/vendor/FreeImage/Source/LibTIFF4/tif_read.c +1086 -0
  384. data/vendor/FreeImage/Source/LibTIFF4/tif_strip.c +383 -0
  385. data/vendor/FreeImage/Source/LibTIFF4/tif_swab.c +310 -0
  386. data/vendor/FreeImage/Source/LibTIFF4/tif_thunder.c +207 -0
  387. data/vendor/FreeImage/Source/LibTIFF4/tif_tile.c +299 -0
  388. data/vendor/FreeImage/Source/LibTIFF4/tif_unix.c +325 -0
  389. data/vendor/FreeImage/Source/LibTIFF4/tif_version.c +40 -0
  390. data/vendor/FreeImage/Source/LibTIFF4/tif_vms.c +603 -0
  391. data/vendor/FreeImage/Source/LibTIFF4/tif_warning.c +81 -0
  392. data/vendor/FreeImage/Source/LibTIFF4/tif_win32.c +443 -0
  393. data/vendor/FreeImage/Source/LibTIFF4/tif_wince.c +293 -0
  394. data/vendor/FreeImage/Source/LibTIFF4/tif_write.c +771 -0
  395. data/vendor/FreeImage/Source/LibTIFF4/tif_zip.c +472 -0
  396. data/vendor/FreeImage/Source/LibTIFF4/tiff.h +681 -0
  397. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.h +170 -0
  398. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.vc.h +160 -0
  399. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.wince.h +121 -0
  400. data/vendor/FreeImage/Source/LibTIFF4/tiffio.h +557 -0
  401. data/vendor/FreeImage/Source/LibTIFF4/tiffiop.h +367 -0
  402. data/vendor/FreeImage/Source/LibTIFF4/tiffvers.h +9 -0
  403. data/vendor/FreeImage/Source/LibTIFF4/uvcode.h +180 -0
  404. data/vendor/FreeImage/Source/LibWebP/src/dec/alphai.h +55 -0
  405. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.alpha.c +167 -0
  406. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.buffer.c +249 -0
  407. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.frame.c +827 -0
  408. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.idec.c +857 -0
  409. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.io.c +640 -0
  410. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.quant.c +110 -0
  411. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.tree.c +525 -0
  412. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8.c +663 -0
  413. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8l.c +1584 -0
  414. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.webp.c +834 -0
  415. data/vendor/FreeImage/Source/LibWebP/src/dec/decode_vp8.h +185 -0
  416. data/vendor/FreeImage/Source/LibWebP/src/dec/vp8i.h +353 -0
  417. data/vendor/FreeImage/Source/LibWebP/src/dec/vp8li.h +136 -0
  418. data/vendor/FreeImage/Source/LibWebP/src/dec/webpi.h +120 -0
  419. data/vendor/FreeImage/Source/LibWebP/src/demux/demux.demux.c +957 -0
  420. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing.c +377 -0
  421. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_mips_dsp_r2.c +139 -0
  422. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_sse2.c +296 -0
  423. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb.c +68 -0
  424. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_mips_dsp_r2.c +108 -0
  425. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_sse2.c +62 -0
  426. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost.c +412 -0
  427. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips32.c +154 -0
  428. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips_dsp_r2.c +107 -0
  429. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_sse2.c +121 -0
  430. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cpu.c +138 -0
  431. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec.c +760 -0
  432. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_clip_tables.c +366 -0
  433. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips32.c +585 -0
  434. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c +992 -0
  435. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_neon.c +1489 -0
  436. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_sse2.c +1284 -0
  437. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc.c +788 -0
  438. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_avx2.c +24 -0
  439. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips32.c +670 -0
  440. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c +1510 -0
  441. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_neon.c +932 -0
  442. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_sse2.c +940 -0
  443. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters.c +240 -0
  444. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c +404 -0
  445. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_sse2.c +349 -0
  446. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.h +434 -0
  447. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless.c +1838 -0
  448. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips32.c +416 -0
  449. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c +921 -0
  450. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_neon.c +357 -0
  451. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_sse2.c +535 -0
  452. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler.c +115 -0
  453. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips32.c +192 -0
  454. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips_dsp_r2.c +210 -0
  455. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling.c +252 -0
  456. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c +280 -0
  457. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_neon.c +267 -0
  458. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_sse2.c +214 -0
  459. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv.c +166 -0
  460. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips32.c +100 -0
  461. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c +131 -0
  462. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_sse2.c +322 -0
  463. data/vendor/FreeImage/Source/LibWebP/src/dsp/lossless.h +313 -0
  464. data/vendor/FreeImage/Source/LibWebP/src/dsp/mips_macro.h +200 -0
  465. data/vendor/FreeImage/Source/LibWebP/src/dsp/neon.h +82 -0
  466. data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv.h +321 -0
  467. data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv_tables_sse2.h +536 -0
  468. data/vendor/FreeImage/Source/LibWebP/src/enc/backward_references.h +202 -0
  469. data/vendor/FreeImage/Source/LibWebP/src/enc/cost.h +69 -0
  470. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.alpha.c +440 -0
  471. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.analysis.c +501 -0
  472. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.backward_references.c +1076 -0
  473. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.config.c +163 -0
  474. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.cost.c +355 -0
  475. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.filter.c +296 -0
  476. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.frame.c +850 -0
  477. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.histogram.c +897 -0
  478. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.iterator.c +456 -0
  479. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.near_lossless.c +160 -0
  480. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture.c +290 -0
  481. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_csp.c +1100 -0
  482. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_psnr.c +150 -0
  483. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_rescale.c +285 -0
  484. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_tools.c +206 -0
  485. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.quant.c +1191 -0
  486. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.syntax.c +383 -0
  487. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.token.c +285 -0
  488. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.tree.c +504 -0
  489. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.vp8l.c +1437 -0
  490. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.webpenc.c +379 -0
  491. data/vendor/FreeImage/Source/LibWebP/src/enc/histogram.h +114 -0
  492. data/vendor/FreeImage/Source/LibWebP/src/enc/vp8enci.h +551 -0
  493. data/vendor/FreeImage/Source/LibWebP/src/enc/vp8li.h +78 -0
  494. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.anim_encode.c +1241 -0
  495. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxedit.c +696 -0
  496. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxinternal.c +551 -0
  497. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxread.c +544 -0
  498. data/vendor/FreeImage/Source/LibWebP/src/mux/muxi.h +232 -0
  499. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader.h +168 -0
  500. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader_inl.h +172 -0
  501. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_writer.h +120 -0
  502. data/vendor/FreeImage/Source/LibWebP/src/utils/color_cache.h +74 -0
  503. data/vendor/FreeImage/Source/LibWebP/src/utils/endian_inl.h +100 -0
  504. data/vendor/FreeImage/Source/LibWebP/src/utils/filters.h +32 -0
  505. data/vendor/FreeImage/Source/LibWebP/src/utils/huffman.h +67 -0
  506. data/vendor/FreeImage/Source/LibWebP/src/utils/huffman_encode.h +60 -0
  507. data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels.h +36 -0
  508. data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels_dec.h +35 -0
  509. data/vendor/FreeImage/Source/LibWebP/src/utils/random.h +63 -0
  510. data/vendor/FreeImage/Source/LibWebP/src/utils/rescaler.h +78 -0
  511. data/vendor/FreeImage/Source/LibWebP/src/utils/thread.h +93 -0
  512. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_reader.c +208 -0
  513. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_writer.c +308 -0
  514. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.color_cache.c +49 -0
  515. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.filters.c +76 -0
  516. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.h +121 -0
  517. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman.c +205 -0
  518. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman_encode.c +417 -0
  519. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels.c +140 -0
  520. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels_dec.c +279 -0
  521. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.random.c +43 -0
  522. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.rescaler.c +82 -0
  523. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.thread.c +309 -0
  524. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.utils.c +211 -0
  525. data/vendor/FreeImage/Source/LibWebP/src/webp/decode.h +493 -0
  526. data/vendor/FreeImage/Source/LibWebP/src/webp/demux.h +224 -0
  527. data/vendor/FreeImage/Source/LibWebP/src/webp/encode.h +515 -0
  528. data/vendor/FreeImage/Source/LibWebP/src/webp/format_constants.h +88 -0
  529. data/vendor/FreeImage/Source/LibWebP/src/webp/mux.h +507 -0
  530. data/vendor/FreeImage/Source/LibWebP/src/webp/mux_types.h +97 -0
  531. data/vendor/FreeImage/Source/LibWebP/src/webp/types.h +52 -0
  532. data/vendor/FreeImage/Source/MapIntrospector.h +212 -0
  533. data/vendor/FreeImage/Source/Metadata/Exif.cpp +1253 -0
  534. data/vendor/FreeImage/Source/Metadata/FIRational.cpp +176 -0
  535. data/vendor/FreeImage/Source/Metadata/FIRational.h +108 -0
  536. data/vendor/FreeImage/Source/Metadata/FreeImageTag.cpp +353 -0
  537. data/vendor/FreeImage/Source/Metadata/FreeImageTag.h +500 -0
  538. data/vendor/FreeImage/Source/Metadata/IPTC.cpp +342 -0
  539. data/vendor/FreeImage/Source/Metadata/TagConversion.cpp +1094 -0
  540. data/vendor/FreeImage/Source/Metadata/TagLib.cpp +1618 -0
  541. data/vendor/FreeImage/Source/Metadata/XTIFF.cpp +766 -0
  542. data/vendor/FreeImage/Source/OpenEXR/Half/eLut.cpp +114 -0
  543. data/vendor/FreeImage/Source/OpenEXR/Half/eLut.h +71 -0
  544. data/vendor/FreeImage/Source/OpenEXR/Half/half.cpp +310 -0
  545. data/vendor/FreeImage/Source/OpenEXR/Half/half.h +757 -0
  546. data/vendor/FreeImage/Source/OpenEXR/Half/halfExport.h +27 -0
  547. data/vendor/FreeImage/Source/OpenEXR/Half/halfFunction.h +179 -0
  548. data/vendor/FreeImage/Source/OpenEXR/Half/halfLimits.h +102 -0
  549. data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.cpp +164 -0
  550. data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.h +16391 -0
  551. data/vendor/FreeImage/Source/OpenEXR/Iex/Iex.h +60 -0
  552. data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.cpp +156 -0
  553. data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.h +264 -0
  554. data/vendor/FreeImage/Source/OpenEXR/Iex/IexErrnoExc.h +208 -0
  555. data/vendor/FreeImage/Source/OpenEXR/Iex/IexExport.h +51 -0
  556. data/vendor/FreeImage/Source/OpenEXR/Iex/IexForward.h +229 -0
  557. data/vendor/FreeImage/Source/OpenEXR/Iex/IexMacros.h +170 -0
  558. data/vendor/FreeImage/Source/OpenEXR/Iex/IexMathExc.h +57 -0
  559. data/vendor/FreeImage/Source/OpenEXR/Iex/IexNamespace.h +112 -0
  560. data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.cpp +873 -0
  561. data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.h +97 -0
  562. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.cpp +113 -0
  563. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.h +146 -0
  564. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.cpp +530 -0
  565. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.h +91 -0
  566. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathIeeeExc.h +62 -0
  567. data/vendor/FreeImage/Source/OpenEXR/IlmBaseConfig.h +61 -0
  568. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.cpp +633 -0
  569. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.h +324 -0
  570. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfArray.h +285 -0
  571. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.cpp +158 -0
  572. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.h +407 -0
  573. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAutoArray.h +95 -0
  574. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.cpp +1072 -0
  575. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.h +118 -0
  576. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.cpp +111 -0
  577. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.h +87 -0
  578. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.cpp +1438 -0
  579. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.h +555 -0
  580. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.cpp +322 -0
  581. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.h +436 -0
  582. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.cpp +150 -0
  583. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.h +74 -0
  584. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCheckedArithmetic.h +163 -0
  585. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.cpp +151 -0
  586. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.h +131 -0
  587. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.cpp +87 -0
  588. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.h +73 -0
  589. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.cpp +591 -0
  590. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.h +142 -0
  591. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompression.h +84 -0
  592. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.cpp +78 -0
  593. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.h +64 -0
  594. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.cpp +226 -0
  595. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.h +265 -0
  596. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.cpp +143 -0
  597. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.h +107 -0
  598. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.cpp +110 -0
  599. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.h +132 -0
  600. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.cpp +230 -0
  601. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.h +339 -0
  602. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageState.h +96 -0
  603. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.cpp +78 -0
  604. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.h +68 -0
  605. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.cpp +2025 -0
  606. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.h +276 -0
  607. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.cpp +149 -0
  608. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.h +181 -0
  609. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.cpp +1552 -0
  610. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.h +244 -0
  611. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.cpp +107 -0
  612. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.h +168 -0
  613. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.cpp +1979 -0
  614. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.h +437 -0
  615. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.cpp +273 -0
  616. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.h +362 -0
  617. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.cpp +2055 -0
  618. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.h +475 -0
  619. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.cpp +250 -0
  620. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.h +394 -0
  621. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.cpp +57 -0
  622. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.h +59 -0
  623. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.cpp +3424 -0
  624. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.h +210 -0
  625. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressorSimd.h +2145 -0
  626. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.cpp +335 -0
  627. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.h +336 -0
  628. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.cpp +76 -0
  629. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.h +68 -0
  630. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfExport.h +46 -0
  631. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.cpp +768 -0
  632. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.h +148 -0
  633. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.cpp +57 -0
  634. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.h +58 -0
  635. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.cpp +84 -0
  636. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.h +76 -0
  637. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfForward.h +127 -0
  638. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.cpp +228 -0
  639. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.h +386 -0
  640. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.cpp +76 -0
  641. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.h +94 -0
  642. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.cpp +76 -0
  643. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.h +58 -0
  644. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.cpp +112 -0
  645. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.h +62 -0
  646. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.cpp +1283 -0
  647. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.h +699 -0
  648. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.cpp +1114 -0
  649. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.h +82 -0
  650. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.cpp +110 -0
  651. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.h +255 -0
  652. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.cpp +895 -0
  653. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.h +240 -0
  654. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.cpp +114 -0
  655. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.h +84 -0
  656. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.cpp +51 -0
  657. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.h +69 -0
  658. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputStreamMutex.h +68 -0
  659. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInt64.h +56 -0
  660. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.cpp +57 -0
  661. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.h +58 -0
  662. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.cpp +217 -0
  663. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.h +167 -0
  664. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.cpp +99 -0
  665. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.h +73 -0
  666. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrder.h +69 -0
  667. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.cpp +78 -0
  668. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.h +72 -0
  669. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.cpp +178 -0
  670. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.h +188 -0
  671. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.cpp +263 -0
  672. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.h +83 -0
  673. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.cpp +1872 -0
  674. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.h +466 -0
  675. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.cpp +783 -0
  676. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.h +128 -0
  677. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.cpp +519 -0
  678. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.h +118 -0
  679. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.cpp +435 -0
  680. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.h +187 -0
  681. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfName.h +150 -0
  682. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfNamespace.h +115 -0
  683. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.cpp +126 -0
  684. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.h +110 -0
  685. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOptimizedPixelReading.h +646 -0
  686. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.cpp +1378 -0
  687. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.h +263 -0
  688. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.cpp +105 -0
  689. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.h +77 -0
  690. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.cpp +52 -0
  691. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.h +62 -0
  692. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputStreamMutex.h +70 -0
  693. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartHelper.h +262 -0
  694. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.cpp +63 -0
  695. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.h +62 -0
  696. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPixelType.h +67 -0
  697. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.cpp +667 -0
  698. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.h +117 -0
  699. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.cpp +104 -0
  700. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.h +135 -0
  701. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.cpp +103 -0
  702. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.h +70 -0
  703. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.cpp +553 -0
  704. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.h +109 -0
  705. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.cpp +127 -0
  706. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.h +98 -0
  707. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.cpp +74 -0
  708. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.h +69 -0
  709. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgba.h +109 -0
  710. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.cpp +1405 -0
  711. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.h +346 -0
  712. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.cpp +497 -0
  713. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.h +259 -0
  714. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.cpp +157 -0
  715. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.h +63 -0
  716. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.cpp +220 -0
  717. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.h +80 -0
  718. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.cpp +1702 -0
  719. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.h +210 -0
  720. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSimd.h +59 -0
  721. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.cpp +125 -0
  722. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.h +382 -0
  723. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.cpp +242 -0
  724. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.h +160 -0
  725. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.cpp +80 -0
  726. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.h +71 -0
  727. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.cpp +100 -0
  728. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.h +74 -0
  729. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.cpp +129 -0
  730. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.h +172 -0
  731. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.cpp +216 -0
  732. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.h +97 -0
  733. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.cpp +62 -0
  734. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.h +95 -0
  735. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescription.h +107 -0
  736. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.cpp +86 -0
  737. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.h +72 -0
  738. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.cpp +552 -0
  739. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.h +125 -0
  740. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.cpp +1533 -0
  741. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.h +401 -0
  742. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.cpp +208 -0
  743. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.h +100 -0
  744. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.cpp +389 -0
  745. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.h +106 -0
  746. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.cpp +1841 -0
  747. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.h +495 -0
  748. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.cpp +228 -0
  749. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.h +105 -0
  750. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.cpp +1163 -0
  751. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.h +482 -0
  752. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.cpp +431 -0
  753. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.h +242 -0
  754. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.cpp +79 -0
  755. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.h +74 -0
  756. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.cpp +217 -0
  757. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.h +100 -0
  758. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.cpp +60 -0
  759. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.h +136 -0
  760. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.cpp +391 -0
  761. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.h +78 -0
  762. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfXdr.h +927 -0
  763. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.cpp +196 -0
  764. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.h +78 -0
  765. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.cpp +127 -0
  766. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.h +89 -0
  767. data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.cpp +136 -0
  768. data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.h +16396 -0
  769. data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.cpp +573 -0
  770. data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.h +98334 -0
  771. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.cpp +80 -0
  772. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.h +143 -0
  773. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadExport.h +46 -0
  774. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadForward.h +52 -0
  775. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.cpp +59 -0
  776. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.h +160 -0
  777. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexPosix.cpp +85 -0
  778. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexWin32.cpp +79 -0
  779. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadNamespace.h +114 -0
  780. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.cpp +483 -0
  781. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.h +160 -0
  782. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPosix.cpp +98 -0
  783. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.cpp +60 -0
  784. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.h +112 -0
  785. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosix.cpp +106 -0
  786. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosixCompat.cpp +155 -0
  787. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphoreWin32.cpp +153 -0
  788. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadWin32.cpp +100 -0
  789. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.cpp +37 -0
  790. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.h +849 -0
  791. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBoxAlgo.h +1016 -0
  792. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColor.h +736 -0
  793. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.cpp +178 -0
  794. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.h +257 -0
  795. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathEuler.h +926 -0
  796. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExc.h +73 -0
  797. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExport.h +46 -0
  798. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathForward.h +72 -0
  799. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrame.h +192 -0
  800. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustum.h +741 -0
  801. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustumTest.h +417 -0
  802. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.cpp +181 -0
  803. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.h +269 -0
  804. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGL.h +166 -0
  805. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGLU.h +54 -0
  806. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathHalfLimits.h +68 -0
  807. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInt64.h +62 -0
  808. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInterval.h +226 -0
  809. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLimits.h +268 -0
  810. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLine.h +185 -0
  811. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLineAlgo.h +288 -0
  812. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMath.h +208 -0
  813. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrix.h +3441 -0
  814. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.cpp +1252 -0
  815. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.h +1425 -0
  816. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathNamespace.h +115 -0
  817. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlane.h +257 -0
  818. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlatform.h +112 -0
  819. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathQuat.h +964 -0
  820. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.cpp +194 -0
  821. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.h +401 -0
  822. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRoots.h +219 -0
  823. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.cpp +54 -0
  824. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.h +656 -0
  825. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathSphere.h +177 -0
  826. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.cpp +583 -0
  827. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.h +2227 -0
  828. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVecAlgo.h +147 -0
  829. data/vendor/FreeImage/Source/OpenEXR/OpenEXRConfig.h +72 -0
  830. data/vendor/FreeImage/Source/Plugin.h +144 -0
  831. data/vendor/FreeImage/Source/Quantizers.h +354 -0
  832. data/vendor/FreeImage/Source/ToneMapping.h +44 -0
  833. data/vendor/FreeImage/Source/Utilities.h +516 -0
  834. data/vendor/FreeImage/Source/ZLib/adler32.c +179 -0
  835. data/vendor/FreeImage/Source/ZLib/compress.c +80 -0
  836. data/vendor/FreeImage/Source/ZLib/crc32.c +425 -0
  837. data/vendor/FreeImage/Source/ZLib/crc32.h +441 -0
  838. data/vendor/FreeImage/Source/ZLib/deflate.c +1967 -0
  839. data/vendor/FreeImage/Source/ZLib/deflate.h +346 -0
  840. data/vendor/FreeImage/Source/ZLib/gzclose.c +25 -0
  841. data/vendor/FreeImage/Source/ZLib/gzguts.h +209 -0
  842. data/vendor/FreeImage/Source/ZLib/gzlib.c +634 -0
  843. data/vendor/FreeImage/Source/ZLib/gzread.c +594 -0
  844. data/vendor/FreeImage/Source/ZLib/gzwrite.c +577 -0
  845. data/vendor/FreeImage/Source/ZLib/infback.c +640 -0
  846. data/vendor/FreeImage/Source/ZLib/inffast.c +340 -0
  847. data/vendor/FreeImage/Source/ZLib/inffast.h +11 -0
  848. data/vendor/FreeImage/Source/ZLib/inffixed.h +94 -0
  849. data/vendor/FreeImage/Source/ZLib/inflate.c +1512 -0
  850. data/vendor/FreeImage/Source/ZLib/inflate.h +122 -0
  851. data/vendor/FreeImage/Source/ZLib/inftrees.c +306 -0
  852. data/vendor/FreeImage/Source/ZLib/inftrees.h +62 -0
  853. data/vendor/FreeImage/Source/ZLib/trees.c +1226 -0
  854. data/vendor/FreeImage/Source/ZLib/trees.h +128 -0
  855. data/vendor/FreeImage/Source/ZLib/uncompr.c +59 -0
  856. data/vendor/FreeImage/Source/ZLib/zconf.h +511 -0
  857. data/vendor/FreeImage/Source/ZLib/zlib.h +1768 -0
  858. data/vendor/FreeImage/Source/ZLib/zutil.c +324 -0
  859. data/vendor/FreeImage/Source/ZLib/zutil.h +253 -0
  860. metadata +931 -0
@@ -0,0 +1,1489 @@
1
+ // Copyright 2012 Google Inc. All Rights Reserved.
2
+ //
3
+ // Use of this source code is governed by a BSD-style license
4
+ // that can be found in the COPYING file in the root of the source
5
+ // tree. An additional intellectual property rights grant can be found
6
+ // in the file PATENTS. All contributing project authors may
7
+ // be found in the AUTHORS file in the root of the source tree.
8
+ // -----------------------------------------------------------------------------
9
+ //
10
+ // ARM NEON version of dsp functions and loop filtering.
11
+ //
12
+ // Authors: Somnath Banerjee (somnath@google.com)
13
+ // Johann Koenig (johannkoenig@google.com)
14
+
15
+ #include "./dsp.h"
16
+
17
+ #if defined(WEBP_USE_NEON)
18
+
19
+ #include "./neon.h"
20
+ #include "../dec/vp8i.h"
21
+
22
+ //------------------------------------------------------------------------------
23
+ // NxM Loading functions
24
+
25
+ // Load/Store vertical edge
26
+ #define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \
27
+ "vld4.8 {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
28
+ "vld4.8 {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
29
+ "vld4.8 {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
30
+ "vld4.8 {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
31
+ "vld4.8 {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
32
+ "vld4.8 {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
33
+ "vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
34
+ "vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
35
+
36
+ #define STORE8x2(c1, c2, p, stride) \
37
+ "vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
38
+ "vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
39
+ "vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
40
+ "vst2.8 {" #c1"[3], " #c2"[3]}," #p "," #stride " \n" \
41
+ "vst2.8 {" #c1"[4], " #c2"[4]}," #p "," #stride " \n" \
42
+ "vst2.8 {" #c1"[5], " #c2"[5]}," #p "," #stride " \n" \
43
+ "vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \
44
+ "vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
45
+
46
+ #if !defined(WORK_AROUND_GCC)
47
+
48
+ // This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation
49
+ // (register alloc, probably). The variants somewhat mitigate the problem, but
50
+ // not quite. HFilter16i() remains problematic.
51
+ static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) {
52
+ const uint8x8_t zero = vdup_n_u8(0);
53
+ uint8x8x4_t out;
54
+ INIT_VECTOR4(out, zero, zero, zero, zero);
55
+ out = vld4_lane_u8(src + 0 * stride, out, 0);
56
+ out = vld4_lane_u8(src + 1 * stride, out, 1);
57
+ out = vld4_lane_u8(src + 2 * stride, out, 2);
58
+ out = vld4_lane_u8(src + 3 * stride, out, 3);
59
+ out = vld4_lane_u8(src + 4 * stride, out, 4);
60
+ out = vld4_lane_u8(src + 5 * stride, out, 5);
61
+ out = vld4_lane_u8(src + 6 * stride, out, 6);
62
+ out = vld4_lane_u8(src + 7 * stride, out, 7);
63
+ return out;
64
+ }
65
+
66
+ static WEBP_INLINE void Load4x16(const uint8_t* const src, int stride,
67
+ uint8x16_t* const p1, uint8x16_t* const p0,
68
+ uint8x16_t* const q0, uint8x16_t* const q1) {
69
+ // row0 = p1[0..7]|p0[0..7]|q0[0..7]|q1[0..7]
70
+ // row8 = p1[8..15]|p0[8..15]|q0[8..15]|q1[8..15]
71
+ const uint8x8x4_t row0 = Load4x8(src - 2 + 0 * stride, stride);
72
+ const uint8x8x4_t row8 = Load4x8(src - 2 + 8 * stride, stride);
73
+ *p1 = vcombine_u8(row0.val[0], row8.val[0]);
74
+ *p0 = vcombine_u8(row0.val[1], row8.val[1]);
75
+ *q0 = vcombine_u8(row0.val[2], row8.val[2]);
76
+ *q1 = vcombine_u8(row0.val[3], row8.val[3]);
77
+ }
78
+
79
+ #else // WORK_AROUND_GCC
80
+
81
+ #define LOADQ_LANE_32b(VALUE, LANE) do { \
82
+ (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE)); \
83
+ src += stride; \
84
+ } while (0)
85
+
86
+ static WEBP_INLINE void Load4x16(const uint8_t* src, int stride,
87
+ uint8x16_t* const p1, uint8x16_t* const p0,
88
+ uint8x16_t* const q0, uint8x16_t* const q1) {
89
+ const uint32x4_t zero = vdupq_n_u32(0);
90
+ uint32x4x4_t in;
91
+ INIT_VECTOR4(in, zero, zero, zero, zero);
92
+ src -= 2;
93
+ LOADQ_LANE_32b(in.val[0], 0);
94
+ LOADQ_LANE_32b(in.val[1], 0);
95
+ LOADQ_LANE_32b(in.val[2], 0);
96
+ LOADQ_LANE_32b(in.val[3], 0);
97
+ LOADQ_LANE_32b(in.val[0], 1);
98
+ LOADQ_LANE_32b(in.val[1], 1);
99
+ LOADQ_LANE_32b(in.val[2], 1);
100
+ LOADQ_LANE_32b(in.val[3], 1);
101
+ LOADQ_LANE_32b(in.val[0], 2);
102
+ LOADQ_LANE_32b(in.val[1], 2);
103
+ LOADQ_LANE_32b(in.val[2], 2);
104
+ LOADQ_LANE_32b(in.val[3], 2);
105
+ LOADQ_LANE_32b(in.val[0], 3);
106
+ LOADQ_LANE_32b(in.val[1], 3);
107
+ LOADQ_LANE_32b(in.val[2], 3);
108
+ LOADQ_LANE_32b(in.val[3], 3);
109
+ // Transpose four 4x4 parts:
110
+ {
111
+ const uint8x16x2_t row01 = vtrnq_u8(vreinterpretq_u8_u32(in.val[0]),
112
+ vreinterpretq_u8_u32(in.val[1]));
113
+ const uint8x16x2_t row23 = vtrnq_u8(vreinterpretq_u8_u32(in.val[2]),
114
+ vreinterpretq_u8_u32(in.val[3]));
115
+ const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]),
116
+ vreinterpretq_u16_u8(row23.val[0]));
117
+ const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]),
118
+ vreinterpretq_u16_u8(row23.val[1]));
119
+ *p1 = vreinterpretq_u8_u16(row02.val[0]);
120
+ *p0 = vreinterpretq_u8_u16(row13.val[0]);
121
+ *q0 = vreinterpretq_u8_u16(row02.val[1]);
122
+ *q1 = vreinterpretq_u8_u16(row13.val[1]);
123
+ }
124
+ }
125
+ #undef LOADQ_LANE_32b
126
+
127
+ #endif // !WORK_AROUND_GCC
128
+
129
+ static WEBP_INLINE void Load8x16(const uint8_t* const src, int stride,
130
+ uint8x16_t* const p3, uint8x16_t* const p2,
131
+ uint8x16_t* const p1, uint8x16_t* const p0,
132
+ uint8x16_t* const q0, uint8x16_t* const q1,
133
+ uint8x16_t* const q2, uint8x16_t* const q3) {
134
+ Load4x16(src - 2, stride, p3, p2, p1, p0);
135
+ Load4x16(src + 2, stride, q0, q1, q2, q3);
136
+ }
137
+
138
+ static WEBP_INLINE void Load16x4(const uint8_t* const src, int stride,
139
+ uint8x16_t* const p1, uint8x16_t* const p0,
140
+ uint8x16_t* const q0, uint8x16_t* const q1) {
141
+ *p1 = vld1q_u8(src - 2 * stride);
142
+ *p0 = vld1q_u8(src - 1 * stride);
143
+ *q0 = vld1q_u8(src + 0 * stride);
144
+ *q1 = vld1q_u8(src + 1 * stride);
145
+ }
146
+
147
+ static WEBP_INLINE void Load16x8(const uint8_t* const src, int stride,
148
+ uint8x16_t* const p3, uint8x16_t* const p2,
149
+ uint8x16_t* const p1, uint8x16_t* const p0,
150
+ uint8x16_t* const q0, uint8x16_t* const q1,
151
+ uint8x16_t* const q2, uint8x16_t* const q3) {
152
+ Load16x4(src - 2 * stride, stride, p3, p2, p1, p0);
153
+ Load16x4(src + 2 * stride, stride, q0, q1, q2, q3);
154
+ }
155
+
156
+ static WEBP_INLINE void Load8x8x2(const uint8_t* const u,
157
+ const uint8_t* const v,
158
+ int stride,
159
+ uint8x16_t* const p3, uint8x16_t* const p2,
160
+ uint8x16_t* const p1, uint8x16_t* const p0,
161
+ uint8x16_t* const q0, uint8x16_t* const q1,
162
+ uint8x16_t* const q2, uint8x16_t* const q3) {
163
+ // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
164
+ // and the v-samples on the higher half.
165
+ *p3 = vcombine_u8(vld1_u8(u - 4 * stride), vld1_u8(v - 4 * stride));
166
+ *p2 = vcombine_u8(vld1_u8(u - 3 * stride), vld1_u8(v - 3 * stride));
167
+ *p1 = vcombine_u8(vld1_u8(u - 2 * stride), vld1_u8(v - 2 * stride));
168
+ *p0 = vcombine_u8(vld1_u8(u - 1 * stride), vld1_u8(v - 1 * stride));
169
+ *q0 = vcombine_u8(vld1_u8(u + 0 * stride), vld1_u8(v + 0 * stride));
170
+ *q1 = vcombine_u8(vld1_u8(u + 1 * stride), vld1_u8(v + 1 * stride));
171
+ *q2 = vcombine_u8(vld1_u8(u + 2 * stride), vld1_u8(v + 2 * stride));
172
+ *q3 = vcombine_u8(vld1_u8(u + 3 * stride), vld1_u8(v + 3 * stride));
173
+ }
174
+
175
+ #if !defined(WORK_AROUND_GCC)
176
+
177
+ #define LOAD_UV_8(ROW) \
178
+ vcombine_u8(vld1_u8(u - 4 + (ROW) * stride), vld1_u8(v - 4 + (ROW) * stride))
179
+
180
+ static WEBP_INLINE void Load8x8x2T(const uint8_t* const u,
181
+ const uint8_t* const v,
182
+ int stride,
183
+ uint8x16_t* const p3, uint8x16_t* const p2,
184
+ uint8x16_t* const p1, uint8x16_t* const p0,
185
+ uint8x16_t* const q0, uint8x16_t* const q1,
186
+ uint8x16_t* const q2, uint8x16_t* const q3) {
187
+ // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
188
+ // and the v-samples on the higher half.
189
+ const uint8x16_t row0 = LOAD_UV_8(0);
190
+ const uint8x16_t row1 = LOAD_UV_8(1);
191
+ const uint8x16_t row2 = LOAD_UV_8(2);
192
+ const uint8x16_t row3 = LOAD_UV_8(3);
193
+ const uint8x16_t row4 = LOAD_UV_8(4);
194
+ const uint8x16_t row5 = LOAD_UV_8(5);
195
+ const uint8x16_t row6 = LOAD_UV_8(6);
196
+ const uint8x16_t row7 = LOAD_UV_8(7);
197
+ // Perform two side-by-side 8x8 transposes
198
+ // u00 u01 u02 u03 u04 u05 u06 u07 | v00 v01 v02 v03 v04 v05 v06 v07
199
+ // u10 u11 u12 u13 u14 u15 u16 u17 | v10 v11 v12 ...
200
+ // u20 u21 u22 u23 u24 u25 u26 u27 | v20 v21 ...
201
+ // u30 u31 u32 u33 u34 u35 u36 u37 | ...
202
+ // u40 u41 u42 u43 u44 u45 u46 u47 | ...
203
+ // u50 u51 u52 u53 u54 u55 u56 u57 | ...
204
+ // u60 u61 u62 u63 u64 u65 u66 u67 | v60 ...
205
+ // u70 u71 u72 u73 u74 u75 u76 u77 | v70 v71 v72 ...
206
+ const uint8x16x2_t row01 = vtrnq_u8(row0, row1); // u00 u10 u02 u12 ...
207
+ // u01 u11 u03 u13 ...
208
+ const uint8x16x2_t row23 = vtrnq_u8(row2, row3); // u20 u30 u22 u32 ...
209
+ // u21 u31 u23 u33 ...
210
+ const uint8x16x2_t row45 = vtrnq_u8(row4, row5); // ...
211
+ const uint8x16x2_t row67 = vtrnq_u8(row6, row7); // ...
212
+ const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]),
213
+ vreinterpretq_u16_u8(row23.val[0]));
214
+ const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]),
215
+ vreinterpretq_u16_u8(row23.val[1]));
216
+ const uint16x8x2_t row46 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[0]),
217
+ vreinterpretq_u16_u8(row67.val[0]));
218
+ const uint16x8x2_t row57 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[1]),
219
+ vreinterpretq_u16_u8(row67.val[1]));
220
+ const uint32x4x2_t row04 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[0]),
221
+ vreinterpretq_u32_u16(row46.val[0]));
222
+ const uint32x4x2_t row26 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[1]),
223
+ vreinterpretq_u32_u16(row46.val[1]));
224
+ const uint32x4x2_t row15 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[0]),
225
+ vreinterpretq_u32_u16(row57.val[0]));
226
+ const uint32x4x2_t row37 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[1]),
227
+ vreinterpretq_u32_u16(row57.val[1]));
228
+ *p3 = vreinterpretq_u8_u32(row04.val[0]);
229
+ *p2 = vreinterpretq_u8_u32(row15.val[0]);
230
+ *p1 = vreinterpretq_u8_u32(row26.val[0]);
231
+ *p0 = vreinterpretq_u8_u32(row37.val[0]);
232
+ *q0 = vreinterpretq_u8_u32(row04.val[1]);
233
+ *q1 = vreinterpretq_u8_u32(row15.val[1]);
234
+ *q2 = vreinterpretq_u8_u32(row26.val[1]);
235
+ *q3 = vreinterpretq_u8_u32(row37.val[1]);
236
+ }
237
+ #undef LOAD_UV_8
238
+
239
+ #endif // !WORK_AROUND_GCC
240
+
241
+ static WEBP_INLINE void Store2x8(const uint8x8x2_t v,
242
+ uint8_t* const dst, int stride) {
243
+ vst2_lane_u8(dst + 0 * stride, v, 0);
244
+ vst2_lane_u8(dst + 1 * stride, v, 1);
245
+ vst2_lane_u8(dst + 2 * stride, v, 2);
246
+ vst2_lane_u8(dst + 3 * stride, v, 3);
247
+ vst2_lane_u8(dst + 4 * stride, v, 4);
248
+ vst2_lane_u8(dst + 5 * stride, v, 5);
249
+ vst2_lane_u8(dst + 6 * stride, v, 6);
250
+ vst2_lane_u8(dst + 7 * stride, v, 7);
251
+ }
252
+
253
+ static WEBP_INLINE void Store2x16(const uint8x16_t p0, const uint8x16_t q0,
254
+ uint8_t* const dst, int stride) {
255
+ uint8x8x2_t lo, hi;
256
+ lo.val[0] = vget_low_u8(p0);
257
+ lo.val[1] = vget_low_u8(q0);
258
+ hi.val[0] = vget_high_u8(p0);
259
+ hi.val[1] = vget_high_u8(q0);
260
+ Store2x8(lo, dst - 1 + 0 * stride, stride);
261
+ Store2x8(hi, dst - 1 + 8 * stride, stride);
262
+ }
263
+
264
+ #if !defined(WORK_AROUND_GCC)
265
+ static WEBP_INLINE void Store4x8(const uint8x8x4_t v,
266
+ uint8_t* const dst, int stride) {
267
+ vst4_lane_u8(dst + 0 * stride, v, 0);
268
+ vst4_lane_u8(dst + 1 * stride, v, 1);
269
+ vst4_lane_u8(dst + 2 * stride, v, 2);
270
+ vst4_lane_u8(dst + 3 * stride, v, 3);
271
+ vst4_lane_u8(dst + 4 * stride, v, 4);
272
+ vst4_lane_u8(dst + 5 * stride, v, 5);
273
+ vst4_lane_u8(dst + 6 * stride, v, 6);
274
+ vst4_lane_u8(dst + 7 * stride, v, 7);
275
+ }
276
+
277
+ static WEBP_INLINE void Store4x16(const uint8x16_t p1, const uint8x16_t p0,
278
+ const uint8x16_t q0, const uint8x16_t q1,
279
+ uint8_t* const dst, int stride) {
280
+ uint8x8x4_t lo, hi;
281
+ INIT_VECTOR4(lo,
282
+ vget_low_u8(p1), vget_low_u8(p0),
283
+ vget_low_u8(q0), vget_low_u8(q1));
284
+ INIT_VECTOR4(hi,
285
+ vget_high_u8(p1), vget_high_u8(p0),
286
+ vget_high_u8(q0), vget_high_u8(q1));
287
+ Store4x8(lo, dst - 2 + 0 * stride, stride);
288
+ Store4x8(hi, dst - 2 + 8 * stride, stride);
289
+ }
290
+ #endif // !WORK_AROUND_GCC
291
+
292
+ static WEBP_INLINE void Store16x2(const uint8x16_t p0, const uint8x16_t q0,
293
+ uint8_t* const dst, int stride) {
294
+ vst1q_u8(dst - stride, p0);
295
+ vst1q_u8(dst, q0);
296
+ }
297
+
298
+ static WEBP_INLINE void Store16x4(const uint8x16_t p1, const uint8x16_t p0,
299
+ const uint8x16_t q0, const uint8x16_t q1,
300
+ uint8_t* const dst, int stride) {
301
+ Store16x2(p1, p0, dst - stride, stride);
302
+ Store16x2(q0, q1, dst + stride, stride);
303
+ }
304
+
305
+ static WEBP_INLINE void Store8x2x2(const uint8x16_t p0, const uint8x16_t q0,
306
+ uint8_t* const u, uint8_t* const v,
307
+ int stride) {
308
+ // p0 and q0 contain the u+v samples packed in low/high halves.
309
+ vst1_u8(u - stride, vget_low_u8(p0));
310
+ vst1_u8(u, vget_low_u8(q0));
311
+ vst1_u8(v - stride, vget_high_u8(p0));
312
+ vst1_u8(v, vget_high_u8(q0));
313
+ }
314
+
315
+ static WEBP_INLINE void Store8x4x2(const uint8x16_t p1, const uint8x16_t p0,
316
+ const uint8x16_t q0, const uint8x16_t q1,
317
+ uint8_t* const u, uint8_t* const v,
318
+ int stride) {
319
+ // The p1...q1 registers contain the u+v samples packed in low/high halves.
320
+ Store8x2x2(p1, p0, u - stride, v - stride, stride);
321
+ Store8x2x2(q0, q1, u + stride, v + stride, stride);
322
+ }
323
+
324
+ #if !defined(WORK_AROUND_GCC)
325
+
326
+ #define STORE6_LANE(DST, VAL0, VAL1, LANE) do { \
327
+ vst3_lane_u8((DST) - 3, (VAL0), (LANE)); \
328
+ vst3_lane_u8((DST) + 0, (VAL1), (LANE)); \
329
+ (DST) += stride; \
330
+ } while (0)
331
+
332
+ static WEBP_INLINE void Store6x8x2(const uint8x16_t p2, const uint8x16_t p1,
333
+ const uint8x16_t p0, const uint8x16_t q0,
334
+ const uint8x16_t q1, const uint8x16_t q2,
335
+ uint8_t* u, uint8_t* v,
336
+ int stride) {
337
+ uint8x8x3_t u0, u1, v0, v1;
338
+ INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
339
+ INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
340
+ INIT_VECTOR3(v0, vget_high_u8(p2), vget_high_u8(p1), vget_high_u8(p0));
341
+ INIT_VECTOR3(v1, vget_high_u8(q0), vget_high_u8(q1), vget_high_u8(q2));
342
+ STORE6_LANE(u, u0, u1, 0);
343
+ STORE6_LANE(u, u0, u1, 1);
344
+ STORE6_LANE(u, u0, u1, 2);
345
+ STORE6_LANE(u, u0, u1, 3);
346
+ STORE6_LANE(u, u0, u1, 4);
347
+ STORE6_LANE(u, u0, u1, 5);
348
+ STORE6_LANE(u, u0, u1, 6);
349
+ STORE6_LANE(u, u0, u1, 7);
350
+ STORE6_LANE(v, v0, v1, 0);
351
+ STORE6_LANE(v, v0, v1, 1);
352
+ STORE6_LANE(v, v0, v1, 2);
353
+ STORE6_LANE(v, v0, v1, 3);
354
+ STORE6_LANE(v, v0, v1, 4);
355
+ STORE6_LANE(v, v0, v1, 5);
356
+ STORE6_LANE(v, v0, v1, 6);
357
+ STORE6_LANE(v, v0, v1, 7);
358
+ }
359
+ #undef STORE6_LANE
360
+
361
+ static WEBP_INLINE void Store4x8x2(const uint8x16_t p1, const uint8x16_t p0,
362
+ const uint8x16_t q0, const uint8x16_t q1,
363
+ uint8_t* const u, uint8_t* const v,
364
+ int stride) {
365
+ uint8x8x4_t u0, v0;
366
+ INIT_VECTOR4(u0,
367
+ vget_low_u8(p1), vget_low_u8(p0),
368
+ vget_low_u8(q0), vget_low_u8(q1));
369
+ INIT_VECTOR4(v0,
370
+ vget_high_u8(p1), vget_high_u8(p0),
371
+ vget_high_u8(q0), vget_high_u8(q1));
372
+ vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
373
+ vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
374
+ vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
375
+ vst4_lane_u8(u - 2 + 3 * stride, u0, 3);
376
+ vst4_lane_u8(u - 2 + 4 * stride, u0, 4);
377
+ vst4_lane_u8(u - 2 + 5 * stride, u0, 5);
378
+ vst4_lane_u8(u - 2 + 6 * stride, u0, 6);
379
+ vst4_lane_u8(u - 2 + 7 * stride, u0, 7);
380
+ vst4_lane_u8(v - 2 + 0 * stride, v0, 0);
381
+ vst4_lane_u8(v - 2 + 1 * stride, v0, 1);
382
+ vst4_lane_u8(v - 2 + 2 * stride, v0, 2);
383
+ vst4_lane_u8(v - 2 + 3 * stride, v0, 3);
384
+ vst4_lane_u8(v - 2 + 4 * stride, v0, 4);
385
+ vst4_lane_u8(v - 2 + 5 * stride, v0, 5);
386
+ vst4_lane_u8(v - 2 + 6 * stride, v0, 6);
387
+ vst4_lane_u8(v - 2 + 7 * stride, v0, 7);
388
+ }
389
+
390
+ #endif // !WORK_AROUND_GCC
391
+
392
+ // Zero extend 'v' to an int16x8_t.
393
+ static WEBP_INLINE int16x8_t ConvertU8ToS16(uint8x8_t v) {
394
+ return vreinterpretq_s16_u16(vmovl_u8(v));
395
+ }
396
+
397
+ // Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
398
+ // to the corresponding rows of 'dst'.
399
+ static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
400
+ const int16x8_t dst01,
401
+ const int16x8_t dst23) {
402
+ // Unsigned saturate to 8b.
403
+ const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
404
+ const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
405
+
406
+ // Store the results.
407
+ vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
408
+ vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
409
+ vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
410
+ vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
411
+ }
412
+
413
+ static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
414
+ uint8_t* const dst) {
415
+ uint32x2_t dst01 = vdup_n_u32(0);
416
+ uint32x2_t dst23 = vdup_n_u32(0);
417
+
418
+ // Load the source pixels.
419
+ dst01 = vld1_lane_u32((uint32_t*)(dst + 0 * BPS), dst01, 0);
420
+ dst23 = vld1_lane_u32((uint32_t*)(dst + 2 * BPS), dst23, 0);
421
+ dst01 = vld1_lane_u32((uint32_t*)(dst + 1 * BPS), dst01, 1);
422
+ dst23 = vld1_lane_u32((uint32_t*)(dst + 3 * BPS), dst23, 1);
423
+
424
+ {
425
+ // Convert to 16b.
426
+ const int16x8_t dst01_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst01));
427
+ const int16x8_t dst23_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst23));
428
+
429
+ // Descale with rounding.
430
+ const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
431
+ const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
432
+ // Add the inverse transform.
433
+ SaturateAndStore4x4(dst, out01, out23);
434
+ }
435
+ }
436
+
437
+ //-----------------------------------------------------------------------------
438
+ // Simple In-loop filtering (Paragraph 15.2)
439
+
440
+ static uint8x16_t NeedsFilter(const uint8x16_t p1, const uint8x16_t p0,
441
+ const uint8x16_t q0, const uint8x16_t q1,
442
+ int thresh) {
443
+ const uint8x16_t thresh_v = vdupq_n_u8((uint8_t)thresh);
444
+ const uint8x16_t a_p0_q0 = vabdq_u8(p0, q0); // abs(p0-q0)
445
+ const uint8x16_t a_p1_q1 = vabdq_u8(p1, q1); // abs(p1-q1)
446
+ const uint8x16_t a_p0_q0_2 = vqaddq_u8(a_p0_q0, a_p0_q0); // 2 * abs(p0-q0)
447
+ const uint8x16_t a_p1_q1_2 = vshrq_n_u8(a_p1_q1, 1); // abs(p1-q1) / 2
448
+ const uint8x16_t sum = vqaddq_u8(a_p0_q0_2, a_p1_q1_2);
449
+ const uint8x16_t mask = vcgeq_u8(thresh_v, sum);
450
+ return mask;
451
+ }
452
+
453
+ static int8x16_t FlipSign(const uint8x16_t v) {
454
+ const uint8x16_t sign_bit = vdupq_n_u8(0x80);
455
+ return vreinterpretq_s8_u8(veorq_u8(v, sign_bit));
456
+ }
457
+
458
+ static uint8x16_t FlipSignBack(const int8x16_t v) {
459
+ const int8x16_t sign_bit = vdupq_n_s8(0x80);
460
+ return vreinterpretq_u8_s8(veorq_s8(v, sign_bit));
461
+ }
462
+
463
+ static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0,
464
+ const int8x16_t q0, const int8x16_t q1) {
465
+ const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0)
466
+ const int8x16_t p1_q1 = vqsubq_s8(p1, q1); // (p1-q1)
467
+ const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0); // (p1-q1) + 1 * (q0 - p0)
468
+ const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // (p1-q1) + 2 * (q0 - p0)
469
+ const int8x16_t s3 = vqaddq_s8(q0_p0, s2); // (p1-q1) + 3 * (q0 - p0)
470
+ return s3;
471
+ }
472
+
473
+ static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) {
474
+ const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0)
475
+ const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0); // 2 * (q0 - p0)
476
+ const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // 3 * (q0 - p0)
477
+ return s2;
478
+ }
479
+
480
+ //------------------------------------------------------------------------------
481
+
482
+ static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s,
483
+ const int8x16_t delta,
484
+ uint8x16_t* const op0, uint8x16_t* const oq0) {
485
+ const int8x16_t kCst3 = vdupq_n_s8(0x03);
486
+ const int8x16_t kCst4 = vdupq_n_s8(0x04);
487
+ const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
488
+ const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4);
489
+ const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3);
490
+ const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
491
+ const int8x16_t sp0 = vqaddq_s8(p0s, delta3);
492
+ const int8x16_t sq0 = vqsubq_s8(q0s, delta4);
493
+ *op0 = FlipSignBack(sp0);
494
+ *oq0 = FlipSignBack(sq0);
495
+ }
496
+
497
+ #if defined(WEBP_USE_INTRINSICS)
498
+
499
+ static void DoFilter2(const uint8x16_t p1, const uint8x16_t p0,
500
+ const uint8x16_t q0, const uint8x16_t q1,
501
+ const uint8x16_t mask,
502
+ uint8x16_t* const op0, uint8x16_t* const oq0) {
503
+ const int8x16_t p1s = FlipSign(p1);
504
+ const int8x16_t p0s = FlipSign(p0);
505
+ const int8x16_t q0s = FlipSign(q0);
506
+ const int8x16_t q1s = FlipSign(q1);
507
+ const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
508
+ const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask));
509
+ ApplyFilter2(p0s, q0s, delta1, op0, oq0);
510
+ }
511
+
512
+ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
513
+ uint8x16_t p1, p0, q0, q1, op0, oq0;
514
+ Load16x4(p, stride, &p1, &p0, &q0, &q1);
515
+ {
516
+ const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
517
+ DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
518
+ }
519
+ Store16x2(op0, oq0, p, stride);
520
+ }
521
+
522
+ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
523
+ uint8x16_t p1, p0, q0, q1, oq0, op0;
524
+ Load4x16(p, stride, &p1, &p0, &q0, &q1);
525
+ {
526
+ const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
527
+ DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
528
+ }
529
+ Store2x16(op0, oq0, p, stride);
530
+ }
531
+
532
+ #else
533
+
534
+ #define QRegs "q0", "q1", "q2", "q3", \
535
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
536
+
537
+ #define FLIP_SIGN_BIT2(a, b, s) \
538
+ "veor " #a "," #a "," #s " \n" \
539
+ "veor " #b "," #b "," #s " \n" \
540
+
541
+ #define FLIP_SIGN_BIT4(a, b, c, d, s) \
542
+ FLIP_SIGN_BIT2(a, b, s) \
543
+ FLIP_SIGN_BIT2(c, d, s) \
544
+
545
+ #define NEEDS_FILTER(p1, p0, q0, q1, thresh, mask) \
546
+ "vabd.u8 q15," #p0 "," #q0 " \n" /* abs(p0 - q0) */ \
547
+ "vabd.u8 q14," #p1 "," #q1 " \n" /* abs(p1 - q1) */ \
548
+ "vqadd.u8 q15, q15, q15 \n" /* abs(p0 - q0) * 2 */ \
549
+ "vshr.u8 q14, q14, #1 \n" /* abs(p1 - q1) / 2 */ \
550
+ "vqadd.u8 q15, q15, q14 \n" /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \
551
+ "vdup.8 q14, " #thresh " \n" \
552
+ "vcge.u8 " #mask ", q14, q15 \n" /* mask <= thresh */
553
+
554
+ #define GET_BASE_DELTA(p1, p0, q0, q1, o) \
555
+ "vqsub.s8 q15," #q0 "," #p0 " \n" /* (q0 - p0) */ \
556
+ "vqsub.s8 " #o "," #p1 "," #q1 " \n" /* (p1 - q1) */ \
557
+ "vqadd.s8 " #o "," #o ", q15 \n" /* (p1 - q1) + 1 * (p0 - q0) */ \
558
+ "vqadd.s8 " #o "," #o ", q15 \n" /* (p1 - q1) + 2 * (p0 - q0) */ \
559
+ "vqadd.s8 " #o "," #o ", q15 \n" /* (p1 - q1) + 3 * (p0 - q0) */
560
+
561
+ #define DO_SIMPLE_FILTER(p0, q0, fl) \
562
+ "vmov.i8 q15, #0x03 \n" \
563
+ "vqadd.s8 q15, q15, " #fl " \n" /* filter1 = filter + 3 */ \
564
+ "vshr.s8 q15, q15, #3 \n" /* filter1 >> 3 */ \
565
+ "vqadd.s8 " #p0 "," #p0 ", q15 \n" /* p0 += filter1 */ \
566
+ \
567
+ "vmov.i8 q15, #0x04 \n" \
568
+ "vqadd.s8 q15, q15, " #fl " \n" /* filter1 = filter + 4 */ \
569
+ "vshr.s8 q15, q15, #3 \n" /* filter2 >> 3 */ \
570
+ "vqsub.s8 " #q0 "," #q0 ", q15 \n" /* q0 -= filter2 */
571
+
572
+ // Applies filter on 2 pixels (p0 and q0)
573
+ #define DO_FILTER2(p1, p0, q0, q1, thresh) \
574
+ NEEDS_FILTER(p1, p0, q0, q1, thresh, q9) /* filter mask in q9 */ \
575
+ "vmov.i8 q10, #0x80 \n" /* sign bit */ \
576
+ FLIP_SIGN_BIT4(p1, p0, q0, q1, q10) /* convert to signed value */ \
577
+ GET_BASE_DELTA(p1, p0, q0, q1, q11) /* get filter level */ \
578
+ "vand q9, q9, q11 \n" /* apply filter mask */ \
579
+ DO_SIMPLE_FILTER(p0, q0, q9) /* apply filter */ \
580
+ FLIP_SIGN_BIT2(p0, q0, q10)
581
+
582
+ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
583
+ __asm__ volatile (
584
+ "sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
585
+
586
+ "vld1.u8 {q1}, [%[p]], %[stride] \n" // p1
587
+ "vld1.u8 {q2}, [%[p]], %[stride] \n" // p0
588
+ "vld1.u8 {q3}, [%[p]], %[stride] \n" // q0
589
+ "vld1.u8 {q12}, [%[p]] \n" // q1
590
+
591
+ DO_FILTER2(q1, q2, q3, q12, %[thresh])
592
+
593
+ "sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
594
+
595
+ "vst1.u8 {q2}, [%[p]], %[stride] \n" // store op0
596
+ "vst1.u8 {q3}, [%[p]] \n" // store oq0
597
+ : [p] "+r"(p)
598
+ : [stride] "r"(stride), [thresh] "r"(thresh)
599
+ : "memory", QRegs
600
+ );
601
+ }
602
+
603
+ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
604
+ __asm__ volatile (
605
+ "sub r4, %[p], #2 \n" // base1 = p - 2
606
+ "lsl r6, %[stride], #1 \n" // r6 = 2 * stride
607
+ "add r5, r4, %[stride] \n" // base2 = base1 + stride
608
+
609
+ LOAD8x4(d2, d3, d4, d5, [r4], [r5], r6)
610
+ LOAD8x4(d24, d25, d26, d27, [r4], [r5], r6)
611
+ "vswp d3, d24 \n" // p1:q1 p0:q3
612
+ "vswp d5, d26 \n" // q0:q2 q1:q4
613
+ "vswp q2, q12 \n" // p1:q1 p0:q2 q0:q3 q1:q4
614
+
615
+ DO_FILTER2(q1, q2, q12, q13, %[thresh])
616
+
617
+ "sub %[p], %[p], #1 \n" // p - 1
618
+
619
+ "vswp d5, d24 \n"
620
+ STORE8x2(d4, d5, [%[p]], %[stride])
621
+ STORE8x2(d24, d25, [%[p]], %[stride])
622
+
623
+ : [p] "+r"(p)
624
+ : [stride] "r"(stride), [thresh] "r"(thresh)
625
+ : "memory", "r4", "r5", "r6", QRegs
626
+ );
627
+ }
628
+
629
+ #endif // WEBP_USE_INTRINSICS
630
+
631
+ static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
632
+ uint32_t k;
633
+ for (k = 3; k != 0; --k) {
634
+ p += 4 * stride;
635
+ SimpleVFilter16(p, stride, thresh);
636
+ }
637
+ }
638
+
639
+ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
640
+ uint32_t k;
641
+ for (k = 3; k != 0; --k) {
642
+ p += 4;
643
+ SimpleHFilter16(p, stride, thresh);
644
+ }
645
+ }
646
+
647
+ //------------------------------------------------------------------------------
648
+ // Complex In-loop filtering (Paragraph 15.3)
649
+
650
+ static uint8x16_t NeedsHev(const uint8x16_t p1, const uint8x16_t p0,
651
+ const uint8x16_t q0, const uint8x16_t q1,
652
+ int hev_thresh) {
653
+ const uint8x16_t hev_thresh_v = vdupq_n_u8((uint8_t)hev_thresh);
654
+ const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0); // abs(p1 - p0)
655
+ const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0); // abs(q1 - q0)
656
+ const uint8x16_t mask1 = vcgtq_u8(a_p1_p0, hev_thresh_v);
657
+ const uint8x16_t mask2 = vcgtq_u8(a_q1_q0, hev_thresh_v);
658
+ const uint8x16_t mask = vorrq_u8(mask1, mask2);
659
+ return mask;
660
+ }
661
+
662
+ static uint8x16_t NeedsFilter2(const uint8x16_t p3, const uint8x16_t p2,
663
+ const uint8x16_t p1, const uint8x16_t p0,
664
+ const uint8x16_t q0, const uint8x16_t q1,
665
+ const uint8x16_t q2, const uint8x16_t q3,
666
+ int ithresh, int thresh) {
667
+ const uint8x16_t ithresh_v = vdupq_n_u8((uint8_t)ithresh);
668
+ const uint8x16_t a_p3_p2 = vabdq_u8(p3, p2); // abs(p3 - p2)
669
+ const uint8x16_t a_p2_p1 = vabdq_u8(p2, p1); // abs(p2 - p1)
670
+ const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0); // abs(p1 - p0)
671
+ const uint8x16_t a_q3_q2 = vabdq_u8(q3, q2); // abs(q3 - q2)
672
+ const uint8x16_t a_q2_q1 = vabdq_u8(q2, q1); // abs(q2 - q1)
673
+ const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0); // abs(q1 - q0)
674
+ const uint8x16_t max1 = vmaxq_u8(a_p3_p2, a_p2_p1);
675
+ const uint8x16_t max2 = vmaxq_u8(a_p1_p0, a_q3_q2);
676
+ const uint8x16_t max3 = vmaxq_u8(a_q2_q1, a_q1_q0);
677
+ const uint8x16_t max12 = vmaxq_u8(max1, max2);
678
+ const uint8x16_t max123 = vmaxq_u8(max12, max3);
679
+ const uint8x16_t mask2 = vcgeq_u8(ithresh_v, max123);
680
+ const uint8x16_t mask1 = NeedsFilter(p1, p0, q0, q1, thresh);
681
+ const uint8x16_t mask = vandq_u8(mask1, mask2);
682
+ return mask;
683
+ }
684
+
685
+ // 4-points filter
686
+
687
+ static void ApplyFilter4(
688
+ const int8x16_t p1, const int8x16_t p0,
689
+ const int8x16_t q0, const int8x16_t q1,
690
+ const int8x16_t delta0,
691
+ uint8x16_t* const op1, uint8x16_t* const op0,
692
+ uint8x16_t* const oq0, uint8x16_t* const oq1) {
693
+ const int8x16_t kCst3 = vdupq_n_s8(0x03);
694
+ const int8x16_t kCst4 = vdupq_n_s8(0x04);
695
+ const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
696
+ const int8x16_t delta2 = vqaddq_s8(delta0, kCst3);
697
+ const int8x16_t a1 = vshrq_n_s8(delta1, 3);
698
+ const int8x16_t a2 = vshrq_n_s8(delta2, 3);
699
+ const int8x16_t a3 = vrshrq_n_s8(a1, 1); // a3 = (a1 + 1) >> 1
700
+ *op0 = FlipSignBack(vqaddq_s8(p0, a2)); // clip(p0 + a2)
701
+ *oq0 = FlipSignBack(vqsubq_s8(q0, a1)); // clip(q0 - a1)
702
+ *op1 = FlipSignBack(vqaddq_s8(p1, a3)); // clip(p1 + a3)
703
+ *oq1 = FlipSignBack(vqsubq_s8(q1, a3)); // clip(q1 - a3)
704
+ }
705
+
706
+ static void DoFilter4(
707
+ const uint8x16_t p1, const uint8x16_t p0,
708
+ const uint8x16_t q0, const uint8x16_t q1,
709
+ const uint8x16_t mask, const uint8x16_t hev_mask,
710
+ uint8x16_t* const op1, uint8x16_t* const op0,
711
+ uint8x16_t* const oq0, uint8x16_t* const oq1) {
712
+ // This is a fused version of DoFilter2() calling ApplyFilter2 directly
713
+ const int8x16_t p1s = FlipSign(p1);
714
+ int8x16_t p0s = FlipSign(p0);
715
+ int8x16_t q0s = FlipSign(q0);
716
+ const int8x16_t q1s = FlipSign(q1);
717
+ const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
718
+
719
+ // do_filter2 part (simple loopfilter on pixels with hev)
720
+ {
721
+ const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s);
722
+ const int8x16_t simple_lf_delta =
723
+ vandq_s8(delta, vreinterpretq_s8_u8(simple_lf_mask));
724
+ uint8x16_t tmp_p0, tmp_q0;
725
+ ApplyFilter2(p0s, q0s, simple_lf_delta, &tmp_p0, &tmp_q0);
726
+ // TODO(skal): avoid the double FlipSign() in ApplyFilter2() and here
727
+ p0s = FlipSign(tmp_p0);
728
+ q0s = FlipSign(tmp_q0);
729
+ }
730
+
731
+ // do_filter4 part (complex loopfilter on pixels without hev)
732
+ {
733
+ const int8x16_t delta0 = GetBaseDelta0(p0s, q0s);
734
+ // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
735
+ const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
736
+ const int8x16_t complex_lf_delta =
737
+ vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
738
+ ApplyFilter4(p1s, p0s, q0s, q1s, complex_lf_delta, op1, op0, oq0, oq1);
739
+ }
740
+ }
741
+
742
+ // 6-points filter
743
+
744
+ static void ApplyFilter6(
745
+ const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
746
+ const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
747
+ const int8x16_t delta,
748
+ uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
749
+ uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
750
+ const int16x8_t kCst63 = vdupq_n_s16(63);
751
+ const int8x8_t kCst27 = vdup_n_s8(27);
752
+ const int8x8_t kCst18 = vdup_n_s8(18);
753
+ const int8x8_t kCst9 = vdup_n_s8(9);
754
+ const int8x8_t delta_lo = vget_low_s8(delta);
755
+ const int8x8_t delta_hi = vget_high_s8(delta);
756
+ const int16x8_t s1_lo = vmlal_s8(kCst63, kCst27, delta_lo); // 63 + 27 * a
757
+ const int16x8_t s1_hi = vmlal_s8(kCst63, kCst27, delta_hi); // 63 + 27 * a
758
+ const int16x8_t s2_lo = vmlal_s8(kCst63, kCst18, delta_lo); // 63 + 18 * a
759
+ const int16x8_t s2_hi = vmlal_s8(kCst63, kCst18, delta_hi); // 63 + 18 * a
760
+ const int16x8_t s3_lo = vmlal_s8(kCst63, kCst9, delta_lo); // 63 + 9 * a
761
+ const int16x8_t s3_hi = vmlal_s8(kCst63, kCst9, delta_hi); // 63 + 9 * a
762
+ const int8x8_t a1_lo = vqshrn_n_s16(s1_lo, 7);
763
+ const int8x8_t a1_hi = vqshrn_n_s16(s1_hi, 7);
764
+ const int8x8_t a2_lo = vqshrn_n_s16(s2_lo, 7);
765
+ const int8x8_t a2_hi = vqshrn_n_s16(s2_hi, 7);
766
+ const int8x8_t a3_lo = vqshrn_n_s16(s3_lo, 7);
767
+ const int8x8_t a3_hi = vqshrn_n_s16(s3_hi, 7);
768
+ const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi);
769
+ const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
770
+ const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi);
771
+
772
+ *op0 = FlipSignBack(vqaddq_s8(p0, a1)); // clip(p0 + a1)
773
+ *oq0 = FlipSignBack(vqsubq_s8(q0, a1)); // clip(q0 - q1)
774
+ *oq1 = FlipSignBack(vqsubq_s8(q1, a2)); // clip(q1 - a2)
775
+ *op1 = FlipSignBack(vqaddq_s8(p1, a2)); // clip(p1 + a2)
776
+ *oq2 = FlipSignBack(vqsubq_s8(q2, a3)); // clip(q2 - a3)
777
+ *op2 = FlipSignBack(vqaddq_s8(p2, a3)); // clip(p2 + a3)
778
+ }
779
+
780
+ static void DoFilter6(
781
+ const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
782
+ const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
783
+ const uint8x16_t mask, const uint8x16_t hev_mask,
784
+ uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
785
+ uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
786
+ // This is a fused version of DoFilter2() calling ApplyFilter2 directly
787
+ const int8x16_t p2s = FlipSign(p2);
788
+ const int8x16_t p1s = FlipSign(p1);
789
+ int8x16_t p0s = FlipSign(p0);
790
+ int8x16_t q0s = FlipSign(q0);
791
+ const int8x16_t q1s = FlipSign(q1);
792
+ const int8x16_t q2s = FlipSign(q2);
793
+ const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
794
+ const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
795
+
796
+ // do_filter2 part (simple loopfilter on pixels with hev)
797
+ {
798
+ const int8x16_t simple_lf_delta =
799
+ vandq_s8(delta0, vreinterpretq_s8_u8(simple_lf_mask));
800
+ uint8x16_t tmp_p0, tmp_q0;
801
+ ApplyFilter2(p0s, q0s, simple_lf_delta, &tmp_p0, &tmp_q0);
802
+ // TODO(skal): avoid the double FlipSign() in ApplyFilter2() and here
803
+ p0s = FlipSign(tmp_p0);
804
+ q0s = FlipSign(tmp_q0);
805
+ }
806
+
807
+ // do_filter6 part (complex loopfilter on pixels without hev)
808
+ {
809
+ // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
810
+ const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
811
+ const int8x16_t complex_lf_delta =
812
+ vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
813
+ ApplyFilter6(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
814
+ op2, op1, op0, oq0, oq1, oq2);
815
+ }
816
+ }
817
+
818
+ // on macroblock edges
819
+
820
+ static void VFilter16(uint8_t* p, int stride,
821
+ int thresh, int ithresh, int hev_thresh) {
822
+ uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
823
+ Load16x8(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
824
+ {
825
+ const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
826
+ ithresh, thresh);
827
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
828
+ uint8x16_t op2, op1, op0, oq0, oq1, oq2;
829
+ DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
830
+ &op2, &op1, &op0, &oq0, &oq1, &oq2);
831
+ Store16x2(op2, op1, p - 2 * stride, stride);
832
+ Store16x2(op0, oq0, p + 0 * stride, stride);
833
+ Store16x2(oq1, oq2, p + 2 * stride, stride);
834
+ }
835
+ }
836
+
837
+ static void HFilter16(uint8_t* p, int stride,
838
+ int thresh, int ithresh, int hev_thresh) {
839
+ uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
840
+ Load8x16(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
841
+ {
842
+ const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
843
+ ithresh, thresh);
844
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
845
+ uint8x16_t op2, op1, op0, oq0, oq1, oq2;
846
+ DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
847
+ &op2, &op1, &op0, &oq0, &oq1, &oq2);
848
+ Store2x16(op2, op1, p - 2, stride);
849
+ Store2x16(op0, oq0, p + 0, stride);
850
+ Store2x16(oq1, oq2, p + 2, stride);
851
+ }
852
+ }
853
+
854
+ // on three inner edges
855
+ static void VFilter16i(uint8_t* p, int stride,
856
+ int thresh, int ithresh, int hev_thresh) {
857
+ uint32_t k;
858
+ uint8x16_t p3, p2, p1, p0;
859
+ Load16x4(p + 2 * stride, stride, &p3, &p2, &p1, &p0);
860
+ for (k = 3; k != 0; --k) {
861
+ uint8x16_t q0, q1, q2, q3;
862
+ p += 4 * stride;
863
+ Load16x4(p + 2 * stride, stride, &q0, &q1, &q2, &q3);
864
+ {
865
+ const uint8x16_t mask =
866
+ NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
867
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
868
+ // p3 and p2 are not just temporary variables here: they will be
869
+ // re-used for next span. And q2/q3 will become p1/p0 accordingly.
870
+ DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
871
+ Store16x4(p1, p0, p3, p2, p, stride);
872
+ p1 = q2;
873
+ p0 = q3;
874
+ }
875
+ }
876
+ }
877
+
878
+ #if !defined(WORK_AROUND_GCC)
879
+ static void HFilter16i(uint8_t* p, int stride,
880
+ int thresh, int ithresh, int hev_thresh) {
881
+ uint32_t k;
882
+ uint8x16_t p3, p2, p1, p0;
883
+ Load4x16(p + 2, stride, &p3, &p2, &p1, &p0);
884
+ for (k = 3; k != 0; --k) {
885
+ uint8x16_t q0, q1, q2, q3;
886
+ p += 4;
887
+ Load4x16(p + 2, stride, &q0, &q1, &q2, &q3);
888
+ {
889
+ const uint8x16_t mask =
890
+ NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
891
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
892
+ DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
893
+ Store4x16(p1, p0, p3, p2, p, stride);
894
+ p1 = q2;
895
+ p0 = q3;
896
+ }
897
+ }
898
+ }
899
+ #endif // !WORK_AROUND_GCC
900
+
901
+ // 8-pixels wide variant, for chroma filtering
902
+ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
903
+ int thresh, int ithresh, int hev_thresh) {
904
+ uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
905
+ Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
906
+ {
907
+ const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
908
+ ithresh, thresh);
909
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
910
+ uint8x16_t op2, op1, op0, oq0, oq1, oq2;
911
+ DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
912
+ &op2, &op1, &op0, &oq0, &oq1, &oq2);
913
+ Store8x2x2(op2, op1, u - 2 * stride, v - 2 * stride, stride);
914
+ Store8x2x2(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
915
+ Store8x2x2(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
916
+ }
917
+ }
918
+ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
919
+ int thresh, int ithresh, int hev_thresh) {
920
+ uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
921
+ u += 4 * stride;
922
+ v += 4 * stride;
923
+ Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
924
+ {
925
+ const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
926
+ ithresh, thresh);
927
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
928
+ uint8x16_t op1, op0, oq0, oq1;
929
+ DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
930
+ Store8x4x2(op1, op0, oq0, oq1, u, v, stride);
931
+ }
932
+ }
933
+
934
+ #if !defined(WORK_AROUND_GCC)
935
+ static void HFilter8(uint8_t* u, uint8_t* v, int stride,
936
+ int thresh, int ithresh, int hev_thresh) {
937
+ uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
938
+ Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
939
+ {
940
+ const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
941
+ ithresh, thresh);
942
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
943
+ uint8x16_t op2, op1, op0, oq0, oq1, oq2;
944
+ DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
945
+ &op2, &op1, &op0, &oq0, &oq1, &oq2);
946
+ Store6x8x2(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
947
+ }
948
+ }
949
+
950
+ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
951
+ int thresh, int ithresh, int hev_thresh) {
952
+ uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
953
+ u += 4;
954
+ v += 4;
955
+ Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
956
+ {
957
+ const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
958
+ ithresh, thresh);
959
+ const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
960
+ uint8x16_t op1, op0, oq0, oq1;
961
+ DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
962
+ Store4x8x2(op1, op0, oq0, oq1, u, v, stride);
963
+ }
964
+ }
965
+ #endif // !WORK_AROUND_GCC
966
+
967
+ //-----------------------------------------------------------------------------
968
+ // Inverse transforms (Paragraph 14.4)
969
+
970
+ // Technically these are unsigned but vqdmulh is only available in signed.
971
+ // vqdmulh returns high half (effectively >> 16) but also doubles the value,
972
+ // changing the >> 16 to >> 15 and requiring an additional >> 1.
973
+ // We use this to our advantage with kC2. The canonical value is 35468.
974
+ // However, the high bit is set so treating it as signed will give incorrect
975
+ // results. We avoid this by down shifting by 1 here to clear the highest bit.
976
+ // Combined with the doubling effect of vqdmulh we get >> 16.
977
+ // This can not be applied to kC1 because the lowest bit is set. Down shifting
978
+ // the constant would reduce precision.
979
+
980
+ // libwebp uses a trick to avoid some extra addition that libvpx does.
981
+ // Instead of:
982
+ // temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
983
+ // libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
984
+ // same issue with kC1 and vqdmulh that we work around by down shifting kC2
985
+
986
+ static const int16_t kC1 = 20091;
987
+ static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
988
+
989
+ #if defined(WEBP_USE_INTRINSICS)
990
+ static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
991
+ int16x8x2_t* const out) {
992
+ // a0 a1 a2 a3 | b0 b1 b2 b3 => a0 b0 c0 d0 | a1 b1 c1 d1
993
+ // c0 c1 c2 c3 | d0 d1 d2 d3 a2 b2 c2 d2 | a3 b3 c3 d3
994
+ const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
995
+ // b0 d0 b1 d1 b2 d2 ...
996
+ *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
997
+ }
998
+
999
+ static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
1000
+ // {rows} = in0 | in4
1001
+ // in8 | in12
1002
+ // B1 = in4 | in12
1003
+ const int16x8_t B1 =
1004
+ vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
1005
+ // C0 = kC1 * in4 | kC1 * in12
1006
+ // C1 = kC2 * in4 | kC2 * in12
1007
+ const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
1008
+ const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
1009
+ const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
1010
+ vget_low_s16(rows->val[1])); // in0 + in8
1011
+ const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
1012
+ vget_low_s16(rows->val[1])); // in0 - in8
1013
+ // c = kC2 * in4 - kC1 * in12
1014
+ // d = kC1 * in4 + kC2 * in12
1015
+ const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
1016
+ const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
1017
+ const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b
1018
+ const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c
1019
+ const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
1020
+ const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
1021
+ const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
1022
+ Transpose8x2(E0, E1, rows);
1023
+ }
1024
+
1025
+ static void TransformOne(const int16_t* in, uint8_t* dst) {
1026
+ int16x8x2_t rows;
1027
+ INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
1028
+ TransformPass(&rows);
1029
+ TransformPass(&rows);
1030
+ Add4x4(rows.val[0], rows.val[1], dst);
1031
+ }
1032
+
1033
+ #else
1034
+
1035
+ static void TransformOne(const int16_t* in, uint8_t* dst) {
1036
+ const int kBPS = BPS;
1037
+ // kC1, kC2. Padded because vld1.16 loads 8 bytes
1038
+ const int16_t constants[4] = { kC1, kC2, 0, 0 };
1039
+ /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
1040
+ __asm__ volatile (
1041
+ "vld1.16 {q1, q2}, [%[in]] \n"
1042
+ "vld1.16 {d0}, [%[constants]] \n"
1043
+
1044
+ /* d2: in[0]
1045
+ * d3: in[8]
1046
+ * d4: in[4]
1047
+ * d5: in[12]
1048
+ */
1049
+ "vswp d3, d4 \n"
1050
+
1051
+ /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
1052
+ * q9 = {in[4], in[12]} * kC2 >> 16
1053
+ */
1054
+ "vqdmulh.s16 q8, q2, d0[0] \n"
1055
+ "vqdmulh.s16 q9, q2, d0[1] \n"
1056
+
1057
+ /* d22 = a = in[0] + in[8]
1058
+ * d23 = b = in[0] - in[8]
1059
+ */
1060
+ "vqadd.s16 d22, d2, d3 \n"
1061
+ "vqsub.s16 d23, d2, d3 \n"
1062
+
1063
+ /* The multiplication should be x * kC1 >> 16
1064
+ * However, with vqdmulh we get x * kC1 * 2 >> 16
1065
+ * (multiply, double, return high half)
1066
+ * We avoided this in kC2 by pre-shifting the constant.
1067
+ * q8 = in[4]/[12] * kC1 >> 16
1068
+ */
1069
+ "vshr.s16 q8, q8, #1 \n"
1070
+
1071
+ /* Add {in[4], in[12]} back after the multiplication. This is handled by
1072
+ * adding 1 << 16 to kC1 in the libwebp C code.
1073
+ */
1074
+ "vqadd.s16 q8, q2, q8 \n"
1075
+
1076
+ /* d20 = c = in[4]*kC2 - in[12]*kC1
1077
+ * d21 = d = in[4]*kC1 + in[12]*kC2
1078
+ */
1079
+ "vqsub.s16 d20, d18, d17 \n"
1080
+ "vqadd.s16 d21, d19, d16 \n"
1081
+
1082
+ /* d2 = tmp[0] = a + d
1083
+ * d3 = tmp[1] = b + c
1084
+ * d4 = tmp[2] = b - c
1085
+ * d5 = tmp[3] = a - d
1086
+ */
1087
+ "vqadd.s16 d2, d22, d21 \n"
1088
+ "vqadd.s16 d3, d23, d20 \n"
1089
+ "vqsub.s16 d4, d23, d20 \n"
1090
+ "vqsub.s16 d5, d22, d21 \n"
1091
+
1092
+ "vzip.16 q1, q2 \n"
1093
+ "vzip.16 q1, q2 \n"
1094
+
1095
+ "vswp d3, d4 \n"
1096
+
1097
+ /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
1098
+ * q9 = {tmp[4], tmp[12]} * kC2 >> 16
1099
+ */
1100
+ "vqdmulh.s16 q8, q2, d0[0] \n"
1101
+ "vqdmulh.s16 q9, q2, d0[1] \n"
1102
+
1103
+ /* d22 = a = tmp[0] + tmp[8]
1104
+ * d23 = b = tmp[0] - tmp[8]
1105
+ */
1106
+ "vqadd.s16 d22, d2, d3 \n"
1107
+ "vqsub.s16 d23, d2, d3 \n"
1108
+
1109
+ /* See long winded explanations prior */
1110
+ "vshr.s16 q8, q8, #1 \n"
1111
+ "vqadd.s16 q8, q2, q8 \n"
1112
+
1113
+ /* d20 = c = in[4]*kC2 - in[12]*kC1
1114
+ * d21 = d = in[4]*kC1 + in[12]*kC2
1115
+ */
1116
+ "vqsub.s16 d20, d18, d17 \n"
1117
+ "vqadd.s16 d21, d19, d16 \n"
1118
+
1119
+ /* d2 = tmp[0] = a + d
1120
+ * d3 = tmp[1] = b + c
1121
+ * d4 = tmp[2] = b - c
1122
+ * d5 = tmp[3] = a - d
1123
+ */
1124
+ "vqadd.s16 d2, d22, d21 \n"
1125
+ "vqadd.s16 d3, d23, d20 \n"
1126
+ "vqsub.s16 d4, d23, d20 \n"
1127
+ "vqsub.s16 d5, d22, d21 \n"
1128
+
1129
+ "vld1.32 d6[0], [%[dst]], %[kBPS] \n"
1130
+ "vld1.32 d6[1], [%[dst]], %[kBPS] \n"
1131
+ "vld1.32 d7[0], [%[dst]], %[kBPS] \n"
1132
+ "vld1.32 d7[1], [%[dst]], %[kBPS] \n"
1133
+
1134
+ "sub %[dst], %[dst], %[kBPS], lsl #2 \n"
1135
+
1136
+ /* (val) + 4 >> 3 */
1137
+ "vrshr.s16 d2, d2, #3 \n"
1138
+ "vrshr.s16 d3, d3, #3 \n"
1139
+ "vrshr.s16 d4, d4, #3 \n"
1140
+ "vrshr.s16 d5, d5, #3 \n"
1141
+
1142
+ "vzip.16 q1, q2 \n"
1143
+ "vzip.16 q1, q2 \n"
1144
+
1145
+ /* Must accumulate before saturating */
1146
+ "vmovl.u8 q8, d6 \n"
1147
+ "vmovl.u8 q9, d7 \n"
1148
+
1149
+ "vqadd.s16 q1, q1, q8 \n"
1150
+ "vqadd.s16 q2, q2, q9 \n"
1151
+
1152
+ "vqmovun.s16 d0, q1 \n"
1153
+ "vqmovun.s16 d1, q2 \n"
1154
+
1155
+ "vst1.32 d0[0], [%[dst]], %[kBPS] \n"
1156
+ "vst1.32 d0[1], [%[dst]], %[kBPS] \n"
1157
+ "vst1.32 d1[0], [%[dst]], %[kBPS] \n"
1158
+ "vst1.32 d1[1], [%[dst]] \n"
1159
+
1160
+ : [in] "+r"(in), [dst] "+r"(dst) /* modified registers */
1161
+ : [kBPS] "r"(kBPS), [constants] "r"(constants) /* constants */
1162
+ : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */
1163
+ );
1164
+ }
1165
+
1166
+ #endif // WEBP_USE_INTRINSICS
1167
+
1168
+ static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
1169
+ TransformOne(in, dst);
1170
+ if (do_two) {
1171
+ TransformOne(in + 16, dst + 4);
1172
+ }
1173
+ }
1174
+
1175
+ static void TransformDC(const int16_t* in, uint8_t* dst) {
1176
+ const int16x8_t DC = vdupq_n_s16(in[0]);
1177
+ Add4x4(DC, DC, dst);
1178
+ }
1179
+
1180
+ //------------------------------------------------------------------------------
1181
+
1182
+ #define STORE_WHT(dst, col, rows) do { \
1183
+ *dst = vgetq_lane_s32(rows.val[0], col); (dst) += 16; \
1184
+ *dst = vgetq_lane_s32(rows.val[1], col); (dst) += 16; \
1185
+ *dst = vgetq_lane_s32(rows.val[2], col); (dst) += 16; \
1186
+ *dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
1187
+ } while (0)
1188
+
1189
+ static void TransformWHT(const int16_t* in, int16_t* out) {
1190
+ int32x4x4_t tmp;
1191
+
1192
+ {
1193
+ // Load the source.
1194
+ const int16x4_t in00_03 = vld1_s16(in + 0);
1195
+ const int16x4_t in04_07 = vld1_s16(in + 4);
1196
+ const int16x4_t in08_11 = vld1_s16(in + 8);
1197
+ const int16x4_t in12_15 = vld1_s16(in + 12);
1198
+ const int32x4_t a0 = vaddl_s16(in00_03, in12_15); // in[0..3] + in[12..15]
1199
+ const int32x4_t a1 = vaddl_s16(in04_07, in08_11); // in[4..7] + in[8..11]
1200
+ const int32x4_t a2 = vsubl_s16(in04_07, in08_11); // in[4..7] - in[8..11]
1201
+ const int32x4_t a3 = vsubl_s16(in00_03, in12_15); // in[0..3] - in[12..15]
1202
+ tmp.val[0] = vaddq_s32(a0, a1);
1203
+ tmp.val[1] = vaddq_s32(a3, a2);
1204
+ tmp.val[2] = vsubq_s32(a0, a1);
1205
+ tmp.val[3] = vsubq_s32(a3, a2);
1206
+ // Arrange the temporary results column-wise.
1207
+ tmp = Transpose4x4(tmp);
1208
+ }
1209
+
1210
+ {
1211
+ const int32x4_t kCst3 = vdupq_n_s32(3);
1212
+ const int32x4_t dc = vaddq_s32(tmp.val[0], kCst3); // add rounder
1213
+ const int32x4_t a0 = vaddq_s32(dc, tmp.val[3]);
1214
+ const int32x4_t a1 = vaddq_s32(tmp.val[1], tmp.val[2]);
1215
+ const int32x4_t a2 = vsubq_s32(tmp.val[1], tmp.val[2]);
1216
+ const int32x4_t a3 = vsubq_s32(dc, tmp.val[3]);
1217
+
1218
+ tmp.val[0] = vaddq_s32(a0, a1);
1219
+ tmp.val[1] = vaddq_s32(a3, a2);
1220
+ tmp.val[2] = vsubq_s32(a0, a1);
1221
+ tmp.val[3] = vsubq_s32(a3, a2);
1222
+
1223
+ // right shift the results by 3.
1224
+ tmp.val[0] = vshrq_n_s32(tmp.val[0], 3);
1225
+ tmp.val[1] = vshrq_n_s32(tmp.val[1], 3);
1226
+ tmp.val[2] = vshrq_n_s32(tmp.val[2], 3);
1227
+ tmp.val[3] = vshrq_n_s32(tmp.val[3], 3);
1228
+
1229
+ STORE_WHT(out, 0, tmp);
1230
+ STORE_WHT(out, 1, tmp);
1231
+ STORE_WHT(out, 2, tmp);
1232
+ STORE_WHT(out, 3, tmp);
1233
+ }
1234
+ }
1235
+
1236
+ #undef STORE_WHT
1237
+
1238
+ //------------------------------------------------------------------------------
1239
+
1240
+ #define MUL(a, b) (((a) * (b)) >> 16)
1241
+ static void TransformAC3(const int16_t* in, uint8_t* dst) {
1242
+ static const int kC1_full = 20091 + (1 << 16);
1243
+ static const int kC2_full = 35468;
1244
+ const int16x4_t A = vdup_n_s16(in[0]);
1245
+ const int16x4_t c4 = vdup_n_s16(MUL(in[4], kC2_full));
1246
+ const int16x4_t d4 = vdup_n_s16(MUL(in[4], kC1_full));
1247
+ const int c1 = MUL(in[1], kC2_full);
1248
+ const int d1 = MUL(in[1], kC1_full);
1249
+ const uint64_t cd = (uint64_t)( d1 & 0xffff) << 0 |
1250
+ (uint64_t)( c1 & 0xffff) << 16 |
1251
+ (uint64_t)(-c1 & 0xffff) << 32 |
1252
+ (uint64_t)(-d1 & 0xffff) << 48;
1253
+ const int16x4_t CD = vcreate_s16(cd);
1254
+ const int16x4_t B = vqadd_s16(A, CD);
1255
+ const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
1256
+ const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4));
1257
+ Add4x4(m0_m1, m2_m3, dst);
1258
+ }
1259
+ #undef MUL
1260
+
1261
+ //------------------------------------------------------------------------------
1262
+ // 4x4
1263
+
1264
+ static void DC4(uint8_t* dst) { // DC
1265
+ const uint8x8_t A = vld1_u8(dst - BPS); // top row
1266
+ const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
1267
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
1268
+ const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1));
1269
+ const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1));
1270
+ const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1));
1271
+ const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1));
1272
+ const uint16x8_t s0 = vaddq_u16(L0, L1);
1273
+ const uint16x8_t s1 = vaddq_u16(L2, L3);
1274
+ const uint16x8_t s01 = vaddq_u16(s0, s1);
1275
+ const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1));
1276
+ const uint8x8_t dc0 = vrshrn_n_u16(sum, 3); // (sum + 4) >> 3
1277
+ const uint8x8_t dc = vdup_lane_u8(dc0, 0);
1278
+ int i;
1279
+ for (i = 0; i < 4; ++i) {
1280
+ vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc), 0);
1281
+ }
1282
+ }
1283
+
1284
+ // TrueMotion (4x4 + 8x8)
1285
+ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
1286
+ const uint8x8_t TL = vdup_n_u8(dst[-BPS - 1]); // top-left pixel 'A[-1]'
1287
+ const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
1288
+ const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1]
1289
+ int y;
1290
+ for (y = 0; y < size; y += 4) {
1291
+ // left edge
1292
+ const int16x8_t l0 = ConvertU8ToS16(vld1_u8(dst + 0 * BPS - 1));
1293
+ const int16x8_t l1 = ConvertU8ToS16(vld1_u8(dst + 1 * BPS - 1));
1294
+ const int16x8_t l2 = ConvertU8ToS16(vld1_u8(dst + 2 * BPS - 1));
1295
+ const int16x8_t l3 = ConvertU8ToS16(vld1_u8(dst + 3 * BPS - 1));
1296
+ const int16x8_t L0 = vdupq_lane_s16(vget_low_s16(l0), 0);
1297
+ const int16x8_t L1 = vdupq_lane_s16(vget_low_s16(l1), 0);
1298
+ const int16x8_t L2 = vdupq_lane_s16(vget_low_s16(l2), 0);
1299
+ const int16x8_t L3 = vdupq_lane_s16(vget_low_s16(l3), 0);
1300
+ const int16x8_t r0 = vaddq_s16(L0, d); // L[r] + A[c] - A[-1]
1301
+ const int16x8_t r1 = vaddq_s16(L1, d);
1302
+ const int16x8_t r2 = vaddq_s16(L2, d);
1303
+ const int16x8_t r3 = vaddq_s16(L3, d);
1304
+ // Saturate and store the result.
1305
+ const uint32x2_t r0_u32 = vreinterpret_u32_u8(vqmovun_s16(r0));
1306
+ const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
1307
+ const uint32x2_t r2_u32 = vreinterpret_u32_u8(vqmovun_s16(r2));
1308
+ const uint32x2_t r3_u32 = vreinterpret_u32_u8(vqmovun_s16(r3));
1309
+ if (size == 4) {
1310
+ vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0_u32, 0);
1311
+ vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1_u32, 0);
1312
+ vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2_u32, 0);
1313
+ vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3_u32, 0);
1314
+ } else {
1315
+ vst1_u32((uint32_t*)(dst + 0 * BPS), r0_u32);
1316
+ vst1_u32((uint32_t*)(dst + 1 * BPS), r1_u32);
1317
+ vst1_u32((uint32_t*)(dst + 2 * BPS), r2_u32);
1318
+ vst1_u32((uint32_t*)(dst + 3 * BPS), r3_u32);
1319
+ }
1320
+ dst += 4 * BPS;
1321
+ }
1322
+ }
1323
+
1324
+ static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
1325
+
1326
+ static void VE4(uint8_t* dst) { // vertical
1327
+ // NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
1328
+ const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1)); // top row
1329
+ const uint64x1_t A1 = vshr_n_u64(A0, 8);
1330
+ const uint64x1_t A2 = vshr_n_u64(A0, 16);
1331
+ const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
1332
+ const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
1333
+ const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
1334
+ const uint8x8_t b = vhadd_u8(ABCDEFGH, CDEFGH00);
1335
+ const uint8x8_t avg = vrhadd_u8(b, BCDEFGH0);
1336
+ int i;
1337
+ for (i = 0; i < 4; ++i) {
1338
+ vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(avg), 0);
1339
+ }
1340
+ }
1341
+
1342
+ static void RD4(uint8_t* dst) { // Down-right
1343
+ const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1);
1344
+ const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
1345
+ const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
1346
+ const uint32_t I = dst[-1 + 0 * BPS];
1347
+ const uint32_t J = dst[-1 + 1 * BPS];
1348
+ const uint32_t K = dst[-1 + 2 * BPS];
1349
+ const uint32_t L = dst[-1 + 3 * BPS];
1350
+ const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24));
1351
+ const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
1352
+ const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
1353
+ const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
1354
+ const uint8_t D = vget_lane_u8(XABCD_u8, 4);
1355
+ const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
1356
+ const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
1357
+ const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
1358
+ const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
1359
+ const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
1360
+ const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
1361
+ const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
1362
+ const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
1363
+ const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
1364
+ vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0);
1365
+ vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0);
1366
+ vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0);
1367
+ vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
1368
+ }
1369
+
1370
+ static void LD4(uint8_t* dst) { // Down-left
1371
+ // Note using the same shift trick as VE4() is slower here.
1372
+ const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0);
1373
+ const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1);
1374
+ const uint8x8_t CDEFGH00 = vld1_u8(dst - BPS + 2);
1375
+ const uint8x8_t CDEFGHH0 = vset_lane_u8(dst[-BPS + 7], CDEFGH00, 6);
1376
+ const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGHH0);
1377
+ const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0);
1378
+ const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
1379
+ const uint32x2_t r0 = vreinterpret_u32_u8(avg2);
1380
+ const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
1381
+ const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
1382
+ const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
1383
+ vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0);
1384
+ vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0);
1385
+ vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0);
1386
+ vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
1387
+ }
1388
+
1389
+ //------------------------------------------------------------------------------
1390
+ // Chroma
1391
+
1392
+ static WEBP_INLINE void DC8(uint8_t* dst, int do_top, int do_left) {
1393
+ uint16x8_t sum_top;
1394
+ uint16x8_t sum_left;
1395
+ uint8x8_t dc0;
1396
+
1397
+ if (do_top) {
1398
+ const uint8x8_t A = vld1_u8(dst - BPS); // top row
1399
+ const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
1400
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
1401
+ const uint16x4_t p2 = vpadd_u16(p1, p1);
1402
+ sum_top = vcombine_u16(p2, p2);
1403
+ }
1404
+
1405
+ if (do_left) {
1406
+ const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1));
1407
+ const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1));
1408
+ const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1));
1409
+ const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1));
1410
+ const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + 4 * BPS - 1));
1411
+ const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + 5 * BPS - 1));
1412
+ const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + 6 * BPS - 1));
1413
+ const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + 7 * BPS - 1));
1414
+ const uint16x8_t s0 = vaddq_u16(L0, L1);
1415
+ const uint16x8_t s1 = vaddq_u16(L2, L3);
1416
+ const uint16x8_t s2 = vaddq_u16(L4, L5);
1417
+ const uint16x8_t s3 = vaddq_u16(L6, L7);
1418
+ const uint16x8_t s01 = vaddq_u16(s0, s1);
1419
+ const uint16x8_t s23 = vaddq_u16(s2, s3);
1420
+ sum_left = vaddq_u16(s01, s23);
1421
+ }
1422
+
1423
+ if (do_top && do_left) {
1424
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
1425
+ dc0 = vrshrn_n_u16(sum, 4);
1426
+ } else if (do_top) {
1427
+ dc0 = vrshrn_n_u16(sum_top, 3);
1428
+ } else {
1429
+ dc0 = vrshrn_n_u16(sum_left, 3);
1430
+ }
1431
+
1432
+ {
1433
+ const uint8x8_t dc = vdup_lane_u8(dc0, 0);
1434
+ int i;
1435
+ for (i = 0; i < 8; ++i) {
1436
+ vst1_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc));
1437
+ }
1438
+ }
1439
+ }
1440
+
1441
+ static void DC8uv(uint8_t* dst) { DC8(dst, 1, 1); }
1442
+ static void DC8uvNoTop(uint8_t* dst) { DC8(dst, 0, 1); }
1443
+ static void DC8uvNoLeft(uint8_t* dst) { DC8(dst, 1, 0); }
1444
+
1445
+ static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
1446
+
1447
+ #endif // WEBP_USE_NEON
1448
+
1449
+ //------------------------------------------------------------------------------
1450
+ // Entry point
1451
+
1452
+ extern void VP8DspInitNEON(void);
1453
+
1454
+ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitNEON(void) {
1455
+ #if defined(WEBP_USE_NEON)
1456
+ VP8Transform = TransformTwo;
1457
+ VP8TransformAC3 = TransformAC3;
1458
+ VP8TransformDC = TransformDC;
1459
+ VP8TransformWHT = TransformWHT;
1460
+
1461
+ VP8VFilter16 = VFilter16;
1462
+ VP8VFilter16i = VFilter16i;
1463
+ VP8HFilter16 = HFilter16;
1464
+ #if !defined(WORK_AROUND_GCC)
1465
+ VP8HFilter16i = HFilter16i;
1466
+ #endif
1467
+ VP8VFilter8 = VFilter8;
1468
+ VP8VFilter8i = VFilter8i;
1469
+ #if !defined(WORK_AROUND_GCC)
1470
+ VP8HFilter8 = HFilter8;
1471
+ VP8HFilter8i = HFilter8i;
1472
+ #endif
1473
+ VP8SimpleVFilter16 = SimpleVFilter16;
1474
+ VP8SimpleHFilter16 = SimpleHFilter16;
1475
+ VP8SimpleVFilter16i = SimpleVFilter16i;
1476
+ VP8SimpleHFilter16i = SimpleHFilter16i;
1477
+
1478
+ VP8PredLuma4[0] = DC4;
1479
+ VP8PredLuma4[1] = TM4;
1480
+ VP8PredLuma4[2] = VE4;
1481
+ VP8PredLuma4[4] = RD4;
1482
+ VP8PredLuma4[6] = LD4;
1483
+
1484
+ VP8PredChroma8[0] = DC8uv;
1485
+ VP8PredChroma8[1] = TM8uv;
1486
+ VP8PredChroma8[4] = DC8uvNoTop;
1487
+ VP8PredChroma8[5] = DC8uvNoLeft;
1488
+ #endif // WEBP_USE_NEON
1489
+ }