rfreeimage 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (860) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +1 -0
  4. data/Rakefile +34 -0
  5. data/ext/rfreeimage/extconf.rb +35 -0
  6. data/ext/rfreeimage/rfi_main.c +389 -0
  7. data/lib/rfreeimage/image.rb +26 -0
  8. data/lib/rfreeimage/version.rb +3 -0
  9. data/lib/rfreeimage.rb +3 -0
  10. data/rfreeimage.gemspec +32 -0
  11. data/vendor/FreeImage/Makefile +34 -0
  12. data/vendor/FreeImage/Makefile.cygwin +74 -0
  13. data/vendor/FreeImage/Makefile.fip +84 -0
  14. data/vendor/FreeImage/Makefile.gnu +83 -0
  15. data/vendor/FreeImage/Makefile.iphone +96 -0
  16. data/vendor/FreeImage/Makefile.mingw +136 -0
  17. data/vendor/FreeImage/Makefile.osx +115 -0
  18. data/vendor/FreeImage/Makefile.solaris +66 -0
  19. data/vendor/FreeImage/Makefile.srcs +6 -0
  20. data/vendor/FreeImage/README.iphone +19 -0
  21. data/vendor/FreeImage/README.linux +50 -0
  22. data/vendor/FreeImage/README.minGW +236 -0
  23. data/vendor/FreeImage/README.osx +44 -0
  24. data/vendor/FreeImage/README.solaris +67 -0
  25. data/vendor/FreeImage/Source/CacheFile.h +92 -0
  26. data/vendor/FreeImage/Source/DeprecationManager/Deprecated.cpp +36 -0
  27. data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.cpp +103 -0
  28. data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.h +83 -0
  29. data/vendor/FreeImage/Source/FreeImage/BitmapAccess.cpp +1573 -0
  30. data/vendor/FreeImage/Source/FreeImage/CacheFile.cpp +271 -0
  31. data/vendor/FreeImage/Source/FreeImage/ColorLookup.cpp +785 -0
  32. data/vendor/FreeImage/Source/FreeImage/Conversion.cpp +551 -0
  33. data/vendor/FreeImage/Source/FreeImage/Conversion16_555.cpp +209 -0
  34. data/vendor/FreeImage/Source/FreeImage/Conversion16_565.cpp +204 -0
  35. data/vendor/FreeImage/Source/FreeImage/Conversion24.cpp +252 -0
  36. data/vendor/FreeImage/Source/FreeImage/Conversion32.cpp +345 -0
  37. data/vendor/FreeImage/Source/FreeImage/Conversion4.cpp +246 -0
  38. data/vendor/FreeImage/Source/FreeImage/Conversion8.cpp +305 -0
  39. data/vendor/FreeImage/Source/FreeImage/ConversionFloat.cpp +194 -0
  40. data/vendor/FreeImage/Source/FreeImage/ConversionRGB16.cpp +144 -0
  41. data/vendor/FreeImage/Source/FreeImage/ConversionRGBA16.cpp +147 -0
  42. data/vendor/FreeImage/Source/FreeImage/ConversionRGBAF.cpp +250 -0
  43. data/vendor/FreeImage/Source/FreeImage/ConversionRGBF.cpp +243 -0
  44. data/vendor/FreeImage/Source/FreeImage/ConversionType.cpp +699 -0
  45. data/vendor/FreeImage/Source/FreeImage/ConversionUINT16.cpp +134 -0
  46. data/vendor/FreeImage/Source/FreeImage/FreeImage.cpp +226 -0
  47. data/vendor/FreeImage/Source/FreeImage/FreeImageC.c +22 -0
  48. data/vendor/FreeImage/Source/FreeImage/FreeImageIO.cpp +175 -0
  49. data/vendor/FreeImage/Source/FreeImage/GetType.cpp +92 -0
  50. data/vendor/FreeImage/Source/FreeImage/Halftoning.cpp +474 -0
  51. data/vendor/FreeImage/Source/FreeImage/J2KHelper.cpp +591 -0
  52. data/vendor/FreeImage/Source/FreeImage/J2KHelper.h +36 -0
  53. data/vendor/FreeImage/Source/FreeImage/LFPQuantizer.cpp +208 -0
  54. data/vendor/FreeImage/Source/FreeImage/MNGHelper.cpp +1320 -0
  55. data/vendor/FreeImage/Source/FreeImage/MemoryIO.cpp +237 -0
  56. data/vendor/FreeImage/Source/FreeImage/MultiPage.cpp +974 -0
  57. data/vendor/FreeImage/Source/FreeImage/NNQuantizer.cpp +507 -0
  58. data/vendor/FreeImage/Source/FreeImage/PSDParser.cpp +1057 -0
  59. data/vendor/FreeImage/Source/FreeImage/PSDParser.h +271 -0
  60. data/vendor/FreeImage/Source/FreeImage/PixelAccess.cpp +197 -0
  61. data/vendor/FreeImage/Source/FreeImage/Plugin.cpp +822 -0
  62. data/vendor/FreeImage/Source/FreeImage/PluginBMP.cpp +1494 -0
  63. data/vendor/FreeImage/Source/FreeImage/PluginCUT.cpp +240 -0
  64. data/vendor/FreeImage/Source/FreeImage/PluginDDS.cpp +655 -0
  65. data/vendor/FreeImage/Source/FreeImage/PluginEXR.cpp +773 -0
  66. data/vendor/FreeImage/Source/FreeImage/PluginG3.cpp +433 -0
  67. data/vendor/FreeImage/Source/FreeImage/PluginGIF.cpp +1407 -0
  68. data/vendor/FreeImage/Source/FreeImage/PluginHDR.cpp +722 -0
  69. data/vendor/FreeImage/Source/FreeImage/PluginICO.cpp +824 -0
  70. data/vendor/FreeImage/Source/FreeImage/PluginIFF.cpp +459 -0
  71. data/vendor/FreeImage/Source/FreeImage/PluginJ2K.cpp +328 -0
  72. data/vendor/FreeImage/Source/FreeImage/PluginJNG.cpp +162 -0
  73. data/vendor/FreeImage/Source/FreeImage/PluginJP2.cpp +328 -0
  74. data/vendor/FreeImage/Source/FreeImage/PluginJPEG.cpp +1706 -0
  75. data/vendor/FreeImage/Source/FreeImage/PluginJXR.cpp +1475 -0
  76. data/vendor/FreeImage/Source/FreeImage/PluginKOALA.cpp +243 -0
  77. data/vendor/FreeImage/Source/FreeImage/PluginMNG.cpp +153 -0
  78. data/vendor/FreeImage/Source/FreeImage/PluginPCD.cpp +251 -0
  79. data/vendor/FreeImage/Source/FreeImage/PluginPCX.cpp +659 -0
  80. data/vendor/FreeImage/Source/FreeImage/PluginPFM.cpp +409 -0
  81. data/vendor/FreeImage/Source/FreeImage/PluginPICT.cpp +1343 -0
  82. data/vendor/FreeImage/Source/FreeImage/PluginPNG.cpp +1115 -0
  83. data/vendor/FreeImage/Source/FreeImage/PluginPNM.cpp +838 -0
  84. data/vendor/FreeImage/Source/FreeImage/PluginPSD.cpp +131 -0
  85. data/vendor/FreeImage/Source/FreeImage/PluginRAS.cpp +512 -0
  86. data/vendor/FreeImage/Source/FreeImage/PluginRAW.cpp +793 -0
  87. data/vendor/FreeImage/Source/FreeImage/PluginSGI.cpp +425 -0
  88. data/vendor/FreeImage/Source/FreeImage/PluginTARGA.cpp +1591 -0
  89. data/vendor/FreeImage/Source/FreeImage/PluginTIFF.cpp +2631 -0
  90. data/vendor/FreeImage/Source/FreeImage/PluginWBMP.cpp +372 -0
  91. data/vendor/FreeImage/Source/FreeImage/PluginWebP.cpp +698 -0
  92. data/vendor/FreeImage/Source/FreeImage/PluginXBM.cpp +399 -0
  93. data/vendor/FreeImage/Source/FreeImage/PluginXPM.cpp +487 -0
  94. data/vendor/FreeImage/Source/FreeImage/TIFFLogLuv.cpp +65 -0
  95. data/vendor/FreeImage/Source/FreeImage/ToneMapping.cpp +75 -0
  96. data/vendor/FreeImage/Source/FreeImage/WuQuantizer.cpp +559 -0
  97. data/vendor/FreeImage/Source/FreeImage/ZLibInterface.cpp +223 -0
  98. data/vendor/FreeImage/Source/FreeImage/tmoColorConvert.cpp +479 -0
  99. data/vendor/FreeImage/Source/FreeImage/tmoDrago03.cpp +295 -0
  100. data/vendor/FreeImage/Source/FreeImage/tmoFattal02.cpp +689 -0
  101. data/vendor/FreeImage/Source/FreeImage/tmoReinhard05.cpp +260 -0
  102. data/vendor/FreeImage/Source/FreeImage.h +1153 -0
  103. data/vendor/FreeImage/Source/FreeImageIO.h +63 -0
  104. data/vendor/FreeImage/Source/FreeImageToolkit/BSplineRotate.cpp +730 -0
  105. data/vendor/FreeImage/Source/FreeImageToolkit/Background.cpp +895 -0
  106. data/vendor/FreeImage/Source/FreeImageToolkit/Channels.cpp +488 -0
  107. data/vendor/FreeImage/Source/FreeImageToolkit/ClassicRotate.cpp +917 -0
  108. data/vendor/FreeImage/Source/FreeImageToolkit/Colors.cpp +967 -0
  109. data/vendor/FreeImage/Source/FreeImageToolkit/CopyPaste.cpp +861 -0
  110. data/vendor/FreeImage/Source/FreeImageToolkit/Display.cpp +230 -0
  111. data/vendor/FreeImage/Source/FreeImageToolkit/Filters.h +287 -0
  112. data/vendor/FreeImage/Source/FreeImageToolkit/Flip.cpp +166 -0
  113. data/vendor/FreeImage/Source/FreeImageToolkit/JPEGTransform.cpp +623 -0
  114. data/vendor/FreeImage/Source/FreeImageToolkit/MultigridPoissonSolver.cpp +505 -0
  115. data/vendor/FreeImage/Source/FreeImageToolkit/Rescale.cpp +192 -0
  116. data/vendor/FreeImage/Source/FreeImageToolkit/Resize.cpp +2116 -0
  117. data/vendor/FreeImage/Source/FreeImageToolkit/Resize.h +196 -0
  118. data/vendor/FreeImage/Source/LibJPEG/ansi2knr.c +739 -0
  119. data/vendor/FreeImage/Source/LibJPEG/cderror.h +134 -0
  120. data/vendor/FreeImage/Source/LibJPEG/cdjpeg.c +181 -0
  121. data/vendor/FreeImage/Source/LibJPEG/cdjpeg.h +187 -0
  122. data/vendor/FreeImage/Source/LibJPEG/cjpeg.c +664 -0
  123. data/vendor/FreeImage/Source/LibJPEG/ckconfig.c +402 -0
  124. data/vendor/FreeImage/Source/LibJPEG/djpeg.c +617 -0
  125. data/vendor/FreeImage/Source/LibJPEG/example.c +433 -0
  126. data/vendor/FreeImage/Source/LibJPEG/jaricom.c +153 -0
  127. data/vendor/FreeImage/Source/LibJPEG/jcapimin.c +288 -0
  128. data/vendor/FreeImage/Source/LibJPEG/jcapistd.c +162 -0
  129. data/vendor/FreeImage/Source/LibJPEG/jcarith.c +944 -0
  130. data/vendor/FreeImage/Source/LibJPEG/jccoefct.c +454 -0
  131. data/vendor/FreeImage/Source/LibJPEG/jccolor.c +604 -0
  132. data/vendor/FreeImage/Source/LibJPEG/jcdctmgr.c +477 -0
  133. data/vendor/FreeImage/Source/LibJPEG/jchuff.c +1573 -0
  134. data/vendor/FreeImage/Source/LibJPEG/jcinit.c +84 -0
  135. data/vendor/FreeImage/Source/LibJPEG/jcmainct.c +297 -0
  136. data/vendor/FreeImage/Source/LibJPEG/jcmarker.c +719 -0
  137. data/vendor/FreeImage/Source/LibJPEG/jcmaster.c +856 -0
  138. data/vendor/FreeImage/Source/LibJPEG/jcomapi.c +106 -0
  139. data/vendor/FreeImage/Source/LibJPEG/jconfig.h +161 -0
  140. data/vendor/FreeImage/Source/LibJPEG/jcparam.c +675 -0
  141. data/vendor/FreeImage/Source/LibJPEG/jcprepct.c +358 -0
  142. data/vendor/FreeImage/Source/LibJPEG/jcsample.c +545 -0
  143. data/vendor/FreeImage/Source/LibJPEG/jctrans.c +385 -0
  144. data/vendor/FreeImage/Source/LibJPEG/jdapimin.c +399 -0
  145. data/vendor/FreeImage/Source/LibJPEG/jdapistd.c +276 -0
  146. data/vendor/FreeImage/Source/LibJPEG/jdarith.c +796 -0
  147. data/vendor/FreeImage/Source/LibJPEG/jdatadst.c +270 -0
  148. data/vendor/FreeImage/Source/LibJPEG/jdatasrc.c +275 -0
  149. data/vendor/FreeImage/Source/LibJPEG/jdcoefct.c +741 -0
  150. data/vendor/FreeImage/Source/LibJPEG/jdcolor.c +748 -0
  151. data/vendor/FreeImage/Source/LibJPEG/jdct.h +393 -0
  152. data/vendor/FreeImage/Source/LibJPEG/jddctmgr.c +384 -0
  153. data/vendor/FreeImage/Source/LibJPEG/jdhuff.c +1554 -0
  154. data/vendor/FreeImage/Source/LibJPEG/jdinput.c +662 -0
  155. data/vendor/FreeImage/Source/LibJPEG/jdmainct.c +513 -0
  156. data/vendor/FreeImage/Source/LibJPEG/jdmarker.c +1511 -0
  157. data/vendor/FreeImage/Source/LibJPEG/jdmaster.c +543 -0
  158. data/vendor/FreeImage/Source/LibJPEG/jdmerge.c +401 -0
  159. data/vendor/FreeImage/Source/LibJPEG/jdpostct.c +290 -0
  160. data/vendor/FreeImage/Source/LibJPEG/jdsample.c +361 -0
  161. data/vendor/FreeImage/Source/LibJPEG/jdtrans.c +140 -0
  162. data/vendor/FreeImage/Source/LibJPEG/jerror.c +253 -0
  163. data/vendor/FreeImage/Source/LibJPEG/jerror.h +304 -0
  164. data/vendor/FreeImage/Source/LibJPEG/jfdctflt.c +174 -0
  165. data/vendor/FreeImage/Source/LibJPEG/jfdctfst.c +230 -0
  166. data/vendor/FreeImage/Source/LibJPEG/jfdctint.c +4406 -0
  167. data/vendor/FreeImage/Source/LibJPEG/jidctflt.c +235 -0
  168. data/vendor/FreeImage/Source/LibJPEG/jidctfst.c +368 -0
  169. data/vendor/FreeImage/Source/LibJPEG/jidctint.c +5179 -0
  170. data/vendor/FreeImage/Source/LibJPEG/jinclude.h +91 -0
  171. data/vendor/FreeImage/Source/LibJPEG/jmemansi.c +167 -0
  172. data/vendor/FreeImage/Source/LibJPEG/jmemdos.c +638 -0
  173. data/vendor/FreeImage/Source/LibJPEG/jmemmac.c +289 -0
  174. data/vendor/FreeImage/Source/LibJPEG/jmemmgr.c +1119 -0
  175. data/vendor/FreeImage/Source/LibJPEG/jmemname.c +276 -0
  176. data/vendor/FreeImage/Source/LibJPEG/jmemnobs.c +109 -0
  177. data/vendor/FreeImage/Source/LibJPEG/jmemsys.h +198 -0
  178. data/vendor/FreeImage/Source/LibJPEG/jmorecfg.h +442 -0
  179. data/vendor/FreeImage/Source/LibJPEG/jpegint.h +426 -0
  180. data/vendor/FreeImage/Source/LibJPEG/jpeglib.h +1180 -0
  181. data/vendor/FreeImage/Source/LibJPEG/jpegtran.c +577 -0
  182. data/vendor/FreeImage/Source/LibJPEG/jquant1.c +857 -0
  183. data/vendor/FreeImage/Source/LibJPEG/jquant2.c +1311 -0
  184. data/vendor/FreeImage/Source/LibJPEG/jutils.c +227 -0
  185. data/vendor/FreeImage/Source/LibJPEG/jversion.h +14 -0
  186. data/vendor/FreeImage/Source/LibJPEG/rdbmp.c +480 -0
  187. data/vendor/FreeImage/Source/LibJPEG/rdcolmap.c +253 -0
  188. data/vendor/FreeImage/Source/LibJPEG/rdgif.c +38 -0
  189. data/vendor/FreeImage/Source/LibJPEG/rdjpgcom.c +515 -0
  190. data/vendor/FreeImage/Source/LibJPEG/rdppm.c +459 -0
  191. data/vendor/FreeImage/Source/LibJPEG/rdrle.c +387 -0
  192. data/vendor/FreeImage/Source/LibJPEG/rdswitch.c +365 -0
  193. data/vendor/FreeImage/Source/LibJPEG/rdtarga.c +500 -0
  194. data/vendor/FreeImage/Source/LibJPEG/transupp.c +1763 -0
  195. data/vendor/FreeImage/Source/LibJPEG/transupp.h +219 -0
  196. data/vendor/FreeImage/Source/LibJPEG/wrbmp.c +442 -0
  197. data/vendor/FreeImage/Source/LibJPEG/wrgif.c +399 -0
  198. data/vendor/FreeImage/Source/LibJPEG/wrjpgcom.c +583 -0
  199. data/vendor/FreeImage/Source/LibJPEG/wrppm.c +269 -0
  200. data/vendor/FreeImage/Source/LibJPEG/wrrle.c +305 -0
  201. data/vendor/FreeImage/Source/LibJPEG/wrtarga.c +253 -0
  202. data/vendor/FreeImage/Source/LibJXR/common/include/guiddef.h +230 -0
  203. data/vendor/FreeImage/Source/LibJXR/common/include/wmsal.h +757 -0
  204. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstring.h +342 -0
  205. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_adt.h +71 -0
  206. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_strict.h +1096 -0
  207. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_undef.h +406 -0
  208. data/vendor/FreeImage/Source/LibJXR/image/decode/JXRTranscode.c +987 -0
  209. data/vendor/FreeImage/Source/LibJXR/image/decode/decode.c +200 -0
  210. data/vendor/FreeImage/Source/LibJXR/image/decode/decode.h +143 -0
  211. data/vendor/FreeImage/Source/LibJXR/image/decode/postprocess.c +288 -0
  212. data/vendor/FreeImage/Source/LibJXR/image/decode/segdec.c +1205 -0
  213. data/vendor/FreeImage/Source/LibJXR/image/decode/strInvTransform.c +1888 -0
  214. data/vendor/FreeImage/Source/LibJXR/image/decode/strPredQuantDec.c +539 -0
  215. data/vendor/FreeImage/Source/LibJXR/image/decode/strdec.c +3628 -0
  216. data/vendor/FreeImage/Source/LibJXR/image/decode/strdec_x86.c +1640 -0
  217. data/vendor/FreeImage/Source/LibJXR/image/encode/encode.c +144 -0
  218. data/vendor/FreeImage/Source/LibJXR/image/encode/encode.h +113 -0
  219. data/vendor/FreeImage/Source/LibJXR/image/encode/segenc.c +1186 -0
  220. data/vendor/FreeImage/Source/LibJXR/image/encode/strFwdTransform.c +1111 -0
  221. data/vendor/FreeImage/Source/LibJXR/image/encode/strPredQuantEnc.c +511 -0
  222. data/vendor/FreeImage/Source/LibJXR/image/encode/strenc.c +2370 -0
  223. data/vendor/FreeImage/Source/LibJXR/image/encode/strenc_x86.c +409 -0
  224. data/vendor/FreeImage/Source/LibJXR/image/sys/adapthuff.c +511 -0
  225. data/vendor/FreeImage/Source/LibJXR/image/sys/ansi.h +61 -0
  226. data/vendor/FreeImage/Source/LibJXR/image/sys/common.h +131 -0
  227. data/vendor/FreeImage/Source/LibJXR/image/sys/image.c +183 -0
  228. data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimer.h +115 -0
  229. data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimerANSI.c +274 -0
  230. data/vendor/FreeImage/Source/LibJXR/image/sys/strPredQuant.c +306 -0
  231. data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.c +85 -0
  232. data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.h +50 -0
  233. data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.c +1251 -0
  234. data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.h +681 -0
  235. data/vendor/FreeImage/Source/LibJXR/image/sys/windowsmediaphoto.h +515 -0
  236. data/vendor/FreeImage/Source/LibJXR/image/sys/xplatform_image.h +84 -0
  237. data/vendor/FreeImage/Source/LibJXR/image/x86/x86.h +58 -0
  238. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.c +930 -0
  239. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.h +636 -0
  240. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlueJxr.c +2246 -0
  241. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGluePFC.c +2338 -0
  242. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.c +905 -0
  243. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.h +258 -0
  244. data/vendor/FreeImage/Source/LibOpenJPEG/bio.c +188 -0
  245. data/vendor/FreeImage/Source/LibOpenJPEG/bio.h +128 -0
  246. data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.c +239 -0
  247. data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.h +68 -0
  248. data/vendor/FreeImage/Source/LibOpenJPEG/cio.c +644 -0
  249. data/vendor/FreeImage/Source/LibOpenJPEG/cio.h +393 -0
  250. data/vendor/FreeImage/Source/LibOpenJPEG/dwt.c +919 -0
  251. data/vendor/FreeImage/Source/LibOpenJPEG/dwt.h +116 -0
  252. data/vendor/FreeImage/Source/LibOpenJPEG/event.c +141 -0
  253. data/vendor/FreeImage/Source/LibOpenJPEG/event.h +97 -0
  254. data/vendor/FreeImage/Source/LibOpenJPEG/function_list.c +114 -0
  255. data/vendor/FreeImage/Source/LibOpenJPEG/function_list.h +126 -0
  256. data/vendor/FreeImage/Source/LibOpenJPEG/image.c +235 -0
  257. data/vendor/FreeImage/Source/LibOpenJPEG/image.h +63 -0
  258. data/vendor/FreeImage/Source/LibOpenJPEG/indexbox_manager.h +148 -0
  259. data/vendor/FreeImage/Source/LibOpenJPEG/invert.c +289 -0
  260. data/vendor/FreeImage/Source/LibOpenJPEG/invert.h +59 -0
  261. data/vendor/FreeImage/Source/LibOpenJPEG/j2k.c +10238 -0
  262. data/vendor/FreeImage/Source/LibOpenJPEG/j2k.h +838 -0
  263. data/vendor/FreeImage/Source/LibOpenJPEG/jp2.c +2776 -0
  264. data/vendor/FreeImage/Source/LibOpenJPEG/jp2.h +490 -0
  265. data/vendor/FreeImage/Source/LibOpenJPEG/mct.c +319 -0
  266. data/vendor/FreeImage/Source/LibOpenJPEG/mct.h +149 -0
  267. data/vendor/FreeImage/Source/LibOpenJPEG/mqc.c +604 -0
  268. data/vendor/FreeImage/Source/LibOpenJPEG/mqc.h +201 -0
  269. data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.c +955 -0
  270. data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.h +1475 -0
  271. data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.c +59 -0
  272. data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.h +54 -0
  273. data/vendor/FreeImage/Source/LibOpenJPEG/opj_codec.h +160 -0
  274. data/vendor/FreeImage/Source/LibOpenJPEG/opj_config.h +9 -0
  275. data/vendor/FreeImage/Source/LibOpenJPEG/opj_config_private.h +16 -0
  276. data/vendor/FreeImage/Source/LibOpenJPEG/opj_includes.h +175 -0
  277. data/vendor/FreeImage/Source/LibOpenJPEG/opj_intmath.h +172 -0
  278. data/vendor/FreeImage/Source/LibOpenJPEG/opj_inttypes.h +43 -0
  279. data/vendor/FreeImage/Source/LibOpenJPEG/opj_malloc.h +180 -0
  280. data/vendor/FreeImage/Source/LibOpenJPEG/opj_stdint.h +47 -0
  281. data/vendor/FreeImage/Source/LibOpenJPEG/phix_manager.c +191 -0
  282. data/vendor/FreeImage/Source/LibOpenJPEG/pi.c +1870 -0
  283. data/vendor/FreeImage/Source/LibOpenJPEG/pi.h +182 -0
  284. data/vendor/FreeImage/Source/LibOpenJPEG/ppix_manager.c +194 -0
  285. data/vendor/FreeImage/Source/LibOpenJPEG/raw.c +89 -0
  286. data/vendor/FreeImage/Source/LibOpenJPEG/raw.h +100 -0
  287. data/vendor/FreeImage/Source/LibOpenJPEG/t1.c +1751 -0
  288. data/vendor/FreeImage/Source/LibOpenJPEG/t1.h +157 -0
  289. data/vendor/FreeImage/Source/LibOpenJPEG/t1_generate_luts.c +276 -0
  290. data/vendor/FreeImage/Source/LibOpenJPEG/t1_luts.h +143 -0
  291. data/vendor/FreeImage/Source/LibOpenJPEG/t2.c +1334 -0
  292. data/vendor/FreeImage/Source/LibOpenJPEG/t2.h +127 -0
  293. data/vendor/FreeImage/Source/LibOpenJPEG/tcd.c +2123 -0
  294. data/vendor/FreeImage/Source/LibOpenJPEG/tcd.h +348 -0
  295. data/vendor/FreeImage/Source/LibOpenJPEG/tgt.c +331 -0
  296. data/vendor/FreeImage/Source/LibOpenJPEG/tgt.h +140 -0
  297. data/vendor/FreeImage/Source/LibOpenJPEG/thix_manager.c +134 -0
  298. data/vendor/FreeImage/Source/LibOpenJPEG/tpix_manager.c +185 -0
  299. data/vendor/FreeImage/Source/LibPNG/example.c +1061 -0
  300. data/vendor/FreeImage/Source/LibPNG/png.c +4493 -0
  301. data/vendor/FreeImage/Source/LibPNG/png.h +3282 -0
  302. data/vendor/FreeImage/Source/LibPNG/pngconf.h +644 -0
  303. data/vendor/FreeImage/Source/LibPNG/pngdebug.h +154 -0
  304. data/vendor/FreeImage/Source/LibPNG/pngerror.c +963 -0
  305. data/vendor/FreeImage/Source/LibPNG/pngget.c +1213 -0
  306. data/vendor/FreeImage/Source/LibPNG/pnginfo.h +260 -0
  307. data/vendor/FreeImage/Source/LibPNG/pnglibconf.h +218 -0
  308. data/vendor/FreeImage/Source/LibPNG/pngmem.c +281 -0
  309. data/vendor/FreeImage/Source/LibPNG/pngpread.c +1168 -0
  310. data/vendor/FreeImage/Source/LibPNG/pngpriv.h +1944 -0
  311. data/vendor/FreeImage/Source/LibPNG/pngread.c +4121 -0
  312. data/vendor/FreeImage/Source/LibPNG/pngrio.c +120 -0
  313. data/vendor/FreeImage/Source/LibPNG/pngrtran.c +4994 -0
  314. data/vendor/FreeImage/Source/LibPNG/pngrutil.c +4474 -0
  315. data/vendor/FreeImage/Source/LibPNG/pngset.c +1611 -0
  316. data/vendor/FreeImage/Source/LibPNG/pngstruct.h +489 -0
  317. data/vendor/FreeImage/Source/LibPNG/pngtest.c +2011 -0
  318. data/vendor/FreeImage/Source/LibPNG/pngtrans.c +849 -0
  319. data/vendor/FreeImage/Source/LibPNG/pngwio.c +168 -0
  320. data/vendor/FreeImage/Source/LibPNG/pngwrite.c +2455 -0
  321. data/vendor/FreeImage/Source/LibPNG/pngwtran.c +574 -0
  322. data/vendor/FreeImage/Source/LibPNG/pngwutil.c +3029 -0
  323. data/vendor/FreeImage/Source/LibRawLite/dcraw/dcraw.c +15462 -0
  324. data/vendor/FreeImage/Source/LibRawLite/internal/aahd_demosaic.cpp +706 -0
  325. data/vendor/FreeImage/Source/LibRawLite/internal/dcb_demosaicing.c +710 -0
  326. data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_common.cpp +13593 -0
  327. data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_fileio.cpp +240 -0
  328. data/vendor/FreeImage/Source/LibRawLite/internal/defines.h +167 -0
  329. data/vendor/FreeImage/Source/LibRawLite/internal/demosaic_packs.cpp +99 -0
  330. data/vendor/FreeImage/Source/LibRawLite/internal/dht_demosaic.cpp +873 -0
  331. data/vendor/FreeImage/Source/LibRawLite/internal/libraw_internal_funcs.h +282 -0
  332. data/vendor/FreeImage/Source/LibRawLite/internal/libraw_x3f.cpp +1919 -0
  333. data/vendor/FreeImage/Source/LibRawLite/internal/var_defines.h +216 -0
  334. data/vendor/FreeImage/Source/LibRawLite/internal/wf_filtering.cpp +1950 -0
  335. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw.h +338 -0
  336. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_alloc.h +99 -0
  337. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_const.h +233 -0
  338. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_datastream.h +238 -0
  339. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_internal.h +225 -0
  340. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_types.h +442 -0
  341. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_version.h +62 -0
  342. data/vendor/FreeImage/Source/LibRawLite/src/libraw_c_api.cpp +230 -0
  343. data/vendor/FreeImage/Source/LibRawLite/src/libraw_cxx.cpp +4533 -0
  344. data/vendor/FreeImage/Source/LibRawLite/src/libraw_datastream.cpp +703 -0
  345. data/vendor/FreeImage/Source/LibTIFF4/mkg3states.c +451 -0
  346. data/vendor/FreeImage/Source/LibTIFF4/mkspans.c +82 -0
  347. data/vendor/FreeImage/Source/LibTIFF4/t4.h +292 -0
  348. data/vendor/FreeImage/Source/LibTIFF4/tif_aux.c +358 -0
  349. data/vendor/FreeImage/Source/LibTIFF4/tif_close.c +140 -0
  350. data/vendor/FreeImage/Source/LibTIFF4/tif_codec.c +166 -0
  351. data/vendor/FreeImage/Source/LibTIFF4/tif_color.c +287 -0
  352. data/vendor/FreeImage/Source/LibTIFF4/tif_compress.c +304 -0
  353. data/vendor/FreeImage/Source/LibTIFF4/tif_config.h +97 -0
  354. data/vendor/FreeImage/Source/LibTIFF4/tif_config.vc.h +74 -0
  355. data/vendor/FreeImage/Source/LibTIFF4/tif_config.wince.h +71 -0
  356. data/vendor/FreeImage/Source/LibTIFF4/tif_dir.c +1700 -0
  357. data/vendor/FreeImage/Source/LibTIFF4/tif_dir.h +308 -0
  358. data/vendor/FreeImage/Source/LibTIFF4/tif_dirinfo.c +959 -0
  359. data/vendor/FreeImage/Source/LibTIFF4/tif_dirread.c +5640 -0
  360. data/vendor/FreeImage/Source/LibTIFF4/tif_dirwrite.c +2910 -0
  361. data/vendor/FreeImage/Source/LibTIFF4/tif_dumpmode.c +143 -0
  362. data/vendor/FreeImage/Source/LibTIFF4/tif_error.c +80 -0
  363. data/vendor/FreeImage/Source/LibTIFF4/tif_extension.c +118 -0
  364. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.c +1595 -0
  365. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.h +538 -0
  366. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3sm.c +1260 -0
  367. data/vendor/FreeImage/Source/LibTIFF4/tif_flush.c +118 -0
  368. data/vendor/FreeImage/Source/LibTIFF4/tif_getimage.c +2890 -0
  369. data/vendor/FreeImage/Source/LibTIFF4/tif_jbig.c +213 -0
  370. data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg.c +2354 -0
  371. data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg_12.c +65 -0
  372. data/vendor/FreeImage/Source/LibTIFF4/tif_luv.c +1683 -0
  373. data/vendor/FreeImage/Source/LibTIFF4/tif_lzma.c +495 -0
  374. data/vendor/FreeImage/Source/LibTIFF4/tif_lzw.c +1169 -0
  375. data/vendor/FreeImage/Source/LibTIFF4/tif_next.c +181 -0
  376. data/vendor/FreeImage/Source/LibTIFF4/tif_ojpeg.c +2501 -0
  377. data/vendor/FreeImage/Source/LibTIFF4/tif_open.c +725 -0
  378. data/vendor/FreeImage/Source/LibTIFF4/tif_packbits.c +300 -0
  379. data/vendor/FreeImage/Source/LibTIFF4/tif_pixarlog.c +1442 -0
  380. data/vendor/FreeImage/Source/LibTIFF4/tif_predict.c +764 -0
  381. data/vendor/FreeImage/Source/LibTIFF4/tif_predict.h +77 -0
  382. data/vendor/FreeImage/Source/LibTIFF4/tif_print.c +716 -0
  383. data/vendor/FreeImage/Source/LibTIFF4/tif_read.c +1086 -0
  384. data/vendor/FreeImage/Source/LibTIFF4/tif_strip.c +383 -0
  385. data/vendor/FreeImage/Source/LibTIFF4/tif_swab.c +310 -0
  386. data/vendor/FreeImage/Source/LibTIFF4/tif_thunder.c +207 -0
  387. data/vendor/FreeImage/Source/LibTIFF4/tif_tile.c +299 -0
  388. data/vendor/FreeImage/Source/LibTIFF4/tif_unix.c +325 -0
  389. data/vendor/FreeImage/Source/LibTIFF4/tif_version.c +40 -0
  390. data/vendor/FreeImage/Source/LibTIFF4/tif_vms.c +603 -0
  391. data/vendor/FreeImage/Source/LibTIFF4/tif_warning.c +81 -0
  392. data/vendor/FreeImage/Source/LibTIFF4/tif_win32.c +443 -0
  393. data/vendor/FreeImage/Source/LibTIFF4/tif_wince.c +293 -0
  394. data/vendor/FreeImage/Source/LibTIFF4/tif_write.c +771 -0
  395. data/vendor/FreeImage/Source/LibTIFF4/tif_zip.c +472 -0
  396. data/vendor/FreeImage/Source/LibTIFF4/tiff.h +681 -0
  397. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.h +170 -0
  398. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.vc.h +160 -0
  399. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.wince.h +121 -0
  400. data/vendor/FreeImage/Source/LibTIFF4/tiffio.h +557 -0
  401. data/vendor/FreeImage/Source/LibTIFF4/tiffiop.h +367 -0
  402. data/vendor/FreeImage/Source/LibTIFF4/tiffvers.h +9 -0
  403. data/vendor/FreeImage/Source/LibTIFF4/uvcode.h +180 -0
  404. data/vendor/FreeImage/Source/LibWebP/src/dec/alphai.h +55 -0
  405. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.alpha.c +167 -0
  406. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.buffer.c +249 -0
  407. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.frame.c +827 -0
  408. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.idec.c +857 -0
  409. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.io.c +640 -0
  410. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.quant.c +110 -0
  411. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.tree.c +525 -0
  412. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8.c +663 -0
  413. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8l.c +1584 -0
  414. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.webp.c +834 -0
  415. data/vendor/FreeImage/Source/LibWebP/src/dec/decode_vp8.h +185 -0
  416. data/vendor/FreeImage/Source/LibWebP/src/dec/vp8i.h +353 -0
  417. data/vendor/FreeImage/Source/LibWebP/src/dec/vp8li.h +136 -0
  418. data/vendor/FreeImage/Source/LibWebP/src/dec/webpi.h +120 -0
  419. data/vendor/FreeImage/Source/LibWebP/src/demux/demux.demux.c +957 -0
  420. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing.c +377 -0
  421. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_mips_dsp_r2.c +139 -0
  422. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_sse2.c +296 -0
  423. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb.c +68 -0
  424. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_mips_dsp_r2.c +108 -0
  425. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_sse2.c +62 -0
  426. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost.c +412 -0
  427. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips32.c +154 -0
  428. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips_dsp_r2.c +107 -0
  429. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_sse2.c +121 -0
  430. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cpu.c +138 -0
  431. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec.c +760 -0
  432. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_clip_tables.c +366 -0
  433. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips32.c +585 -0
  434. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c +992 -0
  435. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_neon.c +1489 -0
  436. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_sse2.c +1284 -0
  437. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc.c +788 -0
  438. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_avx2.c +24 -0
  439. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips32.c +670 -0
  440. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c +1510 -0
  441. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_neon.c +932 -0
  442. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_sse2.c +940 -0
  443. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters.c +240 -0
  444. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c +404 -0
  445. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_sse2.c +349 -0
  446. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.h +434 -0
  447. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless.c +1838 -0
  448. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips32.c +416 -0
  449. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c +921 -0
  450. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_neon.c +357 -0
  451. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_sse2.c +535 -0
  452. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler.c +115 -0
  453. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips32.c +192 -0
  454. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips_dsp_r2.c +210 -0
  455. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling.c +252 -0
  456. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c +280 -0
  457. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_neon.c +267 -0
  458. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_sse2.c +214 -0
  459. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv.c +166 -0
  460. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips32.c +100 -0
  461. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c +131 -0
  462. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_sse2.c +322 -0
  463. data/vendor/FreeImage/Source/LibWebP/src/dsp/lossless.h +313 -0
  464. data/vendor/FreeImage/Source/LibWebP/src/dsp/mips_macro.h +200 -0
  465. data/vendor/FreeImage/Source/LibWebP/src/dsp/neon.h +82 -0
  466. data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv.h +321 -0
  467. data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv_tables_sse2.h +536 -0
  468. data/vendor/FreeImage/Source/LibWebP/src/enc/backward_references.h +202 -0
  469. data/vendor/FreeImage/Source/LibWebP/src/enc/cost.h +69 -0
  470. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.alpha.c +440 -0
  471. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.analysis.c +501 -0
  472. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.backward_references.c +1076 -0
  473. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.config.c +163 -0
  474. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.cost.c +355 -0
  475. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.filter.c +296 -0
  476. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.frame.c +850 -0
  477. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.histogram.c +897 -0
  478. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.iterator.c +456 -0
  479. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.near_lossless.c +160 -0
  480. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture.c +290 -0
  481. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_csp.c +1100 -0
  482. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_psnr.c +150 -0
  483. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_rescale.c +285 -0
  484. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_tools.c +206 -0
  485. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.quant.c +1191 -0
  486. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.syntax.c +383 -0
  487. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.token.c +285 -0
  488. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.tree.c +504 -0
  489. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.vp8l.c +1437 -0
  490. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.webpenc.c +379 -0
  491. data/vendor/FreeImage/Source/LibWebP/src/enc/histogram.h +114 -0
  492. data/vendor/FreeImage/Source/LibWebP/src/enc/vp8enci.h +551 -0
  493. data/vendor/FreeImage/Source/LibWebP/src/enc/vp8li.h +78 -0
  494. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.anim_encode.c +1241 -0
  495. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxedit.c +696 -0
  496. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxinternal.c +551 -0
  497. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxread.c +544 -0
  498. data/vendor/FreeImage/Source/LibWebP/src/mux/muxi.h +232 -0
  499. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader.h +168 -0
  500. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader_inl.h +172 -0
  501. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_writer.h +120 -0
  502. data/vendor/FreeImage/Source/LibWebP/src/utils/color_cache.h +74 -0
  503. data/vendor/FreeImage/Source/LibWebP/src/utils/endian_inl.h +100 -0
  504. data/vendor/FreeImage/Source/LibWebP/src/utils/filters.h +32 -0
  505. data/vendor/FreeImage/Source/LibWebP/src/utils/huffman.h +67 -0
  506. data/vendor/FreeImage/Source/LibWebP/src/utils/huffman_encode.h +60 -0
  507. data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels.h +36 -0
  508. data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels_dec.h +35 -0
  509. data/vendor/FreeImage/Source/LibWebP/src/utils/random.h +63 -0
  510. data/vendor/FreeImage/Source/LibWebP/src/utils/rescaler.h +78 -0
  511. data/vendor/FreeImage/Source/LibWebP/src/utils/thread.h +93 -0
  512. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_reader.c +208 -0
  513. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_writer.c +308 -0
  514. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.color_cache.c +49 -0
  515. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.filters.c +76 -0
  516. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.h +121 -0
  517. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman.c +205 -0
  518. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman_encode.c +417 -0
  519. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels.c +140 -0
  520. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels_dec.c +279 -0
  521. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.random.c +43 -0
  522. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.rescaler.c +82 -0
  523. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.thread.c +309 -0
  524. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.utils.c +211 -0
  525. data/vendor/FreeImage/Source/LibWebP/src/webp/decode.h +493 -0
  526. data/vendor/FreeImage/Source/LibWebP/src/webp/demux.h +224 -0
  527. data/vendor/FreeImage/Source/LibWebP/src/webp/encode.h +515 -0
  528. data/vendor/FreeImage/Source/LibWebP/src/webp/format_constants.h +88 -0
  529. data/vendor/FreeImage/Source/LibWebP/src/webp/mux.h +507 -0
  530. data/vendor/FreeImage/Source/LibWebP/src/webp/mux_types.h +97 -0
  531. data/vendor/FreeImage/Source/LibWebP/src/webp/types.h +52 -0
  532. data/vendor/FreeImage/Source/MapIntrospector.h +212 -0
  533. data/vendor/FreeImage/Source/Metadata/Exif.cpp +1253 -0
  534. data/vendor/FreeImage/Source/Metadata/FIRational.cpp +176 -0
  535. data/vendor/FreeImage/Source/Metadata/FIRational.h +108 -0
  536. data/vendor/FreeImage/Source/Metadata/FreeImageTag.cpp +353 -0
  537. data/vendor/FreeImage/Source/Metadata/FreeImageTag.h +500 -0
  538. data/vendor/FreeImage/Source/Metadata/IPTC.cpp +342 -0
  539. data/vendor/FreeImage/Source/Metadata/TagConversion.cpp +1094 -0
  540. data/vendor/FreeImage/Source/Metadata/TagLib.cpp +1618 -0
  541. data/vendor/FreeImage/Source/Metadata/XTIFF.cpp +766 -0
  542. data/vendor/FreeImage/Source/OpenEXR/Half/eLut.cpp +114 -0
  543. data/vendor/FreeImage/Source/OpenEXR/Half/eLut.h +71 -0
  544. data/vendor/FreeImage/Source/OpenEXR/Half/half.cpp +310 -0
  545. data/vendor/FreeImage/Source/OpenEXR/Half/half.h +757 -0
  546. data/vendor/FreeImage/Source/OpenEXR/Half/halfExport.h +27 -0
  547. data/vendor/FreeImage/Source/OpenEXR/Half/halfFunction.h +179 -0
  548. data/vendor/FreeImage/Source/OpenEXR/Half/halfLimits.h +102 -0
  549. data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.cpp +164 -0
  550. data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.h +16391 -0
  551. data/vendor/FreeImage/Source/OpenEXR/Iex/Iex.h +60 -0
  552. data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.cpp +156 -0
  553. data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.h +264 -0
  554. data/vendor/FreeImage/Source/OpenEXR/Iex/IexErrnoExc.h +208 -0
  555. data/vendor/FreeImage/Source/OpenEXR/Iex/IexExport.h +51 -0
  556. data/vendor/FreeImage/Source/OpenEXR/Iex/IexForward.h +229 -0
  557. data/vendor/FreeImage/Source/OpenEXR/Iex/IexMacros.h +170 -0
  558. data/vendor/FreeImage/Source/OpenEXR/Iex/IexMathExc.h +57 -0
  559. data/vendor/FreeImage/Source/OpenEXR/Iex/IexNamespace.h +112 -0
  560. data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.cpp +873 -0
  561. data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.h +97 -0
  562. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.cpp +113 -0
  563. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.h +146 -0
  564. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.cpp +530 -0
  565. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.h +91 -0
  566. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathIeeeExc.h +62 -0
  567. data/vendor/FreeImage/Source/OpenEXR/IlmBaseConfig.h +61 -0
  568. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.cpp +633 -0
  569. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.h +324 -0
  570. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfArray.h +285 -0
  571. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.cpp +158 -0
  572. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.h +407 -0
  573. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAutoArray.h +95 -0
  574. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.cpp +1072 -0
  575. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.h +118 -0
  576. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.cpp +111 -0
  577. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.h +87 -0
  578. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.cpp +1438 -0
  579. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.h +555 -0
  580. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.cpp +322 -0
  581. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.h +436 -0
  582. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.cpp +150 -0
  583. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.h +74 -0
  584. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCheckedArithmetic.h +163 -0
  585. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.cpp +151 -0
  586. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.h +131 -0
  587. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.cpp +87 -0
  588. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.h +73 -0
  589. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.cpp +591 -0
  590. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.h +142 -0
  591. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompression.h +84 -0
  592. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.cpp +78 -0
  593. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.h +64 -0
  594. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.cpp +226 -0
  595. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.h +265 -0
  596. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.cpp +143 -0
  597. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.h +107 -0
  598. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.cpp +110 -0
  599. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.h +132 -0
  600. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.cpp +230 -0
  601. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.h +339 -0
  602. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageState.h +96 -0
  603. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.cpp +78 -0
  604. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.h +68 -0
  605. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.cpp +2025 -0
  606. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.h +276 -0
  607. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.cpp +149 -0
  608. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.h +181 -0
  609. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.cpp +1552 -0
  610. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.h +244 -0
  611. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.cpp +107 -0
  612. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.h +168 -0
  613. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.cpp +1979 -0
  614. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.h +437 -0
  615. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.cpp +273 -0
  616. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.h +362 -0
  617. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.cpp +2055 -0
  618. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.h +475 -0
  619. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.cpp +250 -0
  620. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.h +394 -0
  621. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.cpp +57 -0
  622. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.h +59 -0
  623. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.cpp +3424 -0
  624. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.h +210 -0
  625. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressorSimd.h +2145 -0
  626. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.cpp +335 -0
  627. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.h +336 -0
  628. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.cpp +76 -0
  629. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.h +68 -0
  630. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfExport.h +46 -0
  631. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.cpp +768 -0
  632. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.h +148 -0
  633. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.cpp +57 -0
  634. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.h +58 -0
  635. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.cpp +84 -0
  636. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.h +76 -0
  637. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfForward.h +127 -0
  638. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.cpp +228 -0
  639. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.h +386 -0
  640. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.cpp +76 -0
  641. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.h +94 -0
  642. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.cpp +76 -0
  643. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.h +58 -0
  644. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.cpp +112 -0
  645. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.h +62 -0
  646. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.cpp +1283 -0
  647. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.h +699 -0
  648. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.cpp +1114 -0
  649. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.h +82 -0
  650. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.cpp +110 -0
  651. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.h +255 -0
  652. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.cpp +895 -0
  653. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.h +240 -0
  654. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.cpp +114 -0
  655. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.h +84 -0
  656. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.cpp +51 -0
  657. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.h +69 -0
  658. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputStreamMutex.h +68 -0
  659. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInt64.h +56 -0
  660. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.cpp +57 -0
  661. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.h +58 -0
  662. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.cpp +217 -0
  663. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.h +167 -0
  664. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.cpp +99 -0
  665. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.h +73 -0
  666. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrder.h +69 -0
  667. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.cpp +78 -0
  668. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.h +72 -0
  669. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.cpp +178 -0
  670. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.h +188 -0
  671. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.cpp +263 -0
  672. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.h +83 -0
  673. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.cpp +1872 -0
  674. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.h +466 -0
  675. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.cpp +783 -0
  676. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.h +128 -0
  677. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.cpp +519 -0
  678. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.h +118 -0
  679. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.cpp +435 -0
  680. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.h +187 -0
  681. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfName.h +150 -0
  682. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfNamespace.h +115 -0
  683. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.cpp +126 -0
  684. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.h +110 -0
  685. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOptimizedPixelReading.h +646 -0
  686. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.cpp +1378 -0
  687. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.h +263 -0
  688. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.cpp +105 -0
  689. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.h +77 -0
  690. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.cpp +52 -0
  691. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.h +62 -0
  692. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputStreamMutex.h +70 -0
  693. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartHelper.h +262 -0
  694. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.cpp +63 -0
  695. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.h +62 -0
  696. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPixelType.h +67 -0
  697. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.cpp +667 -0
  698. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.h +117 -0
  699. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.cpp +104 -0
  700. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.h +135 -0
  701. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.cpp +103 -0
  702. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.h +70 -0
  703. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.cpp +553 -0
  704. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.h +109 -0
  705. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.cpp +127 -0
  706. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.h +98 -0
  707. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.cpp +74 -0
  708. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.h +69 -0
  709. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgba.h +109 -0
  710. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.cpp +1405 -0
  711. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.h +346 -0
  712. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.cpp +497 -0
  713. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.h +259 -0
  714. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.cpp +157 -0
  715. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.h +63 -0
  716. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.cpp +220 -0
  717. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.h +80 -0
  718. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.cpp +1702 -0
  719. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.h +210 -0
  720. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSimd.h +59 -0
  721. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.cpp +125 -0
  722. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.h +382 -0
  723. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.cpp +242 -0
  724. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.h +160 -0
  725. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.cpp +80 -0
  726. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.h +71 -0
  727. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.cpp +100 -0
  728. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.h +74 -0
  729. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.cpp +129 -0
  730. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.h +172 -0
  731. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.cpp +216 -0
  732. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.h +97 -0
  733. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.cpp +62 -0
  734. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.h +95 -0
  735. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescription.h +107 -0
  736. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.cpp +86 -0
  737. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.h +72 -0
  738. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.cpp +552 -0
  739. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.h +125 -0
  740. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.cpp +1533 -0
  741. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.h +401 -0
  742. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.cpp +208 -0
  743. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.h +100 -0
  744. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.cpp +389 -0
  745. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.h +106 -0
  746. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.cpp +1841 -0
  747. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.h +495 -0
  748. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.cpp +228 -0
  749. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.h +105 -0
  750. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.cpp +1163 -0
  751. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.h +482 -0
  752. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.cpp +431 -0
  753. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.h +242 -0
  754. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.cpp +79 -0
  755. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.h +74 -0
  756. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.cpp +217 -0
  757. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.h +100 -0
  758. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.cpp +60 -0
  759. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.h +136 -0
  760. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.cpp +391 -0
  761. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.h +78 -0
  762. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfXdr.h +927 -0
  763. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.cpp +196 -0
  764. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.h +78 -0
  765. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.cpp +127 -0
  766. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.h +89 -0
  767. data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.cpp +136 -0
  768. data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.h +16396 -0
  769. data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.cpp +573 -0
  770. data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.h +98334 -0
  771. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.cpp +80 -0
  772. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.h +143 -0
  773. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadExport.h +46 -0
  774. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadForward.h +52 -0
  775. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.cpp +59 -0
  776. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.h +160 -0
  777. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexPosix.cpp +85 -0
  778. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexWin32.cpp +79 -0
  779. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadNamespace.h +114 -0
  780. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.cpp +483 -0
  781. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.h +160 -0
  782. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPosix.cpp +98 -0
  783. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.cpp +60 -0
  784. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.h +112 -0
  785. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosix.cpp +106 -0
  786. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosixCompat.cpp +155 -0
  787. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphoreWin32.cpp +153 -0
  788. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadWin32.cpp +100 -0
  789. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.cpp +37 -0
  790. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.h +849 -0
  791. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBoxAlgo.h +1016 -0
  792. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColor.h +736 -0
  793. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.cpp +178 -0
  794. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.h +257 -0
  795. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathEuler.h +926 -0
  796. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExc.h +73 -0
  797. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExport.h +46 -0
  798. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathForward.h +72 -0
  799. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrame.h +192 -0
  800. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustum.h +741 -0
  801. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustumTest.h +417 -0
  802. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.cpp +181 -0
  803. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.h +269 -0
  804. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGL.h +166 -0
  805. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGLU.h +54 -0
  806. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathHalfLimits.h +68 -0
  807. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInt64.h +62 -0
  808. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInterval.h +226 -0
  809. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLimits.h +268 -0
  810. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLine.h +185 -0
  811. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLineAlgo.h +288 -0
  812. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMath.h +208 -0
  813. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrix.h +3441 -0
  814. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.cpp +1252 -0
  815. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.h +1425 -0
  816. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathNamespace.h +115 -0
  817. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlane.h +257 -0
  818. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlatform.h +112 -0
  819. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathQuat.h +964 -0
  820. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.cpp +194 -0
  821. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.h +401 -0
  822. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRoots.h +219 -0
  823. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.cpp +54 -0
  824. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.h +656 -0
  825. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathSphere.h +177 -0
  826. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.cpp +583 -0
  827. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.h +2227 -0
  828. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVecAlgo.h +147 -0
  829. data/vendor/FreeImage/Source/OpenEXR/OpenEXRConfig.h +72 -0
  830. data/vendor/FreeImage/Source/Plugin.h +144 -0
  831. data/vendor/FreeImage/Source/Quantizers.h +354 -0
  832. data/vendor/FreeImage/Source/ToneMapping.h +44 -0
  833. data/vendor/FreeImage/Source/Utilities.h +516 -0
  834. data/vendor/FreeImage/Source/ZLib/adler32.c +179 -0
  835. data/vendor/FreeImage/Source/ZLib/compress.c +80 -0
  836. data/vendor/FreeImage/Source/ZLib/crc32.c +425 -0
  837. data/vendor/FreeImage/Source/ZLib/crc32.h +441 -0
  838. data/vendor/FreeImage/Source/ZLib/deflate.c +1967 -0
  839. data/vendor/FreeImage/Source/ZLib/deflate.h +346 -0
  840. data/vendor/FreeImage/Source/ZLib/gzclose.c +25 -0
  841. data/vendor/FreeImage/Source/ZLib/gzguts.h +209 -0
  842. data/vendor/FreeImage/Source/ZLib/gzlib.c +634 -0
  843. data/vendor/FreeImage/Source/ZLib/gzread.c +594 -0
  844. data/vendor/FreeImage/Source/ZLib/gzwrite.c +577 -0
  845. data/vendor/FreeImage/Source/ZLib/infback.c +640 -0
  846. data/vendor/FreeImage/Source/ZLib/inffast.c +340 -0
  847. data/vendor/FreeImage/Source/ZLib/inffast.h +11 -0
  848. data/vendor/FreeImage/Source/ZLib/inffixed.h +94 -0
  849. data/vendor/FreeImage/Source/ZLib/inflate.c +1512 -0
  850. data/vendor/FreeImage/Source/ZLib/inflate.h +122 -0
  851. data/vendor/FreeImage/Source/ZLib/inftrees.c +306 -0
  852. data/vendor/FreeImage/Source/ZLib/inftrees.h +62 -0
  853. data/vendor/FreeImage/Source/ZLib/trees.c +1226 -0
  854. data/vendor/FreeImage/Source/ZLib/trees.h +128 -0
  855. data/vendor/FreeImage/Source/ZLib/uncompr.c +59 -0
  856. data/vendor/FreeImage/Source/ZLib/zconf.h +511 -0
  857. data/vendor/FreeImage/Source/ZLib/zlib.h +1768 -0
  858. data/vendor/FreeImage/Source/ZLib/zutil.c +324 -0
  859. data/vendor/FreeImage/Source/ZLib/zutil.h +253 -0
  860. metadata +931 -0
@@ -0,0 +1,2145 @@
1
+ ///////////////////////////////////////////////////////////////////////////
2
+ //
3
+ // Copyright (c) 2009-2014 DreamWorks Animation LLC.
4
+ //
5
+ // All rights reserved.
6
+ //
7
+ // Redistribution and use in source and binary forms, with or without
8
+ // modification, are permitted provided that the following conditions are
9
+ // met:
10
+ // * Redistributions of source code must retain the above copyright
11
+ // notice, this list of conditions and the following disclaimer.
12
+ // * Redistributions in binary form must reproduce the above
13
+ // copyright notice, this list of conditions and the following disclaimer
14
+ // in the documentation and/or other materials provided with the
15
+ // distribution.
16
+ // * Neither the name of DreamWorks Animation nor the names of
17
+ // its contributors may be used to endorse or promote products derived
18
+ // from this software without specific prior written permission.
19
+ //
20
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ //
32
+ ///////////////////////////////////////////////////////////////////////////
33
+
34
+ #ifndef IMF_DWACOMPRESSORSIMD_H_HAS_BEEN_INCLUDED
35
+ #define IMF_DWACOMPRESSORSIMD_H_HAS_BEEN_INCLUDED
36
+
37
+ //
38
+ // Various SSE accelerated functions, used by Imf::DwaCompressor.
39
+ // These have been separated into a separate .h file, as the fast
40
+ // paths are done with template specialization.
41
+ //
42
+ // Unless otherwise noted, all pointers are assumed to be 32-byte
43
+ // aligned. Unaligned pointers may risk seg-faulting.
44
+ //
45
+
46
+ #include "ImfNamespace.h"
47
+ #include "ImfSimd.h"
48
+ #include "ImfSystemSpecific.h"
49
+ #include "OpenEXRConfig.h"
50
+
51
+ #include <half.h>
52
+ #include <assert.h>
53
+
54
+ OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_ENTER
55
+
56
+ #define _SSE_ALIGNMENT 32
57
+ #define _SSE_ALIGNMENT_MASK 0x0F
58
+ #define _AVX_ALIGNMENT_MASK 0x1F
59
+
60
+ //
61
+ // Test if we should enable GCC inline asm paths for AVX
62
+ //
63
+
64
+ #ifdef OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX
65
+
66
+ #define IMF_HAVE_GCC_INLINEASM
67
+
68
+ #ifdef __LP64__
69
+ #define IMF_HAVE_GCC_INLINEASM_64
70
+ #endif /* __LP64__ */
71
+
72
+ #endif /* OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX */
73
+
74
+ //
75
+ // A simple 64-element array, aligned properly for SIMD access.
76
+ //
77
+
78
+ template <class T>
79
+ class SimdAlignedBuffer64
80
+ {
81
+ public:
82
+
83
+ SimdAlignedBuffer64(): _buffer (0), _handle (0)
84
+ {
85
+ alloc();
86
+ }
87
+
88
+ SimdAlignedBuffer64(const SimdAlignedBuffer64 &rhs): _handle(0)
89
+ {
90
+ alloc();
91
+ memcpy (_buffer, rhs._buffer, 64 * sizeof (T));
92
+ }
93
+
94
+ ~SimdAlignedBuffer64 ()
95
+ {
96
+ EXRFreeAligned (_handle);
97
+ _handle = 0;
98
+ _buffer = 0;
99
+ }
100
+
101
+ void alloc()
102
+ {
103
+ //
104
+ // Try EXRAllocAligned first - but it might fallback to
105
+ // unaligned allocs. If so, overalloc.
106
+ //
107
+
108
+ _handle = (char *) EXRAllocAligned
109
+ (64 * sizeof(T), _SSE_ALIGNMENT);
110
+
111
+ if (((size_t)_handle & (_SSE_ALIGNMENT - 1)) == 0)
112
+ {
113
+ _buffer = (T *)_handle;
114
+ return;
115
+ }
116
+
117
+ EXRFreeAligned(_handle);
118
+ _handle = (char *) EXRAllocAligned
119
+ (64 * sizeof(T) + _SSE_ALIGNMENT, _SSE_ALIGNMENT);
120
+
121
+ char *aligned = _handle;
122
+
123
+ while ((size_t)aligned & (_SSE_ALIGNMENT - 1))
124
+ aligned++;
125
+
126
+ _buffer = (T *)aligned;
127
+ }
128
+
129
+ T *_buffer;
130
+
131
+ private:
132
+
133
+ char *_handle;
134
+ };
135
+
136
+ typedef SimdAlignedBuffer64<float> SimdAlignedBuffer64f;
137
+ typedef SimdAlignedBuffer64<unsigned short> SimdAlignedBuffer64us;
138
+
139
+ namespace {
140
+
141
+ //
142
+ // Color space conversion, Inverse 709 CSC, Y'CbCr -> R'G'B'
143
+ //
144
+
145
+ void
146
+ csc709Inverse (float &comp0, float &comp1, float &comp2)
147
+ {
148
+ float src[3];
149
+
150
+ src[0] = comp0;
151
+ src[1] = comp1;
152
+ src[2] = comp2;
153
+
154
+ comp0 = src[0] + 1.5747f * src[2];
155
+ comp1 = src[0] - 0.1873f * src[1] - 0.4682f * src[2];
156
+ comp2 = src[0] + 1.8556f * src[1];
157
+ }
158
+
159
+ #ifndef IMF_HAVE_SSE2
160
+
161
+
162
+ //
163
+ // Scalar color space conversion, based on 709 primiary chromaticies.
164
+ // No scaling or offsets, just the matrix
165
+ //
166
+
167
+ void
168
+ csc709Inverse64 (float *comp0, float *comp1, float *comp2)
169
+ {
170
+ for (int i = 0; i < 64; ++i)
171
+ csc709Inverse (comp0[i], comp1[i], comp2[i]);
172
+ }
173
+
174
+ #else /* IMF_HAVE_SSE2 */
175
+
176
+ //
177
+ // SSE2 color space conversion
178
+ //
179
+
180
+ void
181
+ csc709Inverse64 (float *comp0, float *comp1, float *comp2)
182
+ {
183
+ __m128 c0 = { 1.5747f, 1.5747f, 1.5747f, 1.5747f};
184
+ __m128 c1 = { 1.8556f, 1.8556f, 1.8556f, 1.8556f};
185
+ __m128 c2 = {-0.1873f, -0.1873f, -0.1873f, -0.1873f};
186
+ __m128 c3 = {-0.4682f, -0.4682f, -0.4682f, -0.4682f};
187
+
188
+ __m128 *r = (__m128 *)comp0;
189
+ __m128 *g = (__m128 *)comp1;
190
+ __m128 *b = (__m128 *)comp2;
191
+ __m128 src[3];
192
+
193
+ #define CSC_INVERSE_709_SSE2_LOOP(i) \
194
+ src[0] = r[i]; \
195
+ src[1] = g[i]; \
196
+ src[2] = b[i]; \
197
+ \
198
+ r[i] = _mm_add_ps (r[i], _mm_mul_ps (src[2], c0)); \
199
+ \
200
+ g[i] = _mm_mul_ps (g[i], c2); \
201
+ src[2] = _mm_mul_ps (src[2], c3); \
202
+ g[i] = _mm_add_ps (g[i], src[0]); \
203
+ g[i] = _mm_add_ps (g[i], src[2]); \
204
+ \
205
+ b[i] = _mm_mul_ps (c1, src[1]); \
206
+ b[i] = _mm_add_ps (b[i], src[0]);
207
+
208
+ CSC_INVERSE_709_SSE2_LOOP (0)
209
+ CSC_INVERSE_709_SSE2_LOOP (1)
210
+ CSC_INVERSE_709_SSE2_LOOP (2)
211
+ CSC_INVERSE_709_SSE2_LOOP (3)
212
+
213
+ CSC_INVERSE_709_SSE2_LOOP (4)
214
+ CSC_INVERSE_709_SSE2_LOOP (5)
215
+ CSC_INVERSE_709_SSE2_LOOP (6)
216
+ CSC_INVERSE_709_SSE2_LOOP (7)
217
+
218
+ CSC_INVERSE_709_SSE2_LOOP (8)
219
+ CSC_INVERSE_709_SSE2_LOOP (9)
220
+ CSC_INVERSE_709_SSE2_LOOP (10)
221
+ CSC_INVERSE_709_SSE2_LOOP (11)
222
+
223
+ CSC_INVERSE_709_SSE2_LOOP (12)
224
+ CSC_INVERSE_709_SSE2_LOOP (13)
225
+ CSC_INVERSE_709_SSE2_LOOP (14)
226
+ CSC_INVERSE_709_SSE2_LOOP (15)
227
+ }
228
+
229
+ #endif /* IMF_HAVE_SSE2 */
230
+
231
+
232
+ //
233
+ // Color space conversion, Forward 709 CSC, R'G'B' -> Y'CbCr
234
+ //
235
+ // Simple FPU color space conversion. Based on the 709
236
+ // primary chromaticies, with no scaling or offsets.
237
+ //
238
+
239
+ void
240
+ csc709Forward64 (float *comp0, float *comp1, float *comp2)
241
+ {
242
+ float src[3];
243
+
244
+ for (int i = 0; i<64; ++i)
245
+ {
246
+ src[0] = comp0[i];
247
+ src[1] = comp1[i];
248
+ src[2] = comp2[i];
249
+
250
+ comp0[i] = 0.2126f * src[0] + 0.7152f * src[1] + 0.0722f * src[2];
251
+ comp1[i] = -0.1146f * src[0] - 0.3854f * src[1] + 0.5000f * src[2];
252
+ comp2[i] = 0.5000f * src[0] - 0.4542f * src[1] - 0.0458f * src[2];
253
+ }
254
+ }
255
+
256
+
257
+ //
258
+ // Byte interleaving of 2 byte arrays:
259
+ // src0 = AAAA
260
+ // src1 = BBBB
261
+ // dst = ABABABAB
262
+ //
263
+ // numBytes is the size of each of the source buffers
264
+ //
265
+
266
+ #ifndef IMF_HAVE_SSE2
267
+
268
+ //
269
+ // Scalar default implementation
270
+ //
271
+
272
+ void
273
+ interleaveByte2 (char *dst, char *src0, char *src1, int numBytes)
274
+ {
275
+ for (int x = 0; x < numBytes; ++x)
276
+ {
277
+ dst[2 * x] = src0[x];
278
+ dst[2 * x + 1] = src1[x];
279
+ }
280
+ }
281
+
282
+ #else /* IMF_HAVE_SSE2 */
283
+
284
+ //
285
+ // SSE2 byte interleaving
286
+ //
287
+
288
+ void
289
+ interleaveByte2 (char *dst, char *src0, char *src1, int numBytes)
290
+ {
291
+ int dstAlignment = (size_t)dst % 16;
292
+ int src0Alignment = (size_t)src0 % 16;
293
+ int src1Alignment = (size_t)src1 % 16;
294
+
295
+ __m128i *dst_epi8 = (__m128i*)dst;
296
+ __m128i *src0_epi8 = (__m128i*)src0;
297
+ __m128i *src1_epi8 = (__m128i*)src1;
298
+ int sseWidth = numBytes / 16;
299
+
300
+ if ((!dstAlignment) && (!src0Alignment) && (!src1Alignment))
301
+ {
302
+ __m128i tmp0, tmp1;
303
+
304
+ //
305
+ // Aligned loads and stores
306
+ //
307
+
308
+ for (int x = 0; x < sseWidth; ++x)
309
+ {
310
+ tmp0 = src0_epi8[x];
311
+ tmp1 = src1_epi8[x];
312
+
313
+ _mm_stream_si128 (&dst_epi8[2 * x],
314
+ _mm_unpacklo_epi8 (tmp0, tmp1));
315
+
316
+ _mm_stream_si128 (&dst_epi8[2 * x + 1],
317
+ _mm_unpackhi_epi8 (tmp0, tmp1));
318
+ }
319
+
320
+ //
321
+ // Then do run the leftovers one at a time
322
+ //
323
+
324
+ for (int x = 16 * sseWidth; x < numBytes; ++x)
325
+ {
326
+ dst[2 * x] = src0[x];
327
+ dst[2 * x + 1] = src1[x];
328
+ }
329
+ }
330
+ else if ((!dstAlignment) && (src0Alignment == 8) && (src1Alignment == 8))
331
+ {
332
+ //
333
+ // Aligned stores, but catch up a few values so we can
334
+ // use aligned loads
335
+ //
336
+
337
+ for (int x = 0; x < 8; ++x)
338
+ {
339
+ dst[2 * x] = src0[x];
340
+ dst[2 * x + 1] = src1[x];
341
+ }
342
+
343
+ dst_epi8 = (__m128i*)&dst[16];
344
+ src0_epi8 = (__m128i*)&src0[8];
345
+ src1_epi8 = (__m128i*)&src1[8];
346
+ sseWidth = (numBytes - 8) / 16;
347
+
348
+ for (int x=0; x<sseWidth; ++x)
349
+ {
350
+ _mm_stream_si128 (&dst_epi8[2 * x],
351
+ _mm_unpacklo_epi8 (src0_epi8[x], src1_epi8[x]));
352
+
353
+ _mm_stream_si128 (&dst_epi8[2 * x + 1],
354
+ _mm_unpackhi_epi8 (src0_epi8[x], src1_epi8[x]));
355
+ }
356
+
357
+ //
358
+ // Then do run the leftovers one at a time
359
+ //
360
+
361
+ for (int x = 16 * sseWidth + 8; x < numBytes; ++x)
362
+ {
363
+ dst[2 * x] = src0[x];
364
+ dst[2 * x + 1] = src1[x];
365
+ }
366
+ }
367
+ else
368
+ {
369
+ //
370
+ // Unaligned everything
371
+ //
372
+
373
+ for (int x = 0; x < sseWidth; ++x)
374
+ {
375
+ __m128i tmpSrc0_epi8 = _mm_loadu_si128 (&src0_epi8[x]);
376
+ __m128i tmpSrc1_epi8 = _mm_loadu_si128 (&src1_epi8[x]);
377
+
378
+ _mm_storeu_si128 (&dst_epi8[2 * x],
379
+ _mm_unpacklo_epi8 (tmpSrc0_epi8, tmpSrc1_epi8));
380
+
381
+ _mm_storeu_si128 (&dst_epi8[2 * x + 1],
382
+ _mm_unpackhi_epi8 (tmpSrc0_epi8, tmpSrc1_epi8));
383
+ }
384
+
385
+ //
386
+ // Then do run the leftovers one at a time
387
+ //
388
+
389
+ for (int x = 16 * sseWidth; x < numBytes; ++x)
390
+ {
391
+ dst[2 * x] = src0[x];
392
+ dst[2 * x + 1] = src1[x];
393
+ }
394
+ }
395
+ }
396
+
397
+ #endif /* IMF_HAVE_SSE2 */
398
+
399
+
400
+ //
401
+ // Float -> half float conversion
402
+ //
403
+ // To enable F16C based conversion, we can't rely on compile-time
404
+ // detection, hence the multiple defined versions. Pick one based
405
+ // on runtime cpuid detection.
406
+ //
407
+
408
+ //
409
+ // Default boring conversion
410
+ //
411
+
412
+ void
413
+ convertFloatToHalf64_scalar (unsigned short *dst, float *src)
414
+ {
415
+ for (int i=0; i<64; ++i)
416
+ dst[i] = ((half)src[i]).bits();
417
+ }
418
+
419
+
420
+ //
421
+ // F16C conversion - Assumes aligned src and dst
422
+ //
423
+
424
+ void
425
+ convertFloatToHalf64_f16c (unsigned short *dst, float *src)
426
+ {
427
+ //
428
+ // Ordinarly, I'd avoid using inline asm and prefer intrinsics.
429
+ // However, in order to get the intrinsics, we need to tell
430
+ // the compiler to generate VEX instructions.
431
+ //
432
+ // (On the GCC side, -mf16c goes ahead and activates -mavc,
433
+ // resulting in VEX code. Without -mf16c, no intrinsics..)
434
+ //
435
+ // Now, it's quite likely that we'll find ourselves in situations
436
+ // where we want to build *without* VEX, in order to maintain
437
+ // maximum compatability. But to get there with intrinsics,
438
+ // we'd need to break out code into a separate file. Bleh.
439
+ // I'll take the asm.
440
+ //
441
+
442
+ #if defined IMF_HAVE_GCC_INLINEASM
443
+ __asm__
444
+ ("vmovaps (%0), %%ymm0 \n"
445
+ "vmovaps 0x20(%0), %%ymm1 \n"
446
+ "vmovaps 0x40(%0), %%ymm2 \n"
447
+ "vmovaps 0x60(%0), %%ymm3 \n"
448
+ "vcvtps2ph $0, %%ymm0, %%xmm0 \n"
449
+ "vcvtps2ph $0, %%ymm1, %%xmm1 \n"
450
+ "vcvtps2ph $0, %%ymm2, %%xmm2 \n"
451
+ "vcvtps2ph $0, %%ymm3, %%xmm3 \n"
452
+ "vmovdqa %%xmm0, 0x00(%1) \n"
453
+ "vmovdqa %%xmm1, 0x10(%1) \n"
454
+ "vmovdqa %%xmm2, 0x20(%1) \n"
455
+ "vmovdqa %%xmm3, 0x30(%1) \n"
456
+ "vmovaps 0x80(%0), %%ymm0 \n"
457
+ "vmovaps 0xa0(%0), %%ymm1 \n"
458
+ "vmovaps 0xc0(%0), %%ymm2 \n"
459
+ "vmovaps 0xe0(%0), %%ymm3 \n"
460
+ "vcvtps2ph $0, %%ymm0, %%xmm0 \n"
461
+ "vcvtps2ph $0, %%ymm1, %%xmm1 \n"
462
+ "vcvtps2ph $0, %%ymm2, %%xmm2 \n"
463
+ "vcvtps2ph $0, %%ymm3, %%xmm3 \n"
464
+ "vmovdqa %%xmm0, 0x40(%1) \n"
465
+ "vmovdqa %%xmm1, 0x50(%1) \n"
466
+ "vmovdqa %%xmm2, 0x60(%1) \n"
467
+ "vmovdqa %%xmm3, 0x70(%1) \n"
468
+ #ifndef __AVX__
469
+ "vzeroupper \n"
470
+ #endif /* __AVX__ */
471
+ : /* Output */
472
+ : /* Input */ "r"(src), "r"(dst)
473
+ #ifndef __AVX__
474
+ : /* Clobber */ "%xmm0", "%xmm1", "%xmm2", "%xmm3", "memory"
475
+ #else
476
+ : /* Clobber */ "%ymm0", "%ymm1", "%ymm2", "%ymm3", "memory"
477
+ #endif /* __AVX__ */
478
+ );
479
+ #else
480
+ convertFloatToHalf64_scalar (dst, src);
481
+ #endif /* IMF_HAVE_GCC_INLINEASM */
482
+ }
483
+
484
+
485
+ //
486
+ // Convert an 8x8 block of HALF from zig-zag order to
487
+ // FLOAT in normal order. The order we want is:
488
+ //
489
+ // src dst
490
+ // 0 1 2 3 4 5 6 7 0 1 5 6 14 15 27 28
491
+ // 8 9 10 11 12 13 14 15 2 4 7 13 16 26 29 42
492
+ // 16 17 18 19 20 21 22 23 3 8 12 17 25 30 41 43
493
+ // 24 25 26 27 28 29 30 31 9 11 18 24 31 40 44 53
494
+ // 32 33 34 35 36 37 38 39 10 19 23 32 39 45 52 54
495
+ // 40 41 42 43 44 45 46 47 20 22 33 38 46 51 55 60
496
+ // 48 49 50 51 52 53 54 55 21 34 37 47 50 56 59 61
497
+ // 56 57 58 59 60 61 62 63 35 36 48 49 57 58 62 63
498
+ //
499
+
500
+ void
501
+ fromHalfZigZag_scalar (unsigned short *src, float *dst)
502
+ {
503
+ half *srcHalf = (half *)src;
504
+
505
+ dst[0] = (float)srcHalf[0];
506
+ dst[1] = (float)srcHalf[1];
507
+ dst[2] = (float)srcHalf[5];
508
+ dst[3] = (float)srcHalf[6];
509
+ dst[4] = (float)srcHalf[14];
510
+ dst[5] = (float)srcHalf[15];
511
+ dst[6] = (float)srcHalf[27];
512
+ dst[7] = (float)srcHalf[28];
513
+ dst[8] = (float)srcHalf[2];
514
+ dst[9] = (float)srcHalf[4];
515
+
516
+ dst[10] = (float)srcHalf[7];
517
+ dst[11] = (float)srcHalf[13];
518
+ dst[12] = (float)srcHalf[16];
519
+ dst[13] = (float)srcHalf[26];
520
+ dst[14] = (float)srcHalf[29];
521
+ dst[15] = (float)srcHalf[42];
522
+ dst[16] = (float)srcHalf[3];
523
+ dst[17] = (float)srcHalf[8];
524
+ dst[18] = (float)srcHalf[12];
525
+ dst[19] = (float)srcHalf[17];
526
+
527
+ dst[20] = (float)srcHalf[25];
528
+ dst[21] = (float)srcHalf[30];
529
+ dst[22] = (float)srcHalf[41];
530
+ dst[23] = (float)srcHalf[43];
531
+ dst[24] = (float)srcHalf[9];
532
+ dst[25] = (float)srcHalf[11];
533
+ dst[26] = (float)srcHalf[18];
534
+ dst[27] = (float)srcHalf[24];
535
+ dst[28] = (float)srcHalf[31];
536
+ dst[29] = (float)srcHalf[40];
537
+
538
+ dst[30] = (float)srcHalf[44];
539
+ dst[31] = (float)srcHalf[53];
540
+ dst[32] = (float)srcHalf[10];
541
+ dst[33] = (float)srcHalf[19];
542
+ dst[34] = (float)srcHalf[23];
543
+ dst[35] = (float)srcHalf[32];
544
+ dst[36] = (float)srcHalf[39];
545
+ dst[37] = (float)srcHalf[45];
546
+ dst[38] = (float)srcHalf[52];
547
+ dst[39] = (float)srcHalf[54];
548
+
549
+ dst[40] = (float)srcHalf[20];
550
+ dst[41] = (float)srcHalf[22];
551
+ dst[42] = (float)srcHalf[33];
552
+ dst[43] = (float)srcHalf[38];
553
+ dst[44] = (float)srcHalf[46];
554
+ dst[45] = (float)srcHalf[51];
555
+ dst[46] = (float)srcHalf[55];
556
+ dst[47] = (float)srcHalf[60];
557
+ dst[48] = (float)srcHalf[21];
558
+ dst[49] = (float)srcHalf[34];
559
+
560
+ dst[50] = (float)srcHalf[37];
561
+ dst[51] = (float)srcHalf[47];
562
+ dst[52] = (float)srcHalf[50];
563
+ dst[53] = (float)srcHalf[56];
564
+ dst[54] = (float)srcHalf[59];
565
+ dst[55] = (float)srcHalf[61];
566
+ dst[56] = (float)srcHalf[35];
567
+ dst[57] = (float)srcHalf[36];
568
+ dst[58] = (float)srcHalf[48];
569
+ dst[59] = (float)srcHalf[49];
570
+
571
+ dst[60] = (float)srcHalf[57];
572
+ dst[61] = (float)srcHalf[58];
573
+ dst[62] = (float)srcHalf[62];
574
+ dst[63] = (float)srcHalf[63];
575
+ }
576
+
577
+
578
+ //
579
+ // If we can form the correct ordering in xmm registers,
580
+ // we can use F16C to convert from HALF -> FLOAT. However,
581
+ // making the correct order isn't trivial.
582
+ //
583
+ // We want to re-order a source 8x8 matrix from:
584
+ //
585
+ // 0 1 2 3 4 5 6 7 0 1 5 6 14 15 27 28
586
+ // 8 9 10 11 12 13 14 15 2 4 7 13 16 26 29 42
587
+ // 16 17 18 19 20 21 22 23 3 8 12 17 25 30 41 43
588
+ // 24 25 26 27 28 29 30 31 9 11 18 24 31 40 44 53 (A)
589
+ // 32 33 34 35 36 37 38 39 --> 10 19 23 32 39 45 52 54
590
+ // 40 41 42 43 44 45 46 47 20 22 33 38 46 51 55 60
591
+ // 48 49 50 51 52 53 54 55 21 34 37 47 50 56 59 61
592
+ // 56 57 58 59 60 61 62 63 35 36 48 49 57 58 62 63
593
+ //
594
+ // Which looks like a mess, right?
595
+ //
596
+ // Now, check out the NE/SW diagonals of (A). Along those lines,
597
+ // we have runs of contiguous values! If we rewrite (A) a bit, we get:
598
+ //
599
+ // 0
600
+ // 1 2
601
+ // 5 4 3
602
+ // 6 7 8 9
603
+ // 14 13 12 11 10
604
+ // 15 16 17 18 19 20
605
+ // 27 26 25 24 23 22 21 (B)
606
+ // 28 29 30 31 32 33 34 35
607
+ // 42 41 40 39 38 37 36
608
+ // 43 44 45 46 47 48
609
+ // 53 52 51 50 49
610
+ // 54 55 56 57
611
+ // 60 59 58
612
+ // 61 62
613
+ // 63
614
+ //
615
+ // In this ordering, the columns are the rows (A). If we can 'transpose'
616
+ // (B), we'll achieve our goal. But we want this to fit nicely into
617
+ // xmm registers and still be able to load large runs efficiently.
618
+ // Also, notice that the odd rows are in ascending order, while
619
+ // the even rows are in descending order.
620
+ //
621
+ // If we 'fold' the bottom half up into the top, we can preserve ordered
622
+ // runs accross rows, and still keep all the correct values in columns.
623
+ // After transposing, we'll need to rotate things back into place.
624
+ // This gives us:
625
+ //
626
+ // 0 | 42 41 40 39 38 37 36
627
+ // 1 2 | 43 44 45 46 47 48
628
+ // 5 4 3 | 53 52 51 50 49
629
+ // 6 7 8 9 | 54 55 56 57 (C)
630
+ // 14 13 12 11 10 | 60 59 58
631
+ // 15 16 17 18 19 20 | 61 62
632
+ // 27 26 25 24 23 22 21 | 61
633
+ // 28 29 30 31 32 33 34 35
634
+ //
635
+ // But hang on. We still have the backwards descending rows to deal with.
636
+ // Lets reverse the even rows so that all values are in ascending order
637
+ //
638
+ // 36 37 38 39 40 41 42 | 0
639
+ // 1 2 | 43 44 45 46 47 48
640
+ // 49 50 51 52 53 | 3 4 5
641
+ // 6 7 8 9 | 54 55 56 57 (D)
642
+ // 58 59 60 | 10 11 12 13 14
643
+ // 15 16 17 18 19 20 | 61 62
644
+ // 61 | 21 22 23 24 25 26 27
645
+ // 28 29 30 31 32 33 34 35
646
+ //
647
+ // If we can form (D), we will then:
648
+ // 1) Reverse the even rows
649
+ // 2) Transpose
650
+ // 3) Rotate the rows
651
+ //
652
+ // and we'll have (A).
653
+ //
654
+
655
+ void
656
+ fromHalfZigZag_f16c (unsigned short *src, float *dst)
657
+ {
658
+ #if defined IMF_HAVE_GCC_INLINEASM_64
659
+ __asm__
660
+
661
+ /* x3 <- 0
662
+ * x8 <- [ 0- 7]
663
+ * x6 <- [56-63]
664
+ * x9 <- [21-28]
665
+ * x7 <- [28-35]
666
+ * x3 <- [ 6- 9] (lower half) */
667
+
668
+ ("vpxor %%xmm3, %%xmm3, %%xmm3 \n"
669
+ "vmovdqa (%0), %%xmm8 \n"
670
+ "vmovdqa 112(%0), %%xmm6 \n"
671
+ "vmovdqu 42(%0), %%xmm9 \n"
672
+ "vmovdqu 56(%0), %%xmm7 \n"
673
+ "vmovq 12(%0), %%xmm3 \n"
674
+
675
+ /* Setup rows 0-2 of A in xmm0-xmm2
676
+ * x1 <- x8 >> 16 (1 value)
677
+ * x2 <- x8 << 32 (2 values)
678
+ * x0 <- alignr([35-42], x8, 2)
679
+ * x1 <- blend(x1, [41-48])
680
+ * x2 <- blend(x2, [49-56]) */
681
+
682
+ "vpsrldq $2, %%xmm8, %%xmm1 \n"
683
+ "vpslldq $4, %%xmm8, %%xmm2 \n"
684
+ "vpalignr $2, 70(%0), %%xmm8, %%xmm0 \n"
685
+ "vpblendw $0xfc, 82(%0), %%xmm1, %%xmm1 \n"
686
+ "vpblendw $0x1f, 98(%0), %%xmm2, %%xmm2 \n"
687
+
688
+ /* Setup rows 4-6 of A in xmm4-xmm6
689
+ * x4 <- x6 >> 32 (2 values)
690
+ * x5 <- x6 << 16 (1 value)
691
+ * x6 <- alignr(x6,x9,14)
692
+ * x4 <- blend(x4, [ 7-14])
693
+ * x5 <- blend(x5, [15-22]) */
694
+
695
+ "vpsrldq $4, %%xmm6, %%xmm4 \n"
696
+ "vpslldq $2, %%xmm6, %%xmm5 \n"
697
+ "vpalignr $14, %%xmm6, %%xmm9, %%xmm6 \n"
698
+ "vpblendw $0xf8, 14(%0), %%xmm4, %%xmm4 \n"
699
+ "vpblendw $0x3f, 30(%0), %%xmm5, %%xmm5 \n"
700
+
701
+ /* Load the upper half of row 3 into xmm3
702
+ * x3 <- [54-57] (upper half) */
703
+
704
+ "vpinsrq $1, 108(%0), %%xmm3, %%xmm3\n"
705
+
706
+ /* Reverse the even rows. We're not using PSHUFB as
707
+ * that requires loading an extra constant all the time,
708
+ * and we're alreadly pretty memory bound.
709
+ */
710
+
711
+ "vpshuflw $0x1b, %%xmm0, %%xmm0 \n"
712
+ "vpshuflw $0x1b, %%xmm2, %%xmm2 \n"
713
+ "vpshuflw $0x1b, %%xmm4, %%xmm4 \n"
714
+ "vpshuflw $0x1b, %%xmm6, %%xmm6 \n"
715
+
716
+ "vpshufhw $0x1b, %%xmm0, %%xmm0 \n"
717
+ "vpshufhw $0x1b, %%xmm2, %%xmm2 \n"
718
+ "vpshufhw $0x1b, %%xmm4, %%xmm4 \n"
719
+ "vpshufhw $0x1b, %%xmm6, %%xmm6 \n"
720
+
721
+ "vpshufd $0x4e, %%xmm0, %%xmm0 \n"
722
+ "vpshufd $0x4e, %%xmm2, %%xmm2 \n"
723
+ "vpshufd $0x4e, %%xmm4, %%xmm4 \n"
724
+ "vpshufd $0x4e, %%xmm6, %%xmm6 \n"
725
+
726
+ /* Transpose xmm0-xmm7 into xmm8-xmm15 */
727
+
728
+ "vpunpcklwd %%xmm1, %%xmm0, %%xmm8 \n"
729
+ "vpunpcklwd %%xmm3, %%xmm2, %%xmm9 \n"
730
+ "vpunpcklwd %%xmm5, %%xmm4, %%xmm10 \n"
731
+ "vpunpcklwd %%xmm7, %%xmm6, %%xmm11 \n"
732
+ "vpunpckhwd %%xmm1, %%xmm0, %%xmm12 \n"
733
+ "vpunpckhwd %%xmm3, %%xmm2, %%xmm13 \n"
734
+ "vpunpckhwd %%xmm5, %%xmm4, %%xmm14 \n"
735
+ "vpunpckhwd %%xmm7, %%xmm6, %%xmm15 \n"
736
+
737
+ "vpunpckldq %%xmm9, %%xmm8, %%xmm0 \n"
738
+ "vpunpckldq %%xmm11, %%xmm10, %%xmm1 \n"
739
+ "vpunpckhdq %%xmm9, %%xmm8, %%xmm2 \n"
740
+ "vpunpckhdq %%xmm11, %%xmm10, %%xmm3 \n"
741
+ "vpunpckldq %%xmm13, %%xmm12, %%xmm4 \n"
742
+ "vpunpckldq %%xmm15, %%xmm14, %%xmm5 \n"
743
+ "vpunpckhdq %%xmm13, %%xmm12, %%xmm6 \n"
744
+ "vpunpckhdq %%xmm15, %%xmm14, %%xmm7 \n"
745
+
746
+ "vpunpcklqdq %%xmm1, %%xmm0, %%xmm8 \n"
747
+ "vpunpckhqdq %%xmm1, %%xmm0, %%xmm9 \n"
748
+ "vpunpcklqdq %%xmm3, %%xmm2, %%xmm10 \n"
749
+ "vpunpckhqdq %%xmm3, %%xmm2, %%xmm11 \n"
750
+ "vpunpcklqdq %%xmm4, %%xmm5, %%xmm12 \n"
751
+ "vpunpckhqdq %%xmm5, %%xmm4, %%xmm13 \n"
752
+ "vpunpcklqdq %%xmm7, %%xmm6, %%xmm14 \n"
753
+ "vpunpckhqdq %%xmm7, %%xmm6, %%xmm15 \n"
754
+
755
+ /* Rotate the rows to get the correct final order.
756
+ * Rotating xmm12 isn't needed, as we can handle
757
+ * the rotation in the PUNPCKLQDQ above. Rotating
758
+ * xmm8 isn't needed as it's already in the right order
759
+ */
760
+
761
+ "vpalignr $2, %%xmm9, %%xmm9, %%xmm9 \n"
762
+ "vpalignr $4, %%xmm10, %%xmm10, %%xmm10 \n"
763
+ "vpalignr $6, %%xmm11, %%xmm11, %%xmm11 \n"
764
+ "vpalignr $10, %%xmm13, %%xmm13, %%xmm13 \n"
765
+ "vpalignr $12, %%xmm14, %%xmm14, %%xmm14 \n"
766
+ "vpalignr $14, %%xmm15, %%xmm15, %%xmm15 \n"
767
+
768
+ /* Convert from half -> float */
769
+
770
+ "vcvtph2ps %%xmm8, %%ymm8 \n"
771
+ "vcvtph2ps %%xmm9, %%ymm9 \n"
772
+ "vcvtph2ps %%xmm10, %%ymm10 \n"
773
+ "vcvtph2ps %%xmm11, %%ymm11 \n"
774
+ "vcvtph2ps %%xmm12, %%ymm12 \n"
775
+ "vcvtph2ps %%xmm13, %%ymm13 \n"
776
+ "vcvtph2ps %%xmm14, %%ymm14 \n"
777
+ "vcvtph2ps %%xmm15, %%ymm15 \n"
778
+
779
+ /* Move float values to dst */
780
+
781
+ "vmovaps %%ymm8, (%1) \n"
782
+ "vmovaps %%ymm9, 32(%1) \n"
783
+ "vmovaps %%ymm10, 64(%1) \n"
784
+ "vmovaps %%ymm11, 96(%1) \n"
785
+ "vmovaps %%ymm12, 128(%1) \n"
786
+ "vmovaps %%ymm13, 160(%1) \n"
787
+ "vmovaps %%ymm14, 192(%1) \n"
788
+ "vmovaps %%ymm15, 224(%1) \n"
789
+ #ifndef __AVX__
790
+ "vzeroupper \n"
791
+ #endif /* __AVX__ */
792
+ : /* Output */
793
+ : /* Input */ "r"(src), "r"(dst)
794
+ : /* Clobber */ "memory",
795
+ #ifndef __AVX__
796
+ "%xmm0", "%xmm1", "%xmm2", "%xmm3",
797
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7",
798
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
799
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15"
800
+ #else
801
+ "%ymm0", "%ymm1", "%ymm2", "%ymm3",
802
+ "%ymm4", "%ymm5", "%ymm6", "%ymm7",
803
+ "%ymm8", "%ymm9", "%ymm10", "%ymm11",
804
+ "%ymm12", "%ymm13", "%ymm14", "%ymm15"
805
+ #endif /* __AVX__ */
806
+ );
807
+
808
+ #else
809
+ fromHalfZigZag_scalar(src, dst);
810
+ #endif /* defined IMF_HAVE_GCC_INLINEASM_64 */
811
+ }
812
+
813
+
814
+ //
815
+ // Inverse 8x8 DCT, only inverting the DC. This assumes that
816
+ // all AC frequencies are 0.
817
+ //
818
+
819
+ #ifndef IMF_HAVE_SSE2
820
+
821
+ void
822
+ dctInverse8x8DcOnly (float *data)
823
+ {
824
+ float val = data[0] * 3.535536e-01f * 3.535536e-01f;
825
+
826
+ for (int i = 0; i < 64; ++i)
827
+ data[i] = val;
828
+ }
829
+
830
+ #else /* IMF_HAVE_SSE2 */
831
+
832
+ void
833
+ dctInverse8x8DcOnly (float *data)
834
+ {
835
+ __m128 src = _mm_set1_ps (data[0] * 3.535536e-01f * 3.535536e-01f);
836
+ __m128 *dst = (__m128 *)data;
837
+
838
+ for (int i = 0; i < 16; ++i)
839
+ dst[i] = src;
840
+ }
841
+
842
+ #endif /* IMF_HAVE_SSE2 */
843
+
844
+
845
+ //
846
+ // Full 8x8 Inverse DCT:
847
+ //
848
+ // Simple inverse DCT on an 8x8 block, with scalar ops only.
849
+ // Operates on data in-place.
850
+ //
851
+ // This is based on the iDCT formuation (y = frequency domain,
852
+ // x = spatial domain)
853
+ //
854
+ // [x0] [ ][y0] [ ][y1]
855
+ // [x1] = [ M1 ][y2] + [ M2 ][y3]
856
+ // [x2] [ ][y4] [ ][y5]
857
+ // [x3] [ ][y6] [ ][y7]
858
+ //
859
+ // [x7] [ ][y0] [ ][y1]
860
+ // [x6] = [ M1 ][y2] - [ M2 ][y3]
861
+ // [x5] [ ][y4] [ ][y5]
862
+ // [x4] [ ][y6] [ ][y7]
863
+ //
864
+ // where M1: M2:
865
+ //
866
+ // [a c a f] [b d e g]
867
+ // [a f -a -c] [d -g -b -e]
868
+ // [a -f -a c] [e -b g d]
869
+ // [a -c a -f] [g -e d -b]
870
+ //
871
+ // and the constants are as defined below..
872
+ //
873
+ // If you know how many of the lower rows are zero, that can
874
+ // be passed in to help speed things up. If you don't know,
875
+ // just set zeroedRows=0.
876
+ //
877
+
878
+ //
879
+ // Default implementation
880
+ //
881
+
882
+ template <int zeroedRows>
883
+ void
884
+ dctInverse8x8_scalar (float *data)
885
+ {
886
+ const float a = .5f * cosf (3.14159f / 4.0f);
887
+ const float b = .5f * cosf (3.14159f / 16.0f);
888
+ const float c = .5f * cosf (3.14159f / 8.0f);
889
+ const float d = .5f * cosf (3.f*3.14159f / 16.0f);
890
+ const float e = .5f * cosf (5.f*3.14159f / 16.0f);
891
+ const float f = .5f * cosf (3.f*3.14159f / 8.0f);
892
+ const float g = .5f * cosf (7.f*3.14159f / 16.0f);
893
+
894
+ float alpha[4], beta[4], theta[4], gamma[4];
895
+
896
+ float *rowPtr = NULL;
897
+
898
+ //
899
+ // First pass - row wise.
900
+ //
901
+ // This looks less-compact than the description above in
902
+ // an attempt to fold together common sub-expressions.
903
+ //
904
+
905
+ for (int row = 0; row < 8 - zeroedRows; ++row)
906
+ {
907
+ rowPtr = data + row * 8;
908
+
909
+ alpha[0] = c * rowPtr[2];
910
+ alpha[1] = f * rowPtr[2];
911
+ alpha[2] = c * rowPtr[6];
912
+ alpha[3] = f * rowPtr[6];
913
+
914
+ beta[0] = b * rowPtr[1] + d * rowPtr[3] + e * rowPtr[5] + g * rowPtr[7];
915
+ beta[1] = d * rowPtr[1] - g * rowPtr[3] - b * rowPtr[5] - e * rowPtr[7];
916
+ beta[2] = e * rowPtr[1] - b * rowPtr[3] + g * rowPtr[5] + d * rowPtr[7];
917
+ beta[3] = g * rowPtr[1] - e * rowPtr[3] + d * rowPtr[5] - b * rowPtr[7];
918
+
919
+ theta[0] = a * (rowPtr[0] + rowPtr[4]);
920
+ theta[3] = a * (rowPtr[0] - rowPtr[4]);
921
+
922
+ theta[1] = alpha[0] + alpha[3];
923
+ theta[2] = alpha[1] - alpha[2];
924
+
925
+
926
+ gamma[0] = theta[0] + theta[1];
927
+ gamma[1] = theta[3] + theta[2];
928
+ gamma[2] = theta[3] - theta[2];
929
+ gamma[3] = theta[0] - theta[1];
930
+
931
+
932
+ rowPtr[0] = gamma[0] + beta[0];
933
+ rowPtr[1] = gamma[1] + beta[1];
934
+ rowPtr[2] = gamma[2] + beta[2];
935
+ rowPtr[3] = gamma[3] + beta[3];
936
+
937
+ rowPtr[4] = gamma[3] - beta[3];
938
+ rowPtr[5] = gamma[2] - beta[2];
939
+ rowPtr[6] = gamma[1] - beta[1];
940
+ rowPtr[7] = gamma[0] - beta[0];
941
+ }
942
+
943
+ //
944
+ // Second pass - column wise.
945
+ //
946
+
947
+ for (int column = 0; column < 8; ++column)
948
+ {
949
+ alpha[0] = c * data[16+column];
950
+ alpha[1] = f * data[16+column];
951
+ alpha[2] = c * data[48+column];
952
+ alpha[3] = f * data[48+column];
953
+
954
+ beta[0] = b * data[8+column] + d * data[24+column] +
955
+ e * data[40+column] + g * data[56+column];
956
+
957
+ beta[1] = d * data[8+column] - g * data[24+column] -
958
+ b * data[40+column] - e * data[56+column];
959
+
960
+ beta[2] = e * data[8+column] - b * data[24+column] +
961
+ g * data[40+column] + d * data[56+column];
962
+
963
+ beta[3] = g * data[8+column] - e * data[24+column] +
964
+ d * data[40+column] - b * data[56+column];
965
+
966
+ theta[0] = a * (data[column] + data[32+column]);
967
+ theta[3] = a * (data[column] - data[32+column]);
968
+
969
+ theta[1] = alpha[0] + alpha[3];
970
+ theta[2] = alpha[1] - alpha[2];
971
+
972
+ gamma[0] = theta[0] + theta[1];
973
+ gamma[1] = theta[3] + theta[2];
974
+ gamma[2] = theta[3] - theta[2];
975
+ gamma[3] = theta[0] - theta[1];
976
+
977
+ data[ column] = gamma[0] + beta[0];
978
+ data[ 8 + column] = gamma[1] + beta[1];
979
+ data[16 + column] = gamma[2] + beta[2];
980
+ data[24 + column] = gamma[3] + beta[3];
981
+
982
+ data[32 + column] = gamma[3] - beta[3];
983
+ data[40 + column] = gamma[2] - beta[2];
984
+ data[48 + column] = gamma[1] - beta[1];
985
+ data[56 + column] = gamma[0] - beta[0];
986
+ }
987
+ }
988
+
989
+
990
+ //
991
+ // SSE2 Implementation
992
+ //
993
+
994
+ template <int zeroedRows>
995
+ void
996
+ dctInverse8x8_sse2 (float *data)
997
+ {
998
+ #ifdef IMF_HAVE_SSE2
999
+ __m128 a = {3.535536e-01f,3.535536e-01f,3.535536e-01f,3.535536e-01f};
1000
+ __m128 b = {4.903927e-01f,4.903927e-01f,4.903927e-01f,4.903927e-01f};
1001
+ __m128 c = {4.619398e-01f,4.619398e-01f,4.619398e-01f,4.619398e-01f};
1002
+ __m128 d = {4.157349e-01f,4.157349e-01f,4.157349e-01f,4.157349e-01f};
1003
+ __m128 e = {2.777855e-01f,2.777855e-01f,2.777855e-01f,2.777855e-01f};
1004
+ __m128 f = {1.913422e-01f,1.913422e-01f,1.913422e-01f,1.913422e-01f};
1005
+ __m128 g = {9.754573e-02f,9.754573e-02f,9.754573e-02f,9.754573e-02f};
1006
+
1007
+ __m128 c0 = {3.535536e-01f, 3.535536e-01f, 3.535536e-01f, 3.535536e-01f};
1008
+ __m128 c1 = {4.619398e-01f, 1.913422e-01f,-1.913422e-01f,-4.619398e-01f};
1009
+ __m128 c2 = {3.535536e-01f,-3.535536e-01f,-3.535536e-01f, 3.535536e-01f};
1010
+ __m128 c3 = {1.913422e-01f,-4.619398e-01f, 4.619398e-01f,-1.913422e-01f};
1011
+
1012
+ __m128 c4 = {4.903927e-01f, 4.157349e-01f, 2.777855e-01f, 9.754573e-02f};
1013
+ __m128 c5 = {4.157349e-01f,-9.754573e-02f,-4.903927e-01f,-2.777855e-01f};
1014
+ __m128 c6 = {2.777855e-01f,-4.903927e-01f, 9.754573e-02f, 4.157349e-01f};
1015
+ __m128 c7 = {9.754573e-02f,-2.777855e-01f, 4.157349e-01f,-4.903927e-01f};
1016
+
1017
+ __m128 *srcVec = (__m128 *)data;
1018
+ __m128 x[8], evenSum, oddSum;
1019
+ __m128 in[8], alpha[4], beta[4], theta[4], gamma[4];
1020
+
1021
+ //
1022
+ // Rows -
1023
+ //
1024
+ // Treat this just like matrix-vector multiplication. The
1025
+ // trick is to note that:
1026
+ //
1027
+ // [M00 M01 M02 M03][v0] [(v0 M00) + (v1 M01) + (v2 M02) + (v3 M03)]
1028
+ // [M10 M11 M12 M13][v1] = [(v0 M10) + (v1 M11) + (v2 M12) + (v3 M13)]
1029
+ // [M20 M21 M22 M23][v2] [(v0 M20) + (v1 M21) + (v2 M22) + (v3 M23)]
1030
+ // [M30 M31 M32 M33][v3] [(v0 M30) + (v1 M31) + (v2 M32) + (v3 M33)]
1031
+ //
1032
+ // Then, we can fill a register with v_i and multiply by the i-th column
1033
+ // of M, accumulating across all i-s.
1034
+ //
1035
+ // The kids refer to the populating of a register with a single value
1036
+ // "broadcasting", and it can be done with a shuffle instruction. It
1037
+ // seems to be the slowest part of the whole ordeal.
1038
+ //
1039
+ // Our matrix columns are stored above in c0-c7. c0-3 make up M1, and
1040
+ // c4-7 are from M2.
1041
+ //
1042
+
1043
+ #define DCT_INVERSE_8x8_SS2_ROW_LOOP(i) \
1044
+ /* \
1045
+ * Broadcast the components of the row \
1046
+ */ \
1047
+ \
1048
+ x[0] = _mm_shuffle_ps (srcVec[2 * i], \
1049
+ srcVec[2 * i], \
1050
+ _MM_SHUFFLE (0, 0, 0, 0)); \
1051
+ \
1052
+ x[1] = _mm_shuffle_ps (srcVec[2 * i], \
1053
+ srcVec[2 * i], \
1054
+ _MM_SHUFFLE (1, 1, 1, 1)); \
1055
+ \
1056
+ x[2] = _mm_shuffle_ps (srcVec[2 * i], \
1057
+ srcVec[2 * i], \
1058
+ _MM_SHUFFLE (2, 2, 2, 2)); \
1059
+ \
1060
+ x[3] = _mm_shuffle_ps (srcVec[2 * i], \
1061
+ srcVec[2 * i], \
1062
+ _MM_SHUFFLE (3, 3, 3, 3)); \
1063
+ \
1064
+ x[4] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1065
+ srcVec[2 * i + 1], \
1066
+ _MM_SHUFFLE (0, 0, 0, 0)); \
1067
+ \
1068
+ x[5] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1069
+ srcVec[2 * i + 1], \
1070
+ _MM_SHUFFLE (1, 1, 1, 1)); \
1071
+ \
1072
+ x[6] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1073
+ srcVec[2 * i + 1], \
1074
+ _MM_SHUFFLE (2, 2, 2, 2)); \
1075
+ \
1076
+ x[7] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1077
+ srcVec[2 * i + 1], \
1078
+ _MM_SHUFFLE (3, 3, 3, 3)); \
1079
+ /* \
1080
+ * Multiply the components by each column of the matrix \
1081
+ */ \
1082
+ \
1083
+ x[0] = _mm_mul_ps (x[0], c0); \
1084
+ x[2] = _mm_mul_ps (x[2], c1); \
1085
+ x[4] = _mm_mul_ps (x[4], c2); \
1086
+ x[6] = _mm_mul_ps (x[6], c3); \
1087
+ \
1088
+ x[1] = _mm_mul_ps (x[1], c4); \
1089
+ x[3] = _mm_mul_ps (x[3], c5); \
1090
+ x[5] = _mm_mul_ps (x[5], c6); \
1091
+ x[7] = _mm_mul_ps (x[7], c7); \
1092
+ \
1093
+ /* \
1094
+ * Add across \
1095
+ */ \
1096
+ \
1097
+ evenSum = _mm_setzero_ps(); \
1098
+ evenSum = _mm_add_ps (evenSum, x[0]); \
1099
+ evenSum = _mm_add_ps (evenSum, x[2]); \
1100
+ evenSum = _mm_add_ps (evenSum, x[4]); \
1101
+ evenSum = _mm_add_ps (evenSum, x[6]); \
1102
+ \
1103
+ oddSum = _mm_setzero_ps(); \
1104
+ oddSum = _mm_add_ps (oddSum, x[1]); \
1105
+ oddSum = _mm_add_ps (oddSum, x[3]); \
1106
+ oddSum = _mm_add_ps (oddSum, x[5]); \
1107
+ oddSum = _mm_add_ps (oddSum, x[7]); \
1108
+ \
1109
+ /* \
1110
+ * Final Sum: \
1111
+ * out [0, 1, 2, 3] = evenSum + oddSum \
1112
+ * out [7, 6, 5, 4] = evenSum - oddSum \
1113
+ */ \
1114
+ \
1115
+ srcVec[2 * i] = _mm_add_ps (evenSum, oddSum); \
1116
+ srcVec[2 * i + 1] = _mm_sub_ps (evenSum, oddSum); \
1117
+ srcVec[2 * i + 1] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1118
+ srcVec[2 * i + 1], \
1119
+ _MM_SHUFFLE (0, 1, 2, 3));
1120
+
1121
+ switch (zeroedRows)
1122
+ {
1123
+ case 0:
1124
+ default:
1125
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1126
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1127
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1128
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1129
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1130
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
1131
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (6)
1132
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (7)
1133
+ break;
1134
+
1135
+ case 1:
1136
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1137
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1138
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1139
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1140
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1141
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
1142
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (6)
1143
+ break;
1144
+
1145
+ case 2:
1146
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1147
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1148
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1149
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1150
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1151
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
1152
+ break;
1153
+
1154
+ case 3:
1155
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1156
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1157
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1158
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1159
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1160
+ break;
1161
+
1162
+ case 4:
1163
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1164
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1165
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1166
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1167
+ break;
1168
+
1169
+ case 5:
1170
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1171
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1172
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1173
+ break;
1174
+
1175
+ case 6:
1176
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1177
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1178
+ break;
1179
+
1180
+ case 7:
1181
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1182
+ break;
1183
+ }
1184
+
1185
+ //
1186
+ // Columns -
1187
+ //
1188
+ // This is slightly more straightforward, if less readable. Here
1189
+ // we just operate on 4 columns at a time, in two batches.
1190
+ //
1191
+ // The slight mess is to try and cache sub-expressions, which
1192
+ // we ignore in the row-wise pass.
1193
+ //
1194
+
1195
+ for (int col = 0; col < 2; ++col)
1196
+ {
1197
+
1198
+ for (int i = 0; i < 8; ++i)
1199
+ in[i] = srcVec[2 * i + col];
1200
+
1201
+ alpha[0] = _mm_mul_ps (c, in[2]);
1202
+ alpha[1] = _mm_mul_ps (f, in[2]);
1203
+ alpha[2] = _mm_mul_ps (c, in[6]);
1204
+ alpha[3] = _mm_mul_ps (f, in[6]);
1205
+
1206
+ beta[0] = _mm_add_ps (_mm_add_ps (_mm_mul_ps (in[1], b),
1207
+ _mm_mul_ps (in[3], d)),
1208
+ _mm_add_ps (_mm_mul_ps (in[5], e),
1209
+ _mm_mul_ps (in[7], g)));
1210
+
1211
+ beta[1] = _mm_sub_ps (_mm_sub_ps (_mm_mul_ps (in[1], d),
1212
+ _mm_mul_ps (in[3], g)),
1213
+ _mm_add_ps (_mm_mul_ps (in[5], b),
1214
+ _mm_mul_ps (in[7], e)));
1215
+
1216
+ beta[2] = _mm_add_ps (_mm_sub_ps (_mm_mul_ps (in[1], e),
1217
+ _mm_mul_ps (in[3], b)),
1218
+ _mm_add_ps (_mm_mul_ps (in[5], g),
1219
+ _mm_mul_ps (in[7], d)));
1220
+
1221
+ beta[3] = _mm_add_ps (_mm_sub_ps (_mm_mul_ps (in[1], g),
1222
+ _mm_mul_ps (in[3], e)),
1223
+ _mm_sub_ps (_mm_mul_ps (in[5], d),
1224
+ _mm_mul_ps (in[7], b)));
1225
+
1226
+ theta[0] = _mm_mul_ps (a, _mm_add_ps (in[0], in[4]));
1227
+ theta[3] = _mm_mul_ps (a, _mm_sub_ps (in[0], in[4]));
1228
+
1229
+ theta[1] = _mm_add_ps (alpha[0], alpha[3]);
1230
+ theta[2] = _mm_sub_ps (alpha[1], alpha[2]);
1231
+
1232
+ gamma[0] = _mm_add_ps (theta[0], theta[1]);
1233
+ gamma[1] = _mm_add_ps (theta[3], theta[2]);
1234
+ gamma[2] = _mm_sub_ps (theta[3], theta[2]);
1235
+ gamma[3] = _mm_sub_ps (theta[0], theta[1]);
1236
+
1237
+ srcVec[ col] = _mm_add_ps (gamma[0], beta[0]);
1238
+ srcVec[2+col] = _mm_add_ps (gamma[1], beta[1]);
1239
+ srcVec[4+col] = _mm_add_ps (gamma[2], beta[2]);
1240
+ srcVec[6+col] = _mm_add_ps (gamma[3], beta[3]);
1241
+
1242
+ srcVec[ 8+col] = _mm_sub_ps (gamma[3], beta[3]);
1243
+ srcVec[10+col] = _mm_sub_ps (gamma[2], beta[2]);
1244
+ srcVec[12+col] = _mm_sub_ps (gamma[1], beta[1]);
1245
+ srcVec[14+col] = _mm_sub_ps (gamma[0], beta[0]);
1246
+ }
1247
+
1248
+ #else /* IMF_HAVE_SSE2 */
1249
+
1250
+ dctInverse8x8_scalar<zeroedRows> (data);
1251
+
1252
+ #endif /* IMF_HAVE_SSE2 */
1253
+ }
1254
+
1255
+
1256
+ //
1257
+ // AVX Implementation
1258
+ //
1259
+
1260
+ #define STR(A) #A
1261
+
1262
+ #define IDCT_AVX_SETUP_2_ROWS(_DST0, _DST1, _TMP0, _TMP1, \
1263
+ _OFF00, _OFF01, _OFF10, _OFF11) \
1264
+ "vmovaps " STR(_OFF00) "(%0), %%xmm" STR(_TMP0) " \n" \
1265
+ "vmovaps " STR(_OFF01) "(%0), %%xmm" STR(_TMP1) " \n" \
1266
+ " \n" \
1267
+ "vinsertf128 $1, " STR(_OFF10) "(%0), %%ymm" STR(_TMP0) ", %%ymm" STR(_TMP0) " \n" \
1268
+ "vinsertf128 $1, " STR(_OFF11) "(%0), %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP1) " \n" \
1269
+ " \n" \
1270
+ "vunpcklpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST0) " \n" \
1271
+ "vunpckhpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST1) " \n" \
1272
+ " \n" \
1273
+ "vunpcklps %%ymm" STR(_DST1) ", %%ymm" STR(_DST0) ", %%ymm" STR(_TMP0) " \n" \
1274
+ "vunpckhps %%ymm" STR(_DST1) ", %%ymm" STR(_DST0) ", %%ymm" STR(_TMP1) " \n" \
1275
+ " \n" \
1276
+ "vunpcklpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST0) " \n" \
1277
+ "vunpckhpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST1) " \n"
1278
+
1279
+ #define IDCT_AVX_MMULT_ROWS(_SRC) \
1280
+ /* Broadcast the source values into y12-y15 */ \
1281
+ "vpermilps $0x00, " STR(_SRC) ", %%ymm12 \n" \
1282
+ "vpermilps $0x55, " STR(_SRC) ", %%ymm13 \n" \
1283
+ "vpermilps $0xaa, " STR(_SRC) ", %%ymm14 \n" \
1284
+ "vpermilps $0xff, " STR(_SRC) ", %%ymm15 \n" \
1285
+ \
1286
+ /* Multiple coefs and the broadcasted values */ \
1287
+ "vmulps %%ymm12, %%ymm8, %%ymm12 \n" \
1288
+ "vmulps %%ymm13, %%ymm9, %%ymm13 \n" \
1289
+ "vmulps %%ymm14, %%ymm10, %%ymm14 \n" \
1290
+ "vmulps %%ymm15, %%ymm11, %%ymm15 \n" \
1291
+ \
1292
+ /* Accumulate the result back into the source */ \
1293
+ "vaddps %%ymm13, %%ymm12, %%ymm12 \n" \
1294
+ "vaddps %%ymm15, %%ymm14, %%ymm14 \n" \
1295
+ "vaddps %%ymm14, %%ymm12, " STR(_SRC) "\n"
1296
+
1297
+ #define IDCT_AVX_EO_TO_ROW_HALVES(_EVEN, _ODD, _FRONT, _BACK) \
1298
+ "vsubps " STR(_ODD) "," STR(_EVEN) "," STR(_BACK) "\n" \
1299
+ "vaddps " STR(_ODD) "," STR(_EVEN) "," STR(_FRONT) "\n" \
1300
+ /* Reverse the back half */ \
1301
+ "vpermilps $0x1b," STR(_BACK) "," STR(_BACK) "\n"
1302
+
1303
+ /* In order to allow for path paths when we know certain rows
1304
+ * of the 8x8 block are zero, most of the body of the DCT is
1305
+ * in the following macro. Statements are wrapped in a ROWn()
1306
+ * macro, where n is the lowest row in the 8x8 block in which
1307
+ * they depend.
1308
+ *
1309
+ * This should work for the cases where we have 2-8 full rows.
1310
+ * the 1-row case is special, and we'll handle it seperately.
1311
+ */
1312
+ #define IDCT_AVX_BODY \
1313
+ /* ==============================================
1314
+ * Row 1D DCT
1315
+ * ----------------------------------------------
1316
+ */ \
1317
+ \
1318
+ /* Setup for the row-oriented 1D DCT. Assuming that (%0) holds
1319
+ * the row-major 8x8 block, load ymm0-3 with the even columns
1320
+ * and ymm4-7 with the odd columns. The lower half of the ymm
1321
+ * holds one row, while the upper half holds the next row.
1322
+ *
1323
+ * If our source is:
1324
+ * a0 a1 a2 a3 a4 a5 a6 a7
1325
+ * b0 b1 b2 b3 b4 b5 b6 b7
1326
+ *
1327
+ * We'll be forming:
1328
+ * a0 a2 a4 a6 b0 b2 b4 b6
1329
+ * a1 a3 a5 a7 b1 b3 b5 b7
1330
+ */ \
1331
+ ROW0( IDCT_AVX_SETUP_2_ROWS(0, 4, 14, 15, 0, 16, 32, 48) ) \
1332
+ ROW2( IDCT_AVX_SETUP_2_ROWS(1, 5, 12, 13, 64, 80, 96, 112) ) \
1333
+ ROW4( IDCT_AVX_SETUP_2_ROWS(2, 6, 10, 11, 128, 144, 160, 176) ) \
1334
+ ROW6( IDCT_AVX_SETUP_2_ROWS(3, 7, 8, 9, 192, 208, 224, 240) ) \
1335
+ \
1336
+ /* Multiple the even columns (ymm0-3) by the matrix M1
1337
+ * storing the results back in ymm0-3
1338
+ *
1339
+ * Assume that (%1) holds the matrix in column major order
1340
+ */ \
1341
+ "vbroadcastf128 (%1), %%ymm8 \n" \
1342
+ "vbroadcastf128 16(%1), %%ymm9 \n" \
1343
+ "vbroadcastf128 32(%1), %%ymm10 \n" \
1344
+ "vbroadcastf128 48(%1), %%ymm11 \n" \
1345
+ \
1346
+ ROW0( IDCT_AVX_MMULT_ROWS(%%ymm0) ) \
1347
+ ROW2( IDCT_AVX_MMULT_ROWS(%%ymm1) ) \
1348
+ ROW4( IDCT_AVX_MMULT_ROWS(%%ymm2) ) \
1349
+ ROW6( IDCT_AVX_MMULT_ROWS(%%ymm3) ) \
1350
+ \
1351
+ /* Repeat, but with the odd columns (ymm4-7) and the
1352
+ * matrix M2
1353
+ */ \
1354
+ "vbroadcastf128 64(%1), %%ymm8 \n" \
1355
+ "vbroadcastf128 80(%1), %%ymm9 \n" \
1356
+ "vbroadcastf128 96(%1), %%ymm10 \n" \
1357
+ "vbroadcastf128 112(%1), %%ymm11 \n" \
1358
+ \
1359
+ ROW0( IDCT_AVX_MMULT_ROWS(%%ymm4) ) \
1360
+ ROW2( IDCT_AVX_MMULT_ROWS(%%ymm5) ) \
1361
+ ROW4( IDCT_AVX_MMULT_ROWS(%%ymm6) ) \
1362
+ ROW6( IDCT_AVX_MMULT_ROWS(%%ymm7) ) \
1363
+ \
1364
+ /* Sum the M1 (ymm0-3) and M2 (ymm4-7) results to get the
1365
+ * front halves of the results, and difference to get the
1366
+ * back halves. The front halfs end up in ymm0-3, the back
1367
+ * halves end up in ymm12-15.
1368
+ */ \
1369
+ ROW0( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm0, %%ymm4, %%ymm0, %%ymm12) ) \
1370
+ ROW2( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm1, %%ymm5, %%ymm1, %%ymm13) ) \
1371
+ ROW4( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm2, %%ymm6, %%ymm2, %%ymm14) ) \
1372
+ ROW6( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm3, %%ymm7, %%ymm3, %%ymm15) ) \
1373
+ \
1374
+ /* Reassemble the rows halves into ymm0-7 */ \
1375
+ ROW7( "vperm2f128 $0x13, %%ymm3, %%ymm15, %%ymm7 \n" ) \
1376
+ ROW6( "vperm2f128 $0x02, %%ymm3, %%ymm15, %%ymm6 \n" ) \
1377
+ ROW5( "vperm2f128 $0x13, %%ymm2, %%ymm14, %%ymm5 \n" ) \
1378
+ ROW4( "vperm2f128 $0x02, %%ymm2, %%ymm14, %%ymm4 \n" ) \
1379
+ ROW3( "vperm2f128 $0x13, %%ymm1, %%ymm13, %%ymm3 \n" ) \
1380
+ ROW2( "vperm2f128 $0x02, %%ymm1, %%ymm13, %%ymm2 \n" ) \
1381
+ ROW1( "vperm2f128 $0x13, %%ymm0, %%ymm12, %%ymm1 \n" ) \
1382
+ ROW0( "vperm2f128 $0x02, %%ymm0, %%ymm12, %%ymm0 \n" ) \
1383
+ \
1384
+ \
1385
+ /* ==============================================
1386
+ * Column 1D DCT
1387
+ * ----------------------------------------------
1388
+ */ \
1389
+ \
1390
+ /* Rows should be in ymm0-7, and M2 columns should still be
1391
+ * preserved in ymm8-11. M2 has 4 unique values (and +-
1392
+ * versions of each), and all (positive) values appear in
1393
+ * the first column (and row), which is in ymm8.
1394
+ *
1395
+ * For the column-wise DCT, we need to:
1396
+ * 1) Broadcast each element a row of M2 into 4 vectors
1397
+ * 2) Multiple the odd rows (ymm1,3,5,7) by the broadcasts.
1398
+ * 3) Accumulate into ymm12-15 for the odd outputs.
1399
+ *
1400
+ * Instead of doing 16 broadcasts for each element in M2,
1401
+ * do 4, filling y8-11 with:
1402
+ *
1403
+ * ymm8: [ b b b b | b b b b ]
1404
+ * ymm9: [ d d d d | d d d d ]
1405
+ * ymm10: [ e e e e | e e e e ]
1406
+ * ymm11: [ g g g g | g g g g ]
1407
+ *
1408
+ * And deal with the negative values by subtracting during accum.
1409
+ */ \
1410
+ "vpermilps $0xff, %%ymm8, %%ymm11 \n" \
1411
+ "vpermilps $0xaa, %%ymm8, %%ymm10 \n" \
1412
+ "vpermilps $0x55, %%ymm8, %%ymm9 \n" \
1413
+ "vpermilps $0x00, %%ymm8, %%ymm8 \n" \
1414
+ \
1415
+ /* This one is easy, since we have ymm12-15 open for scratch
1416
+ * ymm12 = b ymm1 + d ymm3 + e ymm5 + g ymm7
1417
+ */ \
1418
+ ROW1( "vmulps %%ymm1, %%ymm8, %%ymm12 \n" ) \
1419
+ ROW3( "vmulps %%ymm3, %%ymm9, %%ymm13 \n" ) \
1420
+ ROW5( "vmulps %%ymm5, %%ymm10, %%ymm14 \n" ) \
1421
+ ROW7( "vmulps %%ymm7, %%ymm11, %%ymm15 \n" ) \
1422
+ \
1423
+ ROW3( "vaddps %%ymm12, %%ymm13, %%ymm12 \n" ) \
1424
+ ROW7( "vaddps %%ymm14, %%ymm15, %%ymm14 \n" ) \
1425
+ ROW5( "vaddps %%ymm12, %%ymm14, %%ymm12 \n" ) \
1426
+ \
1427
+ /* Tricker, since only y13-15 are open for scratch
1428
+ * ymm13 = d ymm1 - g ymm3 - b ymm5 - e ymm7
1429
+ */ \
1430
+ ROW1( "vmulps %%ymm1, %%ymm9, %%ymm13 \n" ) \
1431
+ ROW3( "vmulps %%ymm3, %%ymm11, %%ymm14 \n" ) \
1432
+ ROW5( "vmulps %%ymm5, %%ymm8, %%ymm15 \n" ) \
1433
+ \
1434
+ ROW5( "vaddps %%ymm14, %%ymm15, %%ymm14 \n" ) \
1435
+ ROW3( "vsubps %%ymm14, %%ymm13, %%ymm13 \n" ) \
1436
+ \
1437
+ ROW7( "vmulps %%ymm7, %%ymm10, %%ymm15 \n" ) \
1438
+ ROW7( "vsubps %%ymm15, %%ymm13, %%ymm13 \n" ) \
1439
+ \
1440
+ /* Tricker still, as only y14-15 are open for scratch
1441
+ * ymm14 = e ymm1 - b ymm3 + g ymm5 + d ymm7
1442
+ */ \
1443
+ ROW1( "vmulps %%ymm1, %%ymm10, %%ymm14 \n" ) \
1444
+ ROW3( "vmulps %%ymm3, %%ymm8, %%ymm15 \n" ) \
1445
+ \
1446
+ ROW3( "vsubps %%ymm15, %%ymm14, %%ymm14 \n" ) \
1447
+ \
1448
+ ROW5( "vmulps %%ymm5, %%ymm11, %%ymm15 \n" ) \
1449
+ ROW5( "vaddps %%ymm15, %%ymm14, %%ymm14 \n" ) \
1450
+ \
1451
+ ROW7( "vmulps %%ymm7, %%ymm9, %%ymm15 \n" ) \
1452
+ ROW7( "vaddps %%ymm15, %%ymm14, %%ymm14 \n" ) \
1453
+ \
1454
+ \
1455
+ /* Easy, as we can blow away ymm1,3,5,7 for scratch
1456
+ * ymm15 = g ymm1 - e ymm3 + d ymm5 - b ymm7
1457
+ */ \
1458
+ ROW1( "vmulps %%ymm1, %%ymm11, %%ymm15 \n" ) \
1459
+ ROW3( "vmulps %%ymm3, %%ymm10, %%ymm3 \n" ) \
1460
+ ROW5( "vmulps %%ymm5, %%ymm9, %%ymm5 \n" ) \
1461
+ ROW7( "vmulps %%ymm7, %%ymm8, %%ymm7 \n" ) \
1462
+ \
1463
+ ROW5( "vaddps %%ymm15, %%ymm5, %%ymm15 \n" ) \
1464
+ ROW7( "vaddps %%ymm3, %%ymm7, %%ymm3 \n" ) \
1465
+ ROW3( "vsubps %%ymm3, %%ymm15, %%ymm15 \n" ) \
1466
+ \
1467
+ \
1468
+ /* Load coefs for M1. Because we're going to broadcast
1469
+ * coefs, we don't need to load the actual structure from
1470
+ * M1. Instead, just load enough that we can broadcast.
1471
+ * There are only 6 unique values in M1, but they're in +-
1472
+ * pairs, leaving only 3 unique coefs if we add and subtract
1473
+ * properly.
1474
+ *
1475
+ * Fill ymm1 with coef[2] = [ a a c f | a a c f ]
1476
+ * Broadcast ymm5 with [ f f f f | f f f f ]
1477
+ * Broadcast ymm3 with [ c c c c | c c c c ]
1478
+ * Broadcast ymm1 with [ a a a a | a a a a ]
1479
+ */ \
1480
+ "vbroadcastf128 8(%1), %%ymm1 \n" \
1481
+ "vpermilps $0xff, %%ymm1, %%ymm5 \n" \
1482
+ "vpermilps $0xaa, %%ymm1, %%ymm3 \n" \
1483
+ "vpermilps $0x00, %%ymm1, %%ymm1 \n" \
1484
+ \
1485
+ /* If we expand E = [M1] [x0 x2 x4 x6]^t, we get the following
1486
+ * common expressions:
1487
+ *
1488
+ * E_0 = ymm8 = (a ymm0 + a ymm4) + (c ymm2 + f ymm6)
1489
+ * E_3 = ymm11 = (a ymm0 + a ymm4) - (c ymm2 + f ymm6)
1490
+ *
1491
+ * E_1 = ymm9 = (a ymm0 - a ymm4) + (f ymm2 - c ymm6)
1492
+ * E_2 = ymm10 = (a ymm0 - a ymm4) - (f ymm2 - c ymm6)
1493
+ *
1494
+ * Afterwards, ymm8-11 will hold the even outputs.
1495
+ */ \
1496
+ \
1497
+ /* ymm11 = (a ymm0 + a ymm4), ymm1 = (a ymm0 - a ymm4) */ \
1498
+ ROW0( "vmulps %%ymm1, %%ymm0, %%ymm11 \n" ) \
1499
+ ROW4( "vmulps %%ymm1, %%ymm4, %%ymm4 \n" ) \
1500
+ ROW0( "vmovaps %%ymm11, %%ymm1 \n" ) \
1501
+ ROW4( "vaddps %%ymm4, %%ymm11, %%ymm11 \n" ) \
1502
+ ROW4( "vsubps %%ymm4, %%ymm1, %%ymm1 \n" ) \
1503
+ \
1504
+ /* ymm7 = (c ymm2 + f ymm6) */ \
1505
+ ROW2( "vmulps %%ymm3, %%ymm2, %%ymm7 \n" ) \
1506
+ ROW6( "vmulps %%ymm5, %%ymm6, %%ymm9 \n" ) \
1507
+ ROW6( "vaddps %%ymm9, %%ymm7, %%ymm7 \n" ) \
1508
+ \
1509
+ /* E_0 = ymm8 = (a ymm0 + a ymm4) + (c ymm2 + f ymm6)
1510
+ * E_3 = ymm11 = (a ymm0 + a ymm4) - (c ymm2 + f ymm6)
1511
+ */ \
1512
+ ROW0( "vmovaps %%ymm11, %%ymm8 \n" ) \
1513
+ ROW2( "vaddps %%ymm7, %%ymm8, %%ymm8 \n" ) \
1514
+ ROW2( "vsubps %%ymm7, %%ymm11, %%ymm11 \n" ) \
1515
+ \
1516
+ /* ymm7 = (f ymm2 - c ymm6) */ \
1517
+ ROW2( "vmulps %%ymm5, %%ymm2, %%ymm7 \n" ) \
1518
+ ROW6( "vmulps %%ymm3, %%ymm6, %%ymm9 \n" ) \
1519
+ ROW6( "vsubps %%ymm9, %%ymm7, %%ymm7 \n" ) \
1520
+ \
1521
+ /* E_1 = ymm9 = (a ymm0 - a ymm4) + (f ymm2 - c ymm6)
1522
+ * E_2 = ymm10 = (a ymm0 - a ymm4) - (f ymm2 - c ymm6)
1523
+ */ \
1524
+ ROW0( "vmovaps %%ymm1, %%ymm9 \n" ) \
1525
+ ROW0( "vmovaps %%ymm1, %%ymm10 \n" ) \
1526
+ ROW2( "vaddps %%ymm7, %%ymm1, %%ymm9 \n" ) \
1527
+ ROW2( "vsubps %%ymm7, %%ymm1, %%ymm10 \n" ) \
1528
+ \
1529
+ /* Add the even (ymm8-11) and the odds (ymm12-15),
1530
+ * placing the results into ymm0-7
1531
+ */ \
1532
+ "vaddps %%ymm12, %%ymm8, %%ymm0 \n" \
1533
+ "vaddps %%ymm13, %%ymm9, %%ymm1 \n" \
1534
+ "vaddps %%ymm14, %%ymm10, %%ymm2 \n" \
1535
+ "vaddps %%ymm15, %%ymm11, %%ymm3 \n" \
1536
+ \
1537
+ "vsubps %%ymm12, %%ymm8, %%ymm7 \n" \
1538
+ "vsubps %%ymm13, %%ymm9, %%ymm6 \n" \
1539
+ "vsubps %%ymm14, %%ymm10, %%ymm5 \n" \
1540
+ "vsubps %%ymm15, %%ymm11, %%ymm4 \n" \
1541
+ \
1542
+ /* Copy out the results from ymm0-7 */ \
1543
+ "vmovaps %%ymm0, (%0) \n" \
1544
+ "vmovaps %%ymm1, 32(%0) \n" \
1545
+ "vmovaps %%ymm2, 64(%0) \n" \
1546
+ "vmovaps %%ymm3, 96(%0) \n" \
1547
+ "vmovaps %%ymm4, 128(%0) \n" \
1548
+ "vmovaps %%ymm5, 160(%0) \n" \
1549
+ "vmovaps %%ymm6, 192(%0) \n" \
1550
+ "vmovaps %%ymm7, 224(%0) \n"
1551
+
1552
+ /* Output, input, and clobber (OIC) sections of the inline asm */
1553
+ #define IDCT_AVX_OIC(_IN0) \
1554
+ : /* Output */ \
1555
+ : /* Input */ "r"(_IN0), "r"(sAvxCoef) \
1556
+ : /* Clobber */ "memory", \
1557
+ "%xmm0", "%xmm1", "%xmm2", "%xmm3", \
1558
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7", \
1559
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",\
1560
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15"
1561
+
1562
+ /* Include vzeroupper for non-AVX builds */
1563
+ #ifndef __AVX__
1564
+ #define IDCT_AVX_ASM(_IN0) \
1565
+ __asm__( \
1566
+ IDCT_AVX_BODY \
1567
+ "vzeroupper \n" \
1568
+ IDCT_AVX_OIC(_IN0) \
1569
+ );
1570
+ #else /* __AVX__ */
1571
+ #define IDCT_AVX_ASM(_IN0) \
1572
+ __asm__( \
1573
+ IDCT_AVX_BODY \
1574
+ IDCT_AVX_OIC(_IN0) \
1575
+ );
1576
+ #endif /* __AVX__ */
1577
+
1578
+ template <int zeroedRows>
1579
+ void
1580
+ dctInverse8x8_avx (float *data)
1581
+ {
1582
+ #if defined IMF_HAVE_GCC_INLINEASM_64
1583
+
1584
+ /* The column-major version of M1, followed by the
1585
+ * column-major version of M2:
1586
+ *
1587
+ * [ a c a f ] [ b d e g ]
1588
+ * M1 = [ a f -a -c ] M2 = [ d -g -b -e ]
1589
+ * [ a -f -a c ] [ e -b g d ]
1590
+ * [ a -c a -f ] [ g -e d -b ]
1591
+ */
1592
+ const float sAvxCoef[32] __attribute__((aligned(32))) = {
1593
+ 3.535536e-01, 3.535536e-01, 3.535536e-01, 3.535536e-01, /* a a a a */
1594
+ 4.619398e-01, 1.913422e-01, -1.913422e-01, -4.619398e-01, /* c f -f -c */
1595
+ 3.535536e-01, -3.535536e-01, -3.535536e-01, 3.535536e-01, /* a -a -a a */
1596
+ 1.913422e-01, -4.619398e-01, 4.619398e-01, -1.913422e-01, /* f -c c -f */
1597
+
1598
+ 4.903927e-01, 4.157349e-01, 2.777855e-01, 9.754573e-02, /* b d e g */
1599
+ 4.157349e-01, -9.754573e-02, -4.903927e-01, -2.777855e-01, /* d -g -b -e */
1600
+ 2.777855e-01, -4.903927e-01, 9.754573e-02, 4.157349e-01, /* e -b g d */
1601
+ 9.754573e-02, -2.777855e-01, 4.157349e-01, -4.903927e-01 /* g -e d -b */
1602
+ };
1603
+
1604
+ #define ROW0(_X) _X
1605
+ #define ROW1(_X) _X
1606
+ #define ROW2(_X) _X
1607
+ #define ROW3(_X) _X
1608
+ #define ROW4(_X) _X
1609
+ #define ROW5(_X) _X
1610
+ #define ROW6(_X) _X
1611
+ #define ROW7(_X) _X
1612
+
1613
+ if (zeroedRows == 0) {
1614
+
1615
+ IDCT_AVX_ASM(data)
1616
+
1617
+ } else if (zeroedRows == 1) {
1618
+
1619
+ #undef ROW7
1620
+ #define ROW7(_X)
1621
+ IDCT_AVX_ASM(data)
1622
+
1623
+ } else if (zeroedRows == 2) {
1624
+
1625
+ #undef ROW6
1626
+ #define ROW6(_X)
1627
+ IDCT_AVX_ASM(data)
1628
+
1629
+ } else if (zeroedRows == 3) {
1630
+
1631
+ #undef ROW5
1632
+ #define ROW5(_X)
1633
+ IDCT_AVX_ASM(data)
1634
+
1635
+ } else if (zeroedRows == 4) {
1636
+
1637
+ #undef ROW4
1638
+ #define ROW4(_X)
1639
+ IDCT_AVX_ASM(data)
1640
+
1641
+ } else if (zeroedRows == 5) {
1642
+
1643
+ #undef ROW3
1644
+ #define ROW3(_X)
1645
+ IDCT_AVX_ASM(data)
1646
+
1647
+ } else if (zeroedRows == 6) {
1648
+
1649
+ #undef ROW2
1650
+ #define ROW2(_X)
1651
+ IDCT_AVX_ASM(data)
1652
+
1653
+ } else if (zeroedRows == 7) {
1654
+
1655
+ __asm__(
1656
+
1657
+ /* ==============================================
1658
+ * Row 1D DCT
1659
+ * ----------------------------------------------
1660
+ */
1661
+ IDCT_AVX_SETUP_2_ROWS(0, 4, 14, 15, 0, 16, 32, 48)
1662
+
1663
+ "vbroadcastf128 (%1), %%ymm8 \n"
1664
+ "vbroadcastf128 16(%1), %%ymm9 \n"
1665
+ "vbroadcastf128 32(%1), %%ymm10 \n"
1666
+ "vbroadcastf128 48(%1), %%ymm11 \n"
1667
+
1668
+ /* Stash a vector of [a a a a | a a a a] away in ymm2 */
1669
+ "vinsertf128 $1, %%xmm8, %%ymm8, %%ymm2 \n"
1670
+
1671
+ IDCT_AVX_MMULT_ROWS(%%ymm0)
1672
+
1673
+ "vbroadcastf128 64(%1), %%ymm8 \n"
1674
+ "vbroadcastf128 80(%1), %%ymm9 \n"
1675
+ "vbroadcastf128 96(%1), %%ymm10 \n"
1676
+ "vbroadcastf128 112(%1), %%ymm11 \n"
1677
+
1678
+ IDCT_AVX_MMULT_ROWS(%%ymm4)
1679
+
1680
+ IDCT_AVX_EO_TO_ROW_HALVES(%%ymm0, %%ymm4, %%ymm0, %%ymm12)
1681
+
1682
+ "vperm2f128 $0x02, %%ymm0, %%ymm12, %%ymm0 \n"
1683
+
1684
+ /* ==============================================
1685
+ * Column 1D DCT
1686
+ * ----------------------------------------------
1687
+ */
1688
+
1689
+ /* DC only, so multiple by a and we're done */
1690
+ "vmulps %%ymm2, %%ymm0, %%ymm0 \n"
1691
+
1692
+ /* Copy out results */
1693
+ "vmovaps %%ymm0, (%0) \n"
1694
+ "vmovaps %%ymm0, 32(%0) \n"
1695
+ "vmovaps %%ymm0, 64(%0) \n"
1696
+ "vmovaps %%ymm0, 96(%0) \n"
1697
+ "vmovaps %%ymm0, 128(%0) \n"
1698
+ "vmovaps %%ymm0, 160(%0) \n"
1699
+ "vmovaps %%ymm0, 192(%0) \n"
1700
+ "vmovaps %%ymm0, 224(%0) \n"
1701
+
1702
+ #ifndef __AVX__
1703
+ "vzeroupper \n"
1704
+ #endif /* __AVX__ */
1705
+ IDCT_AVX_OIC(data)
1706
+ );
1707
+ } else {
1708
+ assert(false); // Invalid template instance parameter
1709
+ }
1710
+ #else /* IMF_HAVE_GCC_INLINEASM_64 */
1711
+
1712
+ dctInverse8x8_scalar<zeroedRows>(data);
1713
+
1714
+ #endif /* IMF_HAVE_GCC_INLINEASM_64 */
1715
+ }
1716
+
1717
+
1718
+ //
1719
+ // Full 8x8 Forward DCT:
1720
+ //
1721
+ // Base forward 8x8 DCT implementation. Works on the data in-place
1722
+ //
1723
+ // The implementation describedin Pennebaker + Mitchell,
1724
+ // section 4.3.2, and illustrated in figure 4-7
1725
+ //
1726
+ // The basic idea is that the 1D DCT math reduces to:
1727
+ //
1728
+ // 2*out_0 = c_4 [(s_07 + s_34) + (s_12 + s_56)]
1729
+ // 2*out_4 = c_4 [(s_07 + s_34) - (s_12 + s_56)]
1730
+ //
1731
+ // {2*out_2, 2*out_6} = rot_6 ((d_12 - d_56), (s_07 - s_34))
1732
+ //
1733
+ // {2*out_3, 2*out_5} = rot_-3 (d_07 - c_4 (s_12 - s_56),
1734
+ // d_34 - c_4 (d_12 + d_56))
1735
+ //
1736
+ // {2*out_1, 2*out_7} = rot_-1 (d_07 + c_4 (s_12 - s_56),
1737
+ // -d_34 - c_4 (d_12 + d_56))
1738
+ //
1739
+ // where:
1740
+ //
1741
+ // c_i = cos(i*pi/16)
1742
+ // s_i = sin(i*pi/16)
1743
+ //
1744
+ // s_ij = in_i + in_j
1745
+ // d_ij = in_i - in_j
1746
+ //
1747
+ // rot_i(x, y) = {c_i*x + s_i*y, -s_i*x + c_i*y}
1748
+ //
1749
+ // We'll run the DCT in two passes. First, run the 1D DCT on
1750
+ // the rows, in-place. Then, run over the columns in-place,
1751
+ // and be done with it.
1752
+ //
1753
+
1754
+ #ifndef IMF_HAVE_SSE2
1755
+
1756
+ //
1757
+ // Default implementation
1758
+ //
1759
+
1760
+ void
1761
+ dctForward8x8 (float *data)
1762
+ {
1763
+ float A0, A1, A2, A3, A4, A5, A6, A7;
1764
+ float K0, K1, rot_x, rot_y;
1765
+
1766
+ float *srcPtr = data;
1767
+ float *dstPtr = data;
1768
+
1769
+ const float c1 = cosf (3.14159f * 1.0f / 16.0f);
1770
+ const float c2 = cosf (3.14159f * 2.0f / 16.0f);
1771
+ const float c3 = cosf (3.14159f * 3.0f / 16.0f);
1772
+ const float c4 = cosf (3.14159f * 4.0f / 16.0f);
1773
+ const float c5 = cosf (3.14159f * 5.0f / 16.0f);
1774
+ const float c6 = cosf (3.14159f * 6.0f / 16.0f);
1775
+ const float c7 = cosf (3.14159f * 7.0f / 16.0f);
1776
+
1777
+ const float c1Half = .5f * c1;
1778
+ const float c2Half = .5f * c2;
1779
+ const float c3Half = .5f * c3;
1780
+ const float c5Half = .5f * c5;
1781
+ const float c6Half = .5f * c6;
1782
+ const float c7Half = .5f * c7;
1783
+
1784
+ //
1785
+ // First pass - do a 1D DCT over the rows and write the
1786
+ // results back in place
1787
+ //
1788
+
1789
+ for (int row=0; row<8; ++row)
1790
+ {
1791
+ float *srcRowPtr = srcPtr + 8 * row;
1792
+ float *dstRowPtr = dstPtr + 8 * row;
1793
+
1794
+ A0 = srcRowPtr[0] + srcRowPtr[7];
1795
+ A1 = srcRowPtr[1] + srcRowPtr[2];
1796
+ A2 = srcRowPtr[1] - srcRowPtr[2];
1797
+ A3 = srcRowPtr[3] + srcRowPtr[4];
1798
+ A4 = srcRowPtr[3] - srcRowPtr[4];
1799
+ A5 = srcRowPtr[5] + srcRowPtr[6];
1800
+ A6 = srcRowPtr[5] - srcRowPtr[6];
1801
+ A7 = srcRowPtr[0] - srcRowPtr[7];
1802
+
1803
+ K0 = c4 * (A0 + A3);
1804
+ K1 = c4 * (A1 + A5);
1805
+
1806
+ dstRowPtr[0] = .5f * (K0 + K1);
1807
+ dstRowPtr[4] = .5f * (K0 - K1);
1808
+
1809
+ //
1810
+ // (2*dst2, 2*dst6) = rot 6 (d12 - d56, s07 - s34)
1811
+ //
1812
+
1813
+ rot_x = A2 - A6;
1814
+ rot_y = A0 - A3;
1815
+
1816
+ dstRowPtr[2] = c6Half * rot_x + c2Half * rot_y;
1817
+ dstRowPtr[6] = c6Half * rot_y - c2Half * rot_x;
1818
+
1819
+ //
1820
+ // K0, K1 are active until after dst[1],dst[7]
1821
+ // as well as dst[3], dst[5] are computed.
1822
+ //
1823
+
1824
+ K0 = c4 * (A1 - A5);
1825
+ K1 = -1 * c4 * (A2 + A6);
1826
+
1827
+ //
1828
+ // Two ways to do a rotation:
1829
+ //
1830
+ // rot i (x, y) =
1831
+ // X = c_i*x + s_i*y
1832
+ // Y = -s_i*x + c_i*y
1833
+ //
1834
+ // OR
1835
+ //
1836
+ // X = c_i*(x+y) + (s_i-c_i)*y
1837
+ // Y = c_i*y - (s_i+c_i)*x
1838
+ //
1839
+ // the first case has 4 multiplies, but fewer constants,
1840
+ // while the 2nd case has fewer multiplies but takes more space.
1841
+
1842
+ //
1843
+ // (2*dst3, 2*dst5) = rot -3 ( d07 - K0, d34 + K1 )
1844
+ //
1845
+
1846
+ rot_x = A7 - K0;
1847
+ rot_y = A4 + K1;
1848
+
1849
+ dstRowPtr[3] = c3Half * rot_x - c5Half * rot_y;
1850
+ dstRowPtr[5] = c5Half * rot_x + c3Half * rot_y;
1851
+
1852
+ //
1853
+ // (2*dst1, 2*dst7) = rot -1 ( d07 + K0, K1 - d34 )
1854
+ //
1855
+
1856
+ rot_x = A7 + K0;
1857
+ rot_y = K1 - A4;
1858
+
1859
+ //
1860
+ // A: 4, 7 are inactive. All A's are inactive
1861
+ //
1862
+
1863
+ dstRowPtr[1] = c1Half * rot_x - c7Half * rot_y;
1864
+ dstRowPtr[7] = c7Half * rot_x + c1Half * rot_y;
1865
+ }
1866
+
1867
+ //
1868
+ // Second pass - do the same, but on the columns
1869
+ //
1870
+
1871
+ for (int column = 0; column < 8; ++column)
1872
+ {
1873
+
1874
+ A0 = srcPtr[ column] + srcPtr[56 + column];
1875
+ A7 = srcPtr[ column] - srcPtr[56 + column];
1876
+
1877
+ A1 = srcPtr[ 8 + column] + srcPtr[16 + column];
1878
+ A2 = srcPtr[ 8 + column] - srcPtr[16 + column];
1879
+
1880
+ A3 = srcPtr[24 + column] + srcPtr[32 + column];
1881
+ A4 = srcPtr[24 + column] - srcPtr[32 + column];
1882
+
1883
+ A5 = srcPtr[40 + column] + srcPtr[48 + column];
1884
+ A6 = srcPtr[40 + column] - srcPtr[48 + column];
1885
+
1886
+ K0 = c4 * (A0 + A3);
1887
+ K1 = c4 * (A1 + A5);
1888
+
1889
+ dstPtr[ column] = .5f * (K0 + K1);
1890
+ dstPtr[32+column] = .5f * (K0 - K1);
1891
+
1892
+ //
1893
+ // (2*dst2, 2*dst6) = rot 6 ( d12 - d56, s07 - s34 )
1894
+ //
1895
+
1896
+ rot_x = A2 - A6;
1897
+ rot_y = A0 - A3;
1898
+
1899
+ dstPtr[16+column] = .5f * (c6 * rot_x + c2 * rot_y);
1900
+ dstPtr[48+column] = .5f * (c6 * rot_y - c2 * rot_x);
1901
+
1902
+ //
1903
+ // K0, K1 are active until after dst[1],dst[7]
1904
+ // as well as dst[3], dst[5] are computed.
1905
+ //
1906
+
1907
+ K0 = c4 * (A1 - A5);
1908
+ K1 = -1 * c4 * (A2 + A6);
1909
+
1910
+ //
1911
+ // (2*dst3, 2*dst5) = rot -3 ( d07 - K0, d34 + K1 )
1912
+ //
1913
+
1914
+ rot_x = A7 - K0;
1915
+ rot_y = A4 + K1;
1916
+
1917
+ dstPtr[24+column] = .5f * (c3 * rot_x - c5 * rot_y);
1918
+ dstPtr[40+column] = .5f * (c5 * rot_x + c3 * rot_y);
1919
+
1920
+ //
1921
+ // (2*dst1, 2*dst7) = rot -1 ( d07 + K0, K1 - d34 )
1922
+ //
1923
+
1924
+ rot_x = A7 + K0;
1925
+ rot_y = K1 - A4;
1926
+
1927
+ dstPtr[ 8+column] = .5f * (c1 * rot_x - c7 * rot_y);
1928
+ dstPtr[56+column] = .5f * (c7 * rot_x + c1 * rot_y);
1929
+ }
1930
+ }
1931
+
1932
+ #else /* IMF_HAVE_SSE2 */
1933
+
1934
+ //
1935
+ // SSE2 implementation
1936
+ //
1937
+ // Here, we're always doing a column-wise operation
1938
+ // plus transposes. This might be faster to do differently
1939
+ // between rows-wise and column-wise
1940
+ //
1941
+
1942
+ void
1943
+ dctForward8x8 (float *data)
1944
+ {
1945
+ __m128 *srcVec = (__m128 *)data;
1946
+ __m128 a0Vec, a1Vec, a2Vec, a3Vec, a4Vec, a5Vec, a6Vec, a7Vec;
1947
+ __m128 k0Vec, k1Vec, rotXVec, rotYVec;
1948
+ __m128 transTmp[4], transTmp2[4];
1949
+
1950
+ __m128 c4Vec = { .70710678f, .70710678f, .70710678f, .70710678f};
1951
+ __m128 c4NegVec = {-.70710678f, -.70710678f, -.70710678f, -.70710678f};
1952
+
1953
+ __m128 c1HalfVec = {.490392640f, .490392640f, .490392640f, .490392640f};
1954
+ __m128 c2HalfVec = {.461939770f, .461939770f, .461939770f, .461939770f};
1955
+ __m128 c3HalfVec = {.415734810f, .415734810f, .415734810f, .415734810f};
1956
+ __m128 c5HalfVec = {.277785120f, .277785120f, .277785120f, .277785120f};
1957
+ __m128 c6HalfVec = {.191341720f, .191341720f, .191341720f, .191341720f};
1958
+ __m128 c7HalfVec = {.097545161f, .097545161f, .097545161f, .097545161f};
1959
+
1960
+ __m128 halfVec = {.5f, .5f, .5f, .5f};
1961
+
1962
+ for (int iter = 0; iter < 2; ++iter)
1963
+ {
1964
+ //
1965
+ // Operate on 4 columns at a time. The
1966
+ // offsets into our row-major array are:
1967
+ // 0: 0 1
1968
+ // 1: 2 3
1969
+ // 2: 4 5
1970
+ // 3: 6 7
1971
+ // 4: 8 9
1972
+ // 5: 10 11
1973
+ // 6: 12 13
1974
+ // 7: 14 15
1975
+ //
1976
+
1977
+ for (int pass=0; pass<2; ++pass)
1978
+ {
1979
+ a0Vec = _mm_add_ps (srcVec[ 0 + pass], srcVec[14 + pass]);
1980
+ a1Vec = _mm_add_ps (srcVec[ 2 + pass], srcVec[ 4 + pass]);
1981
+ a3Vec = _mm_add_ps (srcVec[ 6 + pass], srcVec[ 8 + pass]);
1982
+ a5Vec = _mm_add_ps (srcVec[10 + pass], srcVec[12 + pass]);
1983
+
1984
+ a7Vec = _mm_sub_ps (srcVec[ 0 + pass], srcVec[14 + pass]);
1985
+ a2Vec = _mm_sub_ps (srcVec[ 2 + pass], srcVec[ 4 + pass]);
1986
+ a4Vec = _mm_sub_ps (srcVec[ 6 + pass], srcVec[ 8 + pass]);
1987
+ a6Vec = _mm_sub_ps (srcVec[10 + pass], srcVec[12 + pass]);
1988
+
1989
+ //
1990
+ // First stage; Compute out_0 and out_4
1991
+ //
1992
+
1993
+ k0Vec = _mm_add_ps (a0Vec, a3Vec);
1994
+ k1Vec = _mm_add_ps (a1Vec, a5Vec);
1995
+
1996
+ k0Vec = _mm_mul_ps (c4Vec, k0Vec);
1997
+ k1Vec = _mm_mul_ps (c4Vec, k1Vec);
1998
+
1999
+ srcVec[0 + pass] = _mm_add_ps (k0Vec, k1Vec);
2000
+ srcVec[8 + pass] = _mm_sub_ps (k0Vec, k1Vec);
2001
+
2002
+ srcVec[0 + pass] = _mm_mul_ps (srcVec[0 + pass], halfVec );
2003
+ srcVec[8 + pass] = _mm_mul_ps (srcVec[8 + pass], halfVec );
2004
+
2005
+
2006
+ //
2007
+ // Second stage; Compute out_2 and out_6
2008
+ //
2009
+
2010
+ k0Vec = _mm_sub_ps (a2Vec, a6Vec);
2011
+ k1Vec = _mm_sub_ps (a0Vec, a3Vec);
2012
+
2013
+ srcVec[ 4 + pass] = _mm_add_ps (_mm_mul_ps (c6HalfVec, k0Vec),
2014
+ _mm_mul_ps (c2HalfVec, k1Vec));
2015
+
2016
+ srcVec[12 + pass] = _mm_sub_ps (_mm_mul_ps (c6HalfVec, k1Vec),
2017
+ _mm_mul_ps (c2HalfVec, k0Vec));
2018
+
2019
+ //
2020
+ // Precompute K0 and K1 for the remaining stages
2021
+ //
2022
+
2023
+ k0Vec = _mm_mul_ps (_mm_sub_ps (a1Vec, a5Vec), c4Vec);
2024
+ k1Vec = _mm_mul_ps (_mm_add_ps (a2Vec, a6Vec), c4NegVec);
2025
+
2026
+ //
2027
+ // Third Stage, compute out_3 and out_5
2028
+ //
2029
+
2030
+ rotXVec = _mm_sub_ps (a7Vec, k0Vec);
2031
+ rotYVec = _mm_add_ps (a4Vec, k1Vec);
2032
+
2033
+ srcVec[ 6 + pass] = _mm_sub_ps (_mm_mul_ps (c3HalfVec, rotXVec),
2034
+ _mm_mul_ps (c5HalfVec, rotYVec));
2035
+
2036
+ srcVec[10 + pass] = _mm_add_ps (_mm_mul_ps (c5HalfVec, rotXVec),
2037
+ _mm_mul_ps (c3HalfVec, rotYVec));
2038
+
2039
+ //
2040
+ // Fourth Stage, compute out_1 and out_7
2041
+ //
2042
+
2043
+ rotXVec = _mm_add_ps (a7Vec, k0Vec);
2044
+ rotYVec = _mm_sub_ps (k1Vec, a4Vec);
2045
+
2046
+ srcVec[ 2 + pass] = _mm_sub_ps (_mm_mul_ps (c1HalfVec, rotXVec),
2047
+ _mm_mul_ps (c7HalfVec, rotYVec));
2048
+
2049
+ srcVec[14 + pass] = _mm_add_ps (_mm_mul_ps (c7HalfVec, rotXVec),
2050
+ _mm_mul_ps (c1HalfVec, rotYVec));
2051
+ }
2052
+
2053
+ //
2054
+ // Transpose the matrix, in 4x4 blocks. So, if we have our
2055
+ // 8x8 matrix divied into 4x4 blocks:
2056
+ //
2057
+ // M0 | M1 M0t | M2t
2058
+ // ----+--- --> -----+------
2059
+ // M2 | M3 M1t | M3t
2060
+ //
2061
+
2062
+ //
2063
+ // M0t, done in place, the first half.
2064
+ //
2065
+
2066
+ transTmp[0] = _mm_shuffle_ps (srcVec[0], srcVec[2], 0x44);
2067
+ transTmp[1] = _mm_shuffle_ps (srcVec[4], srcVec[6], 0x44);
2068
+ transTmp[3] = _mm_shuffle_ps (srcVec[4], srcVec[6], 0xEE);
2069
+ transTmp[2] = _mm_shuffle_ps (srcVec[0], srcVec[2], 0xEE);
2070
+
2071
+ //
2072
+ // M3t, also done in place, the first half.
2073
+ //
2074
+
2075
+ transTmp2[0] = _mm_shuffle_ps (srcVec[ 9], srcVec[11], 0x44);
2076
+ transTmp2[1] = _mm_shuffle_ps (srcVec[13], srcVec[15], 0x44);
2077
+ transTmp2[2] = _mm_shuffle_ps (srcVec[ 9], srcVec[11], 0xEE);
2078
+ transTmp2[3] = _mm_shuffle_ps (srcVec[13], srcVec[15], 0xEE);
2079
+
2080
+ //
2081
+ // M0t, the second half.
2082
+ //
2083
+
2084
+ srcVec[0] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0x88);
2085
+ srcVec[4] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0x88);
2086
+ srcVec[2] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0xDD);
2087
+ srcVec[6] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0xDD);
2088
+
2089
+ //
2090
+ // M3t, the second half.
2091
+ //
2092
+
2093
+ srcVec[ 9] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0x88);
2094
+ srcVec[13] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0x88);
2095
+ srcVec[11] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0xDD);
2096
+ srcVec[15] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0xDD);
2097
+
2098
+ //
2099
+ // M1 and M2 need to be done at the same time, because we're
2100
+ // swapping.
2101
+ //
2102
+ // First, the first half of M1t
2103
+ //
2104
+
2105
+ transTmp[0] = _mm_shuffle_ps (srcVec[1], srcVec[3], 0x44);
2106
+ transTmp[1] = _mm_shuffle_ps (srcVec[5], srcVec[7], 0x44);
2107
+ transTmp[2] = _mm_shuffle_ps (srcVec[1], srcVec[3], 0xEE);
2108
+ transTmp[3] = _mm_shuffle_ps (srcVec[5], srcVec[7], 0xEE);
2109
+
2110
+ //
2111
+ // And the first half of M2t
2112
+ //
2113
+
2114
+ transTmp2[0] = _mm_shuffle_ps (srcVec[ 8], srcVec[10], 0x44);
2115
+ transTmp2[1] = _mm_shuffle_ps (srcVec[12], srcVec[14], 0x44);
2116
+ transTmp2[2] = _mm_shuffle_ps (srcVec[ 8], srcVec[10], 0xEE);
2117
+ transTmp2[3] = _mm_shuffle_ps (srcVec[12], srcVec[14], 0xEE);
2118
+
2119
+ //
2120
+ // Second half of M1t
2121
+ //
2122
+
2123
+ srcVec[ 8] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0x88);
2124
+ srcVec[12] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0x88);
2125
+ srcVec[10] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0xDD);
2126
+ srcVec[14] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0xDD);
2127
+
2128
+ //
2129
+ // Second half of M2
2130
+ //
2131
+
2132
+ srcVec[1] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0x88);
2133
+ srcVec[5] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0x88);
2134
+ srcVec[3] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0xDD);
2135
+ srcVec[7] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0xDD);
2136
+ }
2137
+ }
2138
+
2139
+ #endif /* IMF_HAVE_SSE2 */
2140
+
2141
+ } // anonymous namespace
2142
+
2143
+ OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_EXIT
2144
+
2145
+ #endif