rfreeimage 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (860) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +1 -0
  4. data/Rakefile +34 -0
  5. data/ext/rfreeimage/extconf.rb +35 -0
  6. data/ext/rfreeimage/rfi_main.c +389 -0
  7. data/lib/rfreeimage/image.rb +26 -0
  8. data/lib/rfreeimage/version.rb +3 -0
  9. data/lib/rfreeimage.rb +3 -0
  10. data/rfreeimage.gemspec +32 -0
  11. data/vendor/FreeImage/Makefile +34 -0
  12. data/vendor/FreeImage/Makefile.cygwin +74 -0
  13. data/vendor/FreeImage/Makefile.fip +84 -0
  14. data/vendor/FreeImage/Makefile.gnu +83 -0
  15. data/vendor/FreeImage/Makefile.iphone +96 -0
  16. data/vendor/FreeImage/Makefile.mingw +136 -0
  17. data/vendor/FreeImage/Makefile.osx +115 -0
  18. data/vendor/FreeImage/Makefile.solaris +66 -0
  19. data/vendor/FreeImage/Makefile.srcs +6 -0
  20. data/vendor/FreeImage/README.iphone +19 -0
  21. data/vendor/FreeImage/README.linux +50 -0
  22. data/vendor/FreeImage/README.minGW +236 -0
  23. data/vendor/FreeImage/README.osx +44 -0
  24. data/vendor/FreeImage/README.solaris +67 -0
  25. data/vendor/FreeImage/Source/CacheFile.h +92 -0
  26. data/vendor/FreeImage/Source/DeprecationManager/Deprecated.cpp +36 -0
  27. data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.cpp +103 -0
  28. data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.h +83 -0
  29. data/vendor/FreeImage/Source/FreeImage/BitmapAccess.cpp +1573 -0
  30. data/vendor/FreeImage/Source/FreeImage/CacheFile.cpp +271 -0
  31. data/vendor/FreeImage/Source/FreeImage/ColorLookup.cpp +785 -0
  32. data/vendor/FreeImage/Source/FreeImage/Conversion.cpp +551 -0
  33. data/vendor/FreeImage/Source/FreeImage/Conversion16_555.cpp +209 -0
  34. data/vendor/FreeImage/Source/FreeImage/Conversion16_565.cpp +204 -0
  35. data/vendor/FreeImage/Source/FreeImage/Conversion24.cpp +252 -0
  36. data/vendor/FreeImage/Source/FreeImage/Conversion32.cpp +345 -0
  37. data/vendor/FreeImage/Source/FreeImage/Conversion4.cpp +246 -0
  38. data/vendor/FreeImage/Source/FreeImage/Conversion8.cpp +305 -0
  39. data/vendor/FreeImage/Source/FreeImage/ConversionFloat.cpp +194 -0
  40. data/vendor/FreeImage/Source/FreeImage/ConversionRGB16.cpp +144 -0
  41. data/vendor/FreeImage/Source/FreeImage/ConversionRGBA16.cpp +147 -0
  42. data/vendor/FreeImage/Source/FreeImage/ConversionRGBAF.cpp +250 -0
  43. data/vendor/FreeImage/Source/FreeImage/ConversionRGBF.cpp +243 -0
  44. data/vendor/FreeImage/Source/FreeImage/ConversionType.cpp +699 -0
  45. data/vendor/FreeImage/Source/FreeImage/ConversionUINT16.cpp +134 -0
  46. data/vendor/FreeImage/Source/FreeImage/FreeImage.cpp +226 -0
  47. data/vendor/FreeImage/Source/FreeImage/FreeImageC.c +22 -0
  48. data/vendor/FreeImage/Source/FreeImage/FreeImageIO.cpp +175 -0
  49. data/vendor/FreeImage/Source/FreeImage/GetType.cpp +92 -0
  50. data/vendor/FreeImage/Source/FreeImage/Halftoning.cpp +474 -0
  51. data/vendor/FreeImage/Source/FreeImage/J2KHelper.cpp +591 -0
  52. data/vendor/FreeImage/Source/FreeImage/J2KHelper.h +36 -0
  53. data/vendor/FreeImage/Source/FreeImage/LFPQuantizer.cpp +208 -0
  54. data/vendor/FreeImage/Source/FreeImage/MNGHelper.cpp +1320 -0
  55. data/vendor/FreeImage/Source/FreeImage/MemoryIO.cpp +237 -0
  56. data/vendor/FreeImage/Source/FreeImage/MultiPage.cpp +974 -0
  57. data/vendor/FreeImage/Source/FreeImage/NNQuantizer.cpp +507 -0
  58. data/vendor/FreeImage/Source/FreeImage/PSDParser.cpp +1057 -0
  59. data/vendor/FreeImage/Source/FreeImage/PSDParser.h +271 -0
  60. data/vendor/FreeImage/Source/FreeImage/PixelAccess.cpp +197 -0
  61. data/vendor/FreeImage/Source/FreeImage/Plugin.cpp +822 -0
  62. data/vendor/FreeImage/Source/FreeImage/PluginBMP.cpp +1494 -0
  63. data/vendor/FreeImage/Source/FreeImage/PluginCUT.cpp +240 -0
  64. data/vendor/FreeImage/Source/FreeImage/PluginDDS.cpp +655 -0
  65. data/vendor/FreeImage/Source/FreeImage/PluginEXR.cpp +773 -0
  66. data/vendor/FreeImage/Source/FreeImage/PluginG3.cpp +433 -0
  67. data/vendor/FreeImage/Source/FreeImage/PluginGIF.cpp +1407 -0
  68. data/vendor/FreeImage/Source/FreeImage/PluginHDR.cpp +722 -0
  69. data/vendor/FreeImage/Source/FreeImage/PluginICO.cpp +824 -0
  70. data/vendor/FreeImage/Source/FreeImage/PluginIFF.cpp +459 -0
  71. data/vendor/FreeImage/Source/FreeImage/PluginJ2K.cpp +328 -0
  72. data/vendor/FreeImage/Source/FreeImage/PluginJNG.cpp +162 -0
  73. data/vendor/FreeImage/Source/FreeImage/PluginJP2.cpp +328 -0
  74. data/vendor/FreeImage/Source/FreeImage/PluginJPEG.cpp +1706 -0
  75. data/vendor/FreeImage/Source/FreeImage/PluginJXR.cpp +1475 -0
  76. data/vendor/FreeImage/Source/FreeImage/PluginKOALA.cpp +243 -0
  77. data/vendor/FreeImage/Source/FreeImage/PluginMNG.cpp +153 -0
  78. data/vendor/FreeImage/Source/FreeImage/PluginPCD.cpp +251 -0
  79. data/vendor/FreeImage/Source/FreeImage/PluginPCX.cpp +659 -0
  80. data/vendor/FreeImage/Source/FreeImage/PluginPFM.cpp +409 -0
  81. data/vendor/FreeImage/Source/FreeImage/PluginPICT.cpp +1343 -0
  82. data/vendor/FreeImage/Source/FreeImage/PluginPNG.cpp +1115 -0
  83. data/vendor/FreeImage/Source/FreeImage/PluginPNM.cpp +838 -0
  84. data/vendor/FreeImage/Source/FreeImage/PluginPSD.cpp +131 -0
  85. data/vendor/FreeImage/Source/FreeImage/PluginRAS.cpp +512 -0
  86. data/vendor/FreeImage/Source/FreeImage/PluginRAW.cpp +793 -0
  87. data/vendor/FreeImage/Source/FreeImage/PluginSGI.cpp +425 -0
  88. data/vendor/FreeImage/Source/FreeImage/PluginTARGA.cpp +1591 -0
  89. data/vendor/FreeImage/Source/FreeImage/PluginTIFF.cpp +2631 -0
  90. data/vendor/FreeImage/Source/FreeImage/PluginWBMP.cpp +372 -0
  91. data/vendor/FreeImage/Source/FreeImage/PluginWebP.cpp +698 -0
  92. data/vendor/FreeImage/Source/FreeImage/PluginXBM.cpp +399 -0
  93. data/vendor/FreeImage/Source/FreeImage/PluginXPM.cpp +487 -0
  94. data/vendor/FreeImage/Source/FreeImage/TIFFLogLuv.cpp +65 -0
  95. data/vendor/FreeImage/Source/FreeImage/ToneMapping.cpp +75 -0
  96. data/vendor/FreeImage/Source/FreeImage/WuQuantizer.cpp +559 -0
  97. data/vendor/FreeImage/Source/FreeImage/ZLibInterface.cpp +223 -0
  98. data/vendor/FreeImage/Source/FreeImage/tmoColorConvert.cpp +479 -0
  99. data/vendor/FreeImage/Source/FreeImage/tmoDrago03.cpp +295 -0
  100. data/vendor/FreeImage/Source/FreeImage/tmoFattal02.cpp +689 -0
  101. data/vendor/FreeImage/Source/FreeImage/tmoReinhard05.cpp +260 -0
  102. data/vendor/FreeImage/Source/FreeImage.h +1153 -0
  103. data/vendor/FreeImage/Source/FreeImageIO.h +63 -0
  104. data/vendor/FreeImage/Source/FreeImageToolkit/BSplineRotate.cpp +730 -0
  105. data/vendor/FreeImage/Source/FreeImageToolkit/Background.cpp +895 -0
  106. data/vendor/FreeImage/Source/FreeImageToolkit/Channels.cpp +488 -0
  107. data/vendor/FreeImage/Source/FreeImageToolkit/ClassicRotate.cpp +917 -0
  108. data/vendor/FreeImage/Source/FreeImageToolkit/Colors.cpp +967 -0
  109. data/vendor/FreeImage/Source/FreeImageToolkit/CopyPaste.cpp +861 -0
  110. data/vendor/FreeImage/Source/FreeImageToolkit/Display.cpp +230 -0
  111. data/vendor/FreeImage/Source/FreeImageToolkit/Filters.h +287 -0
  112. data/vendor/FreeImage/Source/FreeImageToolkit/Flip.cpp +166 -0
  113. data/vendor/FreeImage/Source/FreeImageToolkit/JPEGTransform.cpp +623 -0
  114. data/vendor/FreeImage/Source/FreeImageToolkit/MultigridPoissonSolver.cpp +505 -0
  115. data/vendor/FreeImage/Source/FreeImageToolkit/Rescale.cpp +192 -0
  116. data/vendor/FreeImage/Source/FreeImageToolkit/Resize.cpp +2116 -0
  117. data/vendor/FreeImage/Source/FreeImageToolkit/Resize.h +196 -0
  118. data/vendor/FreeImage/Source/LibJPEG/ansi2knr.c +739 -0
  119. data/vendor/FreeImage/Source/LibJPEG/cderror.h +134 -0
  120. data/vendor/FreeImage/Source/LibJPEG/cdjpeg.c +181 -0
  121. data/vendor/FreeImage/Source/LibJPEG/cdjpeg.h +187 -0
  122. data/vendor/FreeImage/Source/LibJPEG/cjpeg.c +664 -0
  123. data/vendor/FreeImage/Source/LibJPEG/ckconfig.c +402 -0
  124. data/vendor/FreeImage/Source/LibJPEG/djpeg.c +617 -0
  125. data/vendor/FreeImage/Source/LibJPEG/example.c +433 -0
  126. data/vendor/FreeImage/Source/LibJPEG/jaricom.c +153 -0
  127. data/vendor/FreeImage/Source/LibJPEG/jcapimin.c +288 -0
  128. data/vendor/FreeImage/Source/LibJPEG/jcapistd.c +162 -0
  129. data/vendor/FreeImage/Source/LibJPEG/jcarith.c +944 -0
  130. data/vendor/FreeImage/Source/LibJPEG/jccoefct.c +454 -0
  131. data/vendor/FreeImage/Source/LibJPEG/jccolor.c +604 -0
  132. data/vendor/FreeImage/Source/LibJPEG/jcdctmgr.c +477 -0
  133. data/vendor/FreeImage/Source/LibJPEG/jchuff.c +1573 -0
  134. data/vendor/FreeImage/Source/LibJPEG/jcinit.c +84 -0
  135. data/vendor/FreeImage/Source/LibJPEG/jcmainct.c +297 -0
  136. data/vendor/FreeImage/Source/LibJPEG/jcmarker.c +719 -0
  137. data/vendor/FreeImage/Source/LibJPEG/jcmaster.c +856 -0
  138. data/vendor/FreeImage/Source/LibJPEG/jcomapi.c +106 -0
  139. data/vendor/FreeImage/Source/LibJPEG/jconfig.h +161 -0
  140. data/vendor/FreeImage/Source/LibJPEG/jcparam.c +675 -0
  141. data/vendor/FreeImage/Source/LibJPEG/jcprepct.c +358 -0
  142. data/vendor/FreeImage/Source/LibJPEG/jcsample.c +545 -0
  143. data/vendor/FreeImage/Source/LibJPEG/jctrans.c +385 -0
  144. data/vendor/FreeImage/Source/LibJPEG/jdapimin.c +399 -0
  145. data/vendor/FreeImage/Source/LibJPEG/jdapistd.c +276 -0
  146. data/vendor/FreeImage/Source/LibJPEG/jdarith.c +796 -0
  147. data/vendor/FreeImage/Source/LibJPEG/jdatadst.c +270 -0
  148. data/vendor/FreeImage/Source/LibJPEG/jdatasrc.c +275 -0
  149. data/vendor/FreeImage/Source/LibJPEG/jdcoefct.c +741 -0
  150. data/vendor/FreeImage/Source/LibJPEG/jdcolor.c +748 -0
  151. data/vendor/FreeImage/Source/LibJPEG/jdct.h +393 -0
  152. data/vendor/FreeImage/Source/LibJPEG/jddctmgr.c +384 -0
  153. data/vendor/FreeImage/Source/LibJPEG/jdhuff.c +1554 -0
  154. data/vendor/FreeImage/Source/LibJPEG/jdinput.c +662 -0
  155. data/vendor/FreeImage/Source/LibJPEG/jdmainct.c +513 -0
  156. data/vendor/FreeImage/Source/LibJPEG/jdmarker.c +1511 -0
  157. data/vendor/FreeImage/Source/LibJPEG/jdmaster.c +543 -0
  158. data/vendor/FreeImage/Source/LibJPEG/jdmerge.c +401 -0
  159. data/vendor/FreeImage/Source/LibJPEG/jdpostct.c +290 -0
  160. data/vendor/FreeImage/Source/LibJPEG/jdsample.c +361 -0
  161. data/vendor/FreeImage/Source/LibJPEG/jdtrans.c +140 -0
  162. data/vendor/FreeImage/Source/LibJPEG/jerror.c +253 -0
  163. data/vendor/FreeImage/Source/LibJPEG/jerror.h +304 -0
  164. data/vendor/FreeImage/Source/LibJPEG/jfdctflt.c +174 -0
  165. data/vendor/FreeImage/Source/LibJPEG/jfdctfst.c +230 -0
  166. data/vendor/FreeImage/Source/LibJPEG/jfdctint.c +4406 -0
  167. data/vendor/FreeImage/Source/LibJPEG/jidctflt.c +235 -0
  168. data/vendor/FreeImage/Source/LibJPEG/jidctfst.c +368 -0
  169. data/vendor/FreeImage/Source/LibJPEG/jidctint.c +5179 -0
  170. data/vendor/FreeImage/Source/LibJPEG/jinclude.h +91 -0
  171. data/vendor/FreeImage/Source/LibJPEG/jmemansi.c +167 -0
  172. data/vendor/FreeImage/Source/LibJPEG/jmemdos.c +638 -0
  173. data/vendor/FreeImage/Source/LibJPEG/jmemmac.c +289 -0
  174. data/vendor/FreeImage/Source/LibJPEG/jmemmgr.c +1119 -0
  175. data/vendor/FreeImage/Source/LibJPEG/jmemname.c +276 -0
  176. data/vendor/FreeImage/Source/LibJPEG/jmemnobs.c +109 -0
  177. data/vendor/FreeImage/Source/LibJPEG/jmemsys.h +198 -0
  178. data/vendor/FreeImage/Source/LibJPEG/jmorecfg.h +442 -0
  179. data/vendor/FreeImage/Source/LibJPEG/jpegint.h +426 -0
  180. data/vendor/FreeImage/Source/LibJPEG/jpeglib.h +1180 -0
  181. data/vendor/FreeImage/Source/LibJPEG/jpegtran.c +577 -0
  182. data/vendor/FreeImage/Source/LibJPEG/jquant1.c +857 -0
  183. data/vendor/FreeImage/Source/LibJPEG/jquant2.c +1311 -0
  184. data/vendor/FreeImage/Source/LibJPEG/jutils.c +227 -0
  185. data/vendor/FreeImage/Source/LibJPEG/jversion.h +14 -0
  186. data/vendor/FreeImage/Source/LibJPEG/rdbmp.c +480 -0
  187. data/vendor/FreeImage/Source/LibJPEG/rdcolmap.c +253 -0
  188. data/vendor/FreeImage/Source/LibJPEG/rdgif.c +38 -0
  189. data/vendor/FreeImage/Source/LibJPEG/rdjpgcom.c +515 -0
  190. data/vendor/FreeImage/Source/LibJPEG/rdppm.c +459 -0
  191. data/vendor/FreeImage/Source/LibJPEG/rdrle.c +387 -0
  192. data/vendor/FreeImage/Source/LibJPEG/rdswitch.c +365 -0
  193. data/vendor/FreeImage/Source/LibJPEG/rdtarga.c +500 -0
  194. data/vendor/FreeImage/Source/LibJPEG/transupp.c +1763 -0
  195. data/vendor/FreeImage/Source/LibJPEG/transupp.h +219 -0
  196. data/vendor/FreeImage/Source/LibJPEG/wrbmp.c +442 -0
  197. data/vendor/FreeImage/Source/LibJPEG/wrgif.c +399 -0
  198. data/vendor/FreeImage/Source/LibJPEG/wrjpgcom.c +583 -0
  199. data/vendor/FreeImage/Source/LibJPEG/wrppm.c +269 -0
  200. data/vendor/FreeImage/Source/LibJPEG/wrrle.c +305 -0
  201. data/vendor/FreeImage/Source/LibJPEG/wrtarga.c +253 -0
  202. data/vendor/FreeImage/Source/LibJXR/common/include/guiddef.h +230 -0
  203. data/vendor/FreeImage/Source/LibJXR/common/include/wmsal.h +757 -0
  204. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstring.h +342 -0
  205. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_adt.h +71 -0
  206. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_strict.h +1096 -0
  207. data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_undef.h +406 -0
  208. data/vendor/FreeImage/Source/LibJXR/image/decode/JXRTranscode.c +987 -0
  209. data/vendor/FreeImage/Source/LibJXR/image/decode/decode.c +200 -0
  210. data/vendor/FreeImage/Source/LibJXR/image/decode/decode.h +143 -0
  211. data/vendor/FreeImage/Source/LibJXR/image/decode/postprocess.c +288 -0
  212. data/vendor/FreeImage/Source/LibJXR/image/decode/segdec.c +1205 -0
  213. data/vendor/FreeImage/Source/LibJXR/image/decode/strInvTransform.c +1888 -0
  214. data/vendor/FreeImage/Source/LibJXR/image/decode/strPredQuantDec.c +539 -0
  215. data/vendor/FreeImage/Source/LibJXR/image/decode/strdec.c +3628 -0
  216. data/vendor/FreeImage/Source/LibJXR/image/decode/strdec_x86.c +1640 -0
  217. data/vendor/FreeImage/Source/LibJXR/image/encode/encode.c +144 -0
  218. data/vendor/FreeImage/Source/LibJXR/image/encode/encode.h +113 -0
  219. data/vendor/FreeImage/Source/LibJXR/image/encode/segenc.c +1186 -0
  220. data/vendor/FreeImage/Source/LibJXR/image/encode/strFwdTransform.c +1111 -0
  221. data/vendor/FreeImage/Source/LibJXR/image/encode/strPredQuantEnc.c +511 -0
  222. data/vendor/FreeImage/Source/LibJXR/image/encode/strenc.c +2370 -0
  223. data/vendor/FreeImage/Source/LibJXR/image/encode/strenc_x86.c +409 -0
  224. data/vendor/FreeImage/Source/LibJXR/image/sys/adapthuff.c +511 -0
  225. data/vendor/FreeImage/Source/LibJXR/image/sys/ansi.h +61 -0
  226. data/vendor/FreeImage/Source/LibJXR/image/sys/common.h +131 -0
  227. data/vendor/FreeImage/Source/LibJXR/image/sys/image.c +183 -0
  228. data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimer.h +115 -0
  229. data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimerANSI.c +274 -0
  230. data/vendor/FreeImage/Source/LibJXR/image/sys/strPredQuant.c +306 -0
  231. data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.c +85 -0
  232. data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.h +50 -0
  233. data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.c +1251 -0
  234. data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.h +681 -0
  235. data/vendor/FreeImage/Source/LibJXR/image/sys/windowsmediaphoto.h +515 -0
  236. data/vendor/FreeImage/Source/LibJXR/image/sys/xplatform_image.h +84 -0
  237. data/vendor/FreeImage/Source/LibJXR/image/x86/x86.h +58 -0
  238. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.c +930 -0
  239. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.h +636 -0
  240. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlueJxr.c +2246 -0
  241. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGluePFC.c +2338 -0
  242. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.c +905 -0
  243. data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.h +258 -0
  244. data/vendor/FreeImage/Source/LibOpenJPEG/bio.c +188 -0
  245. data/vendor/FreeImage/Source/LibOpenJPEG/bio.h +128 -0
  246. data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.c +239 -0
  247. data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.h +68 -0
  248. data/vendor/FreeImage/Source/LibOpenJPEG/cio.c +644 -0
  249. data/vendor/FreeImage/Source/LibOpenJPEG/cio.h +393 -0
  250. data/vendor/FreeImage/Source/LibOpenJPEG/dwt.c +919 -0
  251. data/vendor/FreeImage/Source/LibOpenJPEG/dwt.h +116 -0
  252. data/vendor/FreeImage/Source/LibOpenJPEG/event.c +141 -0
  253. data/vendor/FreeImage/Source/LibOpenJPEG/event.h +97 -0
  254. data/vendor/FreeImage/Source/LibOpenJPEG/function_list.c +114 -0
  255. data/vendor/FreeImage/Source/LibOpenJPEG/function_list.h +126 -0
  256. data/vendor/FreeImage/Source/LibOpenJPEG/image.c +235 -0
  257. data/vendor/FreeImage/Source/LibOpenJPEG/image.h +63 -0
  258. data/vendor/FreeImage/Source/LibOpenJPEG/indexbox_manager.h +148 -0
  259. data/vendor/FreeImage/Source/LibOpenJPEG/invert.c +289 -0
  260. data/vendor/FreeImage/Source/LibOpenJPEG/invert.h +59 -0
  261. data/vendor/FreeImage/Source/LibOpenJPEG/j2k.c +10238 -0
  262. data/vendor/FreeImage/Source/LibOpenJPEG/j2k.h +838 -0
  263. data/vendor/FreeImage/Source/LibOpenJPEG/jp2.c +2776 -0
  264. data/vendor/FreeImage/Source/LibOpenJPEG/jp2.h +490 -0
  265. data/vendor/FreeImage/Source/LibOpenJPEG/mct.c +319 -0
  266. data/vendor/FreeImage/Source/LibOpenJPEG/mct.h +149 -0
  267. data/vendor/FreeImage/Source/LibOpenJPEG/mqc.c +604 -0
  268. data/vendor/FreeImage/Source/LibOpenJPEG/mqc.h +201 -0
  269. data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.c +955 -0
  270. data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.h +1475 -0
  271. data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.c +59 -0
  272. data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.h +54 -0
  273. data/vendor/FreeImage/Source/LibOpenJPEG/opj_codec.h +160 -0
  274. data/vendor/FreeImage/Source/LibOpenJPEG/opj_config.h +9 -0
  275. data/vendor/FreeImage/Source/LibOpenJPEG/opj_config_private.h +16 -0
  276. data/vendor/FreeImage/Source/LibOpenJPEG/opj_includes.h +175 -0
  277. data/vendor/FreeImage/Source/LibOpenJPEG/opj_intmath.h +172 -0
  278. data/vendor/FreeImage/Source/LibOpenJPEG/opj_inttypes.h +43 -0
  279. data/vendor/FreeImage/Source/LibOpenJPEG/opj_malloc.h +180 -0
  280. data/vendor/FreeImage/Source/LibOpenJPEG/opj_stdint.h +47 -0
  281. data/vendor/FreeImage/Source/LibOpenJPEG/phix_manager.c +191 -0
  282. data/vendor/FreeImage/Source/LibOpenJPEG/pi.c +1870 -0
  283. data/vendor/FreeImage/Source/LibOpenJPEG/pi.h +182 -0
  284. data/vendor/FreeImage/Source/LibOpenJPEG/ppix_manager.c +194 -0
  285. data/vendor/FreeImage/Source/LibOpenJPEG/raw.c +89 -0
  286. data/vendor/FreeImage/Source/LibOpenJPEG/raw.h +100 -0
  287. data/vendor/FreeImage/Source/LibOpenJPEG/t1.c +1751 -0
  288. data/vendor/FreeImage/Source/LibOpenJPEG/t1.h +157 -0
  289. data/vendor/FreeImage/Source/LibOpenJPEG/t1_generate_luts.c +276 -0
  290. data/vendor/FreeImage/Source/LibOpenJPEG/t1_luts.h +143 -0
  291. data/vendor/FreeImage/Source/LibOpenJPEG/t2.c +1334 -0
  292. data/vendor/FreeImage/Source/LibOpenJPEG/t2.h +127 -0
  293. data/vendor/FreeImage/Source/LibOpenJPEG/tcd.c +2123 -0
  294. data/vendor/FreeImage/Source/LibOpenJPEG/tcd.h +348 -0
  295. data/vendor/FreeImage/Source/LibOpenJPEG/tgt.c +331 -0
  296. data/vendor/FreeImage/Source/LibOpenJPEG/tgt.h +140 -0
  297. data/vendor/FreeImage/Source/LibOpenJPEG/thix_manager.c +134 -0
  298. data/vendor/FreeImage/Source/LibOpenJPEG/tpix_manager.c +185 -0
  299. data/vendor/FreeImage/Source/LibPNG/example.c +1061 -0
  300. data/vendor/FreeImage/Source/LibPNG/png.c +4493 -0
  301. data/vendor/FreeImage/Source/LibPNG/png.h +3282 -0
  302. data/vendor/FreeImage/Source/LibPNG/pngconf.h +644 -0
  303. data/vendor/FreeImage/Source/LibPNG/pngdebug.h +154 -0
  304. data/vendor/FreeImage/Source/LibPNG/pngerror.c +963 -0
  305. data/vendor/FreeImage/Source/LibPNG/pngget.c +1213 -0
  306. data/vendor/FreeImage/Source/LibPNG/pnginfo.h +260 -0
  307. data/vendor/FreeImage/Source/LibPNG/pnglibconf.h +218 -0
  308. data/vendor/FreeImage/Source/LibPNG/pngmem.c +281 -0
  309. data/vendor/FreeImage/Source/LibPNG/pngpread.c +1168 -0
  310. data/vendor/FreeImage/Source/LibPNG/pngpriv.h +1944 -0
  311. data/vendor/FreeImage/Source/LibPNG/pngread.c +4121 -0
  312. data/vendor/FreeImage/Source/LibPNG/pngrio.c +120 -0
  313. data/vendor/FreeImage/Source/LibPNG/pngrtran.c +4994 -0
  314. data/vendor/FreeImage/Source/LibPNG/pngrutil.c +4474 -0
  315. data/vendor/FreeImage/Source/LibPNG/pngset.c +1611 -0
  316. data/vendor/FreeImage/Source/LibPNG/pngstruct.h +489 -0
  317. data/vendor/FreeImage/Source/LibPNG/pngtest.c +2011 -0
  318. data/vendor/FreeImage/Source/LibPNG/pngtrans.c +849 -0
  319. data/vendor/FreeImage/Source/LibPNG/pngwio.c +168 -0
  320. data/vendor/FreeImage/Source/LibPNG/pngwrite.c +2455 -0
  321. data/vendor/FreeImage/Source/LibPNG/pngwtran.c +574 -0
  322. data/vendor/FreeImage/Source/LibPNG/pngwutil.c +3029 -0
  323. data/vendor/FreeImage/Source/LibRawLite/dcraw/dcraw.c +15462 -0
  324. data/vendor/FreeImage/Source/LibRawLite/internal/aahd_demosaic.cpp +706 -0
  325. data/vendor/FreeImage/Source/LibRawLite/internal/dcb_demosaicing.c +710 -0
  326. data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_common.cpp +13593 -0
  327. data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_fileio.cpp +240 -0
  328. data/vendor/FreeImage/Source/LibRawLite/internal/defines.h +167 -0
  329. data/vendor/FreeImage/Source/LibRawLite/internal/demosaic_packs.cpp +99 -0
  330. data/vendor/FreeImage/Source/LibRawLite/internal/dht_demosaic.cpp +873 -0
  331. data/vendor/FreeImage/Source/LibRawLite/internal/libraw_internal_funcs.h +282 -0
  332. data/vendor/FreeImage/Source/LibRawLite/internal/libraw_x3f.cpp +1919 -0
  333. data/vendor/FreeImage/Source/LibRawLite/internal/var_defines.h +216 -0
  334. data/vendor/FreeImage/Source/LibRawLite/internal/wf_filtering.cpp +1950 -0
  335. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw.h +338 -0
  336. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_alloc.h +99 -0
  337. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_const.h +233 -0
  338. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_datastream.h +238 -0
  339. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_internal.h +225 -0
  340. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_types.h +442 -0
  341. data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_version.h +62 -0
  342. data/vendor/FreeImage/Source/LibRawLite/src/libraw_c_api.cpp +230 -0
  343. data/vendor/FreeImage/Source/LibRawLite/src/libraw_cxx.cpp +4533 -0
  344. data/vendor/FreeImage/Source/LibRawLite/src/libraw_datastream.cpp +703 -0
  345. data/vendor/FreeImage/Source/LibTIFF4/mkg3states.c +451 -0
  346. data/vendor/FreeImage/Source/LibTIFF4/mkspans.c +82 -0
  347. data/vendor/FreeImage/Source/LibTIFF4/t4.h +292 -0
  348. data/vendor/FreeImage/Source/LibTIFF4/tif_aux.c +358 -0
  349. data/vendor/FreeImage/Source/LibTIFF4/tif_close.c +140 -0
  350. data/vendor/FreeImage/Source/LibTIFF4/tif_codec.c +166 -0
  351. data/vendor/FreeImage/Source/LibTIFF4/tif_color.c +287 -0
  352. data/vendor/FreeImage/Source/LibTIFF4/tif_compress.c +304 -0
  353. data/vendor/FreeImage/Source/LibTIFF4/tif_config.h +97 -0
  354. data/vendor/FreeImage/Source/LibTIFF4/tif_config.vc.h +74 -0
  355. data/vendor/FreeImage/Source/LibTIFF4/tif_config.wince.h +71 -0
  356. data/vendor/FreeImage/Source/LibTIFF4/tif_dir.c +1700 -0
  357. data/vendor/FreeImage/Source/LibTIFF4/tif_dir.h +308 -0
  358. data/vendor/FreeImage/Source/LibTIFF4/tif_dirinfo.c +959 -0
  359. data/vendor/FreeImage/Source/LibTIFF4/tif_dirread.c +5640 -0
  360. data/vendor/FreeImage/Source/LibTIFF4/tif_dirwrite.c +2910 -0
  361. data/vendor/FreeImage/Source/LibTIFF4/tif_dumpmode.c +143 -0
  362. data/vendor/FreeImage/Source/LibTIFF4/tif_error.c +80 -0
  363. data/vendor/FreeImage/Source/LibTIFF4/tif_extension.c +118 -0
  364. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.c +1595 -0
  365. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.h +538 -0
  366. data/vendor/FreeImage/Source/LibTIFF4/tif_fax3sm.c +1260 -0
  367. data/vendor/FreeImage/Source/LibTIFF4/tif_flush.c +118 -0
  368. data/vendor/FreeImage/Source/LibTIFF4/tif_getimage.c +2890 -0
  369. data/vendor/FreeImage/Source/LibTIFF4/tif_jbig.c +213 -0
  370. data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg.c +2354 -0
  371. data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg_12.c +65 -0
  372. data/vendor/FreeImage/Source/LibTIFF4/tif_luv.c +1683 -0
  373. data/vendor/FreeImage/Source/LibTIFF4/tif_lzma.c +495 -0
  374. data/vendor/FreeImage/Source/LibTIFF4/tif_lzw.c +1169 -0
  375. data/vendor/FreeImage/Source/LibTIFF4/tif_next.c +181 -0
  376. data/vendor/FreeImage/Source/LibTIFF4/tif_ojpeg.c +2501 -0
  377. data/vendor/FreeImage/Source/LibTIFF4/tif_open.c +725 -0
  378. data/vendor/FreeImage/Source/LibTIFF4/tif_packbits.c +300 -0
  379. data/vendor/FreeImage/Source/LibTIFF4/tif_pixarlog.c +1442 -0
  380. data/vendor/FreeImage/Source/LibTIFF4/tif_predict.c +764 -0
  381. data/vendor/FreeImage/Source/LibTIFF4/tif_predict.h +77 -0
  382. data/vendor/FreeImage/Source/LibTIFF4/tif_print.c +716 -0
  383. data/vendor/FreeImage/Source/LibTIFF4/tif_read.c +1086 -0
  384. data/vendor/FreeImage/Source/LibTIFF4/tif_strip.c +383 -0
  385. data/vendor/FreeImage/Source/LibTIFF4/tif_swab.c +310 -0
  386. data/vendor/FreeImage/Source/LibTIFF4/tif_thunder.c +207 -0
  387. data/vendor/FreeImage/Source/LibTIFF4/tif_tile.c +299 -0
  388. data/vendor/FreeImage/Source/LibTIFF4/tif_unix.c +325 -0
  389. data/vendor/FreeImage/Source/LibTIFF4/tif_version.c +40 -0
  390. data/vendor/FreeImage/Source/LibTIFF4/tif_vms.c +603 -0
  391. data/vendor/FreeImage/Source/LibTIFF4/tif_warning.c +81 -0
  392. data/vendor/FreeImage/Source/LibTIFF4/tif_win32.c +443 -0
  393. data/vendor/FreeImage/Source/LibTIFF4/tif_wince.c +293 -0
  394. data/vendor/FreeImage/Source/LibTIFF4/tif_write.c +771 -0
  395. data/vendor/FreeImage/Source/LibTIFF4/tif_zip.c +472 -0
  396. data/vendor/FreeImage/Source/LibTIFF4/tiff.h +681 -0
  397. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.h +170 -0
  398. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.vc.h +160 -0
  399. data/vendor/FreeImage/Source/LibTIFF4/tiffconf.wince.h +121 -0
  400. data/vendor/FreeImage/Source/LibTIFF4/tiffio.h +557 -0
  401. data/vendor/FreeImage/Source/LibTIFF4/tiffiop.h +367 -0
  402. data/vendor/FreeImage/Source/LibTIFF4/tiffvers.h +9 -0
  403. data/vendor/FreeImage/Source/LibTIFF4/uvcode.h +180 -0
  404. data/vendor/FreeImage/Source/LibWebP/src/dec/alphai.h +55 -0
  405. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.alpha.c +167 -0
  406. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.buffer.c +249 -0
  407. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.frame.c +827 -0
  408. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.idec.c +857 -0
  409. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.io.c +640 -0
  410. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.quant.c +110 -0
  411. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.tree.c +525 -0
  412. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8.c +663 -0
  413. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8l.c +1584 -0
  414. data/vendor/FreeImage/Source/LibWebP/src/dec/dec.webp.c +834 -0
  415. data/vendor/FreeImage/Source/LibWebP/src/dec/decode_vp8.h +185 -0
  416. data/vendor/FreeImage/Source/LibWebP/src/dec/vp8i.h +353 -0
  417. data/vendor/FreeImage/Source/LibWebP/src/dec/vp8li.h +136 -0
  418. data/vendor/FreeImage/Source/LibWebP/src/dec/webpi.h +120 -0
  419. data/vendor/FreeImage/Source/LibWebP/src/demux/demux.demux.c +957 -0
  420. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing.c +377 -0
  421. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_mips_dsp_r2.c +139 -0
  422. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_sse2.c +296 -0
  423. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb.c +68 -0
  424. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_mips_dsp_r2.c +108 -0
  425. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_sse2.c +62 -0
  426. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost.c +412 -0
  427. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips32.c +154 -0
  428. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips_dsp_r2.c +107 -0
  429. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_sse2.c +121 -0
  430. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cpu.c +138 -0
  431. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec.c +760 -0
  432. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_clip_tables.c +366 -0
  433. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips32.c +585 -0
  434. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c +992 -0
  435. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_neon.c +1489 -0
  436. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_sse2.c +1284 -0
  437. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc.c +788 -0
  438. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_avx2.c +24 -0
  439. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips32.c +670 -0
  440. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c +1510 -0
  441. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_neon.c +932 -0
  442. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_sse2.c +940 -0
  443. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters.c +240 -0
  444. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c +404 -0
  445. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_sse2.c +349 -0
  446. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.h +434 -0
  447. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless.c +1838 -0
  448. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips32.c +416 -0
  449. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c +921 -0
  450. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_neon.c +357 -0
  451. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_sse2.c +535 -0
  452. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler.c +115 -0
  453. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips32.c +192 -0
  454. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips_dsp_r2.c +210 -0
  455. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling.c +252 -0
  456. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c +280 -0
  457. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_neon.c +267 -0
  458. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_sse2.c +214 -0
  459. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv.c +166 -0
  460. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips32.c +100 -0
  461. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c +131 -0
  462. data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_sse2.c +322 -0
  463. data/vendor/FreeImage/Source/LibWebP/src/dsp/lossless.h +313 -0
  464. data/vendor/FreeImage/Source/LibWebP/src/dsp/mips_macro.h +200 -0
  465. data/vendor/FreeImage/Source/LibWebP/src/dsp/neon.h +82 -0
  466. data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv.h +321 -0
  467. data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv_tables_sse2.h +536 -0
  468. data/vendor/FreeImage/Source/LibWebP/src/enc/backward_references.h +202 -0
  469. data/vendor/FreeImage/Source/LibWebP/src/enc/cost.h +69 -0
  470. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.alpha.c +440 -0
  471. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.analysis.c +501 -0
  472. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.backward_references.c +1076 -0
  473. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.config.c +163 -0
  474. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.cost.c +355 -0
  475. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.filter.c +296 -0
  476. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.frame.c +850 -0
  477. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.histogram.c +897 -0
  478. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.iterator.c +456 -0
  479. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.near_lossless.c +160 -0
  480. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture.c +290 -0
  481. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_csp.c +1100 -0
  482. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_psnr.c +150 -0
  483. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_rescale.c +285 -0
  484. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_tools.c +206 -0
  485. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.quant.c +1191 -0
  486. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.syntax.c +383 -0
  487. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.token.c +285 -0
  488. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.tree.c +504 -0
  489. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.vp8l.c +1437 -0
  490. data/vendor/FreeImage/Source/LibWebP/src/enc/enc.webpenc.c +379 -0
  491. data/vendor/FreeImage/Source/LibWebP/src/enc/histogram.h +114 -0
  492. data/vendor/FreeImage/Source/LibWebP/src/enc/vp8enci.h +551 -0
  493. data/vendor/FreeImage/Source/LibWebP/src/enc/vp8li.h +78 -0
  494. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.anim_encode.c +1241 -0
  495. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxedit.c +696 -0
  496. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxinternal.c +551 -0
  497. data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxread.c +544 -0
  498. data/vendor/FreeImage/Source/LibWebP/src/mux/muxi.h +232 -0
  499. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader.h +168 -0
  500. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader_inl.h +172 -0
  501. data/vendor/FreeImage/Source/LibWebP/src/utils/bit_writer.h +120 -0
  502. data/vendor/FreeImage/Source/LibWebP/src/utils/color_cache.h +74 -0
  503. data/vendor/FreeImage/Source/LibWebP/src/utils/endian_inl.h +100 -0
  504. data/vendor/FreeImage/Source/LibWebP/src/utils/filters.h +32 -0
  505. data/vendor/FreeImage/Source/LibWebP/src/utils/huffman.h +67 -0
  506. data/vendor/FreeImage/Source/LibWebP/src/utils/huffman_encode.h +60 -0
  507. data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels.h +36 -0
  508. data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels_dec.h +35 -0
  509. data/vendor/FreeImage/Source/LibWebP/src/utils/random.h +63 -0
  510. data/vendor/FreeImage/Source/LibWebP/src/utils/rescaler.h +78 -0
  511. data/vendor/FreeImage/Source/LibWebP/src/utils/thread.h +93 -0
  512. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_reader.c +208 -0
  513. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_writer.c +308 -0
  514. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.color_cache.c +49 -0
  515. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.filters.c +76 -0
  516. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.h +121 -0
  517. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman.c +205 -0
  518. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman_encode.c +417 -0
  519. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels.c +140 -0
  520. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels_dec.c +279 -0
  521. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.random.c +43 -0
  522. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.rescaler.c +82 -0
  523. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.thread.c +309 -0
  524. data/vendor/FreeImage/Source/LibWebP/src/utils/utils.utils.c +211 -0
  525. data/vendor/FreeImage/Source/LibWebP/src/webp/decode.h +493 -0
  526. data/vendor/FreeImage/Source/LibWebP/src/webp/demux.h +224 -0
  527. data/vendor/FreeImage/Source/LibWebP/src/webp/encode.h +515 -0
  528. data/vendor/FreeImage/Source/LibWebP/src/webp/format_constants.h +88 -0
  529. data/vendor/FreeImage/Source/LibWebP/src/webp/mux.h +507 -0
  530. data/vendor/FreeImage/Source/LibWebP/src/webp/mux_types.h +97 -0
  531. data/vendor/FreeImage/Source/LibWebP/src/webp/types.h +52 -0
  532. data/vendor/FreeImage/Source/MapIntrospector.h +212 -0
  533. data/vendor/FreeImage/Source/Metadata/Exif.cpp +1253 -0
  534. data/vendor/FreeImage/Source/Metadata/FIRational.cpp +176 -0
  535. data/vendor/FreeImage/Source/Metadata/FIRational.h +108 -0
  536. data/vendor/FreeImage/Source/Metadata/FreeImageTag.cpp +353 -0
  537. data/vendor/FreeImage/Source/Metadata/FreeImageTag.h +500 -0
  538. data/vendor/FreeImage/Source/Metadata/IPTC.cpp +342 -0
  539. data/vendor/FreeImage/Source/Metadata/TagConversion.cpp +1094 -0
  540. data/vendor/FreeImage/Source/Metadata/TagLib.cpp +1618 -0
  541. data/vendor/FreeImage/Source/Metadata/XTIFF.cpp +766 -0
  542. data/vendor/FreeImage/Source/OpenEXR/Half/eLut.cpp +114 -0
  543. data/vendor/FreeImage/Source/OpenEXR/Half/eLut.h +71 -0
  544. data/vendor/FreeImage/Source/OpenEXR/Half/half.cpp +310 -0
  545. data/vendor/FreeImage/Source/OpenEXR/Half/half.h +757 -0
  546. data/vendor/FreeImage/Source/OpenEXR/Half/halfExport.h +27 -0
  547. data/vendor/FreeImage/Source/OpenEXR/Half/halfFunction.h +179 -0
  548. data/vendor/FreeImage/Source/OpenEXR/Half/halfLimits.h +102 -0
  549. data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.cpp +164 -0
  550. data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.h +16391 -0
  551. data/vendor/FreeImage/Source/OpenEXR/Iex/Iex.h +60 -0
  552. data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.cpp +156 -0
  553. data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.h +264 -0
  554. data/vendor/FreeImage/Source/OpenEXR/Iex/IexErrnoExc.h +208 -0
  555. data/vendor/FreeImage/Source/OpenEXR/Iex/IexExport.h +51 -0
  556. data/vendor/FreeImage/Source/OpenEXR/Iex/IexForward.h +229 -0
  557. data/vendor/FreeImage/Source/OpenEXR/Iex/IexMacros.h +170 -0
  558. data/vendor/FreeImage/Source/OpenEXR/Iex/IexMathExc.h +57 -0
  559. data/vendor/FreeImage/Source/OpenEXR/Iex/IexNamespace.h +112 -0
  560. data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.cpp +873 -0
  561. data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.h +97 -0
  562. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.cpp +113 -0
  563. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.h +146 -0
  564. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.cpp +530 -0
  565. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.h +91 -0
  566. data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathIeeeExc.h +62 -0
  567. data/vendor/FreeImage/Source/OpenEXR/IlmBaseConfig.h +61 -0
  568. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.cpp +633 -0
  569. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.h +324 -0
  570. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfArray.h +285 -0
  571. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.cpp +158 -0
  572. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.h +407 -0
  573. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAutoArray.h +95 -0
  574. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.cpp +1072 -0
  575. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.h +118 -0
  576. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.cpp +111 -0
  577. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.h +87 -0
  578. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.cpp +1438 -0
  579. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.h +555 -0
  580. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.cpp +322 -0
  581. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.h +436 -0
  582. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.cpp +150 -0
  583. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.h +74 -0
  584. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCheckedArithmetic.h +163 -0
  585. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.cpp +151 -0
  586. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.h +131 -0
  587. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.cpp +87 -0
  588. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.h +73 -0
  589. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.cpp +591 -0
  590. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.h +142 -0
  591. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompression.h +84 -0
  592. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.cpp +78 -0
  593. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.h +64 -0
  594. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.cpp +226 -0
  595. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.h +265 -0
  596. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.cpp +143 -0
  597. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.h +107 -0
  598. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.cpp +110 -0
  599. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.h +132 -0
  600. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.cpp +230 -0
  601. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.h +339 -0
  602. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageState.h +96 -0
  603. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.cpp +78 -0
  604. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.h +68 -0
  605. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.cpp +2025 -0
  606. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.h +276 -0
  607. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.cpp +149 -0
  608. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.h +181 -0
  609. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.cpp +1552 -0
  610. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.h +244 -0
  611. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.cpp +107 -0
  612. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.h +168 -0
  613. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.cpp +1979 -0
  614. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.h +437 -0
  615. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.cpp +273 -0
  616. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.h +362 -0
  617. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.cpp +2055 -0
  618. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.h +475 -0
  619. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.cpp +250 -0
  620. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.h +394 -0
  621. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.cpp +57 -0
  622. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.h +59 -0
  623. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.cpp +3424 -0
  624. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.h +210 -0
  625. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressorSimd.h +2145 -0
  626. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.cpp +335 -0
  627. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.h +336 -0
  628. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.cpp +76 -0
  629. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.h +68 -0
  630. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfExport.h +46 -0
  631. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.cpp +768 -0
  632. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.h +148 -0
  633. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.cpp +57 -0
  634. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.h +58 -0
  635. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.cpp +84 -0
  636. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.h +76 -0
  637. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfForward.h +127 -0
  638. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.cpp +228 -0
  639. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.h +386 -0
  640. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.cpp +76 -0
  641. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.h +94 -0
  642. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.cpp +76 -0
  643. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.h +58 -0
  644. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.cpp +112 -0
  645. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.h +62 -0
  646. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.cpp +1283 -0
  647. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.h +699 -0
  648. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.cpp +1114 -0
  649. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.h +82 -0
  650. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.cpp +110 -0
  651. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.h +255 -0
  652. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.cpp +895 -0
  653. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.h +240 -0
  654. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.cpp +114 -0
  655. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.h +84 -0
  656. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.cpp +51 -0
  657. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.h +69 -0
  658. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputStreamMutex.h +68 -0
  659. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInt64.h +56 -0
  660. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.cpp +57 -0
  661. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.h +58 -0
  662. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.cpp +217 -0
  663. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.h +167 -0
  664. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.cpp +99 -0
  665. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.h +73 -0
  666. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrder.h +69 -0
  667. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.cpp +78 -0
  668. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.h +72 -0
  669. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.cpp +178 -0
  670. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.h +188 -0
  671. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.cpp +263 -0
  672. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.h +83 -0
  673. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.cpp +1872 -0
  674. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.h +466 -0
  675. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.cpp +783 -0
  676. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.h +128 -0
  677. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.cpp +519 -0
  678. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.h +118 -0
  679. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.cpp +435 -0
  680. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.h +187 -0
  681. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfName.h +150 -0
  682. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfNamespace.h +115 -0
  683. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.cpp +126 -0
  684. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.h +110 -0
  685. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOptimizedPixelReading.h +646 -0
  686. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.cpp +1378 -0
  687. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.h +263 -0
  688. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.cpp +105 -0
  689. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.h +77 -0
  690. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.cpp +52 -0
  691. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.h +62 -0
  692. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputStreamMutex.h +70 -0
  693. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartHelper.h +262 -0
  694. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.cpp +63 -0
  695. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.h +62 -0
  696. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPixelType.h +67 -0
  697. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.cpp +667 -0
  698. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.h +117 -0
  699. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.cpp +104 -0
  700. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.h +135 -0
  701. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.cpp +103 -0
  702. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.h +70 -0
  703. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.cpp +553 -0
  704. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.h +109 -0
  705. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.cpp +127 -0
  706. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.h +98 -0
  707. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.cpp +74 -0
  708. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.h +69 -0
  709. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgba.h +109 -0
  710. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.cpp +1405 -0
  711. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.h +346 -0
  712. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.cpp +497 -0
  713. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.h +259 -0
  714. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.cpp +157 -0
  715. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.h +63 -0
  716. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.cpp +220 -0
  717. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.h +80 -0
  718. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.cpp +1702 -0
  719. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.h +210 -0
  720. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSimd.h +59 -0
  721. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.cpp +125 -0
  722. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.h +382 -0
  723. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.cpp +242 -0
  724. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.h +160 -0
  725. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.cpp +80 -0
  726. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.h +71 -0
  727. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.cpp +100 -0
  728. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.h +74 -0
  729. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.cpp +129 -0
  730. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.h +172 -0
  731. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.cpp +216 -0
  732. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.h +97 -0
  733. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.cpp +62 -0
  734. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.h +95 -0
  735. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescription.h +107 -0
  736. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.cpp +86 -0
  737. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.h +72 -0
  738. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.cpp +552 -0
  739. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.h +125 -0
  740. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.cpp +1533 -0
  741. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.h +401 -0
  742. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.cpp +208 -0
  743. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.h +100 -0
  744. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.cpp +389 -0
  745. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.h +106 -0
  746. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.cpp +1841 -0
  747. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.h +495 -0
  748. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.cpp +228 -0
  749. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.h +105 -0
  750. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.cpp +1163 -0
  751. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.h +482 -0
  752. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.cpp +431 -0
  753. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.h +242 -0
  754. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.cpp +79 -0
  755. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.h +74 -0
  756. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.cpp +217 -0
  757. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.h +100 -0
  758. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.cpp +60 -0
  759. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.h +136 -0
  760. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.cpp +391 -0
  761. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.h +78 -0
  762. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfXdr.h +927 -0
  763. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.cpp +196 -0
  764. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.h +78 -0
  765. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.cpp +127 -0
  766. data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.h +89 -0
  767. data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.cpp +136 -0
  768. data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.h +16396 -0
  769. data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.cpp +573 -0
  770. data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.h +98334 -0
  771. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.cpp +80 -0
  772. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.h +143 -0
  773. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadExport.h +46 -0
  774. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadForward.h +52 -0
  775. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.cpp +59 -0
  776. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.h +160 -0
  777. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexPosix.cpp +85 -0
  778. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexWin32.cpp +79 -0
  779. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadNamespace.h +114 -0
  780. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.cpp +483 -0
  781. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.h +160 -0
  782. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPosix.cpp +98 -0
  783. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.cpp +60 -0
  784. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.h +112 -0
  785. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosix.cpp +106 -0
  786. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosixCompat.cpp +155 -0
  787. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphoreWin32.cpp +153 -0
  788. data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadWin32.cpp +100 -0
  789. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.cpp +37 -0
  790. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.h +849 -0
  791. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBoxAlgo.h +1016 -0
  792. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColor.h +736 -0
  793. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.cpp +178 -0
  794. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.h +257 -0
  795. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathEuler.h +926 -0
  796. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExc.h +73 -0
  797. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExport.h +46 -0
  798. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathForward.h +72 -0
  799. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrame.h +192 -0
  800. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustum.h +741 -0
  801. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustumTest.h +417 -0
  802. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.cpp +181 -0
  803. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.h +269 -0
  804. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGL.h +166 -0
  805. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGLU.h +54 -0
  806. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathHalfLimits.h +68 -0
  807. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInt64.h +62 -0
  808. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInterval.h +226 -0
  809. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLimits.h +268 -0
  810. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLine.h +185 -0
  811. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLineAlgo.h +288 -0
  812. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMath.h +208 -0
  813. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrix.h +3441 -0
  814. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.cpp +1252 -0
  815. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.h +1425 -0
  816. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathNamespace.h +115 -0
  817. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlane.h +257 -0
  818. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlatform.h +112 -0
  819. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathQuat.h +964 -0
  820. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.cpp +194 -0
  821. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.h +401 -0
  822. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRoots.h +219 -0
  823. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.cpp +54 -0
  824. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.h +656 -0
  825. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathSphere.h +177 -0
  826. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.cpp +583 -0
  827. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.h +2227 -0
  828. data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVecAlgo.h +147 -0
  829. data/vendor/FreeImage/Source/OpenEXR/OpenEXRConfig.h +72 -0
  830. data/vendor/FreeImage/Source/Plugin.h +144 -0
  831. data/vendor/FreeImage/Source/Quantizers.h +354 -0
  832. data/vendor/FreeImage/Source/ToneMapping.h +44 -0
  833. data/vendor/FreeImage/Source/Utilities.h +516 -0
  834. data/vendor/FreeImage/Source/ZLib/adler32.c +179 -0
  835. data/vendor/FreeImage/Source/ZLib/compress.c +80 -0
  836. data/vendor/FreeImage/Source/ZLib/crc32.c +425 -0
  837. data/vendor/FreeImage/Source/ZLib/crc32.h +441 -0
  838. data/vendor/FreeImage/Source/ZLib/deflate.c +1967 -0
  839. data/vendor/FreeImage/Source/ZLib/deflate.h +346 -0
  840. data/vendor/FreeImage/Source/ZLib/gzclose.c +25 -0
  841. data/vendor/FreeImage/Source/ZLib/gzguts.h +209 -0
  842. data/vendor/FreeImage/Source/ZLib/gzlib.c +634 -0
  843. data/vendor/FreeImage/Source/ZLib/gzread.c +594 -0
  844. data/vendor/FreeImage/Source/ZLib/gzwrite.c +577 -0
  845. data/vendor/FreeImage/Source/ZLib/infback.c +640 -0
  846. data/vendor/FreeImage/Source/ZLib/inffast.c +340 -0
  847. data/vendor/FreeImage/Source/ZLib/inffast.h +11 -0
  848. data/vendor/FreeImage/Source/ZLib/inffixed.h +94 -0
  849. data/vendor/FreeImage/Source/ZLib/inflate.c +1512 -0
  850. data/vendor/FreeImage/Source/ZLib/inflate.h +122 -0
  851. data/vendor/FreeImage/Source/ZLib/inftrees.c +306 -0
  852. data/vendor/FreeImage/Source/ZLib/inftrees.h +62 -0
  853. data/vendor/FreeImage/Source/ZLib/trees.c +1226 -0
  854. data/vendor/FreeImage/Source/ZLib/trees.h +128 -0
  855. data/vendor/FreeImage/Source/ZLib/uncompr.c +59 -0
  856. data/vendor/FreeImage/Source/ZLib/zconf.h +511 -0
  857. data/vendor/FreeImage/Source/ZLib/zlib.h +1768 -0
  858. data/vendor/FreeImage/Source/ZLib/zutil.c +324 -0
  859. data/vendor/FreeImage/Source/ZLib/zutil.h +253 -0
  860. metadata +931 -0
@@ -0,0 +1,2145 @@
1
+ ///////////////////////////////////////////////////////////////////////////
2
+ //
3
+ // Copyright (c) 2009-2014 DreamWorks Animation LLC.
4
+ //
5
+ // All rights reserved.
6
+ //
7
+ // Redistribution and use in source and binary forms, with or without
8
+ // modification, are permitted provided that the following conditions are
9
+ // met:
10
+ // * Redistributions of source code must retain the above copyright
11
+ // notice, this list of conditions and the following disclaimer.
12
+ // * Redistributions in binary form must reproduce the above
13
+ // copyright notice, this list of conditions and the following disclaimer
14
+ // in the documentation and/or other materials provided with the
15
+ // distribution.
16
+ // * Neither the name of DreamWorks Animation nor the names of
17
+ // its contributors may be used to endorse or promote products derived
18
+ // from this software without specific prior written permission.
19
+ //
20
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ //
32
+ ///////////////////////////////////////////////////////////////////////////
33
+
34
+ #ifndef IMF_DWACOMPRESSORSIMD_H_HAS_BEEN_INCLUDED
35
+ #define IMF_DWACOMPRESSORSIMD_H_HAS_BEEN_INCLUDED
36
+
37
+ //
38
+ // Various SSE accelerated functions, used by Imf::DwaCompressor.
39
+ // These have been separated into a separate .h file, as the fast
40
+ // paths are done with template specialization.
41
+ //
42
+ // Unless otherwise noted, all pointers are assumed to be 32-byte
43
+ // aligned. Unaligned pointers may risk seg-faulting.
44
+ //
45
+
46
+ #include "ImfNamespace.h"
47
+ #include "ImfSimd.h"
48
+ #include "ImfSystemSpecific.h"
49
+ #include "OpenEXRConfig.h"
50
+
51
+ #include <half.h>
52
+ #include <assert.h>
53
+
54
+ OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_ENTER
55
+
56
+ #define _SSE_ALIGNMENT 32
57
+ #define _SSE_ALIGNMENT_MASK 0x0F
58
+ #define _AVX_ALIGNMENT_MASK 0x1F
59
+
60
+ //
61
+ // Test if we should enable GCC inline asm paths for AVX
62
+ //
63
+
64
+ #ifdef OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX
65
+
66
+ #define IMF_HAVE_GCC_INLINEASM
67
+
68
+ #ifdef __LP64__
69
+ #define IMF_HAVE_GCC_INLINEASM_64
70
+ #endif /* __LP64__ */
71
+
72
+ #endif /* OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX */
73
+
74
+ //
75
+ // A simple 64-element array, aligned properly for SIMD access.
76
+ //
77
+
78
+ template <class T>
79
+ class SimdAlignedBuffer64
80
+ {
81
+ public:
82
+
83
+ SimdAlignedBuffer64(): _buffer (0), _handle (0)
84
+ {
85
+ alloc();
86
+ }
87
+
88
+ SimdAlignedBuffer64(const SimdAlignedBuffer64 &rhs): _handle(0)
89
+ {
90
+ alloc();
91
+ memcpy (_buffer, rhs._buffer, 64 * sizeof (T));
92
+ }
93
+
94
+ ~SimdAlignedBuffer64 ()
95
+ {
96
+ EXRFreeAligned (_handle);
97
+ _handle = 0;
98
+ _buffer = 0;
99
+ }
100
+
101
+ void alloc()
102
+ {
103
+ //
104
+ // Try EXRAllocAligned first - but it might fallback to
105
+ // unaligned allocs. If so, overalloc.
106
+ //
107
+
108
+ _handle = (char *) EXRAllocAligned
109
+ (64 * sizeof(T), _SSE_ALIGNMENT);
110
+
111
+ if (((size_t)_handle & (_SSE_ALIGNMENT - 1)) == 0)
112
+ {
113
+ _buffer = (T *)_handle;
114
+ return;
115
+ }
116
+
117
+ EXRFreeAligned(_handle);
118
+ _handle = (char *) EXRAllocAligned
119
+ (64 * sizeof(T) + _SSE_ALIGNMENT, _SSE_ALIGNMENT);
120
+
121
+ char *aligned = _handle;
122
+
123
+ while ((size_t)aligned & (_SSE_ALIGNMENT - 1))
124
+ aligned++;
125
+
126
+ _buffer = (T *)aligned;
127
+ }
128
+
129
+ T *_buffer;
130
+
131
+ private:
132
+
133
+ char *_handle;
134
+ };
135
+
136
+ typedef SimdAlignedBuffer64<float> SimdAlignedBuffer64f;
137
+ typedef SimdAlignedBuffer64<unsigned short> SimdAlignedBuffer64us;
138
+
139
+ namespace {
140
+
141
+ //
142
+ // Color space conversion, Inverse 709 CSC, Y'CbCr -> R'G'B'
143
+ //
144
+
145
+ void
146
+ csc709Inverse (float &comp0, float &comp1, float &comp2)
147
+ {
148
+ float src[3];
149
+
150
+ src[0] = comp0;
151
+ src[1] = comp1;
152
+ src[2] = comp2;
153
+
154
+ comp0 = src[0] + 1.5747f * src[2];
155
+ comp1 = src[0] - 0.1873f * src[1] - 0.4682f * src[2];
156
+ comp2 = src[0] + 1.8556f * src[1];
157
+ }
158
+
159
+ #ifndef IMF_HAVE_SSE2
160
+
161
+
162
+ //
163
+ // Scalar color space conversion, based on 709 primiary chromaticies.
164
+ // No scaling or offsets, just the matrix
165
+ //
166
+
167
+ void
168
+ csc709Inverse64 (float *comp0, float *comp1, float *comp2)
169
+ {
170
+ for (int i = 0; i < 64; ++i)
171
+ csc709Inverse (comp0[i], comp1[i], comp2[i]);
172
+ }
173
+
174
+ #else /* IMF_HAVE_SSE2 */
175
+
176
+ //
177
+ // SSE2 color space conversion
178
+ //
179
+
180
+ void
181
+ csc709Inverse64 (float *comp0, float *comp1, float *comp2)
182
+ {
183
+ __m128 c0 = { 1.5747f, 1.5747f, 1.5747f, 1.5747f};
184
+ __m128 c1 = { 1.8556f, 1.8556f, 1.8556f, 1.8556f};
185
+ __m128 c2 = {-0.1873f, -0.1873f, -0.1873f, -0.1873f};
186
+ __m128 c3 = {-0.4682f, -0.4682f, -0.4682f, -0.4682f};
187
+
188
+ __m128 *r = (__m128 *)comp0;
189
+ __m128 *g = (__m128 *)comp1;
190
+ __m128 *b = (__m128 *)comp2;
191
+ __m128 src[3];
192
+
193
+ #define CSC_INVERSE_709_SSE2_LOOP(i) \
194
+ src[0] = r[i]; \
195
+ src[1] = g[i]; \
196
+ src[2] = b[i]; \
197
+ \
198
+ r[i] = _mm_add_ps (r[i], _mm_mul_ps (src[2], c0)); \
199
+ \
200
+ g[i] = _mm_mul_ps (g[i], c2); \
201
+ src[2] = _mm_mul_ps (src[2], c3); \
202
+ g[i] = _mm_add_ps (g[i], src[0]); \
203
+ g[i] = _mm_add_ps (g[i], src[2]); \
204
+ \
205
+ b[i] = _mm_mul_ps (c1, src[1]); \
206
+ b[i] = _mm_add_ps (b[i], src[0]);
207
+
208
+ CSC_INVERSE_709_SSE2_LOOP (0)
209
+ CSC_INVERSE_709_SSE2_LOOP (1)
210
+ CSC_INVERSE_709_SSE2_LOOP (2)
211
+ CSC_INVERSE_709_SSE2_LOOP (3)
212
+
213
+ CSC_INVERSE_709_SSE2_LOOP (4)
214
+ CSC_INVERSE_709_SSE2_LOOP (5)
215
+ CSC_INVERSE_709_SSE2_LOOP (6)
216
+ CSC_INVERSE_709_SSE2_LOOP (7)
217
+
218
+ CSC_INVERSE_709_SSE2_LOOP (8)
219
+ CSC_INVERSE_709_SSE2_LOOP (9)
220
+ CSC_INVERSE_709_SSE2_LOOP (10)
221
+ CSC_INVERSE_709_SSE2_LOOP (11)
222
+
223
+ CSC_INVERSE_709_SSE2_LOOP (12)
224
+ CSC_INVERSE_709_SSE2_LOOP (13)
225
+ CSC_INVERSE_709_SSE2_LOOP (14)
226
+ CSC_INVERSE_709_SSE2_LOOP (15)
227
+ }
228
+
229
+ #endif /* IMF_HAVE_SSE2 */
230
+
231
+
232
+ //
233
+ // Color space conversion, Forward 709 CSC, R'G'B' -> Y'CbCr
234
+ //
235
+ // Simple FPU color space conversion. Based on the 709
236
+ // primary chromaticies, with no scaling or offsets.
237
+ //
238
+
239
+ void
240
+ csc709Forward64 (float *comp0, float *comp1, float *comp2)
241
+ {
242
+ float src[3];
243
+
244
+ for (int i = 0; i<64; ++i)
245
+ {
246
+ src[0] = comp0[i];
247
+ src[1] = comp1[i];
248
+ src[2] = comp2[i];
249
+
250
+ comp0[i] = 0.2126f * src[0] + 0.7152f * src[1] + 0.0722f * src[2];
251
+ comp1[i] = -0.1146f * src[0] - 0.3854f * src[1] + 0.5000f * src[2];
252
+ comp2[i] = 0.5000f * src[0] - 0.4542f * src[1] - 0.0458f * src[2];
253
+ }
254
+ }
255
+
256
+
257
+ //
258
+ // Byte interleaving of 2 byte arrays:
259
+ // src0 = AAAA
260
+ // src1 = BBBB
261
+ // dst = ABABABAB
262
+ //
263
+ // numBytes is the size of each of the source buffers
264
+ //
265
+
266
+ #ifndef IMF_HAVE_SSE2
267
+
268
+ //
269
+ // Scalar default implementation
270
+ //
271
+
272
+ void
273
+ interleaveByte2 (char *dst, char *src0, char *src1, int numBytes)
274
+ {
275
+ for (int x = 0; x < numBytes; ++x)
276
+ {
277
+ dst[2 * x] = src0[x];
278
+ dst[2 * x + 1] = src1[x];
279
+ }
280
+ }
281
+
282
+ #else /* IMF_HAVE_SSE2 */
283
+
284
+ //
285
+ // SSE2 byte interleaving
286
+ //
287
+
288
+ void
289
+ interleaveByte2 (char *dst, char *src0, char *src1, int numBytes)
290
+ {
291
+ int dstAlignment = (size_t)dst % 16;
292
+ int src0Alignment = (size_t)src0 % 16;
293
+ int src1Alignment = (size_t)src1 % 16;
294
+
295
+ __m128i *dst_epi8 = (__m128i*)dst;
296
+ __m128i *src0_epi8 = (__m128i*)src0;
297
+ __m128i *src1_epi8 = (__m128i*)src1;
298
+ int sseWidth = numBytes / 16;
299
+
300
+ if ((!dstAlignment) && (!src0Alignment) && (!src1Alignment))
301
+ {
302
+ __m128i tmp0, tmp1;
303
+
304
+ //
305
+ // Aligned loads and stores
306
+ //
307
+
308
+ for (int x = 0; x < sseWidth; ++x)
309
+ {
310
+ tmp0 = src0_epi8[x];
311
+ tmp1 = src1_epi8[x];
312
+
313
+ _mm_stream_si128 (&dst_epi8[2 * x],
314
+ _mm_unpacklo_epi8 (tmp0, tmp1));
315
+
316
+ _mm_stream_si128 (&dst_epi8[2 * x + 1],
317
+ _mm_unpackhi_epi8 (tmp0, tmp1));
318
+ }
319
+
320
+ //
321
+ // Then do run the leftovers one at a time
322
+ //
323
+
324
+ for (int x = 16 * sseWidth; x < numBytes; ++x)
325
+ {
326
+ dst[2 * x] = src0[x];
327
+ dst[2 * x + 1] = src1[x];
328
+ }
329
+ }
330
+ else if ((!dstAlignment) && (src0Alignment == 8) && (src1Alignment == 8))
331
+ {
332
+ //
333
+ // Aligned stores, but catch up a few values so we can
334
+ // use aligned loads
335
+ //
336
+
337
+ for (int x = 0; x < 8; ++x)
338
+ {
339
+ dst[2 * x] = src0[x];
340
+ dst[2 * x + 1] = src1[x];
341
+ }
342
+
343
+ dst_epi8 = (__m128i*)&dst[16];
344
+ src0_epi8 = (__m128i*)&src0[8];
345
+ src1_epi8 = (__m128i*)&src1[8];
346
+ sseWidth = (numBytes - 8) / 16;
347
+
348
+ for (int x=0; x<sseWidth; ++x)
349
+ {
350
+ _mm_stream_si128 (&dst_epi8[2 * x],
351
+ _mm_unpacklo_epi8 (src0_epi8[x], src1_epi8[x]));
352
+
353
+ _mm_stream_si128 (&dst_epi8[2 * x + 1],
354
+ _mm_unpackhi_epi8 (src0_epi8[x], src1_epi8[x]));
355
+ }
356
+
357
+ //
358
+ // Then do run the leftovers one at a time
359
+ //
360
+
361
+ for (int x = 16 * sseWidth + 8; x < numBytes; ++x)
362
+ {
363
+ dst[2 * x] = src0[x];
364
+ dst[2 * x + 1] = src1[x];
365
+ }
366
+ }
367
+ else
368
+ {
369
+ //
370
+ // Unaligned everything
371
+ //
372
+
373
+ for (int x = 0; x < sseWidth; ++x)
374
+ {
375
+ __m128i tmpSrc0_epi8 = _mm_loadu_si128 (&src0_epi8[x]);
376
+ __m128i tmpSrc1_epi8 = _mm_loadu_si128 (&src1_epi8[x]);
377
+
378
+ _mm_storeu_si128 (&dst_epi8[2 * x],
379
+ _mm_unpacklo_epi8 (tmpSrc0_epi8, tmpSrc1_epi8));
380
+
381
+ _mm_storeu_si128 (&dst_epi8[2 * x + 1],
382
+ _mm_unpackhi_epi8 (tmpSrc0_epi8, tmpSrc1_epi8));
383
+ }
384
+
385
+ //
386
+ // Then do run the leftovers one at a time
387
+ //
388
+
389
+ for (int x = 16 * sseWidth; x < numBytes; ++x)
390
+ {
391
+ dst[2 * x] = src0[x];
392
+ dst[2 * x + 1] = src1[x];
393
+ }
394
+ }
395
+ }
396
+
397
+ #endif /* IMF_HAVE_SSE2 */
398
+
399
+
400
+ //
401
+ // Float -> half float conversion
402
+ //
403
+ // To enable F16C based conversion, we can't rely on compile-time
404
+ // detection, hence the multiple defined versions. Pick one based
405
+ // on runtime cpuid detection.
406
+ //
407
+
408
+ //
409
+ // Default boring conversion
410
+ //
411
+
412
+ void
413
+ convertFloatToHalf64_scalar (unsigned short *dst, float *src)
414
+ {
415
+ for (int i=0; i<64; ++i)
416
+ dst[i] = ((half)src[i]).bits();
417
+ }
418
+
419
+
420
+ //
421
+ // F16C conversion - Assumes aligned src and dst
422
+ //
423
+
424
+ void
425
+ convertFloatToHalf64_f16c (unsigned short *dst, float *src)
426
+ {
427
+ //
428
+ // Ordinarly, I'd avoid using inline asm and prefer intrinsics.
429
+ // However, in order to get the intrinsics, we need to tell
430
+ // the compiler to generate VEX instructions.
431
+ //
432
+ // (On the GCC side, -mf16c goes ahead and activates -mavc,
433
+ // resulting in VEX code. Without -mf16c, no intrinsics..)
434
+ //
435
+ // Now, it's quite likely that we'll find ourselves in situations
436
+ // where we want to build *without* VEX, in order to maintain
437
+ // maximum compatability. But to get there with intrinsics,
438
+ // we'd need to break out code into a separate file. Bleh.
439
+ // I'll take the asm.
440
+ //
441
+
442
+ #if defined IMF_HAVE_GCC_INLINEASM
443
+ __asm__
444
+ ("vmovaps (%0), %%ymm0 \n"
445
+ "vmovaps 0x20(%0), %%ymm1 \n"
446
+ "vmovaps 0x40(%0), %%ymm2 \n"
447
+ "vmovaps 0x60(%0), %%ymm3 \n"
448
+ "vcvtps2ph $0, %%ymm0, %%xmm0 \n"
449
+ "vcvtps2ph $0, %%ymm1, %%xmm1 \n"
450
+ "vcvtps2ph $0, %%ymm2, %%xmm2 \n"
451
+ "vcvtps2ph $0, %%ymm3, %%xmm3 \n"
452
+ "vmovdqa %%xmm0, 0x00(%1) \n"
453
+ "vmovdqa %%xmm1, 0x10(%1) \n"
454
+ "vmovdqa %%xmm2, 0x20(%1) \n"
455
+ "vmovdqa %%xmm3, 0x30(%1) \n"
456
+ "vmovaps 0x80(%0), %%ymm0 \n"
457
+ "vmovaps 0xa0(%0), %%ymm1 \n"
458
+ "vmovaps 0xc0(%0), %%ymm2 \n"
459
+ "vmovaps 0xe0(%0), %%ymm3 \n"
460
+ "vcvtps2ph $0, %%ymm0, %%xmm0 \n"
461
+ "vcvtps2ph $0, %%ymm1, %%xmm1 \n"
462
+ "vcvtps2ph $0, %%ymm2, %%xmm2 \n"
463
+ "vcvtps2ph $0, %%ymm3, %%xmm3 \n"
464
+ "vmovdqa %%xmm0, 0x40(%1) \n"
465
+ "vmovdqa %%xmm1, 0x50(%1) \n"
466
+ "vmovdqa %%xmm2, 0x60(%1) \n"
467
+ "vmovdqa %%xmm3, 0x70(%1) \n"
468
+ #ifndef __AVX__
469
+ "vzeroupper \n"
470
+ #endif /* __AVX__ */
471
+ : /* Output */
472
+ : /* Input */ "r"(src), "r"(dst)
473
+ #ifndef __AVX__
474
+ : /* Clobber */ "%xmm0", "%xmm1", "%xmm2", "%xmm3", "memory"
475
+ #else
476
+ : /* Clobber */ "%ymm0", "%ymm1", "%ymm2", "%ymm3", "memory"
477
+ #endif /* __AVX__ */
478
+ );
479
+ #else
480
+ convertFloatToHalf64_scalar (dst, src);
481
+ #endif /* IMF_HAVE_GCC_INLINEASM */
482
+ }
483
+
484
+
485
+ //
486
+ // Convert an 8x8 block of HALF from zig-zag order to
487
+ // FLOAT in normal order. The order we want is:
488
+ //
489
+ // src dst
490
+ // 0 1 2 3 4 5 6 7 0 1 5 6 14 15 27 28
491
+ // 8 9 10 11 12 13 14 15 2 4 7 13 16 26 29 42
492
+ // 16 17 18 19 20 21 22 23 3 8 12 17 25 30 41 43
493
+ // 24 25 26 27 28 29 30 31 9 11 18 24 31 40 44 53
494
+ // 32 33 34 35 36 37 38 39 10 19 23 32 39 45 52 54
495
+ // 40 41 42 43 44 45 46 47 20 22 33 38 46 51 55 60
496
+ // 48 49 50 51 52 53 54 55 21 34 37 47 50 56 59 61
497
+ // 56 57 58 59 60 61 62 63 35 36 48 49 57 58 62 63
498
+ //
499
+
500
+ void
501
+ fromHalfZigZag_scalar (unsigned short *src, float *dst)
502
+ {
503
+ half *srcHalf = (half *)src;
504
+
505
+ dst[0] = (float)srcHalf[0];
506
+ dst[1] = (float)srcHalf[1];
507
+ dst[2] = (float)srcHalf[5];
508
+ dst[3] = (float)srcHalf[6];
509
+ dst[4] = (float)srcHalf[14];
510
+ dst[5] = (float)srcHalf[15];
511
+ dst[6] = (float)srcHalf[27];
512
+ dst[7] = (float)srcHalf[28];
513
+ dst[8] = (float)srcHalf[2];
514
+ dst[9] = (float)srcHalf[4];
515
+
516
+ dst[10] = (float)srcHalf[7];
517
+ dst[11] = (float)srcHalf[13];
518
+ dst[12] = (float)srcHalf[16];
519
+ dst[13] = (float)srcHalf[26];
520
+ dst[14] = (float)srcHalf[29];
521
+ dst[15] = (float)srcHalf[42];
522
+ dst[16] = (float)srcHalf[3];
523
+ dst[17] = (float)srcHalf[8];
524
+ dst[18] = (float)srcHalf[12];
525
+ dst[19] = (float)srcHalf[17];
526
+
527
+ dst[20] = (float)srcHalf[25];
528
+ dst[21] = (float)srcHalf[30];
529
+ dst[22] = (float)srcHalf[41];
530
+ dst[23] = (float)srcHalf[43];
531
+ dst[24] = (float)srcHalf[9];
532
+ dst[25] = (float)srcHalf[11];
533
+ dst[26] = (float)srcHalf[18];
534
+ dst[27] = (float)srcHalf[24];
535
+ dst[28] = (float)srcHalf[31];
536
+ dst[29] = (float)srcHalf[40];
537
+
538
+ dst[30] = (float)srcHalf[44];
539
+ dst[31] = (float)srcHalf[53];
540
+ dst[32] = (float)srcHalf[10];
541
+ dst[33] = (float)srcHalf[19];
542
+ dst[34] = (float)srcHalf[23];
543
+ dst[35] = (float)srcHalf[32];
544
+ dst[36] = (float)srcHalf[39];
545
+ dst[37] = (float)srcHalf[45];
546
+ dst[38] = (float)srcHalf[52];
547
+ dst[39] = (float)srcHalf[54];
548
+
549
+ dst[40] = (float)srcHalf[20];
550
+ dst[41] = (float)srcHalf[22];
551
+ dst[42] = (float)srcHalf[33];
552
+ dst[43] = (float)srcHalf[38];
553
+ dst[44] = (float)srcHalf[46];
554
+ dst[45] = (float)srcHalf[51];
555
+ dst[46] = (float)srcHalf[55];
556
+ dst[47] = (float)srcHalf[60];
557
+ dst[48] = (float)srcHalf[21];
558
+ dst[49] = (float)srcHalf[34];
559
+
560
+ dst[50] = (float)srcHalf[37];
561
+ dst[51] = (float)srcHalf[47];
562
+ dst[52] = (float)srcHalf[50];
563
+ dst[53] = (float)srcHalf[56];
564
+ dst[54] = (float)srcHalf[59];
565
+ dst[55] = (float)srcHalf[61];
566
+ dst[56] = (float)srcHalf[35];
567
+ dst[57] = (float)srcHalf[36];
568
+ dst[58] = (float)srcHalf[48];
569
+ dst[59] = (float)srcHalf[49];
570
+
571
+ dst[60] = (float)srcHalf[57];
572
+ dst[61] = (float)srcHalf[58];
573
+ dst[62] = (float)srcHalf[62];
574
+ dst[63] = (float)srcHalf[63];
575
+ }
576
+
577
+
578
+ //
579
+ // If we can form the correct ordering in xmm registers,
580
+ // we can use F16C to convert from HALF -> FLOAT. However,
581
+ // making the correct order isn't trivial.
582
+ //
583
+ // We want to re-order a source 8x8 matrix from:
584
+ //
585
+ // 0 1 2 3 4 5 6 7 0 1 5 6 14 15 27 28
586
+ // 8 9 10 11 12 13 14 15 2 4 7 13 16 26 29 42
587
+ // 16 17 18 19 20 21 22 23 3 8 12 17 25 30 41 43
588
+ // 24 25 26 27 28 29 30 31 9 11 18 24 31 40 44 53 (A)
589
+ // 32 33 34 35 36 37 38 39 --> 10 19 23 32 39 45 52 54
590
+ // 40 41 42 43 44 45 46 47 20 22 33 38 46 51 55 60
591
+ // 48 49 50 51 52 53 54 55 21 34 37 47 50 56 59 61
592
+ // 56 57 58 59 60 61 62 63 35 36 48 49 57 58 62 63
593
+ //
594
+ // Which looks like a mess, right?
595
+ //
596
+ // Now, check out the NE/SW diagonals of (A). Along those lines,
597
+ // we have runs of contiguous values! If we rewrite (A) a bit, we get:
598
+ //
599
+ // 0
600
+ // 1 2
601
+ // 5 4 3
602
+ // 6 7 8 9
603
+ // 14 13 12 11 10
604
+ // 15 16 17 18 19 20
605
+ // 27 26 25 24 23 22 21 (B)
606
+ // 28 29 30 31 32 33 34 35
607
+ // 42 41 40 39 38 37 36
608
+ // 43 44 45 46 47 48
609
+ // 53 52 51 50 49
610
+ // 54 55 56 57
611
+ // 60 59 58
612
+ // 61 62
613
+ // 63
614
+ //
615
+ // In this ordering, the columns are the rows (A). If we can 'transpose'
616
+ // (B), we'll achieve our goal. But we want this to fit nicely into
617
+ // xmm registers and still be able to load large runs efficiently.
618
+ // Also, notice that the odd rows are in ascending order, while
619
+ // the even rows are in descending order.
620
+ //
621
+ // If we 'fold' the bottom half up into the top, we can preserve ordered
622
+ // runs accross rows, and still keep all the correct values in columns.
623
+ // After transposing, we'll need to rotate things back into place.
624
+ // This gives us:
625
+ //
626
+ // 0 | 42 41 40 39 38 37 36
627
+ // 1 2 | 43 44 45 46 47 48
628
+ // 5 4 3 | 53 52 51 50 49
629
+ // 6 7 8 9 | 54 55 56 57 (C)
630
+ // 14 13 12 11 10 | 60 59 58
631
+ // 15 16 17 18 19 20 | 61 62
632
+ // 27 26 25 24 23 22 21 | 61
633
+ // 28 29 30 31 32 33 34 35
634
+ //
635
+ // But hang on. We still have the backwards descending rows to deal with.
636
+ // Lets reverse the even rows so that all values are in ascending order
637
+ //
638
+ // 36 37 38 39 40 41 42 | 0
639
+ // 1 2 | 43 44 45 46 47 48
640
+ // 49 50 51 52 53 | 3 4 5
641
+ // 6 7 8 9 | 54 55 56 57 (D)
642
+ // 58 59 60 | 10 11 12 13 14
643
+ // 15 16 17 18 19 20 | 61 62
644
+ // 61 | 21 22 23 24 25 26 27
645
+ // 28 29 30 31 32 33 34 35
646
+ //
647
+ // If we can form (D), we will then:
648
+ // 1) Reverse the even rows
649
+ // 2) Transpose
650
+ // 3) Rotate the rows
651
+ //
652
+ // and we'll have (A).
653
+ //
654
+
655
+ void
656
+ fromHalfZigZag_f16c (unsigned short *src, float *dst)
657
+ {
658
+ #if defined IMF_HAVE_GCC_INLINEASM_64
659
+ __asm__
660
+
661
+ /* x3 <- 0
662
+ * x8 <- [ 0- 7]
663
+ * x6 <- [56-63]
664
+ * x9 <- [21-28]
665
+ * x7 <- [28-35]
666
+ * x3 <- [ 6- 9] (lower half) */
667
+
668
+ ("vpxor %%xmm3, %%xmm3, %%xmm3 \n"
669
+ "vmovdqa (%0), %%xmm8 \n"
670
+ "vmovdqa 112(%0), %%xmm6 \n"
671
+ "vmovdqu 42(%0), %%xmm9 \n"
672
+ "vmovdqu 56(%0), %%xmm7 \n"
673
+ "vmovq 12(%0), %%xmm3 \n"
674
+
675
+ /* Setup rows 0-2 of A in xmm0-xmm2
676
+ * x1 <- x8 >> 16 (1 value)
677
+ * x2 <- x8 << 32 (2 values)
678
+ * x0 <- alignr([35-42], x8, 2)
679
+ * x1 <- blend(x1, [41-48])
680
+ * x2 <- blend(x2, [49-56]) */
681
+
682
+ "vpsrldq $2, %%xmm8, %%xmm1 \n"
683
+ "vpslldq $4, %%xmm8, %%xmm2 \n"
684
+ "vpalignr $2, 70(%0), %%xmm8, %%xmm0 \n"
685
+ "vpblendw $0xfc, 82(%0), %%xmm1, %%xmm1 \n"
686
+ "vpblendw $0x1f, 98(%0), %%xmm2, %%xmm2 \n"
687
+
688
+ /* Setup rows 4-6 of A in xmm4-xmm6
689
+ * x4 <- x6 >> 32 (2 values)
690
+ * x5 <- x6 << 16 (1 value)
691
+ * x6 <- alignr(x6,x9,14)
692
+ * x4 <- blend(x4, [ 7-14])
693
+ * x5 <- blend(x5, [15-22]) */
694
+
695
+ "vpsrldq $4, %%xmm6, %%xmm4 \n"
696
+ "vpslldq $2, %%xmm6, %%xmm5 \n"
697
+ "vpalignr $14, %%xmm6, %%xmm9, %%xmm6 \n"
698
+ "vpblendw $0xf8, 14(%0), %%xmm4, %%xmm4 \n"
699
+ "vpblendw $0x3f, 30(%0), %%xmm5, %%xmm5 \n"
700
+
701
+ /* Load the upper half of row 3 into xmm3
702
+ * x3 <- [54-57] (upper half) */
703
+
704
+ "vpinsrq $1, 108(%0), %%xmm3, %%xmm3\n"
705
+
706
+ /* Reverse the even rows. We're not using PSHUFB as
707
+ * that requires loading an extra constant all the time,
708
+ * and we're alreadly pretty memory bound.
709
+ */
710
+
711
+ "vpshuflw $0x1b, %%xmm0, %%xmm0 \n"
712
+ "vpshuflw $0x1b, %%xmm2, %%xmm2 \n"
713
+ "vpshuflw $0x1b, %%xmm4, %%xmm4 \n"
714
+ "vpshuflw $0x1b, %%xmm6, %%xmm6 \n"
715
+
716
+ "vpshufhw $0x1b, %%xmm0, %%xmm0 \n"
717
+ "vpshufhw $0x1b, %%xmm2, %%xmm2 \n"
718
+ "vpshufhw $0x1b, %%xmm4, %%xmm4 \n"
719
+ "vpshufhw $0x1b, %%xmm6, %%xmm6 \n"
720
+
721
+ "vpshufd $0x4e, %%xmm0, %%xmm0 \n"
722
+ "vpshufd $0x4e, %%xmm2, %%xmm2 \n"
723
+ "vpshufd $0x4e, %%xmm4, %%xmm4 \n"
724
+ "vpshufd $0x4e, %%xmm6, %%xmm6 \n"
725
+
726
+ /* Transpose xmm0-xmm7 into xmm8-xmm15 */
727
+
728
+ "vpunpcklwd %%xmm1, %%xmm0, %%xmm8 \n"
729
+ "vpunpcklwd %%xmm3, %%xmm2, %%xmm9 \n"
730
+ "vpunpcklwd %%xmm5, %%xmm4, %%xmm10 \n"
731
+ "vpunpcklwd %%xmm7, %%xmm6, %%xmm11 \n"
732
+ "vpunpckhwd %%xmm1, %%xmm0, %%xmm12 \n"
733
+ "vpunpckhwd %%xmm3, %%xmm2, %%xmm13 \n"
734
+ "vpunpckhwd %%xmm5, %%xmm4, %%xmm14 \n"
735
+ "vpunpckhwd %%xmm7, %%xmm6, %%xmm15 \n"
736
+
737
+ "vpunpckldq %%xmm9, %%xmm8, %%xmm0 \n"
738
+ "vpunpckldq %%xmm11, %%xmm10, %%xmm1 \n"
739
+ "vpunpckhdq %%xmm9, %%xmm8, %%xmm2 \n"
740
+ "vpunpckhdq %%xmm11, %%xmm10, %%xmm3 \n"
741
+ "vpunpckldq %%xmm13, %%xmm12, %%xmm4 \n"
742
+ "vpunpckldq %%xmm15, %%xmm14, %%xmm5 \n"
743
+ "vpunpckhdq %%xmm13, %%xmm12, %%xmm6 \n"
744
+ "vpunpckhdq %%xmm15, %%xmm14, %%xmm7 \n"
745
+
746
+ "vpunpcklqdq %%xmm1, %%xmm0, %%xmm8 \n"
747
+ "vpunpckhqdq %%xmm1, %%xmm0, %%xmm9 \n"
748
+ "vpunpcklqdq %%xmm3, %%xmm2, %%xmm10 \n"
749
+ "vpunpckhqdq %%xmm3, %%xmm2, %%xmm11 \n"
750
+ "vpunpcklqdq %%xmm4, %%xmm5, %%xmm12 \n"
751
+ "vpunpckhqdq %%xmm5, %%xmm4, %%xmm13 \n"
752
+ "vpunpcklqdq %%xmm7, %%xmm6, %%xmm14 \n"
753
+ "vpunpckhqdq %%xmm7, %%xmm6, %%xmm15 \n"
754
+
755
+ /* Rotate the rows to get the correct final order.
756
+ * Rotating xmm12 isn't needed, as we can handle
757
+ * the rotation in the PUNPCKLQDQ above. Rotating
758
+ * xmm8 isn't needed as it's already in the right order
759
+ */
760
+
761
+ "vpalignr $2, %%xmm9, %%xmm9, %%xmm9 \n"
762
+ "vpalignr $4, %%xmm10, %%xmm10, %%xmm10 \n"
763
+ "vpalignr $6, %%xmm11, %%xmm11, %%xmm11 \n"
764
+ "vpalignr $10, %%xmm13, %%xmm13, %%xmm13 \n"
765
+ "vpalignr $12, %%xmm14, %%xmm14, %%xmm14 \n"
766
+ "vpalignr $14, %%xmm15, %%xmm15, %%xmm15 \n"
767
+
768
+ /* Convert from half -> float */
769
+
770
+ "vcvtph2ps %%xmm8, %%ymm8 \n"
771
+ "vcvtph2ps %%xmm9, %%ymm9 \n"
772
+ "vcvtph2ps %%xmm10, %%ymm10 \n"
773
+ "vcvtph2ps %%xmm11, %%ymm11 \n"
774
+ "vcvtph2ps %%xmm12, %%ymm12 \n"
775
+ "vcvtph2ps %%xmm13, %%ymm13 \n"
776
+ "vcvtph2ps %%xmm14, %%ymm14 \n"
777
+ "vcvtph2ps %%xmm15, %%ymm15 \n"
778
+
779
+ /* Move float values to dst */
780
+
781
+ "vmovaps %%ymm8, (%1) \n"
782
+ "vmovaps %%ymm9, 32(%1) \n"
783
+ "vmovaps %%ymm10, 64(%1) \n"
784
+ "vmovaps %%ymm11, 96(%1) \n"
785
+ "vmovaps %%ymm12, 128(%1) \n"
786
+ "vmovaps %%ymm13, 160(%1) \n"
787
+ "vmovaps %%ymm14, 192(%1) \n"
788
+ "vmovaps %%ymm15, 224(%1) \n"
789
+ #ifndef __AVX__
790
+ "vzeroupper \n"
791
+ #endif /* __AVX__ */
792
+ : /* Output */
793
+ : /* Input */ "r"(src), "r"(dst)
794
+ : /* Clobber */ "memory",
795
+ #ifndef __AVX__
796
+ "%xmm0", "%xmm1", "%xmm2", "%xmm3",
797
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7",
798
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
799
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15"
800
+ #else
801
+ "%ymm0", "%ymm1", "%ymm2", "%ymm3",
802
+ "%ymm4", "%ymm5", "%ymm6", "%ymm7",
803
+ "%ymm8", "%ymm9", "%ymm10", "%ymm11",
804
+ "%ymm12", "%ymm13", "%ymm14", "%ymm15"
805
+ #endif /* __AVX__ */
806
+ );
807
+
808
+ #else
809
+ fromHalfZigZag_scalar(src, dst);
810
+ #endif /* defined IMF_HAVE_GCC_INLINEASM_64 */
811
+ }
812
+
813
+
814
+ //
815
+ // Inverse 8x8 DCT, only inverting the DC. This assumes that
816
+ // all AC frequencies are 0.
817
+ //
818
+
819
+ #ifndef IMF_HAVE_SSE2
820
+
821
+ void
822
+ dctInverse8x8DcOnly (float *data)
823
+ {
824
+ float val = data[0] * 3.535536e-01f * 3.535536e-01f;
825
+
826
+ for (int i = 0; i < 64; ++i)
827
+ data[i] = val;
828
+ }
829
+
830
+ #else /* IMF_HAVE_SSE2 */
831
+
832
+ void
833
+ dctInverse8x8DcOnly (float *data)
834
+ {
835
+ __m128 src = _mm_set1_ps (data[0] * 3.535536e-01f * 3.535536e-01f);
836
+ __m128 *dst = (__m128 *)data;
837
+
838
+ for (int i = 0; i < 16; ++i)
839
+ dst[i] = src;
840
+ }
841
+
842
+ #endif /* IMF_HAVE_SSE2 */
843
+
844
+
845
+ //
846
+ // Full 8x8 Inverse DCT:
847
+ //
848
+ // Simple inverse DCT on an 8x8 block, with scalar ops only.
849
+ // Operates on data in-place.
850
+ //
851
+ // This is based on the iDCT formuation (y = frequency domain,
852
+ // x = spatial domain)
853
+ //
854
+ // [x0] [ ][y0] [ ][y1]
855
+ // [x1] = [ M1 ][y2] + [ M2 ][y3]
856
+ // [x2] [ ][y4] [ ][y5]
857
+ // [x3] [ ][y6] [ ][y7]
858
+ //
859
+ // [x7] [ ][y0] [ ][y1]
860
+ // [x6] = [ M1 ][y2] - [ M2 ][y3]
861
+ // [x5] [ ][y4] [ ][y5]
862
+ // [x4] [ ][y6] [ ][y7]
863
+ //
864
+ // where M1: M2:
865
+ //
866
+ // [a c a f] [b d e g]
867
+ // [a f -a -c] [d -g -b -e]
868
+ // [a -f -a c] [e -b g d]
869
+ // [a -c a -f] [g -e d -b]
870
+ //
871
+ // and the constants are as defined below..
872
+ //
873
+ // If you know how many of the lower rows are zero, that can
874
+ // be passed in to help speed things up. If you don't know,
875
+ // just set zeroedRows=0.
876
+ //
877
+
878
+ //
879
+ // Default implementation
880
+ //
881
+
882
+ template <int zeroedRows>
883
+ void
884
+ dctInverse8x8_scalar (float *data)
885
+ {
886
+ const float a = .5f * cosf (3.14159f / 4.0f);
887
+ const float b = .5f * cosf (3.14159f / 16.0f);
888
+ const float c = .5f * cosf (3.14159f / 8.0f);
889
+ const float d = .5f * cosf (3.f*3.14159f / 16.0f);
890
+ const float e = .5f * cosf (5.f*3.14159f / 16.0f);
891
+ const float f = .5f * cosf (3.f*3.14159f / 8.0f);
892
+ const float g = .5f * cosf (7.f*3.14159f / 16.0f);
893
+
894
+ float alpha[4], beta[4], theta[4], gamma[4];
895
+
896
+ float *rowPtr = NULL;
897
+
898
+ //
899
+ // First pass - row wise.
900
+ //
901
+ // This looks less-compact than the description above in
902
+ // an attempt to fold together common sub-expressions.
903
+ //
904
+
905
+ for (int row = 0; row < 8 - zeroedRows; ++row)
906
+ {
907
+ rowPtr = data + row * 8;
908
+
909
+ alpha[0] = c * rowPtr[2];
910
+ alpha[1] = f * rowPtr[2];
911
+ alpha[2] = c * rowPtr[6];
912
+ alpha[3] = f * rowPtr[6];
913
+
914
+ beta[0] = b * rowPtr[1] + d * rowPtr[3] + e * rowPtr[5] + g * rowPtr[7];
915
+ beta[1] = d * rowPtr[1] - g * rowPtr[3] - b * rowPtr[5] - e * rowPtr[7];
916
+ beta[2] = e * rowPtr[1] - b * rowPtr[3] + g * rowPtr[5] + d * rowPtr[7];
917
+ beta[3] = g * rowPtr[1] - e * rowPtr[3] + d * rowPtr[5] - b * rowPtr[7];
918
+
919
+ theta[0] = a * (rowPtr[0] + rowPtr[4]);
920
+ theta[3] = a * (rowPtr[0] - rowPtr[4]);
921
+
922
+ theta[1] = alpha[0] + alpha[3];
923
+ theta[2] = alpha[1] - alpha[2];
924
+
925
+
926
+ gamma[0] = theta[0] + theta[1];
927
+ gamma[1] = theta[3] + theta[2];
928
+ gamma[2] = theta[3] - theta[2];
929
+ gamma[3] = theta[0] - theta[1];
930
+
931
+
932
+ rowPtr[0] = gamma[0] + beta[0];
933
+ rowPtr[1] = gamma[1] + beta[1];
934
+ rowPtr[2] = gamma[2] + beta[2];
935
+ rowPtr[3] = gamma[3] + beta[3];
936
+
937
+ rowPtr[4] = gamma[3] - beta[3];
938
+ rowPtr[5] = gamma[2] - beta[2];
939
+ rowPtr[6] = gamma[1] - beta[1];
940
+ rowPtr[7] = gamma[0] - beta[0];
941
+ }
942
+
943
+ //
944
+ // Second pass - column wise.
945
+ //
946
+
947
+ for (int column = 0; column < 8; ++column)
948
+ {
949
+ alpha[0] = c * data[16+column];
950
+ alpha[1] = f * data[16+column];
951
+ alpha[2] = c * data[48+column];
952
+ alpha[3] = f * data[48+column];
953
+
954
+ beta[0] = b * data[8+column] + d * data[24+column] +
955
+ e * data[40+column] + g * data[56+column];
956
+
957
+ beta[1] = d * data[8+column] - g * data[24+column] -
958
+ b * data[40+column] - e * data[56+column];
959
+
960
+ beta[2] = e * data[8+column] - b * data[24+column] +
961
+ g * data[40+column] + d * data[56+column];
962
+
963
+ beta[3] = g * data[8+column] - e * data[24+column] +
964
+ d * data[40+column] - b * data[56+column];
965
+
966
+ theta[0] = a * (data[column] + data[32+column]);
967
+ theta[3] = a * (data[column] - data[32+column]);
968
+
969
+ theta[1] = alpha[0] + alpha[3];
970
+ theta[2] = alpha[1] - alpha[2];
971
+
972
+ gamma[0] = theta[0] + theta[1];
973
+ gamma[1] = theta[3] + theta[2];
974
+ gamma[2] = theta[3] - theta[2];
975
+ gamma[3] = theta[0] - theta[1];
976
+
977
+ data[ column] = gamma[0] + beta[0];
978
+ data[ 8 + column] = gamma[1] + beta[1];
979
+ data[16 + column] = gamma[2] + beta[2];
980
+ data[24 + column] = gamma[3] + beta[3];
981
+
982
+ data[32 + column] = gamma[3] - beta[3];
983
+ data[40 + column] = gamma[2] - beta[2];
984
+ data[48 + column] = gamma[1] - beta[1];
985
+ data[56 + column] = gamma[0] - beta[0];
986
+ }
987
+ }
988
+
989
+
990
+ //
991
+ // SSE2 Implementation
992
+ //
993
+
994
+ template <int zeroedRows>
995
+ void
996
+ dctInverse8x8_sse2 (float *data)
997
+ {
998
+ #ifdef IMF_HAVE_SSE2
999
+ __m128 a = {3.535536e-01f,3.535536e-01f,3.535536e-01f,3.535536e-01f};
1000
+ __m128 b = {4.903927e-01f,4.903927e-01f,4.903927e-01f,4.903927e-01f};
1001
+ __m128 c = {4.619398e-01f,4.619398e-01f,4.619398e-01f,4.619398e-01f};
1002
+ __m128 d = {4.157349e-01f,4.157349e-01f,4.157349e-01f,4.157349e-01f};
1003
+ __m128 e = {2.777855e-01f,2.777855e-01f,2.777855e-01f,2.777855e-01f};
1004
+ __m128 f = {1.913422e-01f,1.913422e-01f,1.913422e-01f,1.913422e-01f};
1005
+ __m128 g = {9.754573e-02f,9.754573e-02f,9.754573e-02f,9.754573e-02f};
1006
+
1007
+ __m128 c0 = {3.535536e-01f, 3.535536e-01f, 3.535536e-01f, 3.535536e-01f};
1008
+ __m128 c1 = {4.619398e-01f, 1.913422e-01f,-1.913422e-01f,-4.619398e-01f};
1009
+ __m128 c2 = {3.535536e-01f,-3.535536e-01f,-3.535536e-01f, 3.535536e-01f};
1010
+ __m128 c3 = {1.913422e-01f,-4.619398e-01f, 4.619398e-01f,-1.913422e-01f};
1011
+
1012
+ __m128 c4 = {4.903927e-01f, 4.157349e-01f, 2.777855e-01f, 9.754573e-02f};
1013
+ __m128 c5 = {4.157349e-01f,-9.754573e-02f,-4.903927e-01f,-2.777855e-01f};
1014
+ __m128 c6 = {2.777855e-01f,-4.903927e-01f, 9.754573e-02f, 4.157349e-01f};
1015
+ __m128 c7 = {9.754573e-02f,-2.777855e-01f, 4.157349e-01f,-4.903927e-01f};
1016
+
1017
+ __m128 *srcVec = (__m128 *)data;
1018
+ __m128 x[8], evenSum, oddSum;
1019
+ __m128 in[8], alpha[4], beta[4], theta[4], gamma[4];
1020
+
1021
+ //
1022
+ // Rows -
1023
+ //
1024
+ // Treat this just like matrix-vector multiplication. The
1025
+ // trick is to note that:
1026
+ //
1027
+ // [M00 M01 M02 M03][v0] [(v0 M00) + (v1 M01) + (v2 M02) + (v3 M03)]
1028
+ // [M10 M11 M12 M13][v1] = [(v0 M10) + (v1 M11) + (v2 M12) + (v3 M13)]
1029
+ // [M20 M21 M22 M23][v2] [(v0 M20) + (v1 M21) + (v2 M22) + (v3 M23)]
1030
+ // [M30 M31 M32 M33][v3] [(v0 M30) + (v1 M31) + (v2 M32) + (v3 M33)]
1031
+ //
1032
+ // Then, we can fill a register with v_i and multiply by the i-th column
1033
+ // of M, accumulating across all i-s.
1034
+ //
1035
+ // The kids refer to the populating of a register with a single value
1036
+ // "broadcasting", and it can be done with a shuffle instruction. It
1037
+ // seems to be the slowest part of the whole ordeal.
1038
+ //
1039
+ // Our matrix columns are stored above in c0-c7. c0-3 make up M1, and
1040
+ // c4-7 are from M2.
1041
+ //
1042
+
1043
+ #define DCT_INVERSE_8x8_SS2_ROW_LOOP(i) \
1044
+ /* \
1045
+ * Broadcast the components of the row \
1046
+ */ \
1047
+ \
1048
+ x[0] = _mm_shuffle_ps (srcVec[2 * i], \
1049
+ srcVec[2 * i], \
1050
+ _MM_SHUFFLE (0, 0, 0, 0)); \
1051
+ \
1052
+ x[1] = _mm_shuffle_ps (srcVec[2 * i], \
1053
+ srcVec[2 * i], \
1054
+ _MM_SHUFFLE (1, 1, 1, 1)); \
1055
+ \
1056
+ x[2] = _mm_shuffle_ps (srcVec[2 * i], \
1057
+ srcVec[2 * i], \
1058
+ _MM_SHUFFLE (2, 2, 2, 2)); \
1059
+ \
1060
+ x[3] = _mm_shuffle_ps (srcVec[2 * i], \
1061
+ srcVec[2 * i], \
1062
+ _MM_SHUFFLE (3, 3, 3, 3)); \
1063
+ \
1064
+ x[4] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1065
+ srcVec[2 * i + 1], \
1066
+ _MM_SHUFFLE (0, 0, 0, 0)); \
1067
+ \
1068
+ x[5] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1069
+ srcVec[2 * i + 1], \
1070
+ _MM_SHUFFLE (1, 1, 1, 1)); \
1071
+ \
1072
+ x[6] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1073
+ srcVec[2 * i + 1], \
1074
+ _MM_SHUFFLE (2, 2, 2, 2)); \
1075
+ \
1076
+ x[7] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1077
+ srcVec[2 * i + 1], \
1078
+ _MM_SHUFFLE (3, 3, 3, 3)); \
1079
+ /* \
1080
+ * Multiply the components by each column of the matrix \
1081
+ */ \
1082
+ \
1083
+ x[0] = _mm_mul_ps (x[0], c0); \
1084
+ x[2] = _mm_mul_ps (x[2], c1); \
1085
+ x[4] = _mm_mul_ps (x[4], c2); \
1086
+ x[6] = _mm_mul_ps (x[6], c3); \
1087
+ \
1088
+ x[1] = _mm_mul_ps (x[1], c4); \
1089
+ x[3] = _mm_mul_ps (x[3], c5); \
1090
+ x[5] = _mm_mul_ps (x[5], c6); \
1091
+ x[7] = _mm_mul_ps (x[7], c7); \
1092
+ \
1093
+ /* \
1094
+ * Add across \
1095
+ */ \
1096
+ \
1097
+ evenSum = _mm_setzero_ps(); \
1098
+ evenSum = _mm_add_ps (evenSum, x[0]); \
1099
+ evenSum = _mm_add_ps (evenSum, x[2]); \
1100
+ evenSum = _mm_add_ps (evenSum, x[4]); \
1101
+ evenSum = _mm_add_ps (evenSum, x[6]); \
1102
+ \
1103
+ oddSum = _mm_setzero_ps(); \
1104
+ oddSum = _mm_add_ps (oddSum, x[1]); \
1105
+ oddSum = _mm_add_ps (oddSum, x[3]); \
1106
+ oddSum = _mm_add_ps (oddSum, x[5]); \
1107
+ oddSum = _mm_add_ps (oddSum, x[7]); \
1108
+ \
1109
+ /* \
1110
+ * Final Sum: \
1111
+ * out [0, 1, 2, 3] = evenSum + oddSum \
1112
+ * out [7, 6, 5, 4] = evenSum - oddSum \
1113
+ */ \
1114
+ \
1115
+ srcVec[2 * i] = _mm_add_ps (evenSum, oddSum); \
1116
+ srcVec[2 * i + 1] = _mm_sub_ps (evenSum, oddSum); \
1117
+ srcVec[2 * i + 1] = _mm_shuffle_ps (srcVec[2 * i + 1], \
1118
+ srcVec[2 * i + 1], \
1119
+ _MM_SHUFFLE (0, 1, 2, 3));
1120
+
1121
+ switch (zeroedRows)
1122
+ {
1123
+ case 0:
1124
+ default:
1125
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1126
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1127
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1128
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1129
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1130
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
1131
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (6)
1132
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (7)
1133
+ break;
1134
+
1135
+ case 1:
1136
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1137
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1138
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1139
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1140
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1141
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
1142
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (6)
1143
+ break;
1144
+
1145
+ case 2:
1146
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1147
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1148
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1149
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1150
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1151
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
1152
+ break;
1153
+
1154
+ case 3:
1155
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1156
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1157
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1158
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1159
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
1160
+ break;
1161
+
1162
+ case 4:
1163
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1164
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1165
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1166
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
1167
+ break;
1168
+
1169
+ case 5:
1170
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1171
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1172
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
1173
+ break;
1174
+
1175
+ case 6:
1176
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1177
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
1178
+ break;
1179
+
1180
+ case 7:
1181
+ DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
1182
+ break;
1183
+ }
1184
+
1185
+ //
1186
+ // Columns -
1187
+ //
1188
+ // This is slightly more straightforward, if less readable. Here
1189
+ // we just operate on 4 columns at a time, in two batches.
1190
+ //
1191
+ // The slight mess is to try and cache sub-expressions, which
1192
+ // we ignore in the row-wise pass.
1193
+ //
1194
+
1195
+ for (int col = 0; col < 2; ++col)
1196
+ {
1197
+
1198
+ for (int i = 0; i < 8; ++i)
1199
+ in[i] = srcVec[2 * i + col];
1200
+
1201
+ alpha[0] = _mm_mul_ps (c, in[2]);
1202
+ alpha[1] = _mm_mul_ps (f, in[2]);
1203
+ alpha[2] = _mm_mul_ps (c, in[6]);
1204
+ alpha[3] = _mm_mul_ps (f, in[6]);
1205
+
1206
+ beta[0] = _mm_add_ps (_mm_add_ps (_mm_mul_ps (in[1], b),
1207
+ _mm_mul_ps (in[3], d)),
1208
+ _mm_add_ps (_mm_mul_ps (in[5], e),
1209
+ _mm_mul_ps (in[7], g)));
1210
+
1211
+ beta[1] = _mm_sub_ps (_mm_sub_ps (_mm_mul_ps (in[1], d),
1212
+ _mm_mul_ps (in[3], g)),
1213
+ _mm_add_ps (_mm_mul_ps (in[5], b),
1214
+ _mm_mul_ps (in[7], e)));
1215
+
1216
+ beta[2] = _mm_add_ps (_mm_sub_ps (_mm_mul_ps (in[1], e),
1217
+ _mm_mul_ps (in[3], b)),
1218
+ _mm_add_ps (_mm_mul_ps (in[5], g),
1219
+ _mm_mul_ps (in[7], d)));
1220
+
1221
+ beta[3] = _mm_add_ps (_mm_sub_ps (_mm_mul_ps (in[1], g),
1222
+ _mm_mul_ps (in[3], e)),
1223
+ _mm_sub_ps (_mm_mul_ps (in[5], d),
1224
+ _mm_mul_ps (in[7], b)));
1225
+
1226
+ theta[0] = _mm_mul_ps (a, _mm_add_ps (in[0], in[4]));
1227
+ theta[3] = _mm_mul_ps (a, _mm_sub_ps (in[0], in[4]));
1228
+
1229
+ theta[1] = _mm_add_ps (alpha[0], alpha[3]);
1230
+ theta[2] = _mm_sub_ps (alpha[1], alpha[2]);
1231
+
1232
+ gamma[0] = _mm_add_ps (theta[0], theta[1]);
1233
+ gamma[1] = _mm_add_ps (theta[3], theta[2]);
1234
+ gamma[2] = _mm_sub_ps (theta[3], theta[2]);
1235
+ gamma[3] = _mm_sub_ps (theta[0], theta[1]);
1236
+
1237
+ srcVec[ col] = _mm_add_ps (gamma[0], beta[0]);
1238
+ srcVec[2+col] = _mm_add_ps (gamma[1], beta[1]);
1239
+ srcVec[4+col] = _mm_add_ps (gamma[2], beta[2]);
1240
+ srcVec[6+col] = _mm_add_ps (gamma[3], beta[3]);
1241
+
1242
+ srcVec[ 8+col] = _mm_sub_ps (gamma[3], beta[3]);
1243
+ srcVec[10+col] = _mm_sub_ps (gamma[2], beta[2]);
1244
+ srcVec[12+col] = _mm_sub_ps (gamma[1], beta[1]);
1245
+ srcVec[14+col] = _mm_sub_ps (gamma[0], beta[0]);
1246
+ }
1247
+
1248
+ #else /* IMF_HAVE_SSE2 */
1249
+
1250
+ dctInverse8x8_scalar<zeroedRows> (data);
1251
+
1252
+ #endif /* IMF_HAVE_SSE2 */
1253
+ }
1254
+
1255
+
1256
+ //
1257
+ // AVX Implementation
1258
+ //
1259
+
1260
+ #define STR(A) #A
1261
+
1262
+ #define IDCT_AVX_SETUP_2_ROWS(_DST0, _DST1, _TMP0, _TMP1, \
1263
+ _OFF00, _OFF01, _OFF10, _OFF11) \
1264
+ "vmovaps " STR(_OFF00) "(%0), %%xmm" STR(_TMP0) " \n" \
1265
+ "vmovaps " STR(_OFF01) "(%0), %%xmm" STR(_TMP1) " \n" \
1266
+ " \n" \
1267
+ "vinsertf128 $1, " STR(_OFF10) "(%0), %%ymm" STR(_TMP0) ", %%ymm" STR(_TMP0) " \n" \
1268
+ "vinsertf128 $1, " STR(_OFF11) "(%0), %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP1) " \n" \
1269
+ " \n" \
1270
+ "vunpcklpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST0) " \n" \
1271
+ "vunpckhpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST1) " \n" \
1272
+ " \n" \
1273
+ "vunpcklps %%ymm" STR(_DST1) ", %%ymm" STR(_DST0) ", %%ymm" STR(_TMP0) " \n" \
1274
+ "vunpckhps %%ymm" STR(_DST1) ", %%ymm" STR(_DST0) ", %%ymm" STR(_TMP1) " \n" \
1275
+ " \n" \
1276
+ "vunpcklpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST0) " \n" \
1277
+ "vunpckhpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST1) " \n"
1278
+
1279
+ #define IDCT_AVX_MMULT_ROWS(_SRC) \
1280
+ /* Broadcast the source values into y12-y15 */ \
1281
+ "vpermilps $0x00, " STR(_SRC) ", %%ymm12 \n" \
1282
+ "vpermilps $0x55, " STR(_SRC) ", %%ymm13 \n" \
1283
+ "vpermilps $0xaa, " STR(_SRC) ", %%ymm14 \n" \
1284
+ "vpermilps $0xff, " STR(_SRC) ", %%ymm15 \n" \
1285
+ \
1286
+ /* Multiple coefs and the broadcasted values */ \
1287
+ "vmulps %%ymm12, %%ymm8, %%ymm12 \n" \
1288
+ "vmulps %%ymm13, %%ymm9, %%ymm13 \n" \
1289
+ "vmulps %%ymm14, %%ymm10, %%ymm14 \n" \
1290
+ "vmulps %%ymm15, %%ymm11, %%ymm15 \n" \
1291
+ \
1292
+ /* Accumulate the result back into the source */ \
1293
+ "vaddps %%ymm13, %%ymm12, %%ymm12 \n" \
1294
+ "vaddps %%ymm15, %%ymm14, %%ymm14 \n" \
1295
+ "vaddps %%ymm14, %%ymm12, " STR(_SRC) "\n"
1296
+
1297
+ #define IDCT_AVX_EO_TO_ROW_HALVES(_EVEN, _ODD, _FRONT, _BACK) \
1298
+ "vsubps " STR(_ODD) "," STR(_EVEN) "," STR(_BACK) "\n" \
1299
+ "vaddps " STR(_ODD) "," STR(_EVEN) "," STR(_FRONT) "\n" \
1300
+ /* Reverse the back half */ \
1301
+ "vpermilps $0x1b," STR(_BACK) "," STR(_BACK) "\n"
1302
+
1303
+ /* In order to allow for path paths when we know certain rows
1304
+ * of the 8x8 block are zero, most of the body of the DCT is
1305
+ * in the following macro. Statements are wrapped in a ROWn()
1306
+ * macro, where n is the lowest row in the 8x8 block in which
1307
+ * they depend.
1308
+ *
1309
+ * This should work for the cases where we have 2-8 full rows.
1310
+ * the 1-row case is special, and we'll handle it seperately.
1311
+ */
1312
+ #define IDCT_AVX_BODY \
1313
+ /* ==============================================
1314
+ * Row 1D DCT
1315
+ * ----------------------------------------------
1316
+ */ \
1317
+ \
1318
+ /* Setup for the row-oriented 1D DCT. Assuming that (%0) holds
1319
+ * the row-major 8x8 block, load ymm0-3 with the even columns
1320
+ * and ymm4-7 with the odd columns. The lower half of the ymm
1321
+ * holds one row, while the upper half holds the next row.
1322
+ *
1323
+ * If our source is:
1324
+ * a0 a1 a2 a3 a4 a5 a6 a7
1325
+ * b0 b1 b2 b3 b4 b5 b6 b7
1326
+ *
1327
+ * We'll be forming:
1328
+ * a0 a2 a4 a6 b0 b2 b4 b6
1329
+ * a1 a3 a5 a7 b1 b3 b5 b7
1330
+ */ \
1331
+ ROW0( IDCT_AVX_SETUP_2_ROWS(0, 4, 14, 15, 0, 16, 32, 48) ) \
1332
+ ROW2( IDCT_AVX_SETUP_2_ROWS(1, 5, 12, 13, 64, 80, 96, 112) ) \
1333
+ ROW4( IDCT_AVX_SETUP_2_ROWS(2, 6, 10, 11, 128, 144, 160, 176) ) \
1334
+ ROW6( IDCT_AVX_SETUP_2_ROWS(3, 7, 8, 9, 192, 208, 224, 240) ) \
1335
+ \
1336
+ /* Multiple the even columns (ymm0-3) by the matrix M1
1337
+ * storing the results back in ymm0-3
1338
+ *
1339
+ * Assume that (%1) holds the matrix in column major order
1340
+ */ \
1341
+ "vbroadcastf128 (%1), %%ymm8 \n" \
1342
+ "vbroadcastf128 16(%1), %%ymm9 \n" \
1343
+ "vbroadcastf128 32(%1), %%ymm10 \n" \
1344
+ "vbroadcastf128 48(%1), %%ymm11 \n" \
1345
+ \
1346
+ ROW0( IDCT_AVX_MMULT_ROWS(%%ymm0) ) \
1347
+ ROW2( IDCT_AVX_MMULT_ROWS(%%ymm1) ) \
1348
+ ROW4( IDCT_AVX_MMULT_ROWS(%%ymm2) ) \
1349
+ ROW6( IDCT_AVX_MMULT_ROWS(%%ymm3) ) \
1350
+ \
1351
+ /* Repeat, but with the odd columns (ymm4-7) and the
1352
+ * matrix M2
1353
+ */ \
1354
+ "vbroadcastf128 64(%1), %%ymm8 \n" \
1355
+ "vbroadcastf128 80(%1), %%ymm9 \n" \
1356
+ "vbroadcastf128 96(%1), %%ymm10 \n" \
1357
+ "vbroadcastf128 112(%1), %%ymm11 \n" \
1358
+ \
1359
+ ROW0( IDCT_AVX_MMULT_ROWS(%%ymm4) ) \
1360
+ ROW2( IDCT_AVX_MMULT_ROWS(%%ymm5) ) \
1361
+ ROW4( IDCT_AVX_MMULT_ROWS(%%ymm6) ) \
1362
+ ROW6( IDCT_AVX_MMULT_ROWS(%%ymm7) ) \
1363
+ \
1364
+ /* Sum the M1 (ymm0-3) and M2 (ymm4-7) results to get the
1365
+ * front halves of the results, and difference to get the
1366
+ * back halves. The front halfs end up in ymm0-3, the back
1367
+ * halves end up in ymm12-15.
1368
+ */ \
1369
+ ROW0( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm0, %%ymm4, %%ymm0, %%ymm12) ) \
1370
+ ROW2( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm1, %%ymm5, %%ymm1, %%ymm13) ) \
1371
+ ROW4( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm2, %%ymm6, %%ymm2, %%ymm14) ) \
1372
+ ROW6( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm3, %%ymm7, %%ymm3, %%ymm15) ) \
1373
+ \
1374
+ /* Reassemble the rows halves into ymm0-7 */ \
1375
+ ROW7( "vperm2f128 $0x13, %%ymm3, %%ymm15, %%ymm7 \n" ) \
1376
+ ROW6( "vperm2f128 $0x02, %%ymm3, %%ymm15, %%ymm6 \n" ) \
1377
+ ROW5( "vperm2f128 $0x13, %%ymm2, %%ymm14, %%ymm5 \n" ) \
1378
+ ROW4( "vperm2f128 $0x02, %%ymm2, %%ymm14, %%ymm4 \n" ) \
1379
+ ROW3( "vperm2f128 $0x13, %%ymm1, %%ymm13, %%ymm3 \n" ) \
1380
+ ROW2( "vperm2f128 $0x02, %%ymm1, %%ymm13, %%ymm2 \n" ) \
1381
+ ROW1( "vperm2f128 $0x13, %%ymm0, %%ymm12, %%ymm1 \n" ) \
1382
+ ROW0( "vperm2f128 $0x02, %%ymm0, %%ymm12, %%ymm0 \n" ) \
1383
+ \
1384
+ \
1385
+ /* ==============================================
1386
+ * Column 1D DCT
1387
+ * ----------------------------------------------
1388
+ */ \
1389
+ \
1390
+ /* Rows should be in ymm0-7, and M2 columns should still be
1391
+ * preserved in ymm8-11. M2 has 4 unique values (and +-
1392
+ * versions of each), and all (positive) values appear in
1393
+ * the first column (and row), which is in ymm8.
1394
+ *
1395
+ * For the column-wise DCT, we need to:
1396
+ * 1) Broadcast each element a row of M2 into 4 vectors
1397
+ * 2) Multiple the odd rows (ymm1,3,5,7) by the broadcasts.
1398
+ * 3) Accumulate into ymm12-15 for the odd outputs.
1399
+ *
1400
+ * Instead of doing 16 broadcasts for each element in M2,
1401
+ * do 4, filling y8-11 with:
1402
+ *
1403
+ * ymm8: [ b b b b | b b b b ]
1404
+ * ymm9: [ d d d d | d d d d ]
1405
+ * ymm10: [ e e e e | e e e e ]
1406
+ * ymm11: [ g g g g | g g g g ]
1407
+ *
1408
+ * And deal with the negative values by subtracting during accum.
1409
+ */ \
1410
+ "vpermilps $0xff, %%ymm8, %%ymm11 \n" \
1411
+ "vpermilps $0xaa, %%ymm8, %%ymm10 \n" \
1412
+ "vpermilps $0x55, %%ymm8, %%ymm9 \n" \
1413
+ "vpermilps $0x00, %%ymm8, %%ymm8 \n" \
1414
+ \
1415
+ /* This one is easy, since we have ymm12-15 open for scratch
1416
+ * ymm12 = b ymm1 + d ymm3 + e ymm5 + g ymm7
1417
+ */ \
1418
+ ROW1( "vmulps %%ymm1, %%ymm8, %%ymm12 \n" ) \
1419
+ ROW3( "vmulps %%ymm3, %%ymm9, %%ymm13 \n" ) \
1420
+ ROW5( "vmulps %%ymm5, %%ymm10, %%ymm14 \n" ) \
1421
+ ROW7( "vmulps %%ymm7, %%ymm11, %%ymm15 \n" ) \
1422
+ \
1423
+ ROW3( "vaddps %%ymm12, %%ymm13, %%ymm12 \n" ) \
1424
+ ROW7( "vaddps %%ymm14, %%ymm15, %%ymm14 \n" ) \
1425
+ ROW5( "vaddps %%ymm12, %%ymm14, %%ymm12 \n" ) \
1426
+ \
1427
+ /* Tricker, since only y13-15 are open for scratch
1428
+ * ymm13 = d ymm1 - g ymm3 - b ymm5 - e ymm7
1429
+ */ \
1430
+ ROW1( "vmulps %%ymm1, %%ymm9, %%ymm13 \n" ) \
1431
+ ROW3( "vmulps %%ymm3, %%ymm11, %%ymm14 \n" ) \
1432
+ ROW5( "vmulps %%ymm5, %%ymm8, %%ymm15 \n" ) \
1433
+ \
1434
+ ROW5( "vaddps %%ymm14, %%ymm15, %%ymm14 \n" ) \
1435
+ ROW3( "vsubps %%ymm14, %%ymm13, %%ymm13 \n" ) \
1436
+ \
1437
+ ROW7( "vmulps %%ymm7, %%ymm10, %%ymm15 \n" ) \
1438
+ ROW7( "vsubps %%ymm15, %%ymm13, %%ymm13 \n" ) \
1439
+ \
1440
+ /* Tricker still, as only y14-15 are open for scratch
1441
+ * ymm14 = e ymm1 - b ymm3 + g ymm5 + d ymm7
1442
+ */ \
1443
+ ROW1( "vmulps %%ymm1, %%ymm10, %%ymm14 \n" ) \
1444
+ ROW3( "vmulps %%ymm3, %%ymm8, %%ymm15 \n" ) \
1445
+ \
1446
+ ROW3( "vsubps %%ymm15, %%ymm14, %%ymm14 \n" ) \
1447
+ \
1448
+ ROW5( "vmulps %%ymm5, %%ymm11, %%ymm15 \n" ) \
1449
+ ROW5( "vaddps %%ymm15, %%ymm14, %%ymm14 \n" ) \
1450
+ \
1451
+ ROW7( "vmulps %%ymm7, %%ymm9, %%ymm15 \n" ) \
1452
+ ROW7( "vaddps %%ymm15, %%ymm14, %%ymm14 \n" ) \
1453
+ \
1454
+ \
1455
+ /* Easy, as we can blow away ymm1,3,5,7 for scratch
1456
+ * ymm15 = g ymm1 - e ymm3 + d ymm5 - b ymm7
1457
+ */ \
1458
+ ROW1( "vmulps %%ymm1, %%ymm11, %%ymm15 \n" ) \
1459
+ ROW3( "vmulps %%ymm3, %%ymm10, %%ymm3 \n" ) \
1460
+ ROW5( "vmulps %%ymm5, %%ymm9, %%ymm5 \n" ) \
1461
+ ROW7( "vmulps %%ymm7, %%ymm8, %%ymm7 \n" ) \
1462
+ \
1463
+ ROW5( "vaddps %%ymm15, %%ymm5, %%ymm15 \n" ) \
1464
+ ROW7( "vaddps %%ymm3, %%ymm7, %%ymm3 \n" ) \
1465
+ ROW3( "vsubps %%ymm3, %%ymm15, %%ymm15 \n" ) \
1466
+ \
1467
+ \
1468
+ /* Load coefs for M1. Because we're going to broadcast
1469
+ * coefs, we don't need to load the actual structure from
1470
+ * M1. Instead, just load enough that we can broadcast.
1471
+ * There are only 6 unique values in M1, but they're in +-
1472
+ * pairs, leaving only 3 unique coefs if we add and subtract
1473
+ * properly.
1474
+ *
1475
+ * Fill ymm1 with coef[2] = [ a a c f | a a c f ]
1476
+ * Broadcast ymm5 with [ f f f f | f f f f ]
1477
+ * Broadcast ymm3 with [ c c c c | c c c c ]
1478
+ * Broadcast ymm1 with [ a a a a | a a a a ]
1479
+ */ \
1480
+ "vbroadcastf128 8(%1), %%ymm1 \n" \
1481
+ "vpermilps $0xff, %%ymm1, %%ymm5 \n" \
1482
+ "vpermilps $0xaa, %%ymm1, %%ymm3 \n" \
1483
+ "vpermilps $0x00, %%ymm1, %%ymm1 \n" \
1484
+ \
1485
+ /* If we expand E = [M1] [x0 x2 x4 x6]^t, we get the following
1486
+ * common expressions:
1487
+ *
1488
+ * E_0 = ymm8 = (a ymm0 + a ymm4) + (c ymm2 + f ymm6)
1489
+ * E_3 = ymm11 = (a ymm0 + a ymm4) - (c ymm2 + f ymm6)
1490
+ *
1491
+ * E_1 = ymm9 = (a ymm0 - a ymm4) + (f ymm2 - c ymm6)
1492
+ * E_2 = ymm10 = (a ymm0 - a ymm4) - (f ymm2 - c ymm6)
1493
+ *
1494
+ * Afterwards, ymm8-11 will hold the even outputs.
1495
+ */ \
1496
+ \
1497
+ /* ymm11 = (a ymm0 + a ymm4), ymm1 = (a ymm0 - a ymm4) */ \
1498
+ ROW0( "vmulps %%ymm1, %%ymm0, %%ymm11 \n" ) \
1499
+ ROW4( "vmulps %%ymm1, %%ymm4, %%ymm4 \n" ) \
1500
+ ROW0( "vmovaps %%ymm11, %%ymm1 \n" ) \
1501
+ ROW4( "vaddps %%ymm4, %%ymm11, %%ymm11 \n" ) \
1502
+ ROW4( "vsubps %%ymm4, %%ymm1, %%ymm1 \n" ) \
1503
+ \
1504
+ /* ymm7 = (c ymm2 + f ymm6) */ \
1505
+ ROW2( "vmulps %%ymm3, %%ymm2, %%ymm7 \n" ) \
1506
+ ROW6( "vmulps %%ymm5, %%ymm6, %%ymm9 \n" ) \
1507
+ ROW6( "vaddps %%ymm9, %%ymm7, %%ymm7 \n" ) \
1508
+ \
1509
+ /* E_0 = ymm8 = (a ymm0 + a ymm4) + (c ymm2 + f ymm6)
1510
+ * E_3 = ymm11 = (a ymm0 + a ymm4) - (c ymm2 + f ymm6)
1511
+ */ \
1512
+ ROW0( "vmovaps %%ymm11, %%ymm8 \n" ) \
1513
+ ROW2( "vaddps %%ymm7, %%ymm8, %%ymm8 \n" ) \
1514
+ ROW2( "vsubps %%ymm7, %%ymm11, %%ymm11 \n" ) \
1515
+ \
1516
+ /* ymm7 = (f ymm2 - c ymm6) */ \
1517
+ ROW2( "vmulps %%ymm5, %%ymm2, %%ymm7 \n" ) \
1518
+ ROW6( "vmulps %%ymm3, %%ymm6, %%ymm9 \n" ) \
1519
+ ROW6( "vsubps %%ymm9, %%ymm7, %%ymm7 \n" ) \
1520
+ \
1521
+ /* E_1 = ymm9 = (a ymm0 - a ymm4) + (f ymm2 - c ymm6)
1522
+ * E_2 = ymm10 = (a ymm0 - a ymm4) - (f ymm2 - c ymm6)
1523
+ */ \
1524
+ ROW0( "vmovaps %%ymm1, %%ymm9 \n" ) \
1525
+ ROW0( "vmovaps %%ymm1, %%ymm10 \n" ) \
1526
+ ROW2( "vaddps %%ymm7, %%ymm1, %%ymm9 \n" ) \
1527
+ ROW2( "vsubps %%ymm7, %%ymm1, %%ymm10 \n" ) \
1528
+ \
1529
+ /* Add the even (ymm8-11) and the odds (ymm12-15),
1530
+ * placing the results into ymm0-7
1531
+ */ \
1532
+ "vaddps %%ymm12, %%ymm8, %%ymm0 \n" \
1533
+ "vaddps %%ymm13, %%ymm9, %%ymm1 \n" \
1534
+ "vaddps %%ymm14, %%ymm10, %%ymm2 \n" \
1535
+ "vaddps %%ymm15, %%ymm11, %%ymm3 \n" \
1536
+ \
1537
+ "vsubps %%ymm12, %%ymm8, %%ymm7 \n" \
1538
+ "vsubps %%ymm13, %%ymm9, %%ymm6 \n" \
1539
+ "vsubps %%ymm14, %%ymm10, %%ymm5 \n" \
1540
+ "vsubps %%ymm15, %%ymm11, %%ymm4 \n" \
1541
+ \
1542
+ /* Copy out the results from ymm0-7 */ \
1543
+ "vmovaps %%ymm0, (%0) \n" \
1544
+ "vmovaps %%ymm1, 32(%0) \n" \
1545
+ "vmovaps %%ymm2, 64(%0) \n" \
1546
+ "vmovaps %%ymm3, 96(%0) \n" \
1547
+ "vmovaps %%ymm4, 128(%0) \n" \
1548
+ "vmovaps %%ymm5, 160(%0) \n" \
1549
+ "vmovaps %%ymm6, 192(%0) \n" \
1550
+ "vmovaps %%ymm7, 224(%0) \n"
1551
+
1552
+ /* Output, input, and clobber (OIC) sections of the inline asm */
1553
+ #define IDCT_AVX_OIC(_IN0) \
1554
+ : /* Output */ \
1555
+ : /* Input */ "r"(_IN0), "r"(sAvxCoef) \
1556
+ : /* Clobber */ "memory", \
1557
+ "%xmm0", "%xmm1", "%xmm2", "%xmm3", \
1558
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7", \
1559
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",\
1560
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15"
1561
+
1562
+ /* Include vzeroupper for non-AVX builds */
1563
+ #ifndef __AVX__
1564
+ #define IDCT_AVX_ASM(_IN0) \
1565
+ __asm__( \
1566
+ IDCT_AVX_BODY \
1567
+ "vzeroupper \n" \
1568
+ IDCT_AVX_OIC(_IN0) \
1569
+ );
1570
+ #else /* __AVX__ */
1571
+ #define IDCT_AVX_ASM(_IN0) \
1572
+ __asm__( \
1573
+ IDCT_AVX_BODY \
1574
+ IDCT_AVX_OIC(_IN0) \
1575
+ );
1576
+ #endif /* __AVX__ */
1577
+
1578
+ template <int zeroedRows>
1579
+ void
1580
+ dctInverse8x8_avx (float *data)
1581
+ {
1582
+ #if defined IMF_HAVE_GCC_INLINEASM_64
1583
+
1584
+ /* The column-major version of M1, followed by the
1585
+ * column-major version of M2:
1586
+ *
1587
+ * [ a c a f ] [ b d e g ]
1588
+ * M1 = [ a f -a -c ] M2 = [ d -g -b -e ]
1589
+ * [ a -f -a c ] [ e -b g d ]
1590
+ * [ a -c a -f ] [ g -e d -b ]
1591
+ */
1592
+ const float sAvxCoef[32] __attribute__((aligned(32))) = {
1593
+ 3.535536e-01, 3.535536e-01, 3.535536e-01, 3.535536e-01, /* a a a a */
1594
+ 4.619398e-01, 1.913422e-01, -1.913422e-01, -4.619398e-01, /* c f -f -c */
1595
+ 3.535536e-01, -3.535536e-01, -3.535536e-01, 3.535536e-01, /* a -a -a a */
1596
+ 1.913422e-01, -4.619398e-01, 4.619398e-01, -1.913422e-01, /* f -c c -f */
1597
+
1598
+ 4.903927e-01, 4.157349e-01, 2.777855e-01, 9.754573e-02, /* b d e g */
1599
+ 4.157349e-01, -9.754573e-02, -4.903927e-01, -2.777855e-01, /* d -g -b -e */
1600
+ 2.777855e-01, -4.903927e-01, 9.754573e-02, 4.157349e-01, /* e -b g d */
1601
+ 9.754573e-02, -2.777855e-01, 4.157349e-01, -4.903927e-01 /* g -e d -b */
1602
+ };
1603
+
1604
+ #define ROW0(_X) _X
1605
+ #define ROW1(_X) _X
1606
+ #define ROW2(_X) _X
1607
+ #define ROW3(_X) _X
1608
+ #define ROW4(_X) _X
1609
+ #define ROW5(_X) _X
1610
+ #define ROW6(_X) _X
1611
+ #define ROW7(_X) _X
1612
+
1613
+ if (zeroedRows == 0) {
1614
+
1615
+ IDCT_AVX_ASM(data)
1616
+
1617
+ } else if (zeroedRows == 1) {
1618
+
1619
+ #undef ROW7
1620
+ #define ROW7(_X)
1621
+ IDCT_AVX_ASM(data)
1622
+
1623
+ } else if (zeroedRows == 2) {
1624
+
1625
+ #undef ROW6
1626
+ #define ROW6(_X)
1627
+ IDCT_AVX_ASM(data)
1628
+
1629
+ } else if (zeroedRows == 3) {
1630
+
1631
+ #undef ROW5
1632
+ #define ROW5(_X)
1633
+ IDCT_AVX_ASM(data)
1634
+
1635
+ } else if (zeroedRows == 4) {
1636
+
1637
+ #undef ROW4
1638
+ #define ROW4(_X)
1639
+ IDCT_AVX_ASM(data)
1640
+
1641
+ } else if (zeroedRows == 5) {
1642
+
1643
+ #undef ROW3
1644
+ #define ROW3(_X)
1645
+ IDCT_AVX_ASM(data)
1646
+
1647
+ } else if (zeroedRows == 6) {
1648
+
1649
+ #undef ROW2
1650
+ #define ROW2(_X)
1651
+ IDCT_AVX_ASM(data)
1652
+
1653
+ } else if (zeroedRows == 7) {
1654
+
1655
+ __asm__(
1656
+
1657
+ /* ==============================================
1658
+ * Row 1D DCT
1659
+ * ----------------------------------------------
1660
+ */
1661
+ IDCT_AVX_SETUP_2_ROWS(0, 4, 14, 15, 0, 16, 32, 48)
1662
+
1663
+ "vbroadcastf128 (%1), %%ymm8 \n"
1664
+ "vbroadcastf128 16(%1), %%ymm9 \n"
1665
+ "vbroadcastf128 32(%1), %%ymm10 \n"
1666
+ "vbroadcastf128 48(%1), %%ymm11 \n"
1667
+
1668
+ /* Stash a vector of [a a a a | a a a a] away in ymm2 */
1669
+ "vinsertf128 $1, %%xmm8, %%ymm8, %%ymm2 \n"
1670
+
1671
+ IDCT_AVX_MMULT_ROWS(%%ymm0)
1672
+
1673
+ "vbroadcastf128 64(%1), %%ymm8 \n"
1674
+ "vbroadcastf128 80(%1), %%ymm9 \n"
1675
+ "vbroadcastf128 96(%1), %%ymm10 \n"
1676
+ "vbroadcastf128 112(%1), %%ymm11 \n"
1677
+
1678
+ IDCT_AVX_MMULT_ROWS(%%ymm4)
1679
+
1680
+ IDCT_AVX_EO_TO_ROW_HALVES(%%ymm0, %%ymm4, %%ymm0, %%ymm12)
1681
+
1682
+ "vperm2f128 $0x02, %%ymm0, %%ymm12, %%ymm0 \n"
1683
+
1684
+ /* ==============================================
1685
+ * Column 1D DCT
1686
+ * ----------------------------------------------
1687
+ */
1688
+
1689
+ /* DC only, so multiple by a and we're done */
1690
+ "vmulps %%ymm2, %%ymm0, %%ymm0 \n"
1691
+
1692
+ /* Copy out results */
1693
+ "vmovaps %%ymm0, (%0) \n"
1694
+ "vmovaps %%ymm0, 32(%0) \n"
1695
+ "vmovaps %%ymm0, 64(%0) \n"
1696
+ "vmovaps %%ymm0, 96(%0) \n"
1697
+ "vmovaps %%ymm0, 128(%0) \n"
1698
+ "vmovaps %%ymm0, 160(%0) \n"
1699
+ "vmovaps %%ymm0, 192(%0) \n"
1700
+ "vmovaps %%ymm0, 224(%0) \n"
1701
+
1702
+ #ifndef __AVX__
1703
+ "vzeroupper \n"
1704
+ #endif /* __AVX__ */
1705
+ IDCT_AVX_OIC(data)
1706
+ );
1707
+ } else {
1708
+ assert(false); // Invalid template instance parameter
1709
+ }
1710
+ #else /* IMF_HAVE_GCC_INLINEASM_64 */
1711
+
1712
+ dctInverse8x8_scalar<zeroedRows>(data);
1713
+
1714
+ #endif /* IMF_HAVE_GCC_INLINEASM_64 */
1715
+ }
1716
+
1717
+
1718
+ //
1719
+ // Full 8x8 Forward DCT:
1720
+ //
1721
+ // Base forward 8x8 DCT implementation. Works on the data in-place
1722
+ //
1723
+ // The implementation describedin Pennebaker + Mitchell,
1724
+ // section 4.3.2, and illustrated in figure 4-7
1725
+ //
1726
+ // The basic idea is that the 1D DCT math reduces to:
1727
+ //
1728
+ // 2*out_0 = c_4 [(s_07 + s_34) + (s_12 + s_56)]
1729
+ // 2*out_4 = c_4 [(s_07 + s_34) - (s_12 + s_56)]
1730
+ //
1731
+ // {2*out_2, 2*out_6} = rot_6 ((d_12 - d_56), (s_07 - s_34))
1732
+ //
1733
+ // {2*out_3, 2*out_5} = rot_-3 (d_07 - c_4 (s_12 - s_56),
1734
+ // d_34 - c_4 (d_12 + d_56))
1735
+ //
1736
+ // {2*out_1, 2*out_7} = rot_-1 (d_07 + c_4 (s_12 - s_56),
1737
+ // -d_34 - c_4 (d_12 + d_56))
1738
+ //
1739
+ // where:
1740
+ //
1741
+ // c_i = cos(i*pi/16)
1742
+ // s_i = sin(i*pi/16)
1743
+ //
1744
+ // s_ij = in_i + in_j
1745
+ // d_ij = in_i - in_j
1746
+ //
1747
+ // rot_i(x, y) = {c_i*x + s_i*y, -s_i*x + c_i*y}
1748
+ //
1749
+ // We'll run the DCT in two passes. First, run the 1D DCT on
1750
+ // the rows, in-place. Then, run over the columns in-place,
1751
+ // and be done with it.
1752
+ //
1753
+
1754
+ #ifndef IMF_HAVE_SSE2
1755
+
1756
+ //
1757
+ // Default implementation
1758
+ //
1759
+
1760
+ void
1761
+ dctForward8x8 (float *data)
1762
+ {
1763
+ float A0, A1, A2, A3, A4, A5, A6, A7;
1764
+ float K0, K1, rot_x, rot_y;
1765
+
1766
+ float *srcPtr = data;
1767
+ float *dstPtr = data;
1768
+
1769
+ const float c1 = cosf (3.14159f * 1.0f / 16.0f);
1770
+ const float c2 = cosf (3.14159f * 2.0f / 16.0f);
1771
+ const float c3 = cosf (3.14159f * 3.0f / 16.0f);
1772
+ const float c4 = cosf (3.14159f * 4.0f / 16.0f);
1773
+ const float c5 = cosf (3.14159f * 5.0f / 16.0f);
1774
+ const float c6 = cosf (3.14159f * 6.0f / 16.0f);
1775
+ const float c7 = cosf (3.14159f * 7.0f / 16.0f);
1776
+
1777
+ const float c1Half = .5f * c1;
1778
+ const float c2Half = .5f * c2;
1779
+ const float c3Half = .5f * c3;
1780
+ const float c5Half = .5f * c5;
1781
+ const float c6Half = .5f * c6;
1782
+ const float c7Half = .5f * c7;
1783
+
1784
+ //
1785
+ // First pass - do a 1D DCT over the rows and write the
1786
+ // results back in place
1787
+ //
1788
+
1789
+ for (int row=0; row<8; ++row)
1790
+ {
1791
+ float *srcRowPtr = srcPtr + 8 * row;
1792
+ float *dstRowPtr = dstPtr + 8 * row;
1793
+
1794
+ A0 = srcRowPtr[0] + srcRowPtr[7];
1795
+ A1 = srcRowPtr[1] + srcRowPtr[2];
1796
+ A2 = srcRowPtr[1] - srcRowPtr[2];
1797
+ A3 = srcRowPtr[3] + srcRowPtr[4];
1798
+ A4 = srcRowPtr[3] - srcRowPtr[4];
1799
+ A5 = srcRowPtr[5] + srcRowPtr[6];
1800
+ A6 = srcRowPtr[5] - srcRowPtr[6];
1801
+ A7 = srcRowPtr[0] - srcRowPtr[7];
1802
+
1803
+ K0 = c4 * (A0 + A3);
1804
+ K1 = c4 * (A1 + A5);
1805
+
1806
+ dstRowPtr[0] = .5f * (K0 + K1);
1807
+ dstRowPtr[4] = .5f * (K0 - K1);
1808
+
1809
+ //
1810
+ // (2*dst2, 2*dst6) = rot 6 (d12 - d56, s07 - s34)
1811
+ //
1812
+
1813
+ rot_x = A2 - A6;
1814
+ rot_y = A0 - A3;
1815
+
1816
+ dstRowPtr[2] = c6Half * rot_x + c2Half * rot_y;
1817
+ dstRowPtr[6] = c6Half * rot_y - c2Half * rot_x;
1818
+
1819
+ //
1820
+ // K0, K1 are active until after dst[1],dst[7]
1821
+ // as well as dst[3], dst[5] are computed.
1822
+ //
1823
+
1824
+ K0 = c4 * (A1 - A5);
1825
+ K1 = -1 * c4 * (A2 + A6);
1826
+
1827
+ //
1828
+ // Two ways to do a rotation:
1829
+ //
1830
+ // rot i (x, y) =
1831
+ // X = c_i*x + s_i*y
1832
+ // Y = -s_i*x + c_i*y
1833
+ //
1834
+ // OR
1835
+ //
1836
+ // X = c_i*(x+y) + (s_i-c_i)*y
1837
+ // Y = c_i*y - (s_i+c_i)*x
1838
+ //
1839
+ // the first case has 4 multiplies, but fewer constants,
1840
+ // while the 2nd case has fewer multiplies but takes more space.
1841
+
1842
+ //
1843
+ // (2*dst3, 2*dst5) = rot -3 ( d07 - K0, d34 + K1 )
1844
+ //
1845
+
1846
+ rot_x = A7 - K0;
1847
+ rot_y = A4 + K1;
1848
+
1849
+ dstRowPtr[3] = c3Half * rot_x - c5Half * rot_y;
1850
+ dstRowPtr[5] = c5Half * rot_x + c3Half * rot_y;
1851
+
1852
+ //
1853
+ // (2*dst1, 2*dst7) = rot -1 ( d07 + K0, K1 - d34 )
1854
+ //
1855
+
1856
+ rot_x = A7 + K0;
1857
+ rot_y = K1 - A4;
1858
+
1859
+ //
1860
+ // A: 4, 7 are inactive. All A's are inactive
1861
+ //
1862
+
1863
+ dstRowPtr[1] = c1Half * rot_x - c7Half * rot_y;
1864
+ dstRowPtr[7] = c7Half * rot_x + c1Half * rot_y;
1865
+ }
1866
+
1867
+ //
1868
+ // Second pass - do the same, but on the columns
1869
+ //
1870
+
1871
+ for (int column = 0; column < 8; ++column)
1872
+ {
1873
+
1874
+ A0 = srcPtr[ column] + srcPtr[56 + column];
1875
+ A7 = srcPtr[ column] - srcPtr[56 + column];
1876
+
1877
+ A1 = srcPtr[ 8 + column] + srcPtr[16 + column];
1878
+ A2 = srcPtr[ 8 + column] - srcPtr[16 + column];
1879
+
1880
+ A3 = srcPtr[24 + column] + srcPtr[32 + column];
1881
+ A4 = srcPtr[24 + column] - srcPtr[32 + column];
1882
+
1883
+ A5 = srcPtr[40 + column] + srcPtr[48 + column];
1884
+ A6 = srcPtr[40 + column] - srcPtr[48 + column];
1885
+
1886
+ K0 = c4 * (A0 + A3);
1887
+ K1 = c4 * (A1 + A5);
1888
+
1889
+ dstPtr[ column] = .5f * (K0 + K1);
1890
+ dstPtr[32+column] = .5f * (K0 - K1);
1891
+
1892
+ //
1893
+ // (2*dst2, 2*dst6) = rot 6 ( d12 - d56, s07 - s34 )
1894
+ //
1895
+
1896
+ rot_x = A2 - A6;
1897
+ rot_y = A0 - A3;
1898
+
1899
+ dstPtr[16+column] = .5f * (c6 * rot_x + c2 * rot_y);
1900
+ dstPtr[48+column] = .5f * (c6 * rot_y - c2 * rot_x);
1901
+
1902
+ //
1903
+ // K0, K1 are active until after dst[1],dst[7]
1904
+ // as well as dst[3], dst[5] are computed.
1905
+ //
1906
+
1907
+ K0 = c4 * (A1 - A5);
1908
+ K1 = -1 * c4 * (A2 + A6);
1909
+
1910
+ //
1911
+ // (2*dst3, 2*dst5) = rot -3 ( d07 - K0, d34 + K1 )
1912
+ //
1913
+
1914
+ rot_x = A7 - K0;
1915
+ rot_y = A4 + K1;
1916
+
1917
+ dstPtr[24+column] = .5f * (c3 * rot_x - c5 * rot_y);
1918
+ dstPtr[40+column] = .5f * (c5 * rot_x + c3 * rot_y);
1919
+
1920
+ //
1921
+ // (2*dst1, 2*dst7) = rot -1 ( d07 + K0, K1 - d34 )
1922
+ //
1923
+
1924
+ rot_x = A7 + K0;
1925
+ rot_y = K1 - A4;
1926
+
1927
+ dstPtr[ 8+column] = .5f * (c1 * rot_x - c7 * rot_y);
1928
+ dstPtr[56+column] = .5f * (c7 * rot_x + c1 * rot_y);
1929
+ }
1930
+ }
1931
+
1932
+ #else /* IMF_HAVE_SSE2 */
1933
+
1934
+ //
1935
+ // SSE2 implementation
1936
+ //
1937
+ // Here, we're always doing a column-wise operation
1938
+ // plus transposes. This might be faster to do differently
1939
+ // between rows-wise and column-wise
1940
+ //
1941
+
1942
+ void
1943
+ dctForward8x8 (float *data)
1944
+ {
1945
+ __m128 *srcVec = (__m128 *)data;
1946
+ __m128 a0Vec, a1Vec, a2Vec, a3Vec, a4Vec, a5Vec, a6Vec, a7Vec;
1947
+ __m128 k0Vec, k1Vec, rotXVec, rotYVec;
1948
+ __m128 transTmp[4], transTmp2[4];
1949
+
1950
+ __m128 c4Vec = { .70710678f, .70710678f, .70710678f, .70710678f};
1951
+ __m128 c4NegVec = {-.70710678f, -.70710678f, -.70710678f, -.70710678f};
1952
+
1953
+ __m128 c1HalfVec = {.490392640f, .490392640f, .490392640f, .490392640f};
1954
+ __m128 c2HalfVec = {.461939770f, .461939770f, .461939770f, .461939770f};
1955
+ __m128 c3HalfVec = {.415734810f, .415734810f, .415734810f, .415734810f};
1956
+ __m128 c5HalfVec = {.277785120f, .277785120f, .277785120f, .277785120f};
1957
+ __m128 c6HalfVec = {.191341720f, .191341720f, .191341720f, .191341720f};
1958
+ __m128 c7HalfVec = {.097545161f, .097545161f, .097545161f, .097545161f};
1959
+
1960
+ __m128 halfVec = {.5f, .5f, .5f, .5f};
1961
+
1962
+ for (int iter = 0; iter < 2; ++iter)
1963
+ {
1964
+ //
1965
+ // Operate on 4 columns at a time. The
1966
+ // offsets into our row-major array are:
1967
+ // 0: 0 1
1968
+ // 1: 2 3
1969
+ // 2: 4 5
1970
+ // 3: 6 7
1971
+ // 4: 8 9
1972
+ // 5: 10 11
1973
+ // 6: 12 13
1974
+ // 7: 14 15
1975
+ //
1976
+
1977
+ for (int pass=0; pass<2; ++pass)
1978
+ {
1979
+ a0Vec = _mm_add_ps (srcVec[ 0 + pass], srcVec[14 + pass]);
1980
+ a1Vec = _mm_add_ps (srcVec[ 2 + pass], srcVec[ 4 + pass]);
1981
+ a3Vec = _mm_add_ps (srcVec[ 6 + pass], srcVec[ 8 + pass]);
1982
+ a5Vec = _mm_add_ps (srcVec[10 + pass], srcVec[12 + pass]);
1983
+
1984
+ a7Vec = _mm_sub_ps (srcVec[ 0 + pass], srcVec[14 + pass]);
1985
+ a2Vec = _mm_sub_ps (srcVec[ 2 + pass], srcVec[ 4 + pass]);
1986
+ a4Vec = _mm_sub_ps (srcVec[ 6 + pass], srcVec[ 8 + pass]);
1987
+ a6Vec = _mm_sub_ps (srcVec[10 + pass], srcVec[12 + pass]);
1988
+
1989
+ //
1990
+ // First stage; Compute out_0 and out_4
1991
+ //
1992
+
1993
+ k0Vec = _mm_add_ps (a0Vec, a3Vec);
1994
+ k1Vec = _mm_add_ps (a1Vec, a5Vec);
1995
+
1996
+ k0Vec = _mm_mul_ps (c4Vec, k0Vec);
1997
+ k1Vec = _mm_mul_ps (c4Vec, k1Vec);
1998
+
1999
+ srcVec[0 + pass] = _mm_add_ps (k0Vec, k1Vec);
2000
+ srcVec[8 + pass] = _mm_sub_ps (k0Vec, k1Vec);
2001
+
2002
+ srcVec[0 + pass] = _mm_mul_ps (srcVec[0 + pass], halfVec );
2003
+ srcVec[8 + pass] = _mm_mul_ps (srcVec[8 + pass], halfVec );
2004
+
2005
+
2006
+ //
2007
+ // Second stage; Compute out_2 and out_6
2008
+ //
2009
+
2010
+ k0Vec = _mm_sub_ps (a2Vec, a6Vec);
2011
+ k1Vec = _mm_sub_ps (a0Vec, a3Vec);
2012
+
2013
+ srcVec[ 4 + pass] = _mm_add_ps (_mm_mul_ps (c6HalfVec, k0Vec),
2014
+ _mm_mul_ps (c2HalfVec, k1Vec));
2015
+
2016
+ srcVec[12 + pass] = _mm_sub_ps (_mm_mul_ps (c6HalfVec, k1Vec),
2017
+ _mm_mul_ps (c2HalfVec, k0Vec));
2018
+
2019
+ //
2020
+ // Precompute K0 and K1 for the remaining stages
2021
+ //
2022
+
2023
+ k0Vec = _mm_mul_ps (_mm_sub_ps (a1Vec, a5Vec), c4Vec);
2024
+ k1Vec = _mm_mul_ps (_mm_add_ps (a2Vec, a6Vec), c4NegVec);
2025
+
2026
+ //
2027
+ // Third Stage, compute out_3 and out_5
2028
+ //
2029
+
2030
+ rotXVec = _mm_sub_ps (a7Vec, k0Vec);
2031
+ rotYVec = _mm_add_ps (a4Vec, k1Vec);
2032
+
2033
+ srcVec[ 6 + pass] = _mm_sub_ps (_mm_mul_ps (c3HalfVec, rotXVec),
2034
+ _mm_mul_ps (c5HalfVec, rotYVec));
2035
+
2036
+ srcVec[10 + pass] = _mm_add_ps (_mm_mul_ps (c5HalfVec, rotXVec),
2037
+ _mm_mul_ps (c3HalfVec, rotYVec));
2038
+
2039
+ //
2040
+ // Fourth Stage, compute out_1 and out_7
2041
+ //
2042
+
2043
+ rotXVec = _mm_add_ps (a7Vec, k0Vec);
2044
+ rotYVec = _mm_sub_ps (k1Vec, a4Vec);
2045
+
2046
+ srcVec[ 2 + pass] = _mm_sub_ps (_mm_mul_ps (c1HalfVec, rotXVec),
2047
+ _mm_mul_ps (c7HalfVec, rotYVec));
2048
+
2049
+ srcVec[14 + pass] = _mm_add_ps (_mm_mul_ps (c7HalfVec, rotXVec),
2050
+ _mm_mul_ps (c1HalfVec, rotYVec));
2051
+ }
2052
+
2053
+ //
2054
+ // Transpose the matrix, in 4x4 blocks. So, if we have our
2055
+ // 8x8 matrix divied into 4x4 blocks:
2056
+ //
2057
+ // M0 | M1 M0t | M2t
2058
+ // ----+--- --> -----+------
2059
+ // M2 | M3 M1t | M3t
2060
+ //
2061
+
2062
+ //
2063
+ // M0t, done in place, the first half.
2064
+ //
2065
+
2066
+ transTmp[0] = _mm_shuffle_ps (srcVec[0], srcVec[2], 0x44);
2067
+ transTmp[1] = _mm_shuffle_ps (srcVec[4], srcVec[6], 0x44);
2068
+ transTmp[3] = _mm_shuffle_ps (srcVec[4], srcVec[6], 0xEE);
2069
+ transTmp[2] = _mm_shuffle_ps (srcVec[0], srcVec[2], 0xEE);
2070
+
2071
+ //
2072
+ // M3t, also done in place, the first half.
2073
+ //
2074
+
2075
+ transTmp2[0] = _mm_shuffle_ps (srcVec[ 9], srcVec[11], 0x44);
2076
+ transTmp2[1] = _mm_shuffle_ps (srcVec[13], srcVec[15], 0x44);
2077
+ transTmp2[2] = _mm_shuffle_ps (srcVec[ 9], srcVec[11], 0xEE);
2078
+ transTmp2[3] = _mm_shuffle_ps (srcVec[13], srcVec[15], 0xEE);
2079
+
2080
+ //
2081
+ // M0t, the second half.
2082
+ //
2083
+
2084
+ srcVec[0] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0x88);
2085
+ srcVec[4] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0x88);
2086
+ srcVec[2] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0xDD);
2087
+ srcVec[6] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0xDD);
2088
+
2089
+ //
2090
+ // M3t, the second half.
2091
+ //
2092
+
2093
+ srcVec[ 9] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0x88);
2094
+ srcVec[13] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0x88);
2095
+ srcVec[11] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0xDD);
2096
+ srcVec[15] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0xDD);
2097
+
2098
+ //
2099
+ // M1 and M2 need to be done at the same time, because we're
2100
+ // swapping.
2101
+ //
2102
+ // First, the first half of M1t
2103
+ //
2104
+
2105
+ transTmp[0] = _mm_shuffle_ps (srcVec[1], srcVec[3], 0x44);
2106
+ transTmp[1] = _mm_shuffle_ps (srcVec[5], srcVec[7], 0x44);
2107
+ transTmp[2] = _mm_shuffle_ps (srcVec[1], srcVec[3], 0xEE);
2108
+ transTmp[3] = _mm_shuffle_ps (srcVec[5], srcVec[7], 0xEE);
2109
+
2110
+ //
2111
+ // And the first half of M2t
2112
+ //
2113
+
2114
+ transTmp2[0] = _mm_shuffle_ps (srcVec[ 8], srcVec[10], 0x44);
2115
+ transTmp2[1] = _mm_shuffle_ps (srcVec[12], srcVec[14], 0x44);
2116
+ transTmp2[2] = _mm_shuffle_ps (srcVec[ 8], srcVec[10], 0xEE);
2117
+ transTmp2[3] = _mm_shuffle_ps (srcVec[12], srcVec[14], 0xEE);
2118
+
2119
+ //
2120
+ // Second half of M1t
2121
+ //
2122
+
2123
+ srcVec[ 8] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0x88);
2124
+ srcVec[12] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0x88);
2125
+ srcVec[10] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0xDD);
2126
+ srcVec[14] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0xDD);
2127
+
2128
+ //
2129
+ // Second half of M2
2130
+ //
2131
+
2132
+ srcVec[1] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0x88);
2133
+ srcVec[5] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0x88);
2134
+ srcVec[3] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0xDD);
2135
+ srcVec[7] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0xDD);
2136
+ }
2137
+ }
2138
+
2139
+ #endif /* IMF_HAVE_SSE2 */
2140
+
2141
+ } // anonymous namespace
2142
+
2143
+ OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_EXIT
2144
+
2145
+ #endif