rfreeimage 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +1 -0
- data/Rakefile +34 -0
- data/ext/rfreeimage/extconf.rb +35 -0
- data/ext/rfreeimage/rfi_main.c +389 -0
- data/lib/rfreeimage/image.rb +26 -0
- data/lib/rfreeimage/version.rb +3 -0
- data/lib/rfreeimage.rb +3 -0
- data/rfreeimage.gemspec +32 -0
- data/vendor/FreeImage/Makefile +34 -0
- data/vendor/FreeImage/Makefile.cygwin +74 -0
- data/vendor/FreeImage/Makefile.fip +84 -0
- data/vendor/FreeImage/Makefile.gnu +83 -0
- data/vendor/FreeImage/Makefile.iphone +96 -0
- data/vendor/FreeImage/Makefile.mingw +136 -0
- data/vendor/FreeImage/Makefile.osx +115 -0
- data/vendor/FreeImage/Makefile.solaris +66 -0
- data/vendor/FreeImage/Makefile.srcs +6 -0
- data/vendor/FreeImage/README.iphone +19 -0
- data/vendor/FreeImage/README.linux +50 -0
- data/vendor/FreeImage/README.minGW +236 -0
- data/vendor/FreeImage/README.osx +44 -0
- data/vendor/FreeImage/README.solaris +67 -0
- data/vendor/FreeImage/Source/CacheFile.h +92 -0
- data/vendor/FreeImage/Source/DeprecationManager/Deprecated.cpp +36 -0
- data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.cpp +103 -0
- data/vendor/FreeImage/Source/DeprecationManager/DeprecationMgr.h +83 -0
- data/vendor/FreeImage/Source/FreeImage/BitmapAccess.cpp +1573 -0
- data/vendor/FreeImage/Source/FreeImage/CacheFile.cpp +271 -0
- data/vendor/FreeImage/Source/FreeImage/ColorLookup.cpp +785 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion.cpp +551 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion16_555.cpp +209 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion16_565.cpp +204 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion24.cpp +252 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion32.cpp +345 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion4.cpp +246 -0
- data/vendor/FreeImage/Source/FreeImage/Conversion8.cpp +305 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionFloat.cpp +194 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionRGB16.cpp +144 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionRGBA16.cpp +147 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionRGBAF.cpp +250 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionRGBF.cpp +243 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionType.cpp +699 -0
- data/vendor/FreeImage/Source/FreeImage/ConversionUINT16.cpp +134 -0
- data/vendor/FreeImage/Source/FreeImage/FreeImage.cpp +226 -0
- data/vendor/FreeImage/Source/FreeImage/FreeImageC.c +22 -0
- data/vendor/FreeImage/Source/FreeImage/FreeImageIO.cpp +175 -0
- data/vendor/FreeImage/Source/FreeImage/GetType.cpp +92 -0
- data/vendor/FreeImage/Source/FreeImage/Halftoning.cpp +474 -0
- data/vendor/FreeImage/Source/FreeImage/J2KHelper.cpp +591 -0
- data/vendor/FreeImage/Source/FreeImage/J2KHelper.h +36 -0
- data/vendor/FreeImage/Source/FreeImage/LFPQuantizer.cpp +208 -0
- data/vendor/FreeImage/Source/FreeImage/MNGHelper.cpp +1320 -0
- data/vendor/FreeImage/Source/FreeImage/MemoryIO.cpp +237 -0
- data/vendor/FreeImage/Source/FreeImage/MultiPage.cpp +974 -0
- data/vendor/FreeImage/Source/FreeImage/NNQuantizer.cpp +507 -0
- data/vendor/FreeImage/Source/FreeImage/PSDParser.cpp +1057 -0
- data/vendor/FreeImage/Source/FreeImage/PSDParser.h +271 -0
- data/vendor/FreeImage/Source/FreeImage/PixelAccess.cpp +197 -0
- data/vendor/FreeImage/Source/FreeImage/Plugin.cpp +822 -0
- data/vendor/FreeImage/Source/FreeImage/PluginBMP.cpp +1494 -0
- data/vendor/FreeImage/Source/FreeImage/PluginCUT.cpp +240 -0
- data/vendor/FreeImage/Source/FreeImage/PluginDDS.cpp +655 -0
- data/vendor/FreeImage/Source/FreeImage/PluginEXR.cpp +773 -0
- data/vendor/FreeImage/Source/FreeImage/PluginG3.cpp +433 -0
- data/vendor/FreeImage/Source/FreeImage/PluginGIF.cpp +1407 -0
- data/vendor/FreeImage/Source/FreeImage/PluginHDR.cpp +722 -0
- data/vendor/FreeImage/Source/FreeImage/PluginICO.cpp +824 -0
- data/vendor/FreeImage/Source/FreeImage/PluginIFF.cpp +459 -0
- data/vendor/FreeImage/Source/FreeImage/PluginJ2K.cpp +328 -0
- data/vendor/FreeImage/Source/FreeImage/PluginJNG.cpp +162 -0
- data/vendor/FreeImage/Source/FreeImage/PluginJP2.cpp +328 -0
- data/vendor/FreeImage/Source/FreeImage/PluginJPEG.cpp +1706 -0
- data/vendor/FreeImage/Source/FreeImage/PluginJXR.cpp +1475 -0
- data/vendor/FreeImage/Source/FreeImage/PluginKOALA.cpp +243 -0
- data/vendor/FreeImage/Source/FreeImage/PluginMNG.cpp +153 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPCD.cpp +251 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPCX.cpp +659 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPFM.cpp +409 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPICT.cpp +1343 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPNG.cpp +1115 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPNM.cpp +838 -0
- data/vendor/FreeImage/Source/FreeImage/PluginPSD.cpp +131 -0
- data/vendor/FreeImage/Source/FreeImage/PluginRAS.cpp +512 -0
- data/vendor/FreeImage/Source/FreeImage/PluginRAW.cpp +793 -0
- data/vendor/FreeImage/Source/FreeImage/PluginSGI.cpp +425 -0
- data/vendor/FreeImage/Source/FreeImage/PluginTARGA.cpp +1591 -0
- data/vendor/FreeImage/Source/FreeImage/PluginTIFF.cpp +2631 -0
- data/vendor/FreeImage/Source/FreeImage/PluginWBMP.cpp +372 -0
- data/vendor/FreeImage/Source/FreeImage/PluginWebP.cpp +698 -0
- data/vendor/FreeImage/Source/FreeImage/PluginXBM.cpp +399 -0
- data/vendor/FreeImage/Source/FreeImage/PluginXPM.cpp +487 -0
- data/vendor/FreeImage/Source/FreeImage/TIFFLogLuv.cpp +65 -0
- data/vendor/FreeImage/Source/FreeImage/ToneMapping.cpp +75 -0
- data/vendor/FreeImage/Source/FreeImage/WuQuantizer.cpp +559 -0
- data/vendor/FreeImage/Source/FreeImage/ZLibInterface.cpp +223 -0
- data/vendor/FreeImage/Source/FreeImage/tmoColorConvert.cpp +479 -0
- data/vendor/FreeImage/Source/FreeImage/tmoDrago03.cpp +295 -0
- data/vendor/FreeImage/Source/FreeImage/tmoFattal02.cpp +689 -0
- data/vendor/FreeImage/Source/FreeImage/tmoReinhard05.cpp +260 -0
- data/vendor/FreeImage/Source/FreeImage.h +1153 -0
- data/vendor/FreeImage/Source/FreeImageIO.h +63 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/BSplineRotate.cpp +730 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Background.cpp +895 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Channels.cpp +488 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/ClassicRotate.cpp +917 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Colors.cpp +967 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/CopyPaste.cpp +861 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Display.cpp +230 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Filters.h +287 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Flip.cpp +166 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/JPEGTransform.cpp +623 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/MultigridPoissonSolver.cpp +505 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Rescale.cpp +192 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Resize.cpp +2116 -0
- data/vendor/FreeImage/Source/FreeImageToolkit/Resize.h +196 -0
- data/vendor/FreeImage/Source/LibJPEG/ansi2knr.c +739 -0
- data/vendor/FreeImage/Source/LibJPEG/cderror.h +134 -0
- data/vendor/FreeImage/Source/LibJPEG/cdjpeg.c +181 -0
- data/vendor/FreeImage/Source/LibJPEG/cdjpeg.h +187 -0
- data/vendor/FreeImage/Source/LibJPEG/cjpeg.c +664 -0
- data/vendor/FreeImage/Source/LibJPEG/ckconfig.c +402 -0
- data/vendor/FreeImage/Source/LibJPEG/djpeg.c +617 -0
- data/vendor/FreeImage/Source/LibJPEG/example.c +433 -0
- data/vendor/FreeImage/Source/LibJPEG/jaricom.c +153 -0
- data/vendor/FreeImage/Source/LibJPEG/jcapimin.c +288 -0
- data/vendor/FreeImage/Source/LibJPEG/jcapistd.c +162 -0
- data/vendor/FreeImage/Source/LibJPEG/jcarith.c +944 -0
- data/vendor/FreeImage/Source/LibJPEG/jccoefct.c +454 -0
- data/vendor/FreeImage/Source/LibJPEG/jccolor.c +604 -0
- data/vendor/FreeImage/Source/LibJPEG/jcdctmgr.c +477 -0
- data/vendor/FreeImage/Source/LibJPEG/jchuff.c +1573 -0
- data/vendor/FreeImage/Source/LibJPEG/jcinit.c +84 -0
- data/vendor/FreeImage/Source/LibJPEG/jcmainct.c +297 -0
- data/vendor/FreeImage/Source/LibJPEG/jcmarker.c +719 -0
- data/vendor/FreeImage/Source/LibJPEG/jcmaster.c +856 -0
- data/vendor/FreeImage/Source/LibJPEG/jcomapi.c +106 -0
- data/vendor/FreeImage/Source/LibJPEG/jconfig.h +161 -0
- data/vendor/FreeImage/Source/LibJPEG/jcparam.c +675 -0
- data/vendor/FreeImage/Source/LibJPEG/jcprepct.c +358 -0
- data/vendor/FreeImage/Source/LibJPEG/jcsample.c +545 -0
- data/vendor/FreeImage/Source/LibJPEG/jctrans.c +385 -0
- data/vendor/FreeImage/Source/LibJPEG/jdapimin.c +399 -0
- data/vendor/FreeImage/Source/LibJPEG/jdapistd.c +276 -0
- data/vendor/FreeImage/Source/LibJPEG/jdarith.c +796 -0
- data/vendor/FreeImage/Source/LibJPEG/jdatadst.c +270 -0
- data/vendor/FreeImage/Source/LibJPEG/jdatasrc.c +275 -0
- data/vendor/FreeImage/Source/LibJPEG/jdcoefct.c +741 -0
- data/vendor/FreeImage/Source/LibJPEG/jdcolor.c +748 -0
- data/vendor/FreeImage/Source/LibJPEG/jdct.h +393 -0
- data/vendor/FreeImage/Source/LibJPEG/jddctmgr.c +384 -0
- data/vendor/FreeImage/Source/LibJPEG/jdhuff.c +1554 -0
- data/vendor/FreeImage/Source/LibJPEG/jdinput.c +662 -0
- data/vendor/FreeImage/Source/LibJPEG/jdmainct.c +513 -0
- data/vendor/FreeImage/Source/LibJPEG/jdmarker.c +1511 -0
- data/vendor/FreeImage/Source/LibJPEG/jdmaster.c +543 -0
- data/vendor/FreeImage/Source/LibJPEG/jdmerge.c +401 -0
- data/vendor/FreeImage/Source/LibJPEG/jdpostct.c +290 -0
- data/vendor/FreeImage/Source/LibJPEG/jdsample.c +361 -0
- data/vendor/FreeImage/Source/LibJPEG/jdtrans.c +140 -0
- data/vendor/FreeImage/Source/LibJPEG/jerror.c +253 -0
- data/vendor/FreeImage/Source/LibJPEG/jerror.h +304 -0
- data/vendor/FreeImage/Source/LibJPEG/jfdctflt.c +174 -0
- data/vendor/FreeImage/Source/LibJPEG/jfdctfst.c +230 -0
- data/vendor/FreeImage/Source/LibJPEG/jfdctint.c +4406 -0
- data/vendor/FreeImage/Source/LibJPEG/jidctflt.c +235 -0
- data/vendor/FreeImage/Source/LibJPEG/jidctfst.c +368 -0
- data/vendor/FreeImage/Source/LibJPEG/jidctint.c +5179 -0
- data/vendor/FreeImage/Source/LibJPEG/jinclude.h +91 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemansi.c +167 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemdos.c +638 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemmac.c +289 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemmgr.c +1119 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemname.c +276 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemnobs.c +109 -0
- data/vendor/FreeImage/Source/LibJPEG/jmemsys.h +198 -0
- data/vendor/FreeImage/Source/LibJPEG/jmorecfg.h +442 -0
- data/vendor/FreeImage/Source/LibJPEG/jpegint.h +426 -0
- data/vendor/FreeImage/Source/LibJPEG/jpeglib.h +1180 -0
- data/vendor/FreeImage/Source/LibJPEG/jpegtran.c +577 -0
- data/vendor/FreeImage/Source/LibJPEG/jquant1.c +857 -0
- data/vendor/FreeImage/Source/LibJPEG/jquant2.c +1311 -0
- data/vendor/FreeImage/Source/LibJPEG/jutils.c +227 -0
- data/vendor/FreeImage/Source/LibJPEG/jversion.h +14 -0
- data/vendor/FreeImage/Source/LibJPEG/rdbmp.c +480 -0
- data/vendor/FreeImage/Source/LibJPEG/rdcolmap.c +253 -0
- data/vendor/FreeImage/Source/LibJPEG/rdgif.c +38 -0
- data/vendor/FreeImage/Source/LibJPEG/rdjpgcom.c +515 -0
- data/vendor/FreeImage/Source/LibJPEG/rdppm.c +459 -0
- data/vendor/FreeImage/Source/LibJPEG/rdrle.c +387 -0
- data/vendor/FreeImage/Source/LibJPEG/rdswitch.c +365 -0
- data/vendor/FreeImage/Source/LibJPEG/rdtarga.c +500 -0
- data/vendor/FreeImage/Source/LibJPEG/transupp.c +1763 -0
- data/vendor/FreeImage/Source/LibJPEG/transupp.h +219 -0
- data/vendor/FreeImage/Source/LibJPEG/wrbmp.c +442 -0
- data/vendor/FreeImage/Source/LibJPEG/wrgif.c +399 -0
- data/vendor/FreeImage/Source/LibJPEG/wrjpgcom.c +583 -0
- data/vendor/FreeImage/Source/LibJPEG/wrppm.c +269 -0
- data/vendor/FreeImage/Source/LibJPEG/wrrle.c +305 -0
- data/vendor/FreeImage/Source/LibJPEG/wrtarga.c +253 -0
- data/vendor/FreeImage/Source/LibJXR/common/include/guiddef.h +230 -0
- data/vendor/FreeImage/Source/LibJXR/common/include/wmsal.h +757 -0
- data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstring.h +342 -0
- data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_adt.h +71 -0
- data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_strict.h +1096 -0
- data/vendor/FreeImage/Source/LibJXR/common/include/wmspecstrings_undef.h +406 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/JXRTranscode.c +987 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/decode.c +200 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/decode.h +143 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/postprocess.c +288 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/segdec.c +1205 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/strInvTransform.c +1888 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/strPredQuantDec.c +539 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/strdec.c +3628 -0
- data/vendor/FreeImage/Source/LibJXR/image/decode/strdec_x86.c +1640 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/encode.c +144 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/encode.h +113 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/segenc.c +1186 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/strFwdTransform.c +1111 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/strPredQuantEnc.c +511 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/strenc.c +2370 -0
- data/vendor/FreeImage/Source/LibJXR/image/encode/strenc_x86.c +409 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/adapthuff.c +511 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/ansi.h +61 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/common.h +131 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/image.c +183 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimer.h +115 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/perfTimerANSI.c +274 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/strPredQuant.c +306 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.c +85 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/strTransform.h +50 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.c +1251 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/strcodec.h +681 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/windowsmediaphoto.h +515 -0
- data/vendor/FreeImage/Source/LibJXR/image/sys/xplatform_image.h +84 -0
- data/vendor/FreeImage/Source/LibJXR/image/x86/x86.h +58 -0
- data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.c +930 -0
- data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlue.h +636 -0
- data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGlueJxr.c +2246 -0
- data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRGluePFC.c +2338 -0
- data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.c +905 -0
- data/vendor/FreeImage/Source/LibJXR/jxrgluelib/JXRMeta.h +258 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/bio.c +188 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/bio.h +128 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.c +239 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/cidx_manager.h +68 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/cio.c +644 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/cio.h +393 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/dwt.c +919 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/dwt.h +116 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/event.c +141 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/event.h +97 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/function_list.c +114 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/function_list.h +126 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/image.c +235 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/image.h +63 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/indexbox_manager.h +148 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/invert.c +289 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/invert.h +59 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/j2k.c +10238 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/j2k.h +838 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/jp2.c +2776 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/jp2.h +490 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/mct.c +319 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/mct.h +149 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/mqc.c +604 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/mqc.h +201 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.c +955 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/openjpeg.h +1475 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.c +59 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_clock.h +54 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_codec.h +160 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_config.h +9 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_config_private.h +16 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_includes.h +175 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_intmath.h +172 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_inttypes.h +43 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_malloc.h +180 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/opj_stdint.h +47 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/phix_manager.c +191 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/pi.c +1870 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/pi.h +182 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/ppix_manager.c +194 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/raw.c +89 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/raw.h +100 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/t1.c +1751 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/t1.h +157 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/t1_generate_luts.c +276 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/t1_luts.h +143 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/t2.c +1334 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/t2.h +127 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/tcd.c +2123 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/tcd.h +348 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/tgt.c +331 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/tgt.h +140 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/thix_manager.c +134 -0
- data/vendor/FreeImage/Source/LibOpenJPEG/tpix_manager.c +185 -0
- data/vendor/FreeImage/Source/LibPNG/example.c +1061 -0
- data/vendor/FreeImage/Source/LibPNG/png.c +4493 -0
- data/vendor/FreeImage/Source/LibPNG/png.h +3282 -0
- data/vendor/FreeImage/Source/LibPNG/pngconf.h +644 -0
- data/vendor/FreeImage/Source/LibPNG/pngdebug.h +154 -0
- data/vendor/FreeImage/Source/LibPNG/pngerror.c +963 -0
- data/vendor/FreeImage/Source/LibPNG/pngget.c +1213 -0
- data/vendor/FreeImage/Source/LibPNG/pnginfo.h +260 -0
- data/vendor/FreeImage/Source/LibPNG/pnglibconf.h +218 -0
- data/vendor/FreeImage/Source/LibPNG/pngmem.c +281 -0
- data/vendor/FreeImage/Source/LibPNG/pngpread.c +1168 -0
- data/vendor/FreeImage/Source/LibPNG/pngpriv.h +1944 -0
- data/vendor/FreeImage/Source/LibPNG/pngread.c +4121 -0
- data/vendor/FreeImage/Source/LibPNG/pngrio.c +120 -0
- data/vendor/FreeImage/Source/LibPNG/pngrtran.c +4994 -0
- data/vendor/FreeImage/Source/LibPNG/pngrutil.c +4474 -0
- data/vendor/FreeImage/Source/LibPNG/pngset.c +1611 -0
- data/vendor/FreeImage/Source/LibPNG/pngstruct.h +489 -0
- data/vendor/FreeImage/Source/LibPNG/pngtest.c +2011 -0
- data/vendor/FreeImage/Source/LibPNG/pngtrans.c +849 -0
- data/vendor/FreeImage/Source/LibPNG/pngwio.c +168 -0
- data/vendor/FreeImage/Source/LibPNG/pngwrite.c +2455 -0
- data/vendor/FreeImage/Source/LibPNG/pngwtran.c +574 -0
- data/vendor/FreeImage/Source/LibPNG/pngwutil.c +3029 -0
- data/vendor/FreeImage/Source/LibRawLite/dcraw/dcraw.c +15462 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/aahd_demosaic.cpp +706 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/dcb_demosaicing.c +710 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_common.cpp +13593 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/dcraw_fileio.cpp +240 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/defines.h +167 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/demosaic_packs.cpp +99 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/dht_demosaic.cpp +873 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/libraw_internal_funcs.h +282 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/libraw_x3f.cpp +1919 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/var_defines.h +216 -0
- data/vendor/FreeImage/Source/LibRawLite/internal/wf_filtering.cpp +1950 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw.h +338 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_alloc.h +99 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_const.h +233 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_datastream.h +238 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_internal.h +225 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_types.h +442 -0
- data/vendor/FreeImage/Source/LibRawLite/libraw/libraw_version.h +62 -0
- data/vendor/FreeImage/Source/LibRawLite/src/libraw_c_api.cpp +230 -0
- data/vendor/FreeImage/Source/LibRawLite/src/libraw_cxx.cpp +4533 -0
- data/vendor/FreeImage/Source/LibRawLite/src/libraw_datastream.cpp +703 -0
- data/vendor/FreeImage/Source/LibTIFF4/mkg3states.c +451 -0
- data/vendor/FreeImage/Source/LibTIFF4/mkspans.c +82 -0
- data/vendor/FreeImage/Source/LibTIFF4/t4.h +292 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_aux.c +358 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_close.c +140 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_codec.c +166 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_color.c +287 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_compress.c +304 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_config.h +97 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_config.vc.h +74 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_config.wince.h +71 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_dir.c +1700 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_dir.h +308 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_dirinfo.c +959 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_dirread.c +5640 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_dirwrite.c +2910 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_dumpmode.c +143 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_error.c +80 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_extension.c +118 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.c +1595 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_fax3.h +538 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_fax3sm.c +1260 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_flush.c +118 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_getimage.c +2890 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_jbig.c +213 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg.c +2354 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_jpeg_12.c +65 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_luv.c +1683 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_lzma.c +495 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_lzw.c +1169 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_next.c +181 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_ojpeg.c +2501 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_open.c +725 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_packbits.c +300 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_pixarlog.c +1442 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_predict.c +764 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_predict.h +77 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_print.c +716 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_read.c +1086 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_strip.c +383 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_swab.c +310 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_thunder.c +207 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_tile.c +299 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_unix.c +325 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_version.c +40 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_vms.c +603 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_warning.c +81 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_win32.c +443 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_wince.c +293 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_write.c +771 -0
- data/vendor/FreeImage/Source/LibTIFF4/tif_zip.c +472 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiff.h +681 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiffconf.h +170 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiffconf.vc.h +160 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiffconf.wince.h +121 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiffio.h +557 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiffiop.h +367 -0
- data/vendor/FreeImage/Source/LibTIFF4/tiffvers.h +9 -0
- data/vendor/FreeImage/Source/LibTIFF4/uvcode.h +180 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/alphai.h +55 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.alpha.c +167 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.buffer.c +249 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.frame.c +827 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.idec.c +857 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.io.c +640 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.quant.c +110 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.tree.c +525 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8.c +663 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.vp8l.c +1584 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/dec.webp.c +834 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/decode_vp8.h +185 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/vp8i.h +353 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/vp8li.h +136 -0
- data/vendor/FreeImage/Source/LibWebP/src/dec/webpi.h +120 -0
- data/vendor/FreeImage/Source/LibWebP/src/demux/demux.demux.c +957 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing.c +377 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_mips_dsp_r2.c +139 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.alpha_processing_sse2.c +296 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb.c +68 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_mips_dsp_r2.c +108 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.argb_sse2.c +62 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost.c +412 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips32.c +154 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_mips_dsp_r2.c +107 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cost_sse2.c +121 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.cpu.c +138 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec.c +760 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_clip_tables.c +366 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips32.c +585 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c +992 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_neon.c +1489 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.dec_sse2.c +1284 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc.c +788 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_avx2.c +24 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips32.c +670 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c +1510 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_neon.c +932 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.enc_sse2.c +940 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters.c +240 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c +404 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.filters_sse2.c +349 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.h +434 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless.c +1838 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips32.c +416 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c +921 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_neon.c +357 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.lossless_sse2.c +535 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler.c +115 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips32.c +192 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.rescaler_mips_dsp_r2.c +210 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling.c +252 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c +280 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_neon.c +267 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.upsampling_sse2.c +214 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv.c +166 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips32.c +100 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c +131 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/dsp.yuv_sse2.c +322 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/lossless.h +313 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/mips_macro.h +200 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/neon.h +82 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv.h +321 -0
- data/vendor/FreeImage/Source/LibWebP/src/dsp/yuv_tables_sse2.h +536 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/backward_references.h +202 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/cost.h +69 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.alpha.c +440 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.analysis.c +501 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.backward_references.c +1076 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.config.c +163 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.cost.c +355 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.filter.c +296 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.frame.c +850 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.histogram.c +897 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.iterator.c +456 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.near_lossless.c +160 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture.c +290 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_csp.c +1100 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_psnr.c +150 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_rescale.c +285 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.picture_tools.c +206 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.quant.c +1191 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.syntax.c +383 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.token.c +285 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.tree.c +504 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.vp8l.c +1437 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/enc.webpenc.c +379 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/histogram.h +114 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/vp8enci.h +551 -0
- data/vendor/FreeImage/Source/LibWebP/src/enc/vp8li.h +78 -0
- data/vendor/FreeImage/Source/LibWebP/src/mux/mux.anim_encode.c +1241 -0
- data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxedit.c +696 -0
- data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxinternal.c +551 -0
- data/vendor/FreeImage/Source/LibWebP/src/mux/mux.muxread.c +544 -0
- data/vendor/FreeImage/Source/LibWebP/src/mux/muxi.h +232 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader.h +168 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/bit_reader_inl.h +172 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/bit_writer.h +120 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/color_cache.h +74 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/endian_inl.h +100 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/filters.h +32 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/huffman.h +67 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/huffman_encode.h +60 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels.h +36 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/quant_levels_dec.h +35 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/random.h +63 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/rescaler.h +78 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/thread.h +93 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_reader.c +208 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.bit_writer.c +308 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.color_cache.c +49 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.filters.c +76 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.h +121 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman.c +205 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.huffman_encode.c +417 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels.c +140 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.quant_levels_dec.c +279 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.random.c +43 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.rescaler.c +82 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.thread.c +309 -0
- data/vendor/FreeImage/Source/LibWebP/src/utils/utils.utils.c +211 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/decode.h +493 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/demux.h +224 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/encode.h +515 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/format_constants.h +88 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/mux.h +507 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/mux_types.h +97 -0
- data/vendor/FreeImage/Source/LibWebP/src/webp/types.h +52 -0
- data/vendor/FreeImage/Source/MapIntrospector.h +212 -0
- data/vendor/FreeImage/Source/Metadata/Exif.cpp +1253 -0
- data/vendor/FreeImage/Source/Metadata/FIRational.cpp +176 -0
- data/vendor/FreeImage/Source/Metadata/FIRational.h +108 -0
- data/vendor/FreeImage/Source/Metadata/FreeImageTag.cpp +353 -0
- data/vendor/FreeImage/Source/Metadata/FreeImageTag.h +500 -0
- data/vendor/FreeImage/Source/Metadata/IPTC.cpp +342 -0
- data/vendor/FreeImage/Source/Metadata/TagConversion.cpp +1094 -0
- data/vendor/FreeImage/Source/Metadata/TagLib.cpp +1618 -0
- data/vendor/FreeImage/Source/Metadata/XTIFF.cpp +766 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/eLut.cpp +114 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/eLut.h +71 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/half.cpp +310 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/half.h +757 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/halfExport.h +27 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/halfFunction.h +179 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/halfLimits.h +102 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.cpp +164 -0
- data/vendor/FreeImage/Source/OpenEXR/Half/toFloat.h +16391 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/Iex.h +60 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.cpp +156 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexBaseExc.h +264 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexErrnoExc.h +208 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexExport.h +51 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexForward.h +229 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexMacros.h +170 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexMathExc.h +57 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexNamespace.h +112 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.cpp +873 -0
- data/vendor/FreeImage/Source/OpenEXR/Iex/IexThrowErrnoExc.h +97 -0
- data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.cpp +113 -0
- data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFloatExc.h +146 -0
- data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.cpp +530 -0
- data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathFpu.h +91 -0
- data/vendor/FreeImage/Source/OpenEXR/IexMath/IexMathIeeeExc.h +62 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmBaseConfig.h +61 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.cpp +633 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAcesFile.h +324 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfArray.h +285 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.cpp +158 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAttribute.h +407 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfAutoArray.h +95 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.cpp +1072 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfB44Compressor.h +118 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.cpp +111 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfBoxAttribute.h +87 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.cpp +1438 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCRgbaFile.h +555 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.cpp +322 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelList.h +436 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.cpp +150 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChannelListAttribute.h +74 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCheckedArithmetic.h +163 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.cpp +151 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticities.h +131 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.cpp +87 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfChromaticitiesAttribute.h +73 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.cpp +591 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompositeDeepScanLine.h +142 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompression.h +84 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.cpp +78 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressionAttribute.h +64 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.cpp +226 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfCompressor.h +265 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.cpp +143 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfConvert.h +107 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.cpp +110 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepCompositing.h +132 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.cpp +230 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepFrameBuffer.h +339 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageState.h +96 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.cpp +78 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepImageStateAttribute.h +68 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.cpp +2025 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputFile.h +276 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.cpp +149 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineInputPart.h +181 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.cpp +1552 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputFile.h +244 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.cpp +107 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepScanLineOutputPart.h +168 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.cpp +1979 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputFile.h +437 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.cpp +273 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledInputPart.h +362 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.cpp +2055 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputFile.h +475 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.cpp +250 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDeepTiledOutputPart.h +394 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.cpp +57 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDoubleAttribute.h +59 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.cpp +3424 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressor.h +210 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfDwaCompressorSimd.h +2145 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.cpp +335 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmap.h +336 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.cpp +76 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfEnvmapAttribute.h +68 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfExport.h +46 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.cpp +768 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFastHuf.h +148 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.cpp +57 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatAttribute.h +58 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.cpp +84 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFloatVectorAttribute.h +76 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfForward.h +127 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.cpp +228 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFrameBuffer.h +386 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.cpp +76 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfFramesPerSecond.h +94 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.cpp +76 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericInputFile.h +58 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.cpp +112 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfGenericOutputFile.h +62 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.cpp +1283 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHeader.h +699 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.cpp +1114 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfHuf.h +82 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.cpp +110 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIO.h +255 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.cpp +895 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputFile.h +240 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.cpp +114 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPart.h +84 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.cpp +51 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputPartData.h +69 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInputStreamMutex.h +68 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfInt64.h +56 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.cpp +57 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfIntAttribute.h +58 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.cpp +217 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCode.h +167 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.cpp +99 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfKeyCodeAttribute.h +73 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrder.h +69 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.cpp +78 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLineOrderAttribute.h +72 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.cpp +178 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfLut.h +188 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.cpp +263 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMatrixAttribute.h +83 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.cpp +1872 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMisc.h +466 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.cpp +783 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartInputFile.h +128 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.cpp +519 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiPartOutputFile.h +118 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.cpp +435 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfMultiView.h +187 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfName.h +150 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfNamespace.h +115 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.cpp +126 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOpaqueAttribute.h +110 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOptimizedPixelReading.h +646 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.cpp +1378 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputFile.h +263 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.cpp +105 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPart.h +77 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.cpp +52 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputPartData.h +62 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfOutputStreamMutex.h +70 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartHelper.h +262 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.cpp +63 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPartType.h +62 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPixelType.h +67 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.cpp +667 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPizCompressor.h +117 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.cpp +104 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImage.h +135 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.cpp +103 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPreviewImageAttribute.h +70 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.cpp +553 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfPxr24Compressor.h +109 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.cpp +127 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRational.h +98 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.cpp +74 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRationalAttribute.h +69 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgba.h +109 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.cpp +1405 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaFile.h +346 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.cpp +497 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRgbaYca.h +259 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.cpp +157 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRle.h +63 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.cpp +220 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfRleCompressor.h +80 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.cpp +1702 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfScanLineInputFile.h +210 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSimd.h +59 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.cpp +125 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStandardAttributes.h +382 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.cpp +242 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStdIO.h +160 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.cpp +80 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringAttribute.h +71 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.cpp +100 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfStringVectorAttribute.h +74 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.cpp +129 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfSystemSpecific.h +172 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.cpp +216 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTestFile.h +97 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.cpp +62 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfThreading.h +95 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescription.h +107 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.cpp +86 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileDescriptionAttribute.h +72 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.cpp +552 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTileOffsets.h +125 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.cpp +1533 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputFile.h +401 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.cpp +208 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledInputPart.h +100 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.cpp +389 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledMisc.h +106 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.cpp +1841 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputFile.h +495 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.cpp +228 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledOutputPart.h +105 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.cpp +1163 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTiledRgbaFile.h +482 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.cpp +431 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCode.h +242 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.cpp +79 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfTimeCodeAttribute.h +74 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.cpp +217 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVecAttribute.h +100 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.cpp +60 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfVersion.h +136 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.cpp +391 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfWav.h +78 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfXdr.h +927 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.cpp +196 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZip.h +78 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.cpp +127 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/ImfZipCompressor.h +89 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.cpp +136 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/b44ExpLogTable.h +16396 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.cpp +573 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmImf/dwaLookups.h +98334 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.cpp +80 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThread.h +143 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadExport.h +46 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadForward.h +52 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.cpp +59 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutex.h +160 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexPosix.cpp +85 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadMutexWin32.cpp +79 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadNamespace.h +114 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.cpp +483 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPool.h +160 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadPosix.cpp +98 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.cpp +60 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphore.h +112 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosix.cpp +106 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphorePosixCompat.cpp +155 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadSemaphoreWin32.cpp +153 -0
- data/vendor/FreeImage/Source/OpenEXR/IlmThread/IlmThreadWin32.cpp +100 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.cpp +37 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBox.h +849 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathBoxAlgo.h +1016 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColor.h +736 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.cpp +178 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathColorAlgo.h +257 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathEuler.h +926 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExc.h +73 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathExport.h +46 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathForward.h +72 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrame.h +192 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustum.h +741 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFrustumTest.h +417 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.cpp +181 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathFun.h +269 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGL.h +166 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathGLU.h +54 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathHalfLimits.h +68 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInt64.h +62 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathInterval.h +226 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLimits.h +268 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLine.h +185 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathLineAlgo.h +288 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMath.h +208 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrix.h +3441 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.cpp +1252 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathMatrixAlgo.h +1425 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathNamespace.h +115 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlane.h +257 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathPlatform.h +112 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathQuat.h +964 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.cpp +194 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRandom.h +401 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathRoots.h +219 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.cpp +54 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathShear.h +656 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathSphere.h +177 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.cpp +583 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVec.h +2227 -0
- data/vendor/FreeImage/Source/OpenEXR/Imath/ImathVecAlgo.h +147 -0
- data/vendor/FreeImage/Source/OpenEXR/OpenEXRConfig.h +72 -0
- data/vendor/FreeImage/Source/Plugin.h +144 -0
- data/vendor/FreeImage/Source/Quantizers.h +354 -0
- data/vendor/FreeImage/Source/ToneMapping.h +44 -0
- data/vendor/FreeImage/Source/Utilities.h +516 -0
- data/vendor/FreeImage/Source/ZLib/adler32.c +179 -0
- data/vendor/FreeImage/Source/ZLib/compress.c +80 -0
- data/vendor/FreeImage/Source/ZLib/crc32.c +425 -0
- data/vendor/FreeImage/Source/ZLib/crc32.h +441 -0
- data/vendor/FreeImage/Source/ZLib/deflate.c +1967 -0
- data/vendor/FreeImage/Source/ZLib/deflate.h +346 -0
- data/vendor/FreeImage/Source/ZLib/gzclose.c +25 -0
- data/vendor/FreeImage/Source/ZLib/gzguts.h +209 -0
- data/vendor/FreeImage/Source/ZLib/gzlib.c +634 -0
- data/vendor/FreeImage/Source/ZLib/gzread.c +594 -0
- data/vendor/FreeImage/Source/ZLib/gzwrite.c +577 -0
- data/vendor/FreeImage/Source/ZLib/infback.c +640 -0
- data/vendor/FreeImage/Source/ZLib/inffast.c +340 -0
- data/vendor/FreeImage/Source/ZLib/inffast.h +11 -0
- data/vendor/FreeImage/Source/ZLib/inffixed.h +94 -0
- data/vendor/FreeImage/Source/ZLib/inflate.c +1512 -0
- data/vendor/FreeImage/Source/ZLib/inflate.h +122 -0
- data/vendor/FreeImage/Source/ZLib/inftrees.c +306 -0
- data/vendor/FreeImage/Source/ZLib/inftrees.h +62 -0
- data/vendor/FreeImage/Source/ZLib/trees.c +1226 -0
- data/vendor/FreeImage/Source/ZLib/trees.h +128 -0
- data/vendor/FreeImage/Source/ZLib/uncompr.c +59 -0
- data/vendor/FreeImage/Source/ZLib/zconf.h +511 -0
- data/vendor/FreeImage/Source/ZLib/zlib.h +1768 -0
- data/vendor/FreeImage/Source/ZLib/zutil.c +324 -0
- data/vendor/FreeImage/Source/ZLib/zutil.h +253 -0
- metadata +931 -0
@@ -0,0 +1,2145 @@
|
|
1
|
+
///////////////////////////////////////////////////////////////////////////
|
2
|
+
//
|
3
|
+
// Copyright (c) 2009-2014 DreamWorks Animation LLC.
|
4
|
+
//
|
5
|
+
// All rights reserved.
|
6
|
+
//
|
7
|
+
// Redistribution and use in source and binary forms, with or without
|
8
|
+
// modification, are permitted provided that the following conditions are
|
9
|
+
// met:
|
10
|
+
// * Redistributions of source code must retain the above copyright
|
11
|
+
// notice, this list of conditions and the following disclaimer.
|
12
|
+
// * Redistributions in binary form must reproduce the above
|
13
|
+
// copyright notice, this list of conditions and the following disclaimer
|
14
|
+
// in the documentation and/or other materials provided with the
|
15
|
+
// distribution.
|
16
|
+
// * Neither the name of DreamWorks Animation nor the names of
|
17
|
+
// its contributors may be used to endorse or promote products derived
|
18
|
+
// from this software without specific prior written permission.
|
19
|
+
//
|
20
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
26
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
27
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
28
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
29
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
//
|
32
|
+
///////////////////////////////////////////////////////////////////////////
|
33
|
+
|
34
|
+
#ifndef IMF_DWACOMPRESSORSIMD_H_HAS_BEEN_INCLUDED
|
35
|
+
#define IMF_DWACOMPRESSORSIMD_H_HAS_BEEN_INCLUDED
|
36
|
+
|
37
|
+
//
|
38
|
+
// Various SSE accelerated functions, used by Imf::DwaCompressor.
|
39
|
+
// These have been separated into a separate .h file, as the fast
|
40
|
+
// paths are done with template specialization.
|
41
|
+
//
|
42
|
+
// Unless otherwise noted, all pointers are assumed to be 32-byte
|
43
|
+
// aligned. Unaligned pointers may risk seg-faulting.
|
44
|
+
//
|
45
|
+
|
46
|
+
#include "ImfNamespace.h"
|
47
|
+
#include "ImfSimd.h"
|
48
|
+
#include "ImfSystemSpecific.h"
|
49
|
+
#include "OpenEXRConfig.h"
|
50
|
+
|
51
|
+
#include <half.h>
|
52
|
+
#include <assert.h>
|
53
|
+
|
54
|
+
OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_ENTER
|
55
|
+
|
56
|
+
#define _SSE_ALIGNMENT 32
|
57
|
+
#define _SSE_ALIGNMENT_MASK 0x0F
|
58
|
+
#define _AVX_ALIGNMENT_MASK 0x1F
|
59
|
+
|
60
|
+
//
|
61
|
+
// Test if we should enable GCC inline asm paths for AVX
|
62
|
+
//
|
63
|
+
|
64
|
+
#ifdef OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX
|
65
|
+
|
66
|
+
#define IMF_HAVE_GCC_INLINEASM
|
67
|
+
|
68
|
+
#ifdef __LP64__
|
69
|
+
#define IMF_HAVE_GCC_INLINEASM_64
|
70
|
+
#endif /* __LP64__ */
|
71
|
+
|
72
|
+
#endif /* OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX */
|
73
|
+
|
74
|
+
//
|
75
|
+
// A simple 64-element array, aligned properly for SIMD access.
|
76
|
+
//
|
77
|
+
|
78
|
+
template <class T>
|
79
|
+
class SimdAlignedBuffer64
|
80
|
+
{
|
81
|
+
public:
|
82
|
+
|
83
|
+
SimdAlignedBuffer64(): _buffer (0), _handle (0)
|
84
|
+
{
|
85
|
+
alloc();
|
86
|
+
}
|
87
|
+
|
88
|
+
SimdAlignedBuffer64(const SimdAlignedBuffer64 &rhs): _handle(0)
|
89
|
+
{
|
90
|
+
alloc();
|
91
|
+
memcpy (_buffer, rhs._buffer, 64 * sizeof (T));
|
92
|
+
}
|
93
|
+
|
94
|
+
~SimdAlignedBuffer64 ()
|
95
|
+
{
|
96
|
+
EXRFreeAligned (_handle);
|
97
|
+
_handle = 0;
|
98
|
+
_buffer = 0;
|
99
|
+
}
|
100
|
+
|
101
|
+
void alloc()
|
102
|
+
{
|
103
|
+
//
|
104
|
+
// Try EXRAllocAligned first - but it might fallback to
|
105
|
+
// unaligned allocs. If so, overalloc.
|
106
|
+
//
|
107
|
+
|
108
|
+
_handle = (char *) EXRAllocAligned
|
109
|
+
(64 * sizeof(T), _SSE_ALIGNMENT);
|
110
|
+
|
111
|
+
if (((size_t)_handle & (_SSE_ALIGNMENT - 1)) == 0)
|
112
|
+
{
|
113
|
+
_buffer = (T *)_handle;
|
114
|
+
return;
|
115
|
+
}
|
116
|
+
|
117
|
+
EXRFreeAligned(_handle);
|
118
|
+
_handle = (char *) EXRAllocAligned
|
119
|
+
(64 * sizeof(T) + _SSE_ALIGNMENT, _SSE_ALIGNMENT);
|
120
|
+
|
121
|
+
char *aligned = _handle;
|
122
|
+
|
123
|
+
while ((size_t)aligned & (_SSE_ALIGNMENT - 1))
|
124
|
+
aligned++;
|
125
|
+
|
126
|
+
_buffer = (T *)aligned;
|
127
|
+
}
|
128
|
+
|
129
|
+
T *_buffer;
|
130
|
+
|
131
|
+
private:
|
132
|
+
|
133
|
+
char *_handle;
|
134
|
+
};
|
135
|
+
|
136
|
+
typedef SimdAlignedBuffer64<float> SimdAlignedBuffer64f;
|
137
|
+
typedef SimdAlignedBuffer64<unsigned short> SimdAlignedBuffer64us;
|
138
|
+
|
139
|
+
namespace {
|
140
|
+
|
141
|
+
//
|
142
|
+
// Color space conversion, Inverse 709 CSC, Y'CbCr -> R'G'B'
|
143
|
+
//
|
144
|
+
|
145
|
+
void
|
146
|
+
csc709Inverse (float &comp0, float &comp1, float &comp2)
|
147
|
+
{
|
148
|
+
float src[3];
|
149
|
+
|
150
|
+
src[0] = comp0;
|
151
|
+
src[1] = comp1;
|
152
|
+
src[2] = comp2;
|
153
|
+
|
154
|
+
comp0 = src[0] + 1.5747f * src[2];
|
155
|
+
comp1 = src[0] - 0.1873f * src[1] - 0.4682f * src[2];
|
156
|
+
comp2 = src[0] + 1.8556f * src[1];
|
157
|
+
}
|
158
|
+
|
159
|
+
#ifndef IMF_HAVE_SSE2
|
160
|
+
|
161
|
+
|
162
|
+
//
|
163
|
+
// Scalar color space conversion, based on 709 primiary chromaticies.
|
164
|
+
// No scaling or offsets, just the matrix
|
165
|
+
//
|
166
|
+
|
167
|
+
void
|
168
|
+
csc709Inverse64 (float *comp0, float *comp1, float *comp2)
|
169
|
+
{
|
170
|
+
for (int i = 0; i < 64; ++i)
|
171
|
+
csc709Inverse (comp0[i], comp1[i], comp2[i]);
|
172
|
+
}
|
173
|
+
|
174
|
+
#else /* IMF_HAVE_SSE2 */
|
175
|
+
|
176
|
+
//
|
177
|
+
// SSE2 color space conversion
|
178
|
+
//
|
179
|
+
|
180
|
+
void
|
181
|
+
csc709Inverse64 (float *comp0, float *comp1, float *comp2)
|
182
|
+
{
|
183
|
+
__m128 c0 = { 1.5747f, 1.5747f, 1.5747f, 1.5747f};
|
184
|
+
__m128 c1 = { 1.8556f, 1.8556f, 1.8556f, 1.8556f};
|
185
|
+
__m128 c2 = {-0.1873f, -0.1873f, -0.1873f, -0.1873f};
|
186
|
+
__m128 c3 = {-0.4682f, -0.4682f, -0.4682f, -0.4682f};
|
187
|
+
|
188
|
+
__m128 *r = (__m128 *)comp0;
|
189
|
+
__m128 *g = (__m128 *)comp1;
|
190
|
+
__m128 *b = (__m128 *)comp2;
|
191
|
+
__m128 src[3];
|
192
|
+
|
193
|
+
#define CSC_INVERSE_709_SSE2_LOOP(i) \
|
194
|
+
src[0] = r[i]; \
|
195
|
+
src[1] = g[i]; \
|
196
|
+
src[2] = b[i]; \
|
197
|
+
\
|
198
|
+
r[i] = _mm_add_ps (r[i], _mm_mul_ps (src[2], c0)); \
|
199
|
+
\
|
200
|
+
g[i] = _mm_mul_ps (g[i], c2); \
|
201
|
+
src[2] = _mm_mul_ps (src[2], c3); \
|
202
|
+
g[i] = _mm_add_ps (g[i], src[0]); \
|
203
|
+
g[i] = _mm_add_ps (g[i], src[2]); \
|
204
|
+
\
|
205
|
+
b[i] = _mm_mul_ps (c1, src[1]); \
|
206
|
+
b[i] = _mm_add_ps (b[i], src[0]);
|
207
|
+
|
208
|
+
CSC_INVERSE_709_SSE2_LOOP (0)
|
209
|
+
CSC_INVERSE_709_SSE2_LOOP (1)
|
210
|
+
CSC_INVERSE_709_SSE2_LOOP (2)
|
211
|
+
CSC_INVERSE_709_SSE2_LOOP (3)
|
212
|
+
|
213
|
+
CSC_INVERSE_709_SSE2_LOOP (4)
|
214
|
+
CSC_INVERSE_709_SSE2_LOOP (5)
|
215
|
+
CSC_INVERSE_709_SSE2_LOOP (6)
|
216
|
+
CSC_INVERSE_709_SSE2_LOOP (7)
|
217
|
+
|
218
|
+
CSC_INVERSE_709_SSE2_LOOP (8)
|
219
|
+
CSC_INVERSE_709_SSE2_LOOP (9)
|
220
|
+
CSC_INVERSE_709_SSE2_LOOP (10)
|
221
|
+
CSC_INVERSE_709_SSE2_LOOP (11)
|
222
|
+
|
223
|
+
CSC_INVERSE_709_SSE2_LOOP (12)
|
224
|
+
CSC_INVERSE_709_SSE2_LOOP (13)
|
225
|
+
CSC_INVERSE_709_SSE2_LOOP (14)
|
226
|
+
CSC_INVERSE_709_SSE2_LOOP (15)
|
227
|
+
}
|
228
|
+
|
229
|
+
#endif /* IMF_HAVE_SSE2 */
|
230
|
+
|
231
|
+
|
232
|
+
//
|
233
|
+
// Color space conversion, Forward 709 CSC, R'G'B' -> Y'CbCr
|
234
|
+
//
|
235
|
+
// Simple FPU color space conversion. Based on the 709
|
236
|
+
// primary chromaticies, with no scaling or offsets.
|
237
|
+
//
|
238
|
+
|
239
|
+
void
|
240
|
+
csc709Forward64 (float *comp0, float *comp1, float *comp2)
|
241
|
+
{
|
242
|
+
float src[3];
|
243
|
+
|
244
|
+
for (int i = 0; i<64; ++i)
|
245
|
+
{
|
246
|
+
src[0] = comp0[i];
|
247
|
+
src[1] = comp1[i];
|
248
|
+
src[2] = comp2[i];
|
249
|
+
|
250
|
+
comp0[i] = 0.2126f * src[0] + 0.7152f * src[1] + 0.0722f * src[2];
|
251
|
+
comp1[i] = -0.1146f * src[0] - 0.3854f * src[1] + 0.5000f * src[2];
|
252
|
+
comp2[i] = 0.5000f * src[0] - 0.4542f * src[1] - 0.0458f * src[2];
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
|
257
|
+
//
|
258
|
+
// Byte interleaving of 2 byte arrays:
|
259
|
+
// src0 = AAAA
|
260
|
+
// src1 = BBBB
|
261
|
+
// dst = ABABABAB
|
262
|
+
//
|
263
|
+
// numBytes is the size of each of the source buffers
|
264
|
+
//
|
265
|
+
|
266
|
+
#ifndef IMF_HAVE_SSE2
|
267
|
+
|
268
|
+
//
|
269
|
+
// Scalar default implementation
|
270
|
+
//
|
271
|
+
|
272
|
+
void
|
273
|
+
interleaveByte2 (char *dst, char *src0, char *src1, int numBytes)
|
274
|
+
{
|
275
|
+
for (int x = 0; x < numBytes; ++x)
|
276
|
+
{
|
277
|
+
dst[2 * x] = src0[x];
|
278
|
+
dst[2 * x + 1] = src1[x];
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
#else /* IMF_HAVE_SSE2 */
|
283
|
+
|
284
|
+
//
|
285
|
+
// SSE2 byte interleaving
|
286
|
+
//
|
287
|
+
|
288
|
+
void
|
289
|
+
interleaveByte2 (char *dst, char *src0, char *src1, int numBytes)
|
290
|
+
{
|
291
|
+
int dstAlignment = (size_t)dst % 16;
|
292
|
+
int src0Alignment = (size_t)src0 % 16;
|
293
|
+
int src1Alignment = (size_t)src1 % 16;
|
294
|
+
|
295
|
+
__m128i *dst_epi8 = (__m128i*)dst;
|
296
|
+
__m128i *src0_epi8 = (__m128i*)src0;
|
297
|
+
__m128i *src1_epi8 = (__m128i*)src1;
|
298
|
+
int sseWidth = numBytes / 16;
|
299
|
+
|
300
|
+
if ((!dstAlignment) && (!src0Alignment) && (!src1Alignment))
|
301
|
+
{
|
302
|
+
__m128i tmp0, tmp1;
|
303
|
+
|
304
|
+
//
|
305
|
+
// Aligned loads and stores
|
306
|
+
//
|
307
|
+
|
308
|
+
for (int x = 0; x < sseWidth; ++x)
|
309
|
+
{
|
310
|
+
tmp0 = src0_epi8[x];
|
311
|
+
tmp1 = src1_epi8[x];
|
312
|
+
|
313
|
+
_mm_stream_si128 (&dst_epi8[2 * x],
|
314
|
+
_mm_unpacklo_epi8 (tmp0, tmp1));
|
315
|
+
|
316
|
+
_mm_stream_si128 (&dst_epi8[2 * x + 1],
|
317
|
+
_mm_unpackhi_epi8 (tmp0, tmp1));
|
318
|
+
}
|
319
|
+
|
320
|
+
//
|
321
|
+
// Then do run the leftovers one at a time
|
322
|
+
//
|
323
|
+
|
324
|
+
for (int x = 16 * sseWidth; x < numBytes; ++x)
|
325
|
+
{
|
326
|
+
dst[2 * x] = src0[x];
|
327
|
+
dst[2 * x + 1] = src1[x];
|
328
|
+
}
|
329
|
+
}
|
330
|
+
else if ((!dstAlignment) && (src0Alignment == 8) && (src1Alignment == 8))
|
331
|
+
{
|
332
|
+
//
|
333
|
+
// Aligned stores, but catch up a few values so we can
|
334
|
+
// use aligned loads
|
335
|
+
//
|
336
|
+
|
337
|
+
for (int x = 0; x < 8; ++x)
|
338
|
+
{
|
339
|
+
dst[2 * x] = src0[x];
|
340
|
+
dst[2 * x + 1] = src1[x];
|
341
|
+
}
|
342
|
+
|
343
|
+
dst_epi8 = (__m128i*)&dst[16];
|
344
|
+
src0_epi8 = (__m128i*)&src0[8];
|
345
|
+
src1_epi8 = (__m128i*)&src1[8];
|
346
|
+
sseWidth = (numBytes - 8) / 16;
|
347
|
+
|
348
|
+
for (int x=0; x<sseWidth; ++x)
|
349
|
+
{
|
350
|
+
_mm_stream_si128 (&dst_epi8[2 * x],
|
351
|
+
_mm_unpacklo_epi8 (src0_epi8[x], src1_epi8[x]));
|
352
|
+
|
353
|
+
_mm_stream_si128 (&dst_epi8[2 * x + 1],
|
354
|
+
_mm_unpackhi_epi8 (src0_epi8[x], src1_epi8[x]));
|
355
|
+
}
|
356
|
+
|
357
|
+
//
|
358
|
+
// Then do run the leftovers one at a time
|
359
|
+
//
|
360
|
+
|
361
|
+
for (int x = 16 * sseWidth + 8; x < numBytes; ++x)
|
362
|
+
{
|
363
|
+
dst[2 * x] = src0[x];
|
364
|
+
dst[2 * x + 1] = src1[x];
|
365
|
+
}
|
366
|
+
}
|
367
|
+
else
|
368
|
+
{
|
369
|
+
//
|
370
|
+
// Unaligned everything
|
371
|
+
//
|
372
|
+
|
373
|
+
for (int x = 0; x < sseWidth; ++x)
|
374
|
+
{
|
375
|
+
__m128i tmpSrc0_epi8 = _mm_loadu_si128 (&src0_epi8[x]);
|
376
|
+
__m128i tmpSrc1_epi8 = _mm_loadu_si128 (&src1_epi8[x]);
|
377
|
+
|
378
|
+
_mm_storeu_si128 (&dst_epi8[2 * x],
|
379
|
+
_mm_unpacklo_epi8 (tmpSrc0_epi8, tmpSrc1_epi8));
|
380
|
+
|
381
|
+
_mm_storeu_si128 (&dst_epi8[2 * x + 1],
|
382
|
+
_mm_unpackhi_epi8 (tmpSrc0_epi8, tmpSrc1_epi8));
|
383
|
+
}
|
384
|
+
|
385
|
+
//
|
386
|
+
// Then do run the leftovers one at a time
|
387
|
+
//
|
388
|
+
|
389
|
+
for (int x = 16 * sseWidth; x < numBytes; ++x)
|
390
|
+
{
|
391
|
+
dst[2 * x] = src0[x];
|
392
|
+
dst[2 * x + 1] = src1[x];
|
393
|
+
}
|
394
|
+
}
|
395
|
+
}
|
396
|
+
|
397
|
+
#endif /* IMF_HAVE_SSE2 */
|
398
|
+
|
399
|
+
|
400
|
+
//
|
401
|
+
// Float -> half float conversion
|
402
|
+
//
|
403
|
+
// To enable F16C based conversion, we can't rely on compile-time
|
404
|
+
// detection, hence the multiple defined versions. Pick one based
|
405
|
+
// on runtime cpuid detection.
|
406
|
+
//
|
407
|
+
|
408
|
+
//
|
409
|
+
// Default boring conversion
|
410
|
+
//
|
411
|
+
|
412
|
+
void
|
413
|
+
convertFloatToHalf64_scalar (unsigned short *dst, float *src)
|
414
|
+
{
|
415
|
+
for (int i=0; i<64; ++i)
|
416
|
+
dst[i] = ((half)src[i]).bits();
|
417
|
+
}
|
418
|
+
|
419
|
+
|
420
|
+
//
|
421
|
+
// F16C conversion - Assumes aligned src and dst
|
422
|
+
//
|
423
|
+
|
424
|
+
void
|
425
|
+
convertFloatToHalf64_f16c (unsigned short *dst, float *src)
|
426
|
+
{
|
427
|
+
//
|
428
|
+
// Ordinarly, I'd avoid using inline asm and prefer intrinsics.
|
429
|
+
// However, in order to get the intrinsics, we need to tell
|
430
|
+
// the compiler to generate VEX instructions.
|
431
|
+
//
|
432
|
+
// (On the GCC side, -mf16c goes ahead and activates -mavc,
|
433
|
+
// resulting in VEX code. Without -mf16c, no intrinsics..)
|
434
|
+
//
|
435
|
+
// Now, it's quite likely that we'll find ourselves in situations
|
436
|
+
// where we want to build *without* VEX, in order to maintain
|
437
|
+
// maximum compatability. But to get there with intrinsics,
|
438
|
+
// we'd need to break out code into a separate file. Bleh.
|
439
|
+
// I'll take the asm.
|
440
|
+
//
|
441
|
+
|
442
|
+
#if defined IMF_HAVE_GCC_INLINEASM
|
443
|
+
__asm__
|
444
|
+
("vmovaps (%0), %%ymm0 \n"
|
445
|
+
"vmovaps 0x20(%0), %%ymm1 \n"
|
446
|
+
"vmovaps 0x40(%0), %%ymm2 \n"
|
447
|
+
"vmovaps 0x60(%0), %%ymm3 \n"
|
448
|
+
"vcvtps2ph $0, %%ymm0, %%xmm0 \n"
|
449
|
+
"vcvtps2ph $0, %%ymm1, %%xmm1 \n"
|
450
|
+
"vcvtps2ph $0, %%ymm2, %%xmm2 \n"
|
451
|
+
"vcvtps2ph $0, %%ymm3, %%xmm3 \n"
|
452
|
+
"vmovdqa %%xmm0, 0x00(%1) \n"
|
453
|
+
"vmovdqa %%xmm1, 0x10(%1) \n"
|
454
|
+
"vmovdqa %%xmm2, 0x20(%1) \n"
|
455
|
+
"vmovdqa %%xmm3, 0x30(%1) \n"
|
456
|
+
"vmovaps 0x80(%0), %%ymm0 \n"
|
457
|
+
"vmovaps 0xa0(%0), %%ymm1 \n"
|
458
|
+
"vmovaps 0xc0(%0), %%ymm2 \n"
|
459
|
+
"vmovaps 0xe0(%0), %%ymm3 \n"
|
460
|
+
"vcvtps2ph $0, %%ymm0, %%xmm0 \n"
|
461
|
+
"vcvtps2ph $0, %%ymm1, %%xmm1 \n"
|
462
|
+
"vcvtps2ph $0, %%ymm2, %%xmm2 \n"
|
463
|
+
"vcvtps2ph $0, %%ymm3, %%xmm3 \n"
|
464
|
+
"vmovdqa %%xmm0, 0x40(%1) \n"
|
465
|
+
"vmovdqa %%xmm1, 0x50(%1) \n"
|
466
|
+
"vmovdqa %%xmm2, 0x60(%1) \n"
|
467
|
+
"vmovdqa %%xmm3, 0x70(%1) \n"
|
468
|
+
#ifndef __AVX__
|
469
|
+
"vzeroupper \n"
|
470
|
+
#endif /* __AVX__ */
|
471
|
+
: /* Output */
|
472
|
+
: /* Input */ "r"(src), "r"(dst)
|
473
|
+
#ifndef __AVX__
|
474
|
+
: /* Clobber */ "%xmm0", "%xmm1", "%xmm2", "%xmm3", "memory"
|
475
|
+
#else
|
476
|
+
: /* Clobber */ "%ymm0", "%ymm1", "%ymm2", "%ymm3", "memory"
|
477
|
+
#endif /* __AVX__ */
|
478
|
+
);
|
479
|
+
#else
|
480
|
+
convertFloatToHalf64_scalar (dst, src);
|
481
|
+
#endif /* IMF_HAVE_GCC_INLINEASM */
|
482
|
+
}
|
483
|
+
|
484
|
+
|
485
|
+
//
|
486
|
+
// Convert an 8x8 block of HALF from zig-zag order to
|
487
|
+
// FLOAT in normal order. The order we want is:
|
488
|
+
//
|
489
|
+
// src dst
|
490
|
+
// 0 1 2 3 4 5 6 7 0 1 5 6 14 15 27 28
|
491
|
+
// 8 9 10 11 12 13 14 15 2 4 7 13 16 26 29 42
|
492
|
+
// 16 17 18 19 20 21 22 23 3 8 12 17 25 30 41 43
|
493
|
+
// 24 25 26 27 28 29 30 31 9 11 18 24 31 40 44 53
|
494
|
+
// 32 33 34 35 36 37 38 39 10 19 23 32 39 45 52 54
|
495
|
+
// 40 41 42 43 44 45 46 47 20 22 33 38 46 51 55 60
|
496
|
+
// 48 49 50 51 52 53 54 55 21 34 37 47 50 56 59 61
|
497
|
+
// 56 57 58 59 60 61 62 63 35 36 48 49 57 58 62 63
|
498
|
+
//
|
499
|
+
|
500
|
+
void
|
501
|
+
fromHalfZigZag_scalar (unsigned short *src, float *dst)
|
502
|
+
{
|
503
|
+
half *srcHalf = (half *)src;
|
504
|
+
|
505
|
+
dst[0] = (float)srcHalf[0];
|
506
|
+
dst[1] = (float)srcHalf[1];
|
507
|
+
dst[2] = (float)srcHalf[5];
|
508
|
+
dst[3] = (float)srcHalf[6];
|
509
|
+
dst[4] = (float)srcHalf[14];
|
510
|
+
dst[5] = (float)srcHalf[15];
|
511
|
+
dst[6] = (float)srcHalf[27];
|
512
|
+
dst[7] = (float)srcHalf[28];
|
513
|
+
dst[8] = (float)srcHalf[2];
|
514
|
+
dst[9] = (float)srcHalf[4];
|
515
|
+
|
516
|
+
dst[10] = (float)srcHalf[7];
|
517
|
+
dst[11] = (float)srcHalf[13];
|
518
|
+
dst[12] = (float)srcHalf[16];
|
519
|
+
dst[13] = (float)srcHalf[26];
|
520
|
+
dst[14] = (float)srcHalf[29];
|
521
|
+
dst[15] = (float)srcHalf[42];
|
522
|
+
dst[16] = (float)srcHalf[3];
|
523
|
+
dst[17] = (float)srcHalf[8];
|
524
|
+
dst[18] = (float)srcHalf[12];
|
525
|
+
dst[19] = (float)srcHalf[17];
|
526
|
+
|
527
|
+
dst[20] = (float)srcHalf[25];
|
528
|
+
dst[21] = (float)srcHalf[30];
|
529
|
+
dst[22] = (float)srcHalf[41];
|
530
|
+
dst[23] = (float)srcHalf[43];
|
531
|
+
dst[24] = (float)srcHalf[9];
|
532
|
+
dst[25] = (float)srcHalf[11];
|
533
|
+
dst[26] = (float)srcHalf[18];
|
534
|
+
dst[27] = (float)srcHalf[24];
|
535
|
+
dst[28] = (float)srcHalf[31];
|
536
|
+
dst[29] = (float)srcHalf[40];
|
537
|
+
|
538
|
+
dst[30] = (float)srcHalf[44];
|
539
|
+
dst[31] = (float)srcHalf[53];
|
540
|
+
dst[32] = (float)srcHalf[10];
|
541
|
+
dst[33] = (float)srcHalf[19];
|
542
|
+
dst[34] = (float)srcHalf[23];
|
543
|
+
dst[35] = (float)srcHalf[32];
|
544
|
+
dst[36] = (float)srcHalf[39];
|
545
|
+
dst[37] = (float)srcHalf[45];
|
546
|
+
dst[38] = (float)srcHalf[52];
|
547
|
+
dst[39] = (float)srcHalf[54];
|
548
|
+
|
549
|
+
dst[40] = (float)srcHalf[20];
|
550
|
+
dst[41] = (float)srcHalf[22];
|
551
|
+
dst[42] = (float)srcHalf[33];
|
552
|
+
dst[43] = (float)srcHalf[38];
|
553
|
+
dst[44] = (float)srcHalf[46];
|
554
|
+
dst[45] = (float)srcHalf[51];
|
555
|
+
dst[46] = (float)srcHalf[55];
|
556
|
+
dst[47] = (float)srcHalf[60];
|
557
|
+
dst[48] = (float)srcHalf[21];
|
558
|
+
dst[49] = (float)srcHalf[34];
|
559
|
+
|
560
|
+
dst[50] = (float)srcHalf[37];
|
561
|
+
dst[51] = (float)srcHalf[47];
|
562
|
+
dst[52] = (float)srcHalf[50];
|
563
|
+
dst[53] = (float)srcHalf[56];
|
564
|
+
dst[54] = (float)srcHalf[59];
|
565
|
+
dst[55] = (float)srcHalf[61];
|
566
|
+
dst[56] = (float)srcHalf[35];
|
567
|
+
dst[57] = (float)srcHalf[36];
|
568
|
+
dst[58] = (float)srcHalf[48];
|
569
|
+
dst[59] = (float)srcHalf[49];
|
570
|
+
|
571
|
+
dst[60] = (float)srcHalf[57];
|
572
|
+
dst[61] = (float)srcHalf[58];
|
573
|
+
dst[62] = (float)srcHalf[62];
|
574
|
+
dst[63] = (float)srcHalf[63];
|
575
|
+
}
|
576
|
+
|
577
|
+
|
578
|
+
//
|
579
|
+
// If we can form the correct ordering in xmm registers,
|
580
|
+
// we can use F16C to convert from HALF -> FLOAT. However,
|
581
|
+
// making the correct order isn't trivial.
|
582
|
+
//
|
583
|
+
// We want to re-order a source 8x8 matrix from:
|
584
|
+
//
|
585
|
+
// 0 1 2 3 4 5 6 7 0 1 5 6 14 15 27 28
|
586
|
+
// 8 9 10 11 12 13 14 15 2 4 7 13 16 26 29 42
|
587
|
+
// 16 17 18 19 20 21 22 23 3 8 12 17 25 30 41 43
|
588
|
+
// 24 25 26 27 28 29 30 31 9 11 18 24 31 40 44 53 (A)
|
589
|
+
// 32 33 34 35 36 37 38 39 --> 10 19 23 32 39 45 52 54
|
590
|
+
// 40 41 42 43 44 45 46 47 20 22 33 38 46 51 55 60
|
591
|
+
// 48 49 50 51 52 53 54 55 21 34 37 47 50 56 59 61
|
592
|
+
// 56 57 58 59 60 61 62 63 35 36 48 49 57 58 62 63
|
593
|
+
//
|
594
|
+
// Which looks like a mess, right?
|
595
|
+
//
|
596
|
+
// Now, check out the NE/SW diagonals of (A). Along those lines,
|
597
|
+
// we have runs of contiguous values! If we rewrite (A) a bit, we get:
|
598
|
+
//
|
599
|
+
// 0
|
600
|
+
// 1 2
|
601
|
+
// 5 4 3
|
602
|
+
// 6 7 8 9
|
603
|
+
// 14 13 12 11 10
|
604
|
+
// 15 16 17 18 19 20
|
605
|
+
// 27 26 25 24 23 22 21 (B)
|
606
|
+
// 28 29 30 31 32 33 34 35
|
607
|
+
// 42 41 40 39 38 37 36
|
608
|
+
// 43 44 45 46 47 48
|
609
|
+
// 53 52 51 50 49
|
610
|
+
// 54 55 56 57
|
611
|
+
// 60 59 58
|
612
|
+
// 61 62
|
613
|
+
// 63
|
614
|
+
//
|
615
|
+
// In this ordering, the columns are the rows (A). If we can 'transpose'
|
616
|
+
// (B), we'll achieve our goal. But we want this to fit nicely into
|
617
|
+
// xmm registers and still be able to load large runs efficiently.
|
618
|
+
// Also, notice that the odd rows are in ascending order, while
|
619
|
+
// the even rows are in descending order.
|
620
|
+
//
|
621
|
+
// If we 'fold' the bottom half up into the top, we can preserve ordered
|
622
|
+
// runs accross rows, and still keep all the correct values in columns.
|
623
|
+
// After transposing, we'll need to rotate things back into place.
|
624
|
+
// This gives us:
|
625
|
+
//
|
626
|
+
// 0 | 42 41 40 39 38 37 36
|
627
|
+
// 1 2 | 43 44 45 46 47 48
|
628
|
+
// 5 4 3 | 53 52 51 50 49
|
629
|
+
// 6 7 8 9 | 54 55 56 57 (C)
|
630
|
+
// 14 13 12 11 10 | 60 59 58
|
631
|
+
// 15 16 17 18 19 20 | 61 62
|
632
|
+
// 27 26 25 24 23 22 21 | 61
|
633
|
+
// 28 29 30 31 32 33 34 35
|
634
|
+
//
|
635
|
+
// But hang on. We still have the backwards descending rows to deal with.
|
636
|
+
// Lets reverse the even rows so that all values are in ascending order
|
637
|
+
//
|
638
|
+
// 36 37 38 39 40 41 42 | 0
|
639
|
+
// 1 2 | 43 44 45 46 47 48
|
640
|
+
// 49 50 51 52 53 | 3 4 5
|
641
|
+
// 6 7 8 9 | 54 55 56 57 (D)
|
642
|
+
// 58 59 60 | 10 11 12 13 14
|
643
|
+
// 15 16 17 18 19 20 | 61 62
|
644
|
+
// 61 | 21 22 23 24 25 26 27
|
645
|
+
// 28 29 30 31 32 33 34 35
|
646
|
+
//
|
647
|
+
// If we can form (D), we will then:
|
648
|
+
// 1) Reverse the even rows
|
649
|
+
// 2) Transpose
|
650
|
+
// 3) Rotate the rows
|
651
|
+
//
|
652
|
+
// and we'll have (A).
|
653
|
+
//
|
654
|
+
|
655
|
+
void
|
656
|
+
fromHalfZigZag_f16c (unsigned short *src, float *dst)
|
657
|
+
{
|
658
|
+
#if defined IMF_HAVE_GCC_INLINEASM_64
|
659
|
+
__asm__
|
660
|
+
|
661
|
+
/* x3 <- 0
|
662
|
+
* x8 <- [ 0- 7]
|
663
|
+
* x6 <- [56-63]
|
664
|
+
* x9 <- [21-28]
|
665
|
+
* x7 <- [28-35]
|
666
|
+
* x3 <- [ 6- 9] (lower half) */
|
667
|
+
|
668
|
+
("vpxor %%xmm3, %%xmm3, %%xmm3 \n"
|
669
|
+
"vmovdqa (%0), %%xmm8 \n"
|
670
|
+
"vmovdqa 112(%0), %%xmm6 \n"
|
671
|
+
"vmovdqu 42(%0), %%xmm9 \n"
|
672
|
+
"vmovdqu 56(%0), %%xmm7 \n"
|
673
|
+
"vmovq 12(%0), %%xmm3 \n"
|
674
|
+
|
675
|
+
/* Setup rows 0-2 of A in xmm0-xmm2
|
676
|
+
* x1 <- x8 >> 16 (1 value)
|
677
|
+
* x2 <- x8 << 32 (2 values)
|
678
|
+
* x0 <- alignr([35-42], x8, 2)
|
679
|
+
* x1 <- blend(x1, [41-48])
|
680
|
+
* x2 <- blend(x2, [49-56]) */
|
681
|
+
|
682
|
+
"vpsrldq $2, %%xmm8, %%xmm1 \n"
|
683
|
+
"vpslldq $4, %%xmm8, %%xmm2 \n"
|
684
|
+
"vpalignr $2, 70(%0), %%xmm8, %%xmm0 \n"
|
685
|
+
"vpblendw $0xfc, 82(%0), %%xmm1, %%xmm1 \n"
|
686
|
+
"vpblendw $0x1f, 98(%0), %%xmm2, %%xmm2 \n"
|
687
|
+
|
688
|
+
/* Setup rows 4-6 of A in xmm4-xmm6
|
689
|
+
* x4 <- x6 >> 32 (2 values)
|
690
|
+
* x5 <- x6 << 16 (1 value)
|
691
|
+
* x6 <- alignr(x6,x9,14)
|
692
|
+
* x4 <- blend(x4, [ 7-14])
|
693
|
+
* x5 <- blend(x5, [15-22]) */
|
694
|
+
|
695
|
+
"vpsrldq $4, %%xmm6, %%xmm4 \n"
|
696
|
+
"vpslldq $2, %%xmm6, %%xmm5 \n"
|
697
|
+
"vpalignr $14, %%xmm6, %%xmm9, %%xmm6 \n"
|
698
|
+
"vpblendw $0xf8, 14(%0), %%xmm4, %%xmm4 \n"
|
699
|
+
"vpblendw $0x3f, 30(%0), %%xmm5, %%xmm5 \n"
|
700
|
+
|
701
|
+
/* Load the upper half of row 3 into xmm3
|
702
|
+
* x3 <- [54-57] (upper half) */
|
703
|
+
|
704
|
+
"vpinsrq $1, 108(%0), %%xmm3, %%xmm3\n"
|
705
|
+
|
706
|
+
/* Reverse the even rows. We're not using PSHUFB as
|
707
|
+
* that requires loading an extra constant all the time,
|
708
|
+
* and we're alreadly pretty memory bound.
|
709
|
+
*/
|
710
|
+
|
711
|
+
"vpshuflw $0x1b, %%xmm0, %%xmm0 \n"
|
712
|
+
"vpshuflw $0x1b, %%xmm2, %%xmm2 \n"
|
713
|
+
"vpshuflw $0x1b, %%xmm4, %%xmm4 \n"
|
714
|
+
"vpshuflw $0x1b, %%xmm6, %%xmm6 \n"
|
715
|
+
|
716
|
+
"vpshufhw $0x1b, %%xmm0, %%xmm0 \n"
|
717
|
+
"vpshufhw $0x1b, %%xmm2, %%xmm2 \n"
|
718
|
+
"vpshufhw $0x1b, %%xmm4, %%xmm4 \n"
|
719
|
+
"vpshufhw $0x1b, %%xmm6, %%xmm6 \n"
|
720
|
+
|
721
|
+
"vpshufd $0x4e, %%xmm0, %%xmm0 \n"
|
722
|
+
"vpshufd $0x4e, %%xmm2, %%xmm2 \n"
|
723
|
+
"vpshufd $0x4e, %%xmm4, %%xmm4 \n"
|
724
|
+
"vpshufd $0x4e, %%xmm6, %%xmm6 \n"
|
725
|
+
|
726
|
+
/* Transpose xmm0-xmm7 into xmm8-xmm15 */
|
727
|
+
|
728
|
+
"vpunpcklwd %%xmm1, %%xmm0, %%xmm8 \n"
|
729
|
+
"vpunpcklwd %%xmm3, %%xmm2, %%xmm9 \n"
|
730
|
+
"vpunpcklwd %%xmm5, %%xmm4, %%xmm10 \n"
|
731
|
+
"vpunpcklwd %%xmm7, %%xmm6, %%xmm11 \n"
|
732
|
+
"vpunpckhwd %%xmm1, %%xmm0, %%xmm12 \n"
|
733
|
+
"vpunpckhwd %%xmm3, %%xmm2, %%xmm13 \n"
|
734
|
+
"vpunpckhwd %%xmm5, %%xmm4, %%xmm14 \n"
|
735
|
+
"vpunpckhwd %%xmm7, %%xmm6, %%xmm15 \n"
|
736
|
+
|
737
|
+
"vpunpckldq %%xmm9, %%xmm8, %%xmm0 \n"
|
738
|
+
"vpunpckldq %%xmm11, %%xmm10, %%xmm1 \n"
|
739
|
+
"vpunpckhdq %%xmm9, %%xmm8, %%xmm2 \n"
|
740
|
+
"vpunpckhdq %%xmm11, %%xmm10, %%xmm3 \n"
|
741
|
+
"vpunpckldq %%xmm13, %%xmm12, %%xmm4 \n"
|
742
|
+
"vpunpckldq %%xmm15, %%xmm14, %%xmm5 \n"
|
743
|
+
"vpunpckhdq %%xmm13, %%xmm12, %%xmm6 \n"
|
744
|
+
"vpunpckhdq %%xmm15, %%xmm14, %%xmm7 \n"
|
745
|
+
|
746
|
+
"vpunpcklqdq %%xmm1, %%xmm0, %%xmm8 \n"
|
747
|
+
"vpunpckhqdq %%xmm1, %%xmm0, %%xmm9 \n"
|
748
|
+
"vpunpcklqdq %%xmm3, %%xmm2, %%xmm10 \n"
|
749
|
+
"vpunpckhqdq %%xmm3, %%xmm2, %%xmm11 \n"
|
750
|
+
"vpunpcklqdq %%xmm4, %%xmm5, %%xmm12 \n"
|
751
|
+
"vpunpckhqdq %%xmm5, %%xmm4, %%xmm13 \n"
|
752
|
+
"vpunpcklqdq %%xmm7, %%xmm6, %%xmm14 \n"
|
753
|
+
"vpunpckhqdq %%xmm7, %%xmm6, %%xmm15 \n"
|
754
|
+
|
755
|
+
/* Rotate the rows to get the correct final order.
|
756
|
+
* Rotating xmm12 isn't needed, as we can handle
|
757
|
+
* the rotation in the PUNPCKLQDQ above. Rotating
|
758
|
+
* xmm8 isn't needed as it's already in the right order
|
759
|
+
*/
|
760
|
+
|
761
|
+
"vpalignr $2, %%xmm9, %%xmm9, %%xmm9 \n"
|
762
|
+
"vpalignr $4, %%xmm10, %%xmm10, %%xmm10 \n"
|
763
|
+
"vpalignr $6, %%xmm11, %%xmm11, %%xmm11 \n"
|
764
|
+
"vpalignr $10, %%xmm13, %%xmm13, %%xmm13 \n"
|
765
|
+
"vpalignr $12, %%xmm14, %%xmm14, %%xmm14 \n"
|
766
|
+
"vpalignr $14, %%xmm15, %%xmm15, %%xmm15 \n"
|
767
|
+
|
768
|
+
/* Convert from half -> float */
|
769
|
+
|
770
|
+
"vcvtph2ps %%xmm8, %%ymm8 \n"
|
771
|
+
"vcvtph2ps %%xmm9, %%ymm9 \n"
|
772
|
+
"vcvtph2ps %%xmm10, %%ymm10 \n"
|
773
|
+
"vcvtph2ps %%xmm11, %%ymm11 \n"
|
774
|
+
"vcvtph2ps %%xmm12, %%ymm12 \n"
|
775
|
+
"vcvtph2ps %%xmm13, %%ymm13 \n"
|
776
|
+
"vcvtph2ps %%xmm14, %%ymm14 \n"
|
777
|
+
"vcvtph2ps %%xmm15, %%ymm15 \n"
|
778
|
+
|
779
|
+
/* Move float values to dst */
|
780
|
+
|
781
|
+
"vmovaps %%ymm8, (%1) \n"
|
782
|
+
"vmovaps %%ymm9, 32(%1) \n"
|
783
|
+
"vmovaps %%ymm10, 64(%1) \n"
|
784
|
+
"vmovaps %%ymm11, 96(%1) \n"
|
785
|
+
"vmovaps %%ymm12, 128(%1) \n"
|
786
|
+
"vmovaps %%ymm13, 160(%1) \n"
|
787
|
+
"vmovaps %%ymm14, 192(%1) \n"
|
788
|
+
"vmovaps %%ymm15, 224(%1) \n"
|
789
|
+
#ifndef __AVX__
|
790
|
+
"vzeroupper \n"
|
791
|
+
#endif /* __AVX__ */
|
792
|
+
: /* Output */
|
793
|
+
: /* Input */ "r"(src), "r"(dst)
|
794
|
+
: /* Clobber */ "memory",
|
795
|
+
#ifndef __AVX__
|
796
|
+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
|
797
|
+
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
|
798
|
+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
|
799
|
+
"%xmm12", "%xmm13", "%xmm14", "%xmm15"
|
800
|
+
#else
|
801
|
+
"%ymm0", "%ymm1", "%ymm2", "%ymm3",
|
802
|
+
"%ymm4", "%ymm5", "%ymm6", "%ymm7",
|
803
|
+
"%ymm8", "%ymm9", "%ymm10", "%ymm11",
|
804
|
+
"%ymm12", "%ymm13", "%ymm14", "%ymm15"
|
805
|
+
#endif /* __AVX__ */
|
806
|
+
);
|
807
|
+
|
808
|
+
#else
|
809
|
+
fromHalfZigZag_scalar(src, dst);
|
810
|
+
#endif /* defined IMF_HAVE_GCC_INLINEASM_64 */
|
811
|
+
}
|
812
|
+
|
813
|
+
|
814
|
+
//
|
815
|
+
// Inverse 8x8 DCT, only inverting the DC. This assumes that
|
816
|
+
// all AC frequencies are 0.
|
817
|
+
//
|
818
|
+
|
819
|
+
#ifndef IMF_HAVE_SSE2
|
820
|
+
|
821
|
+
void
|
822
|
+
dctInverse8x8DcOnly (float *data)
|
823
|
+
{
|
824
|
+
float val = data[0] * 3.535536e-01f * 3.535536e-01f;
|
825
|
+
|
826
|
+
for (int i = 0; i < 64; ++i)
|
827
|
+
data[i] = val;
|
828
|
+
}
|
829
|
+
|
830
|
+
#else /* IMF_HAVE_SSE2 */
|
831
|
+
|
832
|
+
void
|
833
|
+
dctInverse8x8DcOnly (float *data)
|
834
|
+
{
|
835
|
+
__m128 src = _mm_set1_ps (data[0] * 3.535536e-01f * 3.535536e-01f);
|
836
|
+
__m128 *dst = (__m128 *)data;
|
837
|
+
|
838
|
+
for (int i = 0; i < 16; ++i)
|
839
|
+
dst[i] = src;
|
840
|
+
}
|
841
|
+
|
842
|
+
#endif /* IMF_HAVE_SSE2 */
|
843
|
+
|
844
|
+
|
845
|
+
//
|
846
|
+
// Full 8x8 Inverse DCT:
|
847
|
+
//
|
848
|
+
// Simple inverse DCT on an 8x8 block, with scalar ops only.
|
849
|
+
// Operates on data in-place.
|
850
|
+
//
|
851
|
+
// This is based on the iDCT formuation (y = frequency domain,
|
852
|
+
// x = spatial domain)
|
853
|
+
//
|
854
|
+
// [x0] [ ][y0] [ ][y1]
|
855
|
+
// [x1] = [ M1 ][y2] + [ M2 ][y3]
|
856
|
+
// [x2] [ ][y4] [ ][y5]
|
857
|
+
// [x3] [ ][y6] [ ][y7]
|
858
|
+
//
|
859
|
+
// [x7] [ ][y0] [ ][y1]
|
860
|
+
// [x6] = [ M1 ][y2] - [ M2 ][y3]
|
861
|
+
// [x5] [ ][y4] [ ][y5]
|
862
|
+
// [x4] [ ][y6] [ ][y7]
|
863
|
+
//
|
864
|
+
// where M1: M2:
|
865
|
+
//
|
866
|
+
// [a c a f] [b d e g]
|
867
|
+
// [a f -a -c] [d -g -b -e]
|
868
|
+
// [a -f -a c] [e -b g d]
|
869
|
+
// [a -c a -f] [g -e d -b]
|
870
|
+
//
|
871
|
+
// and the constants are as defined below..
|
872
|
+
//
|
873
|
+
// If you know how many of the lower rows are zero, that can
|
874
|
+
// be passed in to help speed things up. If you don't know,
|
875
|
+
// just set zeroedRows=0.
|
876
|
+
//
|
877
|
+
|
878
|
+
//
|
879
|
+
// Default implementation
|
880
|
+
//
|
881
|
+
|
882
|
+
template <int zeroedRows>
|
883
|
+
void
|
884
|
+
dctInverse8x8_scalar (float *data)
|
885
|
+
{
|
886
|
+
const float a = .5f * cosf (3.14159f / 4.0f);
|
887
|
+
const float b = .5f * cosf (3.14159f / 16.0f);
|
888
|
+
const float c = .5f * cosf (3.14159f / 8.0f);
|
889
|
+
const float d = .5f * cosf (3.f*3.14159f / 16.0f);
|
890
|
+
const float e = .5f * cosf (5.f*3.14159f / 16.0f);
|
891
|
+
const float f = .5f * cosf (3.f*3.14159f / 8.0f);
|
892
|
+
const float g = .5f * cosf (7.f*3.14159f / 16.0f);
|
893
|
+
|
894
|
+
float alpha[4], beta[4], theta[4], gamma[4];
|
895
|
+
|
896
|
+
float *rowPtr = NULL;
|
897
|
+
|
898
|
+
//
|
899
|
+
// First pass - row wise.
|
900
|
+
//
|
901
|
+
// This looks less-compact than the description above in
|
902
|
+
// an attempt to fold together common sub-expressions.
|
903
|
+
//
|
904
|
+
|
905
|
+
for (int row = 0; row < 8 - zeroedRows; ++row)
|
906
|
+
{
|
907
|
+
rowPtr = data + row * 8;
|
908
|
+
|
909
|
+
alpha[0] = c * rowPtr[2];
|
910
|
+
alpha[1] = f * rowPtr[2];
|
911
|
+
alpha[2] = c * rowPtr[6];
|
912
|
+
alpha[3] = f * rowPtr[6];
|
913
|
+
|
914
|
+
beta[0] = b * rowPtr[1] + d * rowPtr[3] + e * rowPtr[5] + g * rowPtr[7];
|
915
|
+
beta[1] = d * rowPtr[1] - g * rowPtr[3] - b * rowPtr[5] - e * rowPtr[7];
|
916
|
+
beta[2] = e * rowPtr[1] - b * rowPtr[3] + g * rowPtr[5] + d * rowPtr[7];
|
917
|
+
beta[3] = g * rowPtr[1] - e * rowPtr[3] + d * rowPtr[5] - b * rowPtr[7];
|
918
|
+
|
919
|
+
theta[0] = a * (rowPtr[0] + rowPtr[4]);
|
920
|
+
theta[3] = a * (rowPtr[0] - rowPtr[4]);
|
921
|
+
|
922
|
+
theta[1] = alpha[0] + alpha[3];
|
923
|
+
theta[2] = alpha[1] - alpha[2];
|
924
|
+
|
925
|
+
|
926
|
+
gamma[0] = theta[0] + theta[1];
|
927
|
+
gamma[1] = theta[3] + theta[2];
|
928
|
+
gamma[2] = theta[3] - theta[2];
|
929
|
+
gamma[3] = theta[0] - theta[1];
|
930
|
+
|
931
|
+
|
932
|
+
rowPtr[0] = gamma[0] + beta[0];
|
933
|
+
rowPtr[1] = gamma[1] + beta[1];
|
934
|
+
rowPtr[2] = gamma[2] + beta[2];
|
935
|
+
rowPtr[3] = gamma[3] + beta[3];
|
936
|
+
|
937
|
+
rowPtr[4] = gamma[3] - beta[3];
|
938
|
+
rowPtr[5] = gamma[2] - beta[2];
|
939
|
+
rowPtr[6] = gamma[1] - beta[1];
|
940
|
+
rowPtr[7] = gamma[0] - beta[0];
|
941
|
+
}
|
942
|
+
|
943
|
+
//
|
944
|
+
// Second pass - column wise.
|
945
|
+
//
|
946
|
+
|
947
|
+
for (int column = 0; column < 8; ++column)
|
948
|
+
{
|
949
|
+
alpha[0] = c * data[16+column];
|
950
|
+
alpha[1] = f * data[16+column];
|
951
|
+
alpha[2] = c * data[48+column];
|
952
|
+
alpha[3] = f * data[48+column];
|
953
|
+
|
954
|
+
beta[0] = b * data[8+column] + d * data[24+column] +
|
955
|
+
e * data[40+column] + g * data[56+column];
|
956
|
+
|
957
|
+
beta[1] = d * data[8+column] - g * data[24+column] -
|
958
|
+
b * data[40+column] - e * data[56+column];
|
959
|
+
|
960
|
+
beta[2] = e * data[8+column] - b * data[24+column] +
|
961
|
+
g * data[40+column] + d * data[56+column];
|
962
|
+
|
963
|
+
beta[3] = g * data[8+column] - e * data[24+column] +
|
964
|
+
d * data[40+column] - b * data[56+column];
|
965
|
+
|
966
|
+
theta[0] = a * (data[column] + data[32+column]);
|
967
|
+
theta[3] = a * (data[column] - data[32+column]);
|
968
|
+
|
969
|
+
theta[1] = alpha[0] + alpha[3];
|
970
|
+
theta[2] = alpha[1] - alpha[2];
|
971
|
+
|
972
|
+
gamma[0] = theta[0] + theta[1];
|
973
|
+
gamma[1] = theta[3] + theta[2];
|
974
|
+
gamma[2] = theta[3] - theta[2];
|
975
|
+
gamma[3] = theta[0] - theta[1];
|
976
|
+
|
977
|
+
data[ column] = gamma[0] + beta[0];
|
978
|
+
data[ 8 + column] = gamma[1] + beta[1];
|
979
|
+
data[16 + column] = gamma[2] + beta[2];
|
980
|
+
data[24 + column] = gamma[3] + beta[3];
|
981
|
+
|
982
|
+
data[32 + column] = gamma[3] - beta[3];
|
983
|
+
data[40 + column] = gamma[2] - beta[2];
|
984
|
+
data[48 + column] = gamma[1] - beta[1];
|
985
|
+
data[56 + column] = gamma[0] - beta[0];
|
986
|
+
}
|
987
|
+
}
|
988
|
+
|
989
|
+
|
990
|
+
//
|
991
|
+
// SSE2 Implementation
|
992
|
+
//
|
993
|
+
|
994
|
+
template <int zeroedRows>
|
995
|
+
void
|
996
|
+
dctInverse8x8_sse2 (float *data)
|
997
|
+
{
|
998
|
+
#ifdef IMF_HAVE_SSE2
|
999
|
+
__m128 a = {3.535536e-01f,3.535536e-01f,3.535536e-01f,3.535536e-01f};
|
1000
|
+
__m128 b = {4.903927e-01f,4.903927e-01f,4.903927e-01f,4.903927e-01f};
|
1001
|
+
__m128 c = {4.619398e-01f,4.619398e-01f,4.619398e-01f,4.619398e-01f};
|
1002
|
+
__m128 d = {4.157349e-01f,4.157349e-01f,4.157349e-01f,4.157349e-01f};
|
1003
|
+
__m128 e = {2.777855e-01f,2.777855e-01f,2.777855e-01f,2.777855e-01f};
|
1004
|
+
__m128 f = {1.913422e-01f,1.913422e-01f,1.913422e-01f,1.913422e-01f};
|
1005
|
+
__m128 g = {9.754573e-02f,9.754573e-02f,9.754573e-02f,9.754573e-02f};
|
1006
|
+
|
1007
|
+
__m128 c0 = {3.535536e-01f, 3.535536e-01f, 3.535536e-01f, 3.535536e-01f};
|
1008
|
+
__m128 c1 = {4.619398e-01f, 1.913422e-01f,-1.913422e-01f,-4.619398e-01f};
|
1009
|
+
__m128 c2 = {3.535536e-01f,-3.535536e-01f,-3.535536e-01f, 3.535536e-01f};
|
1010
|
+
__m128 c3 = {1.913422e-01f,-4.619398e-01f, 4.619398e-01f,-1.913422e-01f};
|
1011
|
+
|
1012
|
+
__m128 c4 = {4.903927e-01f, 4.157349e-01f, 2.777855e-01f, 9.754573e-02f};
|
1013
|
+
__m128 c5 = {4.157349e-01f,-9.754573e-02f,-4.903927e-01f,-2.777855e-01f};
|
1014
|
+
__m128 c6 = {2.777855e-01f,-4.903927e-01f, 9.754573e-02f, 4.157349e-01f};
|
1015
|
+
__m128 c7 = {9.754573e-02f,-2.777855e-01f, 4.157349e-01f,-4.903927e-01f};
|
1016
|
+
|
1017
|
+
__m128 *srcVec = (__m128 *)data;
|
1018
|
+
__m128 x[8], evenSum, oddSum;
|
1019
|
+
__m128 in[8], alpha[4], beta[4], theta[4], gamma[4];
|
1020
|
+
|
1021
|
+
//
|
1022
|
+
// Rows -
|
1023
|
+
//
|
1024
|
+
// Treat this just like matrix-vector multiplication. The
|
1025
|
+
// trick is to note that:
|
1026
|
+
//
|
1027
|
+
// [M00 M01 M02 M03][v0] [(v0 M00) + (v1 M01) + (v2 M02) + (v3 M03)]
|
1028
|
+
// [M10 M11 M12 M13][v1] = [(v0 M10) + (v1 M11) + (v2 M12) + (v3 M13)]
|
1029
|
+
// [M20 M21 M22 M23][v2] [(v0 M20) + (v1 M21) + (v2 M22) + (v3 M23)]
|
1030
|
+
// [M30 M31 M32 M33][v3] [(v0 M30) + (v1 M31) + (v2 M32) + (v3 M33)]
|
1031
|
+
//
|
1032
|
+
// Then, we can fill a register with v_i and multiply by the i-th column
|
1033
|
+
// of M, accumulating across all i-s.
|
1034
|
+
//
|
1035
|
+
// The kids refer to the populating of a register with a single value
|
1036
|
+
// "broadcasting", and it can be done with a shuffle instruction. It
|
1037
|
+
// seems to be the slowest part of the whole ordeal.
|
1038
|
+
//
|
1039
|
+
// Our matrix columns are stored above in c0-c7. c0-3 make up M1, and
|
1040
|
+
// c4-7 are from M2.
|
1041
|
+
//
|
1042
|
+
|
1043
|
+
#define DCT_INVERSE_8x8_SS2_ROW_LOOP(i) \
|
1044
|
+
/* \
|
1045
|
+
* Broadcast the components of the row \
|
1046
|
+
*/ \
|
1047
|
+
\
|
1048
|
+
x[0] = _mm_shuffle_ps (srcVec[2 * i], \
|
1049
|
+
srcVec[2 * i], \
|
1050
|
+
_MM_SHUFFLE (0, 0, 0, 0)); \
|
1051
|
+
\
|
1052
|
+
x[1] = _mm_shuffle_ps (srcVec[2 * i], \
|
1053
|
+
srcVec[2 * i], \
|
1054
|
+
_MM_SHUFFLE (1, 1, 1, 1)); \
|
1055
|
+
\
|
1056
|
+
x[2] = _mm_shuffle_ps (srcVec[2 * i], \
|
1057
|
+
srcVec[2 * i], \
|
1058
|
+
_MM_SHUFFLE (2, 2, 2, 2)); \
|
1059
|
+
\
|
1060
|
+
x[3] = _mm_shuffle_ps (srcVec[2 * i], \
|
1061
|
+
srcVec[2 * i], \
|
1062
|
+
_MM_SHUFFLE (3, 3, 3, 3)); \
|
1063
|
+
\
|
1064
|
+
x[4] = _mm_shuffle_ps (srcVec[2 * i + 1], \
|
1065
|
+
srcVec[2 * i + 1], \
|
1066
|
+
_MM_SHUFFLE (0, 0, 0, 0)); \
|
1067
|
+
\
|
1068
|
+
x[5] = _mm_shuffle_ps (srcVec[2 * i + 1], \
|
1069
|
+
srcVec[2 * i + 1], \
|
1070
|
+
_MM_SHUFFLE (1, 1, 1, 1)); \
|
1071
|
+
\
|
1072
|
+
x[6] = _mm_shuffle_ps (srcVec[2 * i + 1], \
|
1073
|
+
srcVec[2 * i + 1], \
|
1074
|
+
_MM_SHUFFLE (2, 2, 2, 2)); \
|
1075
|
+
\
|
1076
|
+
x[7] = _mm_shuffle_ps (srcVec[2 * i + 1], \
|
1077
|
+
srcVec[2 * i + 1], \
|
1078
|
+
_MM_SHUFFLE (3, 3, 3, 3)); \
|
1079
|
+
/* \
|
1080
|
+
* Multiply the components by each column of the matrix \
|
1081
|
+
*/ \
|
1082
|
+
\
|
1083
|
+
x[0] = _mm_mul_ps (x[0], c0); \
|
1084
|
+
x[2] = _mm_mul_ps (x[2], c1); \
|
1085
|
+
x[4] = _mm_mul_ps (x[4], c2); \
|
1086
|
+
x[6] = _mm_mul_ps (x[6], c3); \
|
1087
|
+
\
|
1088
|
+
x[1] = _mm_mul_ps (x[1], c4); \
|
1089
|
+
x[3] = _mm_mul_ps (x[3], c5); \
|
1090
|
+
x[5] = _mm_mul_ps (x[5], c6); \
|
1091
|
+
x[7] = _mm_mul_ps (x[7], c7); \
|
1092
|
+
\
|
1093
|
+
/* \
|
1094
|
+
* Add across \
|
1095
|
+
*/ \
|
1096
|
+
\
|
1097
|
+
evenSum = _mm_setzero_ps(); \
|
1098
|
+
evenSum = _mm_add_ps (evenSum, x[0]); \
|
1099
|
+
evenSum = _mm_add_ps (evenSum, x[2]); \
|
1100
|
+
evenSum = _mm_add_ps (evenSum, x[4]); \
|
1101
|
+
evenSum = _mm_add_ps (evenSum, x[6]); \
|
1102
|
+
\
|
1103
|
+
oddSum = _mm_setzero_ps(); \
|
1104
|
+
oddSum = _mm_add_ps (oddSum, x[1]); \
|
1105
|
+
oddSum = _mm_add_ps (oddSum, x[3]); \
|
1106
|
+
oddSum = _mm_add_ps (oddSum, x[5]); \
|
1107
|
+
oddSum = _mm_add_ps (oddSum, x[7]); \
|
1108
|
+
\
|
1109
|
+
/* \
|
1110
|
+
* Final Sum: \
|
1111
|
+
* out [0, 1, 2, 3] = evenSum + oddSum \
|
1112
|
+
* out [7, 6, 5, 4] = evenSum - oddSum \
|
1113
|
+
*/ \
|
1114
|
+
\
|
1115
|
+
srcVec[2 * i] = _mm_add_ps (evenSum, oddSum); \
|
1116
|
+
srcVec[2 * i + 1] = _mm_sub_ps (evenSum, oddSum); \
|
1117
|
+
srcVec[2 * i + 1] = _mm_shuffle_ps (srcVec[2 * i + 1], \
|
1118
|
+
srcVec[2 * i + 1], \
|
1119
|
+
_MM_SHUFFLE (0, 1, 2, 3));
|
1120
|
+
|
1121
|
+
switch (zeroedRows)
|
1122
|
+
{
|
1123
|
+
case 0:
|
1124
|
+
default:
|
1125
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1126
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1127
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
|
1128
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
|
1129
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
|
1130
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
|
1131
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (6)
|
1132
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (7)
|
1133
|
+
break;
|
1134
|
+
|
1135
|
+
case 1:
|
1136
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1137
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1138
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
|
1139
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
|
1140
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
|
1141
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
|
1142
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (6)
|
1143
|
+
break;
|
1144
|
+
|
1145
|
+
case 2:
|
1146
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1147
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1148
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
|
1149
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
|
1150
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
|
1151
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (5)
|
1152
|
+
break;
|
1153
|
+
|
1154
|
+
case 3:
|
1155
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1156
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1157
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
|
1158
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
|
1159
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (4)
|
1160
|
+
break;
|
1161
|
+
|
1162
|
+
case 4:
|
1163
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1164
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1165
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
|
1166
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (3)
|
1167
|
+
break;
|
1168
|
+
|
1169
|
+
case 5:
|
1170
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1171
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1172
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (2)
|
1173
|
+
break;
|
1174
|
+
|
1175
|
+
case 6:
|
1176
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1177
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (1)
|
1178
|
+
break;
|
1179
|
+
|
1180
|
+
case 7:
|
1181
|
+
DCT_INVERSE_8x8_SS2_ROW_LOOP (0)
|
1182
|
+
break;
|
1183
|
+
}
|
1184
|
+
|
1185
|
+
//
|
1186
|
+
// Columns -
|
1187
|
+
//
|
1188
|
+
// This is slightly more straightforward, if less readable. Here
|
1189
|
+
// we just operate on 4 columns at a time, in two batches.
|
1190
|
+
//
|
1191
|
+
// The slight mess is to try and cache sub-expressions, which
|
1192
|
+
// we ignore in the row-wise pass.
|
1193
|
+
//
|
1194
|
+
|
1195
|
+
for (int col = 0; col < 2; ++col)
|
1196
|
+
{
|
1197
|
+
|
1198
|
+
for (int i = 0; i < 8; ++i)
|
1199
|
+
in[i] = srcVec[2 * i + col];
|
1200
|
+
|
1201
|
+
alpha[0] = _mm_mul_ps (c, in[2]);
|
1202
|
+
alpha[1] = _mm_mul_ps (f, in[2]);
|
1203
|
+
alpha[2] = _mm_mul_ps (c, in[6]);
|
1204
|
+
alpha[3] = _mm_mul_ps (f, in[6]);
|
1205
|
+
|
1206
|
+
beta[0] = _mm_add_ps (_mm_add_ps (_mm_mul_ps (in[1], b),
|
1207
|
+
_mm_mul_ps (in[3], d)),
|
1208
|
+
_mm_add_ps (_mm_mul_ps (in[5], e),
|
1209
|
+
_mm_mul_ps (in[7], g)));
|
1210
|
+
|
1211
|
+
beta[1] = _mm_sub_ps (_mm_sub_ps (_mm_mul_ps (in[1], d),
|
1212
|
+
_mm_mul_ps (in[3], g)),
|
1213
|
+
_mm_add_ps (_mm_mul_ps (in[5], b),
|
1214
|
+
_mm_mul_ps (in[7], e)));
|
1215
|
+
|
1216
|
+
beta[2] = _mm_add_ps (_mm_sub_ps (_mm_mul_ps (in[1], e),
|
1217
|
+
_mm_mul_ps (in[3], b)),
|
1218
|
+
_mm_add_ps (_mm_mul_ps (in[5], g),
|
1219
|
+
_mm_mul_ps (in[7], d)));
|
1220
|
+
|
1221
|
+
beta[3] = _mm_add_ps (_mm_sub_ps (_mm_mul_ps (in[1], g),
|
1222
|
+
_mm_mul_ps (in[3], e)),
|
1223
|
+
_mm_sub_ps (_mm_mul_ps (in[5], d),
|
1224
|
+
_mm_mul_ps (in[7], b)));
|
1225
|
+
|
1226
|
+
theta[0] = _mm_mul_ps (a, _mm_add_ps (in[0], in[4]));
|
1227
|
+
theta[3] = _mm_mul_ps (a, _mm_sub_ps (in[0], in[4]));
|
1228
|
+
|
1229
|
+
theta[1] = _mm_add_ps (alpha[0], alpha[3]);
|
1230
|
+
theta[2] = _mm_sub_ps (alpha[1], alpha[2]);
|
1231
|
+
|
1232
|
+
gamma[0] = _mm_add_ps (theta[0], theta[1]);
|
1233
|
+
gamma[1] = _mm_add_ps (theta[3], theta[2]);
|
1234
|
+
gamma[2] = _mm_sub_ps (theta[3], theta[2]);
|
1235
|
+
gamma[3] = _mm_sub_ps (theta[0], theta[1]);
|
1236
|
+
|
1237
|
+
srcVec[ col] = _mm_add_ps (gamma[0], beta[0]);
|
1238
|
+
srcVec[2+col] = _mm_add_ps (gamma[1], beta[1]);
|
1239
|
+
srcVec[4+col] = _mm_add_ps (gamma[2], beta[2]);
|
1240
|
+
srcVec[6+col] = _mm_add_ps (gamma[3], beta[3]);
|
1241
|
+
|
1242
|
+
srcVec[ 8+col] = _mm_sub_ps (gamma[3], beta[3]);
|
1243
|
+
srcVec[10+col] = _mm_sub_ps (gamma[2], beta[2]);
|
1244
|
+
srcVec[12+col] = _mm_sub_ps (gamma[1], beta[1]);
|
1245
|
+
srcVec[14+col] = _mm_sub_ps (gamma[0], beta[0]);
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
#else /* IMF_HAVE_SSE2 */
|
1249
|
+
|
1250
|
+
dctInverse8x8_scalar<zeroedRows> (data);
|
1251
|
+
|
1252
|
+
#endif /* IMF_HAVE_SSE2 */
|
1253
|
+
}
|
1254
|
+
|
1255
|
+
|
1256
|
+
//
|
1257
|
+
// AVX Implementation
|
1258
|
+
//
|
1259
|
+
|
1260
|
+
#define STR(A) #A
|
1261
|
+
|
1262
|
+
#define IDCT_AVX_SETUP_2_ROWS(_DST0, _DST1, _TMP0, _TMP1, \
|
1263
|
+
_OFF00, _OFF01, _OFF10, _OFF11) \
|
1264
|
+
"vmovaps " STR(_OFF00) "(%0), %%xmm" STR(_TMP0) " \n" \
|
1265
|
+
"vmovaps " STR(_OFF01) "(%0), %%xmm" STR(_TMP1) " \n" \
|
1266
|
+
" \n" \
|
1267
|
+
"vinsertf128 $1, " STR(_OFF10) "(%0), %%ymm" STR(_TMP0) ", %%ymm" STR(_TMP0) " \n" \
|
1268
|
+
"vinsertf128 $1, " STR(_OFF11) "(%0), %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP1) " \n" \
|
1269
|
+
" \n" \
|
1270
|
+
"vunpcklpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST0) " \n" \
|
1271
|
+
"vunpckhpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST1) " \n" \
|
1272
|
+
" \n" \
|
1273
|
+
"vunpcklps %%ymm" STR(_DST1) ", %%ymm" STR(_DST0) ", %%ymm" STR(_TMP0) " \n" \
|
1274
|
+
"vunpckhps %%ymm" STR(_DST1) ", %%ymm" STR(_DST0) ", %%ymm" STR(_TMP1) " \n" \
|
1275
|
+
" \n" \
|
1276
|
+
"vunpcklpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST0) " \n" \
|
1277
|
+
"vunpckhpd %%ymm" STR(_TMP1) ", %%ymm" STR(_TMP0) ", %%ymm" STR(_DST1) " \n"
|
1278
|
+
|
1279
|
+
#define IDCT_AVX_MMULT_ROWS(_SRC) \
|
1280
|
+
/* Broadcast the source values into y12-y15 */ \
|
1281
|
+
"vpermilps $0x00, " STR(_SRC) ", %%ymm12 \n" \
|
1282
|
+
"vpermilps $0x55, " STR(_SRC) ", %%ymm13 \n" \
|
1283
|
+
"vpermilps $0xaa, " STR(_SRC) ", %%ymm14 \n" \
|
1284
|
+
"vpermilps $0xff, " STR(_SRC) ", %%ymm15 \n" \
|
1285
|
+
\
|
1286
|
+
/* Multiple coefs and the broadcasted values */ \
|
1287
|
+
"vmulps %%ymm12, %%ymm8, %%ymm12 \n" \
|
1288
|
+
"vmulps %%ymm13, %%ymm9, %%ymm13 \n" \
|
1289
|
+
"vmulps %%ymm14, %%ymm10, %%ymm14 \n" \
|
1290
|
+
"vmulps %%ymm15, %%ymm11, %%ymm15 \n" \
|
1291
|
+
\
|
1292
|
+
/* Accumulate the result back into the source */ \
|
1293
|
+
"vaddps %%ymm13, %%ymm12, %%ymm12 \n" \
|
1294
|
+
"vaddps %%ymm15, %%ymm14, %%ymm14 \n" \
|
1295
|
+
"vaddps %%ymm14, %%ymm12, " STR(_SRC) "\n"
|
1296
|
+
|
1297
|
+
#define IDCT_AVX_EO_TO_ROW_HALVES(_EVEN, _ODD, _FRONT, _BACK) \
|
1298
|
+
"vsubps " STR(_ODD) "," STR(_EVEN) "," STR(_BACK) "\n" \
|
1299
|
+
"vaddps " STR(_ODD) "," STR(_EVEN) "," STR(_FRONT) "\n" \
|
1300
|
+
/* Reverse the back half */ \
|
1301
|
+
"vpermilps $0x1b," STR(_BACK) "," STR(_BACK) "\n"
|
1302
|
+
|
1303
|
+
/* In order to allow for path paths when we know certain rows
|
1304
|
+
* of the 8x8 block are zero, most of the body of the DCT is
|
1305
|
+
* in the following macro. Statements are wrapped in a ROWn()
|
1306
|
+
* macro, where n is the lowest row in the 8x8 block in which
|
1307
|
+
* they depend.
|
1308
|
+
*
|
1309
|
+
* This should work for the cases where we have 2-8 full rows.
|
1310
|
+
* the 1-row case is special, and we'll handle it seperately.
|
1311
|
+
*/
|
1312
|
+
#define IDCT_AVX_BODY \
|
1313
|
+
/* ==============================================
|
1314
|
+
* Row 1D DCT
|
1315
|
+
* ----------------------------------------------
|
1316
|
+
*/ \
|
1317
|
+
\
|
1318
|
+
/* Setup for the row-oriented 1D DCT. Assuming that (%0) holds
|
1319
|
+
* the row-major 8x8 block, load ymm0-3 with the even columns
|
1320
|
+
* and ymm4-7 with the odd columns. The lower half of the ymm
|
1321
|
+
* holds one row, while the upper half holds the next row.
|
1322
|
+
*
|
1323
|
+
* If our source is:
|
1324
|
+
* a0 a1 a2 a3 a4 a5 a6 a7
|
1325
|
+
* b0 b1 b2 b3 b4 b5 b6 b7
|
1326
|
+
*
|
1327
|
+
* We'll be forming:
|
1328
|
+
* a0 a2 a4 a6 b0 b2 b4 b6
|
1329
|
+
* a1 a3 a5 a7 b1 b3 b5 b7
|
1330
|
+
*/ \
|
1331
|
+
ROW0( IDCT_AVX_SETUP_2_ROWS(0, 4, 14, 15, 0, 16, 32, 48) ) \
|
1332
|
+
ROW2( IDCT_AVX_SETUP_2_ROWS(1, 5, 12, 13, 64, 80, 96, 112) ) \
|
1333
|
+
ROW4( IDCT_AVX_SETUP_2_ROWS(2, 6, 10, 11, 128, 144, 160, 176) ) \
|
1334
|
+
ROW6( IDCT_AVX_SETUP_2_ROWS(3, 7, 8, 9, 192, 208, 224, 240) ) \
|
1335
|
+
\
|
1336
|
+
/* Multiple the even columns (ymm0-3) by the matrix M1
|
1337
|
+
* storing the results back in ymm0-3
|
1338
|
+
*
|
1339
|
+
* Assume that (%1) holds the matrix in column major order
|
1340
|
+
*/ \
|
1341
|
+
"vbroadcastf128 (%1), %%ymm8 \n" \
|
1342
|
+
"vbroadcastf128 16(%1), %%ymm9 \n" \
|
1343
|
+
"vbroadcastf128 32(%1), %%ymm10 \n" \
|
1344
|
+
"vbroadcastf128 48(%1), %%ymm11 \n" \
|
1345
|
+
\
|
1346
|
+
ROW0( IDCT_AVX_MMULT_ROWS(%%ymm0) ) \
|
1347
|
+
ROW2( IDCT_AVX_MMULT_ROWS(%%ymm1) ) \
|
1348
|
+
ROW4( IDCT_AVX_MMULT_ROWS(%%ymm2) ) \
|
1349
|
+
ROW6( IDCT_AVX_MMULT_ROWS(%%ymm3) ) \
|
1350
|
+
\
|
1351
|
+
/* Repeat, but with the odd columns (ymm4-7) and the
|
1352
|
+
* matrix M2
|
1353
|
+
*/ \
|
1354
|
+
"vbroadcastf128 64(%1), %%ymm8 \n" \
|
1355
|
+
"vbroadcastf128 80(%1), %%ymm9 \n" \
|
1356
|
+
"vbroadcastf128 96(%1), %%ymm10 \n" \
|
1357
|
+
"vbroadcastf128 112(%1), %%ymm11 \n" \
|
1358
|
+
\
|
1359
|
+
ROW0( IDCT_AVX_MMULT_ROWS(%%ymm4) ) \
|
1360
|
+
ROW2( IDCT_AVX_MMULT_ROWS(%%ymm5) ) \
|
1361
|
+
ROW4( IDCT_AVX_MMULT_ROWS(%%ymm6) ) \
|
1362
|
+
ROW6( IDCT_AVX_MMULT_ROWS(%%ymm7) ) \
|
1363
|
+
\
|
1364
|
+
/* Sum the M1 (ymm0-3) and M2 (ymm4-7) results to get the
|
1365
|
+
* front halves of the results, and difference to get the
|
1366
|
+
* back halves. The front halfs end up in ymm0-3, the back
|
1367
|
+
* halves end up in ymm12-15.
|
1368
|
+
*/ \
|
1369
|
+
ROW0( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm0, %%ymm4, %%ymm0, %%ymm12) ) \
|
1370
|
+
ROW2( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm1, %%ymm5, %%ymm1, %%ymm13) ) \
|
1371
|
+
ROW4( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm2, %%ymm6, %%ymm2, %%ymm14) ) \
|
1372
|
+
ROW6( IDCT_AVX_EO_TO_ROW_HALVES(%%ymm3, %%ymm7, %%ymm3, %%ymm15) ) \
|
1373
|
+
\
|
1374
|
+
/* Reassemble the rows halves into ymm0-7 */ \
|
1375
|
+
ROW7( "vperm2f128 $0x13, %%ymm3, %%ymm15, %%ymm7 \n" ) \
|
1376
|
+
ROW6( "vperm2f128 $0x02, %%ymm3, %%ymm15, %%ymm6 \n" ) \
|
1377
|
+
ROW5( "vperm2f128 $0x13, %%ymm2, %%ymm14, %%ymm5 \n" ) \
|
1378
|
+
ROW4( "vperm2f128 $0x02, %%ymm2, %%ymm14, %%ymm4 \n" ) \
|
1379
|
+
ROW3( "vperm2f128 $0x13, %%ymm1, %%ymm13, %%ymm3 \n" ) \
|
1380
|
+
ROW2( "vperm2f128 $0x02, %%ymm1, %%ymm13, %%ymm2 \n" ) \
|
1381
|
+
ROW1( "vperm2f128 $0x13, %%ymm0, %%ymm12, %%ymm1 \n" ) \
|
1382
|
+
ROW0( "vperm2f128 $0x02, %%ymm0, %%ymm12, %%ymm0 \n" ) \
|
1383
|
+
\
|
1384
|
+
\
|
1385
|
+
/* ==============================================
|
1386
|
+
* Column 1D DCT
|
1387
|
+
* ----------------------------------------------
|
1388
|
+
*/ \
|
1389
|
+
\
|
1390
|
+
/* Rows should be in ymm0-7, and M2 columns should still be
|
1391
|
+
* preserved in ymm8-11. M2 has 4 unique values (and +-
|
1392
|
+
* versions of each), and all (positive) values appear in
|
1393
|
+
* the first column (and row), which is in ymm8.
|
1394
|
+
*
|
1395
|
+
* For the column-wise DCT, we need to:
|
1396
|
+
* 1) Broadcast each element a row of M2 into 4 vectors
|
1397
|
+
* 2) Multiple the odd rows (ymm1,3,5,7) by the broadcasts.
|
1398
|
+
* 3) Accumulate into ymm12-15 for the odd outputs.
|
1399
|
+
*
|
1400
|
+
* Instead of doing 16 broadcasts for each element in M2,
|
1401
|
+
* do 4, filling y8-11 with:
|
1402
|
+
*
|
1403
|
+
* ymm8: [ b b b b | b b b b ]
|
1404
|
+
* ymm9: [ d d d d | d d d d ]
|
1405
|
+
* ymm10: [ e e e e | e e e e ]
|
1406
|
+
* ymm11: [ g g g g | g g g g ]
|
1407
|
+
*
|
1408
|
+
* And deal with the negative values by subtracting during accum.
|
1409
|
+
*/ \
|
1410
|
+
"vpermilps $0xff, %%ymm8, %%ymm11 \n" \
|
1411
|
+
"vpermilps $0xaa, %%ymm8, %%ymm10 \n" \
|
1412
|
+
"vpermilps $0x55, %%ymm8, %%ymm9 \n" \
|
1413
|
+
"vpermilps $0x00, %%ymm8, %%ymm8 \n" \
|
1414
|
+
\
|
1415
|
+
/* This one is easy, since we have ymm12-15 open for scratch
|
1416
|
+
* ymm12 = b ymm1 + d ymm3 + e ymm5 + g ymm7
|
1417
|
+
*/ \
|
1418
|
+
ROW1( "vmulps %%ymm1, %%ymm8, %%ymm12 \n" ) \
|
1419
|
+
ROW3( "vmulps %%ymm3, %%ymm9, %%ymm13 \n" ) \
|
1420
|
+
ROW5( "vmulps %%ymm5, %%ymm10, %%ymm14 \n" ) \
|
1421
|
+
ROW7( "vmulps %%ymm7, %%ymm11, %%ymm15 \n" ) \
|
1422
|
+
\
|
1423
|
+
ROW3( "vaddps %%ymm12, %%ymm13, %%ymm12 \n" ) \
|
1424
|
+
ROW7( "vaddps %%ymm14, %%ymm15, %%ymm14 \n" ) \
|
1425
|
+
ROW5( "vaddps %%ymm12, %%ymm14, %%ymm12 \n" ) \
|
1426
|
+
\
|
1427
|
+
/* Tricker, since only y13-15 are open for scratch
|
1428
|
+
* ymm13 = d ymm1 - g ymm3 - b ymm5 - e ymm7
|
1429
|
+
*/ \
|
1430
|
+
ROW1( "vmulps %%ymm1, %%ymm9, %%ymm13 \n" ) \
|
1431
|
+
ROW3( "vmulps %%ymm3, %%ymm11, %%ymm14 \n" ) \
|
1432
|
+
ROW5( "vmulps %%ymm5, %%ymm8, %%ymm15 \n" ) \
|
1433
|
+
\
|
1434
|
+
ROW5( "vaddps %%ymm14, %%ymm15, %%ymm14 \n" ) \
|
1435
|
+
ROW3( "vsubps %%ymm14, %%ymm13, %%ymm13 \n" ) \
|
1436
|
+
\
|
1437
|
+
ROW7( "vmulps %%ymm7, %%ymm10, %%ymm15 \n" ) \
|
1438
|
+
ROW7( "vsubps %%ymm15, %%ymm13, %%ymm13 \n" ) \
|
1439
|
+
\
|
1440
|
+
/* Tricker still, as only y14-15 are open for scratch
|
1441
|
+
* ymm14 = e ymm1 - b ymm3 + g ymm5 + d ymm7
|
1442
|
+
*/ \
|
1443
|
+
ROW1( "vmulps %%ymm1, %%ymm10, %%ymm14 \n" ) \
|
1444
|
+
ROW3( "vmulps %%ymm3, %%ymm8, %%ymm15 \n" ) \
|
1445
|
+
\
|
1446
|
+
ROW3( "vsubps %%ymm15, %%ymm14, %%ymm14 \n" ) \
|
1447
|
+
\
|
1448
|
+
ROW5( "vmulps %%ymm5, %%ymm11, %%ymm15 \n" ) \
|
1449
|
+
ROW5( "vaddps %%ymm15, %%ymm14, %%ymm14 \n" ) \
|
1450
|
+
\
|
1451
|
+
ROW7( "vmulps %%ymm7, %%ymm9, %%ymm15 \n" ) \
|
1452
|
+
ROW7( "vaddps %%ymm15, %%ymm14, %%ymm14 \n" ) \
|
1453
|
+
\
|
1454
|
+
\
|
1455
|
+
/* Easy, as we can blow away ymm1,3,5,7 for scratch
|
1456
|
+
* ymm15 = g ymm1 - e ymm3 + d ymm5 - b ymm7
|
1457
|
+
*/ \
|
1458
|
+
ROW1( "vmulps %%ymm1, %%ymm11, %%ymm15 \n" ) \
|
1459
|
+
ROW3( "vmulps %%ymm3, %%ymm10, %%ymm3 \n" ) \
|
1460
|
+
ROW5( "vmulps %%ymm5, %%ymm9, %%ymm5 \n" ) \
|
1461
|
+
ROW7( "vmulps %%ymm7, %%ymm8, %%ymm7 \n" ) \
|
1462
|
+
\
|
1463
|
+
ROW5( "vaddps %%ymm15, %%ymm5, %%ymm15 \n" ) \
|
1464
|
+
ROW7( "vaddps %%ymm3, %%ymm7, %%ymm3 \n" ) \
|
1465
|
+
ROW3( "vsubps %%ymm3, %%ymm15, %%ymm15 \n" ) \
|
1466
|
+
\
|
1467
|
+
\
|
1468
|
+
/* Load coefs for M1. Because we're going to broadcast
|
1469
|
+
* coefs, we don't need to load the actual structure from
|
1470
|
+
* M1. Instead, just load enough that we can broadcast.
|
1471
|
+
* There are only 6 unique values in M1, but they're in +-
|
1472
|
+
* pairs, leaving only 3 unique coefs if we add and subtract
|
1473
|
+
* properly.
|
1474
|
+
*
|
1475
|
+
* Fill ymm1 with coef[2] = [ a a c f | a a c f ]
|
1476
|
+
* Broadcast ymm5 with [ f f f f | f f f f ]
|
1477
|
+
* Broadcast ymm3 with [ c c c c | c c c c ]
|
1478
|
+
* Broadcast ymm1 with [ a a a a | a a a a ]
|
1479
|
+
*/ \
|
1480
|
+
"vbroadcastf128 8(%1), %%ymm1 \n" \
|
1481
|
+
"vpermilps $0xff, %%ymm1, %%ymm5 \n" \
|
1482
|
+
"vpermilps $0xaa, %%ymm1, %%ymm3 \n" \
|
1483
|
+
"vpermilps $0x00, %%ymm1, %%ymm1 \n" \
|
1484
|
+
\
|
1485
|
+
/* If we expand E = [M1] [x0 x2 x4 x6]^t, we get the following
|
1486
|
+
* common expressions:
|
1487
|
+
*
|
1488
|
+
* E_0 = ymm8 = (a ymm0 + a ymm4) + (c ymm2 + f ymm6)
|
1489
|
+
* E_3 = ymm11 = (a ymm0 + a ymm4) - (c ymm2 + f ymm6)
|
1490
|
+
*
|
1491
|
+
* E_1 = ymm9 = (a ymm0 - a ymm4) + (f ymm2 - c ymm6)
|
1492
|
+
* E_2 = ymm10 = (a ymm0 - a ymm4) - (f ymm2 - c ymm6)
|
1493
|
+
*
|
1494
|
+
* Afterwards, ymm8-11 will hold the even outputs.
|
1495
|
+
*/ \
|
1496
|
+
\
|
1497
|
+
/* ymm11 = (a ymm0 + a ymm4), ymm1 = (a ymm0 - a ymm4) */ \
|
1498
|
+
ROW0( "vmulps %%ymm1, %%ymm0, %%ymm11 \n" ) \
|
1499
|
+
ROW4( "vmulps %%ymm1, %%ymm4, %%ymm4 \n" ) \
|
1500
|
+
ROW0( "vmovaps %%ymm11, %%ymm1 \n" ) \
|
1501
|
+
ROW4( "vaddps %%ymm4, %%ymm11, %%ymm11 \n" ) \
|
1502
|
+
ROW4( "vsubps %%ymm4, %%ymm1, %%ymm1 \n" ) \
|
1503
|
+
\
|
1504
|
+
/* ymm7 = (c ymm2 + f ymm6) */ \
|
1505
|
+
ROW2( "vmulps %%ymm3, %%ymm2, %%ymm7 \n" ) \
|
1506
|
+
ROW6( "vmulps %%ymm5, %%ymm6, %%ymm9 \n" ) \
|
1507
|
+
ROW6( "vaddps %%ymm9, %%ymm7, %%ymm7 \n" ) \
|
1508
|
+
\
|
1509
|
+
/* E_0 = ymm8 = (a ymm0 + a ymm4) + (c ymm2 + f ymm6)
|
1510
|
+
* E_3 = ymm11 = (a ymm0 + a ymm4) - (c ymm2 + f ymm6)
|
1511
|
+
*/ \
|
1512
|
+
ROW0( "vmovaps %%ymm11, %%ymm8 \n" ) \
|
1513
|
+
ROW2( "vaddps %%ymm7, %%ymm8, %%ymm8 \n" ) \
|
1514
|
+
ROW2( "vsubps %%ymm7, %%ymm11, %%ymm11 \n" ) \
|
1515
|
+
\
|
1516
|
+
/* ymm7 = (f ymm2 - c ymm6) */ \
|
1517
|
+
ROW2( "vmulps %%ymm5, %%ymm2, %%ymm7 \n" ) \
|
1518
|
+
ROW6( "vmulps %%ymm3, %%ymm6, %%ymm9 \n" ) \
|
1519
|
+
ROW6( "vsubps %%ymm9, %%ymm7, %%ymm7 \n" ) \
|
1520
|
+
\
|
1521
|
+
/* E_1 = ymm9 = (a ymm0 - a ymm4) + (f ymm2 - c ymm6)
|
1522
|
+
* E_2 = ymm10 = (a ymm0 - a ymm4) - (f ymm2 - c ymm6)
|
1523
|
+
*/ \
|
1524
|
+
ROW0( "vmovaps %%ymm1, %%ymm9 \n" ) \
|
1525
|
+
ROW0( "vmovaps %%ymm1, %%ymm10 \n" ) \
|
1526
|
+
ROW2( "vaddps %%ymm7, %%ymm1, %%ymm9 \n" ) \
|
1527
|
+
ROW2( "vsubps %%ymm7, %%ymm1, %%ymm10 \n" ) \
|
1528
|
+
\
|
1529
|
+
/* Add the even (ymm8-11) and the odds (ymm12-15),
|
1530
|
+
* placing the results into ymm0-7
|
1531
|
+
*/ \
|
1532
|
+
"vaddps %%ymm12, %%ymm8, %%ymm0 \n" \
|
1533
|
+
"vaddps %%ymm13, %%ymm9, %%ymm1 \n" \
|
1534
|
+
"vaddps %%ymm14, %%ymm10, %%ymm2 \n" \
|
1535
|
+
"vaddps %%ymm15, %%ymm11, %%ymm3 \n" \
|
1536
|
+
\
|
1537
|
+
"vsubps %%ymm12, %%ymm8, %%ymm7 \n" \
|
1538
|
+
"vsubps %%ymm13, %%ymm9, %%ymm6 \n" \
|
1539
|
+
"vsubps %%ymm14, %%ymm10, %%ymm5 \n" \
|
1540
|
+
"vsubps %%ymm15, %%ymm11, %%ymm4 \n" \
|
1541
|
+
\
|
1542
|
+
/* Copy out the results from ymm0-7 */ \
|
1543
|
+
"vmovaps %%ymm0, (%0) \n" \
|
1544
|
+
"vmovaps %%ymm1, 32(%0) \n" \
|
1545
|
+
"vmovaps %%ymm2, 64(%0) \n" \
|
1546
|
+
"vmovaps %%ymm3, 96(%0) \n" \
|
1547
|
+
"vmovaps %%ymm4, 128(%0) \n" \
|
1548
|
+
"vmovaps %%ymm5, 160(%0) \n" \
|
1549
|
+
"vmovaps %%ymm6, 192(%0) \n" \
|
1550
|
+
"vmovaps %%ymm7, 224(%0) \n"
|
1551
|
+
|
1552
|
+
/* Output, input, and clobber (OIC) sections of the inline asm */
|
1553
|
+
#define IDCT_AVX_OIC(_IN0) \
|
1554
|
+
: /* Output */ \
|
1555
|
+
: /* Input */ "r"(_IN0), "r"(sAvxCoef) \
|
1556
|
+
: /* Clobber */ "memory", \
|
1557
|
+
"%xmm0", "%xmm1", "%xmm2", "%xmm3", \
|
1558
|
+
"%xmm4", "%xmm5", "%xmm6", "%xmm7", \
|
1559
|
+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",\
|
1560
|
+
"%xmm12", "%xmm13", "%xmm14", "%xmm15"
|
1561
|
+
|
1562
|
+
/* Include vzeroupper for non-AVX builds */
|
1563
|
+
#ifndef __AVX__
|
1564
|
+
#define IDCT_AVX_ASM(_IN0) \
|
1565
|
+
__asm__( \
|
1566
|
+
IDCT_AVX_BODY \
|
1567
|
+
"vzeroupper \n" \
|
1568
|
+
IDCT_AVX_OIC(_IN0) \
|
1569
|
+
);
|
1570
|
+
#else /* __AVX__ */
|
1571
|
+
#define IDCT_AVX_ASM(_IN0) \
|
1572
|
+
__asm__( \
|
1573
|
+
IDCT_AVX_BODY \
|
1574
|
+
IDCT_AVX_OIC(_IN0) \
|
1575
|
+
);
|
1576
|
+
#endif /* __AVX__ */
|
1577
|
+
|
1578
|
+
template <int zeroedRows>
|
1579
|
+
void
|
1580
|
+
dctInverse8x8_avx (float *data)
|
1581
|
+
{
|
1582
|
+
#if defined IMF_HAVE_GCC_INLINEASM_64
|
1583
|
+
|
1584
|
+
/* The column-major version of M1, followed by the
|
1585
|
+
* column-major version of M2:
|
1586
|
+
*
|
1587
|
+
* [ a c a f ] [ b d e g ]
|
1588
|
+
* M1 = [ a f -a -c ] M2 = [ d -g -b -e ]
|
1589
|
+
* [ a -f -a c ] [ e -b g d ]
|
1590
|
+
* [ a -c a -f ] [ g -e d -b ]
|
1591
|
+
*/
|
1592
|
+
const float sAvxCoef[32] __attribute__((aligned(32))) = {
|
1593
|
+
3.535536e-01, 3.535536e-01, 3.535536e-01, 3.535536e-01, /* a a a a */
|
1594
|
+
4.619398e-01, 1.913422e-01, -1.913422e-01, -4.619398e-01, /* c f -f -c */
|
1595
|
+
3.535536e-01, -3.535536e-01, -3.535536e-01, 3.535536e-01, /* a -a -a a */
|
1596
|
+
1.913422e-01, -4.619398e-01, 4.619398e-01, -1.913422e-01, /* f -c c -f */
|
1597
|
+
|
1598
|
+
4.903927e-01, 4.157349e-01, 2.777855e-01, 9.754573e-02, /* b d e g */
|
1599
|
+
4.157349e-01, -9.754573e-02, -4.903927e-01, -2.777855e-01, /* d -g -b -e */
|
1600
|
+
2.777855e-01, -4.903927e-01, 9.754573e-02, 4.157349e-01, /* e -b g d */
|
1601
|
+
9.754573e-02, -2.777855e-01, 4.157349e-01, -4.903927e-01 /* g -e d -b */
|
1602
|
+
};
|
1603
|
+
|
1604
|
+
#define ROW0(_X) _X
|
1605
|
+
#define ROW1(_X) _X
|
1606
|
+
#define ROW2(_X) _X
|
1607
|
+
#define ROW3(_X) _X
|
1608
|
+
#define ROW4(_X) _X
|
1609
|
+
#define ROW5(_X) _X
|
1610
|
+
#define ROW6(_X) _X
|
1611
|
+
#define ROW7(_X) _X
|
1612
|
+
|
1613
|
+
if (zeroedRows == 0) {
|
1614
|
+
|
1615
|
+
IDCT_AVX_ASM(data)
|
1616
|
+
|
1617
|
+
} else if (zeroedRows == 1) {
|
1618
|
+
|
1619
|
+
#undef ROW7
|
1620
|
+
#define ROW7(_X)
|
1621
|
+
IDCT_AVX_ASM(data)
|
1622
|
+
|
1623
|
+
} else if (zeroedRows == 2) {
|
1624
|
+
|
1625
|
+
#undef ROW6
|
1626
|
+
#define ROW6(_X)
|
1627
|
+
IDCT_AVX_ASM(data)
|
1628
|
+
|
1629
|
+
} else if (zeroedRows == 3) {
|
1630
|
+
|
1631
|
+
#undef ROW5
|
1632
|
+
#define ROW5(_X)
|
1633
|
+
IDCT_AVX_ASM(data)
|
1634
|
+
|
1635
|
+
} else if (zeroedRows == 4) {
|
1636
|
+
|
1637
|
+
#undef ROW4
|
1638
|
+
#define ROW4(_X)
|
1639
|
+
IDCT_AVX_ASM(data)
|
1640
|
+
|
1641
|
+
} else if (zeroedRows == 5) {
|
1642
|
+
|
1643
|
+
#undef ROW3
|
1644
|
+
#define ROW3(_X)
|
1645
|
+
IDCT_AVX_ASM(data)
|
1646
|
+
|
1647
|
+
} else if (zeroedRows == 6) {
|
1648
|
+
|
1649
|
+
#undef ROW2
|
1650
|
+
#define ROW2(_X)
|
1651
|
+
IDCT_AVX_ASM(data)
|
1652
|
+
|
1653
|
+
} else if (zeroedRows == 7) {
|
1654
|
+
|
1655
|
+
__asm__(
|
1656
|
+
|
1657
|
+
/* ==============================================
|
1658
|
+
* Row 1D DCT
|
1659
|
+
* ----------------------------------------------
|
1660
|
+
*/
|
1661
|
+
IDCT_AVX_SETUP_2_ROWS(0, 4, 14, 15, 0, 16, 32, 48)
|
1662
|
+
|
1663
|
+
"vbroadcastf128 (%1), %%ymm8 \n"
|
1664
|
+
"vbroadcastf128 16(%1), %%ymm9 \n"
|
1665
|
+
"vbroadcastf128 32(%1), %%ymm10 \n"
|
1666
|
+
"vbroadcastf128 48(%1), %%ymm11 \n"
|
1667
|
+
|
1668
|
+
/* Stash a vector of [a a a a | a a a a] away in ymm2 */
|
1669
|
+
"vinsertf128 $1, %%xmm8, %%ymm8, %%ymm2 \n"
|
1670
|
+
|
1671
|
+
IDCT_AVX_MMULT_ROWS(%%ymm0)
|
1672
|
+
|
1673
|
+
"vbroadcastf128 64(%1), %%ymm8 \n"
|
1674
|
+
"vbroadcastf128 80(%1), %%ymm9 \n"
|
1675
|
+
"vbroadcastf128 96(%1), %%ymm10 \n"
|
1676
|
+
"vbroadcastf128 112(%1), %%ymm11 \n"
|
1677
|
+
|
1678
|
+
IDCT_AVX_MMULT_ROWS(%%ymm4)
|
1679
|
+
|
1680
|
+
IDCT_AVX_EO_TO_ROW_HALVES(%%ymm0, %%ymm4, %%ymm0, %%ymm12)
|
1681
|
+
|
1682
|
+
"vperm2f128 $0x02, %%ymm0, %%ymm12, %%ymm0 \n"
|
1683
|
+
|
1684
|
+
/* ==============================================
|
1685
|
+
* Column 1D DCT
|
1686
|
+
* ----------------------------------------------
|
1687
|
+
*/
|
1688
|
+
|
1689
|
+
/* DC only, so multiple by a and we're done */
|
1690
|
+
"vmulps %%ymm2, %%ymm0, %%ymm0 \n"
|
1691
|
+
|
1692
|
+
/* Copy out results */
|
1693
|
+
"vmovaps %%ymm0, (%0) \n"
|
1694
|
+
"vmovaps %%ymm0, 32(%0) \n"
|
1695
|
+
"vmovaps %%ymm0, 64(%0) \n"
|
1696
|
+
"vmovaps %%ymm0, 96(%0) \n"
|
1697
|
+
"vmovaps %%ymm0, 128(%0) \n"
|
1698
|
+
"vmovaps %%ymm0, 160(%0) \n"
|
1699
|
+
"vmovaps %%ymm0, 192(%0) \n"
|
1700
|
+
"vmovaps %%ymm0, 224(%0) \n"
|
1701
|
+
|
1702
|
+
#ifndef __AVX__
|
1703
|
+
"vzeroupper \n"
|
1704
|
+
#endif /* __AVX__ */
|
1705
|
+
IDCT_AVX_OIC(data)
|
1706
|
+
);
|
1707
|
+
} else {
|
1708
|
+
assert(false); // Invalid template instance parameter
|
1709
|
+
}
|
1710
|
+
#else /* IMF_HAVE_GCC_INLINEASM_64 */
|
1711
|
+
|
1712
|
+
dctInverse8x8_scalar<zeroedRows>(data);
|
1713
|
+
|
1714
|
+
#endif /* IMF_HAVE_GCC_INLINEASM_64 */
|
1715
|
+
}
|
1716
|
+
|
1717
|
+
|
1718
|
+
//
|
1719
|
+
// Full 8x8 Forward DCT:
|
1720
|
+
//
|
1721
|
+
// Base forward 8x8 DCT implementation. Works on the data in-place
|
1722
|
+
//
|
1723
|
+
// The implementation describedin Pennebaker + Mitchell,
|
1724
|
+
// section 4.3.2, and illustrated in figure 4-7
|
1725
|
+
//
|
1726
|
+
// The basic idea is that the 1D DCT math reduces to:
|
1727
|
+
//
|
1728
|
+
// 2*out_0 = c_4 [(s_07 + s_34) + (s_12 + s_56)]
|
1729
|
+
// 2*out_4 = c_4 [(s_07 + s_34) - (s_12 + s_56)]
|
1730
|
+
//
|
1731
|
+
// {2*out_2, 2*out_6} = rot_6 ((d_12 - d_56), (s_07 - s_34))
|
1732
|
+
//
|
1733
|
+
// {2*out_3, 2*out_5} = rot_-3 (d_07 - c_4 (s_12 - s_56),
|
1734
|
+
// d_34 - c_4 (d_12 + d_56))
|
1735
|
+
//
|
1736
|
+
// {2*out_1, 2*out_7} = rot_-1 (d_07 + c_4 (s_12 - s_56),
|
1737
|
+
// -d_34 - c_4 (d_12 + d_56))
|
1738
|
+
//
|
1739
|
+
// where:
|
1740
|
+
//
|
1741
|
+
// c_i = cos(i*pi/16)
|
1742
|
+
// s_i = sin(i*pi/16)
|
1743
|
+
//
|
1744
|
+
// s_ij = in_i + in_j
|
1745
|
+
// d_ij = in_i - in_j
|
1746
|
+
//
|
1747
|
+
// rot_i(x, y) = {c_i*x + s_i*y, -s_i*x + c_i*y}
|
1748
|
+
//
|
1749
|
+
// We'll run the DCT in two passes. First, run the 1D DCT on
|
1750
|
+
// the rows, in-place. Then, run over the columns in-place,
|
1751
|
+
// and be done with it.
|
1752
|
+
//
|
1753
|
+
|
1754
|
+
#ifndef IMF_HAVE_SSE2
|
1755
|
+
|
1756
|
+
//
|
1757
|
+
// Default implementation
|
1758
|
+
//
|
1759
|
+
|
1760
|
+
void
|
1761
|
+
dctForward8x8 (float *data)
|
1762
|
+
{
|
1763
|
+
float A0, A1, A2, A3, A4, A5, A6, A7;
|
1764
|
+
float K0, K1, rot_x, rot_y;
|
1765
|
+
|
1766
|
+
float *srcPtr = data;
|
1767
|
+
float *dstPtr = data;
|
1768
|
+
|
1769
|
+
const float c1 = cosf (3.14159f * 1.0f / 16.0f);
|
1770
|
+
const float c2 = cosf (3.14159f * 2.0f / 16.0f);
|
1771
|
+
const float c3 = cosf (3.14159f * 3.0f / 16.0f);
|
1772
|
+
const float c4 = cosf (3.14159f * 4.0f / 16.0f);
|
1773
|
+
const float c5 = cosf (3.14159f * 5.0f / 16.0f);
|
1774
|
+
const float c6 = cosf (3.14159f * 6.0f / 16.0f);
|
1775
|
+
const float c7 = cosf (3.14159f * 7.0f / 16.0f);
|
1776
|
+
|
1777
|
+
const float c1Half = .5f * c1;
|
1778
|
+
const float c2Half = .5f * c2;
|
1779
|
+
const float c3Half = .5f * c3;
|
1780
|
+
const float c5Half = .5f * c5;
|
1781
|
+
const float c6Half = .5f * c6;
|
1782
|
+
const float c7Half = .5f * c7;
|
1783
|
+
|
1784
|
+
//
|
1785
|
+
// First pass - do a 1D DCT over the rows and write the
|
1786
|
+
// results back in place
|
1787
|
+
//
|
1788
|
+
|
1789
|
+
for (int row=0; row<8; ++row)
|
1790
|
+
{
|
1791
|
+
float *srcRowPtr = srcPtr + 8 * row;
|
1792
|
+
float *dstRowPtr = dstPtr + 8 * row;
|
1793
|
+
|
1794
|
+
A0 = srcRowPtr[0] + srcRowPtr[7];
|
1795
|
+
A1 = srcRowPtr[1] + srcRowPtr[2];
|
1796
|
+
A2 = srcRowPtr[1] - srcRowPtr[2];
|
1797
|
+
A3 = srcRowPtr[3] + srcRowPtr[4];
|
1798
|
+
A4 = srcRowPtr[3] - srcRowPtr[4];
|
1799
|
+
A5 = srcRowPtr[5] + srcRowPtr[6];
|
1800
|
+
A6 = srcRowPtr[5] - srcRowPtr[6];
|
1801
|
+
A7 = srcRowPtr[0] - srcRowPtr[7];
|
1802
|
+
|
1803
|
+
K0 = c4 * (A0 + A3);
|
1804
|
+
K1 = c4 * (A1 + A5);
|
1805
|
+
|
1806
|
+
dstRowPtr[0] = .5f * (K0 + K1);
|
1807
|
+
dstRowPtr[4] = .5f * (K0 - K1);
|
1808
|
+
|
1809
|
+
//
|
1810
|
+
// (2*dst2, 2*dst6) = rot 6 (d12 - d56, s07 - s34)
|
1811
|
+
//
|
1812
|
+
|
1813
|
+
rot_x = A2 - A6;
|
1814
|
+
rot_y = A0 - A3;
|
1815
|
+
|
1816
|
+
dstRowPtr[2] = c6Half * rot_x + c2Half * rot_y;
|
1817
|
+
dstRowPtr[6] = c6Half * rot_y - c2Half * rot_x;
|
1818
|
+
|
1819
|
+
//
|
1820
|
+
// K0, K1 are active until after dst[1],dst[7]
|
1821
|
+
// as well as dst[3], dst[5] are computed.
|
1822
|
+
//
|
1823
|
+
|
1824
|
+
K0 = c4 * (A1 - A5);
|
1825
|
+
K1 = -1 * c4 * (A2 + A6);
|
1826
|
+
|
1827
|
+
//
|
1828
|
+
// Two ways to do a rotation:
|
1829
|
+
//
|
1830
|
+
// rot i (x, y) =
|
1831
|
+
// X = c_i*x + s_i*y
|
1832
|
+
// Y = -s_i*x + c_i*y
|
1833
|
+
//
|
1834
|
+
// OR
|
1835
|
+
//
|
1836
|
+
// X = c_i*(x+y) + (s_i-c_i)*y
|
1837
|
+
// Y = c_i*y - (s_i+c_i)*x
|
1838
|
+
//
|
1839
|
+
// the first case has 4 multiplies, but fewer constants,
|
1840
|
+
// while the 2nd case has fewer multiplies but takes more space.
|
1841
|
+
|
1842
|
+
//
|
1843
|
+
// (2*dst3, 2*dst5) = rot -3 ( d07 - K0, d34 + K1 )
|
1844
|
+
//
|
1845
|
+
|
1846
|
+
rot_x = A7 - K0;
|
1847
|
+
rot_y = A4 + K1;
|
1848
|
+
|
1849
|
+
dstRowPtr[3] = c3Half * rot_x - c5Half * rot_y;
|
1850
|
+
dstRowPtr[5] = c5Half * rot_x + c3Half * rot_y;
|
1851
|
+
|
1852
|
+
//
|
1853
|
+
// (2*dst1, 2*dst7) = rot -1 ( d07 + K0, K1 - d34 )
|
1854
|
+
//
|
1855
|
+
|
1856
|
+
rot_x = A7 + K0;
|
1857
|
+
rot_y = K1 - A4;
|
1858
|
+
|
1859
|
+
//
|
1860
|
+
// A: 4, 7 are inactive. All A's are inactive
|
1861
|
+
//
|
1862
|
+
|
1863
|
+
dstRowPtr[1] = c1Half * rot_x - c7Half * rot_y;
|
1864
|
+
dstRowPtr[7] = c7Half * rot_x + c1Half * rot_y;
|
1865
|
+
}
|
1866
|
+
|
1867
|
+
//
|
1868
|
+
// Second pass - do the same, but on the columns
|
1869
|
+
//
|
1870
|
+
|
1871
|
+
for (int column = 0; column < 8; ++column)
|
1872
|
+
{
|
1873
|
+
|
1874
|
+
A0 = srcPtr[ column] + srcPtr[56 + column];
|
1875
|
+
A7 = srcPtr[ column] - srcPtr[56 + column];
|
1876
|
+
|
1877
|
+
A1 = srcPtr[ 8 + column] + srcPtr[16 + column];
|
1878
|
+
A2 = srcPtr[ 8 + column] - srcPtr[16 + column];
|
1879
|
+
|
1880
|
+
A3 = srcPtr[24 + column] + srcPtr[32 + column];
|
1881
|
+
A4 = srcPtr[24 + column] - srcPtr[32 + column];
|
1882
|
+
|
1883
|
+
A5 = srcPtr[40 + column] + srcPtr[48 + column];
|
1884
|
+
A6 = srcPtr[40 + column] - srcPtr[48 + column];
|
1885
|
+
|
1886
|
+
K0 = c4 * (A0 + A3);
|
1887
|
+
K1 = c4 * (A1 + A5);
|
1888
|
+
|
1889
|
+
dstPtr[ column] = .5f * (K0 + K1);
|
1890
|
+
dstPtr[32+column] = .5f * (K0 - K1);
|
1891
|
+
|
1892
|
+
//
|
1893
|
+
// (2*dst2, 2*dst6) = rot 6 ( d12 - d56, s07 - s34 )
|
1894
|
+
//
|
1895
|
+
|
1896
|
+
rot_x = A2 - A6;
|
1897
|
+
rot_y = A0 - A3;
|
1898
|
+
|
1899
|
+
dstPtr[16+column] = .5f * (c6 * rot_x + c2 * rot_y);
|
1900
|
+
dstPtr[48+column] = .5f * (c6 * rot_y - c2 * rot_x);
|
1901
|
+
|
1902
|
+
//
|
1903
|
+
// K0, K1 are active until after dst[1],dst[7]
|
1904
|
+
// as well as dst[3], dst[5] are computed.
|
1905
|
+
//
|
1906
|
+
|
1907
|
+
K0 = c4 * (A1 - A5);
|
1908
|
+
K1 = -1 * c4 * (A2 + A6);
|
1909
|
+
|
1910
|
+
//
|
1911
|
+
// (2*dst3, 2*dst5) = rot -3 ( d07 - K0, d34 + K1 )
|
1912
|
+
//
|
1913
|
+
|
1914
|
+
rot_x = A7 - K0;
|
1915
|
+
rot_y = A4 + K1;
|
1916
|
+
|
1917
|
+
dstPtr[24+column] = .5f * (c3 * rot_x - c5 * rot_y);
|
1918
|
+
dstPtr[40+column] = .5f * (c5 * rot_x + c3 * rot_y);
|
1919
|
+
|
1920
|
+
//
|
1921
|
+
// (2*dst1, 2*dst7) = rot -1 ( d07 + K0, K1 - d34 )
|
1922
|
+
//
|
1923
|
+
|
1924
|
+
rot_x = A7 + K0;
|
1925
|
+
rot_y = K1 - A4;
|
1926
|
+
|
1927
|
+
dstPtr[ 8+column] = .5f * (c1 * rot_x - c7 * rot_y);
|
1928
|
+
dstPtr[56+column] = .5f * (c7 * rot_x + c1 * rot_y);
|
1929
|
+
}
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
#else /* IMF_HAVE_SSE2 */
|
1933
|
+
|
1934
|
+
//
|
1935
|
+
// SSE2 implementation
|
1936
|
+
//
|
1937
|
+
// Here, we're always doing a column-wise operation
|
1938
|
+
// plus transposes. This might be faster to do differently
|
1939
|
+
// between rows-wise and column-wise
|
1940
|
+
//
|
1941
|
+
|
1942
|
+
void
|
1943
|
+
dctForward8x8 (float *data)
|
1944
|
+
{
|
1945
|
+
__m128 *srcVec = (__m128 *)data;
|
1946
|
+
__m128 a0Vec, a1Vec, a2Vec, a3Vec, a4Vec, a5Vec, a6Vec, a7Vec;
|
1947
|
+
__m128 k0Vec, k1Vec, rotXVec, rotYVec;
|
1948
|
+
__m128 transTmp[4], transTmp2[4];
|
1949
|
+
|
1950
|
+
__m128 c4Vec = { .70710678f, .70710678f, .70710678f, .70710678f};
|
1951
|
+
__m128 c4NegVec = {-.70710678f, -.70710678f, -.70710678f, -.70710678f};
|
1952
|
+
|
1953
|
+
__m128 c1HalfVec = {.490392640f, .490392640f, .490392640f, .490392640f};
|
1954
|
+
__m128 c2HalfVec = {.461939770f, .461939770f, .461939770f, .461939770f};
|
1955
|
+
__m128 c3HalfVec = {.415734810f, .415734810f, .415734810f, .415734810f};
|
1956
|
+
__m128 c5HalfVec = {.277785120f, .277785120f, .277785120f, .277785120f};
|
1957
|
+
__m128 c6HalfVec = {.191341720f, .191341720f, .191341720f, .191341720f};
|
1958
|
+
__m128 c7HalfVec = {.097545161f, .097545161f, .097545161f, .097545161f};
|
1959
|
+
|
1960
|
+
__m128 halfVec = {.5f, .5f, .5f, .5f};
|
1961
|
+
|
1962
|
+
for (int iter = 0; iter < 2; ++iter)
|
1963
|
+
{
|
1964
|
+
//
|
1965
|
+
// Operate on 4 columns at a time. The
|
1966
|
+
// offsets into our row-major array are:
|
1967
|
+
// 0: 0 1
|
1968
|
+
// 1: 2 3
|
1969
|
+
// 2: 4 5
|
1970
|
+
// 3: 6 7
|
1971
|
+
// 4: 8 9
|
1972
|
+
// 5: 10 11
|
1973
|
+
// 6: 12 13
|
1974
|
+
// 7: 14 15
|
1975
|
+
//
|
1976
|
+
|
1977
|
+
for (int pass=0; pass<2; ++pass)
|
1978
|
+
{
|
1979
|
+
a0Vec = _mm_add_ps (srcVec[ 0 + pass], srcVec[14 + pass]);
|
1980
|
+
a1Vec = _mm_add_ps (srcVec[ 2 + pass], srcVec[ 4 + pass]);
|
1981
|
+
a3Vec = _mm_add_ps (srcVec[ 6 + pass], srcVec[ 8 + pass]);
|
1982
|
+
a5Vec = _mm_add_ps (srcVec[10 + pass], srcVec[12 + pass]);
|
1983
|
+
|
1984
|
+
a7Vec = _mm_sub_ps (srcVec[ 0 + pass], srcVec[14 + pass]);
|
1985
|
+
a2Vec = _mm_sub_ps (srcVec[ 2 + pass], srcVec[ 4 + pass]);
|
1986
|
+
a4Vec = _mm_sub_ps (srcVec[ 6 + pass], srcVec[ 8 + pass]);
|
1987
|
+
a6Vec = _mm_sub_ps (srcVec[10 + pass], srcVec[12 + pass]);
|
1988
|
+
|
1989
|
+
//
|
1990
|
+
// First stage; Compute out_0 and out_4
|
1991
|
+
//
|
1992
|
+
|
1993
|
+
k0Vec = _mm_add_ps (a0Vec, a3Vec);
|
1994
|
+
k1Vec = _mm_add_ps (a1Vec, a5Vec);
|
1995
|
+
|
1996
|
+
k0Vec = _mm_mul_ps (c4Vec, k0Vec);
|
1997
|
+
k1Vec = _mm_mul_ps (c4Vec, k1Vec);
|
1998
|
+
|
1999
|
+
srcVec[0 + pass] = _mm_add_ps (k0Vec, k1Vec);
|
2000
|
+
srcVec[8 + pass] = _mm_sub_ps (k0Vec, k1Vec);
|
2001
|
+
|
2002
|
+
srcVec[0 + pass] = _mm_mul_ps (srcVec[0 + pass], halfVec );
|
2003
|
+
srcVec[8 + pass] = _mm_mul_ps (srcVec[8 + pass], halfVec );
|
2004
|
+
|
2005
|
+
|
2006
|
+
//
|
2007
|
+
// Second stage; Compute out_2 and out_6
|
2008
|
+
//
|
2009
|
+
|
2010
|
+
k0Vec = _mm_sub_ps (a2Vec, a6Vec);
|
2011
|
+
k1Vec = _mm_sub_ps (a0Vec, a3Vec);
|
2012
|
+
|
2013
|
+
srcVec[ 4 + pass] = _mm_add_ps (_mm_mul_ps (c6HalfVec, k0Vec),
|
2014
|
+
_mm_mul_ps (c2HalfVec, k1Vec));
|
2015
|
+
|
2016
|
+
srcVec[12 + pass] = _mm_sub_ps (_mm_mul_ps (c6HalfVec, k1Vec),
|
2017
|
+
_mm_mul_ps (c2HalfVec, k0Vec));
|
2018
|
+
|
2019
|
+
//
|
2020
|
+
// Precompute K0 and K1 for the remaining stages
|
2021
|
+
//
|
2022
|
+
|
2023
|
+
k0Vec = _mm_mul_ps (_mm_sub_ps (a1Vec, a5Vec), c4Vec);
|
2024
|
+
k1Vec = _mm_mul_ps (_mm_add_ps (a2Vec, a6Vec), c4NegVec);
|
2025
|
+
|
2026
|
+
//
|
2027
|
+
// Third Stage, compute out_3 and out_5
|
2028
|
+
//
|
2029
|
+
|
2030
|
+
rotXVec = _mm_sub_ps (a7Vec, k0Vec);
|
2031
|
+
rotYVec = _mm_add_ps (a4Vec, k1Vec);
|
2032
|
+
|
2033
|
+
srcVec[ 6 + pass] = _mm_sub_ps (_mm_mul_ps (c3HalfVec, rotXVec),
|
2034
|
+
_mm_mul_ps (c5HalfVec, rotYVec));
|
2035
|
+
|
2036
|
+
srcVec[10 + pass] = _mm_add_ps (_mm_mul_ps (c5HalfVec, rotXVec),
|
2037
|
+
_mm_mul_ps (c3HalfVec, rotYVec));
|
2038
|
+
|
2039
|
+
//
|
2040
|
+
// Fourth Stage, compute out_1 and out_7
|
2041
|
+
//
|
2042
|
+
|
2043
|
+
rotXVec = _mm_add_ps (a7Vec, k0Vec);
|
2044
|
+
rotYVec = _mm_sub_ps (k1Vec, a4Vec);
|
2045
|
+
|
2046
|
+
srcVec[ 2 + pass] = _mm_sub_ps (_mm_mul_ps (c1HalfVec, rotXVec),
|
2047
|
+
_mm_mul_ps (c7HalfVec, rotYVec));
|
2048
|
+
|
2049
|
+
srcVec[14 + pass] = _mm_add_ps (_mm_mul_ps (c7HalfVec, rotXVec),
|
2050
|
+
_mm_mul_ps (c1HalfVec, rotYVec));
|
2051
|
+
}
|
2052
|
+
|
2053
|
+
//
|
2054
|
+
// Transpose the matrix, in 4x4 blocks. So, if we have our
|
2055
|
+
// 8x8 matrix divied into 4x4 blocks:
|
2056
|
+
//
|
2057
|
+
// M0 | M1 M0t | M2t
|
2058
|
+
// ----+--- --> -----+------
|
2059
|
+
// M2 | M3 M1t | M3t
|
2060
|
+
//
|
2061
|
+
|
2062
|
+
//
|
2063
|
+
// M0t, done in place, the first half.
|
2064
|
+
//
|
2065
|
+
|
2066
|
+
transTmp[0] = _mm_shuffle_ps (srcVec[0], srcVec[2], 0x44);
|
2067
|
+
transTmp[1] = _mm_shuffle_ps (srcVec[4], srcVec[6], 0x44);
|
2068
|
+
transTmp[3] = _mm_shuffle_ps (srcVec[4], srcVec[6], 0xEE);
|
2069
|
+
transTmp[2] = _mm_shuffle_ps (srcVec[0], srcVec[2], 0xEE);
|
2070
|
+
|
2071
|
+
//
|
2072
|
+
// M3t, also done in place, the first half.
|
2073
|
+
//
|
2074
|
+
|
2075
|
+
transTmp2[0] = _mm_shuffle_ps (srcVec[ 9], srcVec[11], 0x44);
|
2076
|
+
transTmp2[1] = _mm_shuffle_ps (srcVec[13], srcVec[15], 0x44);
|
2077
|
+
transTmp2[2] = _mm_shuffle_ps (srcVec[ 9], srcVec[11], 0xEE);
|
2078
|
+
transTmp2[3] = _mm_shuffle_ps (srcVec[13], srcVec[15], 0xEE);
|
2079
|
+
|
2080
|
+
//
|
2081
|
+
// M0t, the second half.
|
2082
|
+
//
|
2083
|
+
|
2084
|
+
srcVec[0] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0x88);
|
2085
|
+
srcVec[4] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0x88);
|
2086
|
+
srcVec[2] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0xDD);
|
2087
|
+
srcVec[6] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0xDD);
|
2088
|
+
|
2089
|
+
//
|
2090
|
+
// M3t, the second half.
|
2091
|
+
//
|
2092
|
+
|
2093
|
+
srcVec[ 9] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0x88);
|
2094
|
+
srcVec[13] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0x88);
|
2095
|
+
srcVec[11] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0xDD);
|
2096
|
+
srcVec[15] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0xDD);
|
2097
|
+
|
2098
|
+
//
|
2099
|
+
// M1 and M2 need to be done at the same time, because we're
|
2100
|
+
// swapping.
|
2101
|
+
//
|
2102
|
+
// First, the first half of M1t
|
2103
|
+
//
|
2104
|
+
|
2105
|
+
transTmp[0] = _mm_shuffle_ps (srcVec[1], srcVec[3], 0x44);
|
2106
|
+
transTmp[1] = _mm_shuffle_ps (srcVec[5], srcVec[7], 0x44);
|
2107
|
+
transTmp[2] = _mm_shuffle_ps (srcVec[1], srcVec[3], 0xEE);
|
2108
|
+
transTmp[3] = _mm_shuffle_ps (srcVec[5], srcVec[7], 0xEE);
|
2109
|
+
|
2110
|
+
//
|
2111
|
+
// And the first half of M2t
|
2112
|
+
//
|
2113
|
+
|
2114
|
+
transTmp2[0] = _mm_shuffle_ps (srcVec[ 8], srcVec[10], 0x44);
|
2115
|
+
transTmp2[1] = _mm_shuffle_ps (srcVec[12], srcVec[14], 0x44);
|
2116
|
+
transTmp2[2] = _mm_shuffle_ps (srcVec[ 8], srcVec[10], 0xEE);
|
2117
|
+
transTmp2[3] = _mm_shuffle_ps (srcVec[12], srcVec[14], 0xEE);
|
2118
|
+
|
2119
|
+
//
|
2120
|
+
// Second half of M1t
|
2121
|
+
//
|
2122
|
+
|
2123
|
+
srcVec[ 8] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0x88);
|
2124
|
+
srcVec[12] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0x88);
|
2125
|
+
srcVec[10] = _mm_shuffle_ps (transTmp[0], transTmp[1], 0xDD);
|
2126
|
+
srcVec[14] = _mm_shuffle_ps (transTmp[2], transTmp[3], 0xDD);
|
2127
|
+
|
2128
|
+
//
|
2129
|
+
// Second half of M2
|
2130
|
+
//
|
2131
|
+
|
2132
|
+
srcVec[1] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0x88);
|
2133
|
+
srcVec[5] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0x88);
|
2134
|
+
srcVec[3] = _mm_shuffle_ps (transTmp2[0], transTmp2[1], 0xDD);
|
2135
|
+
srcVec[7] = _mm_shuffle_ps (transTmp2[2], transTmp2[3], 0xDD);
|
2136
|
+
}
|
2137
|
+
}
|
2138
|
+
|
2139
|
+
#endif /* IMF_HAVE_SSE2 */
|
2140
|
+
|
2141
|
+
} // anonymous namespace
|
2142
|
+
|
2143
|
+
OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_EXIT
|
2144
|
+
|
2145
|
+
#endif
|