tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -0,0 +1,619 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file MorePacketMath.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.3.5
|
|
6
|
+
* @date 2021-07-16
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_MORE_PACKET_MATH_AVX_H
|
|
13
|
+
#define EIGENRAND_MORE_PACKET_MATH_AVX_H
|
|
14
|
+
|
|
15
|
+
#include <immintrin.h>
|
|
16
|
+
|
|
17
|
+
namespace Eigen
|
|
18
|
+
{
|
|
19
|
+
namespace internal
|
|
20
|
+
{
|
|
21
|
+
template<>
|
|
22
|
+
struct IsIntPacket<Packet8i> : std::true_type {};
|
|
23
|
+
|
|
24
|
+
template<>
|
|
25
|
+
struct HalfPacket<Packet8i>
|
|
26
|
+
{
|
|
27
|
+
using type = Packet4i;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
template<>
|
|
31
|
+
struct HalfPacket<Packet8f>
|
|
32
|
+
{
|
|
33
|
+
using type = Packet4f;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
template<>
|
|
37
|
+
struct IsFloatPacket<Packet8f> : std::true_type {};
|
|
38
|
+
|
|
39
|
+
template<>
|
|
40
|
+
struct IsDoublePacket<Packet4d> : std::true_type {};
|
|
41
|
+
|
|
42
|
+
template<>
|
|
43
|
+
struct reinterpreter<Packet8i>
|
|
44
|
+
{
|
|
45
|
+
EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
|
|
46
|
+
{
|
|
47
|
+
return _mm256_castsi256_ps(x);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
|
|
51
|
+
{
|
|
52
|
+
return _mm256_castsi256_pd(x);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
|
|
56
|
+
{
|
|
57
|
+
return x;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
template<>
|
|
62
|
+
struct reinterpreter<Packet8f>
|
|
63
|
+
{
|
|
64
|
+
EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
|
|
65
|
+
{
|
|
66
|
+
return x;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
|
|
70
|
+
{
|
|
71
|
+
return _mm256_castps_pd(x);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
|
|
75
|
+
{
|
|
76
|
+
return _mm256_castps_si256(x);
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
template<>
|
|
81
|
+
struct reinterpreter<Packet4d>
|
|
82
|
+
{
|
|
83
|
+
EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
|
|
84
|
+
{
|
|
85
|
+
return _mm256_castpd_ps(x);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
|
|
89
|
+
{
|
|
90
|
+
return x;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
|
|
94
|
+
{
|
|
95
|
+
return _mm256_castpd_si256(x);
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
template<>
|
|
100
|
+
EIGEN_STRONG_INLINE void split_two<Packet8i>(const Packet8i& x, Packet4i& a, Packet4i& b)
|
|
101
|
+
{
|
|
102
|
+
a = _mm256_extractf128_si256(x, 0);
|
|
103
|
+
b = _mm256_extractf128_si256(x, 1);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
|
|
107
|
+
{
|
|
108
|
+
return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
template<>
|
|
112
|
+
EIGEN_STRONG_INLINE void split_two<Packet8f>(const Packet8f& x, Packet4f& a, Packet4f& b)
|
|
113
|
+
{
|
|
114
|
+
a = _mm256_extractf128_ps(x, 0);
|
|
115
|
+
b = _mm256_extractf128_ps(x, 1);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
|
|
119
|
+
{
|
|
120
|
+
return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
|
|
125
|
+
{
|
|
126
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
127
|
+
return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
|
|
128
|
+
#else
|
|
129
|
+
auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
|
|
130
|
+
return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
|
|
131
|
+
#endif
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template<>
|
|
135
|
+
EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
|
|
136
|
+
{
|
|
137
|
+
return _mm256_set1_epi64x(a);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
template<>
|
|
141
|
+
EIGEN_STRONG_INLINE Packet8i padd64<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
142
|
+
{
|
|
143
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
144
|
+
return _mm256_add_epi64(a, b);
|
|
145
|
+
#else
|
|
146
|
+
Packet4i a1, a2, b1, b2;
|
|
147
|
+
split_two(a, a1, a2);
|
|
148
|
+
split_two(b, b1, b2);
|
|
149
|
+
return combine_two((Packet4i)_mm_add_epi64(a1, b1), (Packet4i)_mm_add_epi64(a2, b2));
|
|
150
|
+
#endif
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
template<>
|
|
154
|
+
EIGEN_STRONG_INLINE Packet8i psub64<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
155
|
+
{
|
|
156
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
157
|
+
return _mm256_sub_epi64(a, b);
|
|
158
|
+
#else
|
|
159
|
+
Packet4i a1, a2, b1, b2;
|
|
160
|
+
split_two(a, a1, a2);
|
|
161
|
+
split_two(b, b1, b2);
|
|
162
|
+
return combine_two((Packet4i)_mm_sub_epi64(a1, b1), (Packet4i)_mm_sub_epi64(a2, b2));
|
|
163
|
+
#endif
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
template<>
|
|
167
|
+
EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
168
|
+
{
|
|
169
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
170
|
+
return _mm256_cmpeq_epi32(a, b);
|
|
171
|
+
#else
|
|
172
|
+
Packet4i a1, a2, b1, b2;
|
|
173
|
+
split_two(a, a1, a2);
|
|
174
|
+
split_two(b, b1, b2);
|
|
175
|
+
return combine_two((Packet4i)_mm_cmpeq_epi32(a1, b1), (Packet4i)_mm_cmpeq_epi32(a2, b2));
|
|
176
|
+
#endif
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
template<>
|
|
180
|
+
struct BitShifter<Packet8i>
|
|
181
|
+
{
|
|
182
|
+
template<int b>
|
|
183
|
+
EIGEN_STRONG_INLINE Packet8i sll(const Packet8i& a)
|
|
184
|
+
{
|
|
185
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
186
|
+
return _mm256_slli_epi32(a, b);
|
|
187
|
+
#else
|
|
188
|
+
Packet4i a1, a2;
|
|
189
|
+
split_two(a, a1, a2);
|
|
190
|
+
return combine_two((Packet4i)_mm_slli_epi32(a1, b), (Packet4i)_mm_slli_epi32(a2, b));
|
|
191
|
+
#endif
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
template<int b>
|
|
195
|
+
EIGEN_STRONG_INLINE Packet8i srl(const Packet8i& a, int _b = b)
|
|
196
|
+
{
|
|
197
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
198
|
+
if (b >= 0)
|
|
199
|
+
{
|
|
200
|
+
return _mm256_srli_epi32(a, b);
|
|
201
|
+
}
|
|
202
|
+
else
|
|
203
|
+
{
|
|
204
|
+
return _mm256_srli_epi32(a, _b);
|
|
205
|
+
}
|
|
206
|
+
#else
|
|
207
|
+
Packet4i a1, a2;
|
|
208
|
+
split_two(a, a1, a2);
|
|
209
|
+
if (b >= 0)
|
|
210
|
+
{
|
|
211
|
+
return combine_two((Packet4i)_mm_srli_epi32(a1, b), (Packet4i)_mm_srli_epi32(a2, b));
|
|
212
|
+
}
|
|
213
|
+
else
|
|
214
|
+
{
|
|
215
|
+
return combine_two((Packet4i)_mm_srli_epi32(a1, _b), (Packet4i)_mm_srli_epi32(a2, _b));
|
|
216
|
+
}
|
|
217
|
+
#endif
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
template<int b>
|
|
221
|
+
EIGEN_STRONG_INLINE Packet8i sll64(const Packet8i& a)
|
|
222
|
+
{
|
|
223
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
224
|
+
return _mm256_slli_epi64(a, b);
|
|
225
|
+
#else
|
|
226
|
+
Packet4i a1, a2;
|
|
227
|
+
split_two(a, a1, a2);
|
|
228
|
+
return combine_two((Packet4i)_mm_slli_epi64(a1, b), (Packet4i)_mm_slli_epi64(a2, b));
|
|
229
|
+
#endif
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
template<int b>
|
|
233
|
+
EIGEN_STRONG_INLINE Packet8i srl64(const Packet8i& a)
|
|
234
|
+
{
|
|
235
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
236
|
+
return _mm256_srli_epi64(a, b);
|
|
237
|
+
#else
|
|
238
|
+
Packet4i a1, a2;
|
|
239
|
+
split_two(a, a1, a2);
|
|
240
|
+
return combine_two((Packet4i)_mm_srli_epi64(a1, b), (Packet4i)_mm_srli_epi64(a2, b));
|
|
241
|
+
#endif
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
#ifdef EIGENRAND_EIGEN_33_MODE
|
|
245
|
+
template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
246
|
+
{
|
|
247
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
248
|
+
return _mm256_add_epi32(a, b);
|
|
249
|
+
#else
|
|
250
|
+
Packet4i a1, a2, b1, b2;
|
|
251
|
+
split_two(a, a1, a2);
|
|
252
|
+
split_two(b, b1, b2);
|
|
253
|
+
return combine_two((Packet4i)_mm_add_epi32(a1, b1), (Packet4i)_mm_add_epi32(a2, b2));
|
|
254
|
+
#endif
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
258
|
+
{
|
|
259
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
260
|
+
return _mm256_sub_epi32(a, b);
|
|
261
|
+
#else
|
|
262
|
+
Packet4i a1, a2, b1, b2;
|
|
263
|
+
split_two(a, a1, a2);
|
|
264
|
+
split_two(b, b1, b2);
|
|
265
|
+
return combine_two((Packet4i)_mm_sub_epi32(a1, b1), (Packet4i)_mm_sub_epi32(a2, b2));
|
|
266
|
+
#endif
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
270
|
+
{
|
|
271
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
272
|
+
return _mm256_and_si256(a, b);
|
|
273
|
+
#else
|
|
274
|
+
return reinterpret_to_int((Packet8f)_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
275
|
+
#endif
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
279
|
+
{
|
|
280
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
281
|
+
return _mm256_andnot_si256(a, b);
|
|
282
|
+
#else
|
|
283
|
+
return reinterpret_to_int((Packet8f)_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
284
|
+
#endif
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
288
|
+
{
|
|
289
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
290
|
+
return _mm256_or_si256(a, b);
|
|
291
|
+
#else
|
|
292
|
+
return reinterpret_to_int((Packet8f)_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
293
|
+
#endif
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
297
|
+
{
|
|
298
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
299
|
+
return _mm256_xor_si256(a, b);
|
|
300
|
+
#else
|
|
301
|
+
return reinterpret_to_int((Packet8f)_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
302
|
+
#endif
|
|
303
|
+
}
|
|
304
|
+
#endif
|
|
305
|
+
template<>
|
|
306
|
+
EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
307
|
+
{
|
|
308
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
309
|
+
return _mm256_cmpgt_epi32(b, a);
|
|
310
|
+
#else
|
|
311
|
+
Packet4i a1, a2, b1, b2;
|
|
312
|
+
split_two(a, a1, a2);
|
|
313
|
+
split_two(b, b1, b2);
|
|
314
|
+
return combine_two((Packet4i)_mm_cmpgt_epi32(b1, a1), (Packet4i)_mm_cmpgt_epi32(b2, a2));
|
|
315
|
+
#endif
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
template<>
|
|
319
|
+
EIGEN_STRONG_INLINE Packet8i pcmplt64<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
320
|
+
{
|
|
321
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
322
|
+
return _mm256_cmpgt_epi64(b, a);
|
|
323
|
+
#else
|
|
324
|
+
Packet4i a1, a2, b1, b2;
|
|
325
|
+
split_two(a, a1, a2);
|
|
326
|
+
split_two(b, b1, b2);
|
|
327
|
+
return combine_two((Packet4i)_mm_cmpgt_epi64(b1, a1), (Packet4i)_mm_cmpgt_epi64(b2, a2));
|
|
328
|
+
#endif
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
template<>
|
|
332
|
+
EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
|
|
333
|
+
{
|
|
334
|
+
return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
template<>
|
|
338
|
+
EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
|
|
339
|
+
{
|
|
340
|
+
return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
template<>
|
|
344
|
+
EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
|
|
345
|
+
{
|
|
346
|
+
return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
template<>
|
|
350
|
+
EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
|
|
351
|
+
{
|
|
352
|
+
return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
template<>
|
|
356
|
+
EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
|
|
357
|
+
{
|
|
358
|
+
return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
template<>
|
|
362
|
+
EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8i& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
|
|
363
|
+
{
|
|
364
|
+
return pblendv(_mm256_castsi256_ps(ifPacket), thenPacket, elsePacket);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
template<>
|
|
368
|
+
EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
|
|
369
|
+
{
|
|
370
|
+
return _mm256_castps_si256(_mm256_blendv_ps(
|
|
371
|
+
_mm256_castsi256_ps(elsePacket),
|
|
372
|
+
_mm256_castsi256_ps(thenPacket),
|
|
373
|
+
_mm256_castsi256_ps(ifPacket)
|
|
374
|
+
));
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
template<>
|
|
378
|
+
EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
|
|
379
|
+
{
|
|
380
|
+
return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
template<>
|
|
384
|
+
EIGEN_STRONG_INLINE Packet4d pblendv(const Packet8i& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
|
|
385
|
+
{
|
|
386
|
+
return pblendv(_mm256_castsi256_pd(ifPacket), thenPacket, elsePacket);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
template<>
|
|
390
|
+
EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
|
|
391
|
+
{
|
|
392
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
393
|
+
return _mm256_i32gather_epi32(addr, index, 4);
|
|
394
|
+
#else
|
|
395
|
+
uint32_t u[8];
|
|
396
|
+
_mm256_storeu_si256((Packet8i*)u, index);
|
|
397
|
+
return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
|
|
398
|
+
addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
|
|
399
|
+
#endif
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
template<>
|
|
403
|
+
EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float* addr, const Packet8i& index)
|
|
404
|
+
{
|
|
405
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
406
|
+
return _mm256_i32gather_ps(addr, index, 4);
|
|
407
|
+
#else
|
|
408
|
+
uint32_t u[8];
|
|
409
|
+
_mm256_storeu_si256((Packet8i*)u, index);
|
|
410
|
+
return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
|
|
411
|
+
addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
|
|
412
|
+
#endif
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
template<>
|
|
416
|
+
EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double* addr, const Packet8i& index, bool upperhalf)
|
|
417
|
+
{
|
|
418
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
419
|
+
return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
|
|
420
|
+
#else
|
|
421
|
+
uint32_t u[8];
|
|
422
|
+
_mm256_storeu_si256((Packet8i*)u, index);
|
|
423
|
+
if (upperhalf)
|
|
424
|
+
{
|
|
425
|
+
return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
|
|
426
|
+
}
|
|
427
|
+
else
|
|
428
|
+
{
|
|
429
|
+
return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
|
|
430
|
+
}
|
|
431
|
+
#endif
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
template<>
|
|
435
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
|
|
436
|
+
{
|
|
437
|
+
return _mm256_movemask_ps(a);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
template<>
|
|
441
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
|
|
442
|
+
{
|
|
443
|
+
return _mm256_movemask_pd(a);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
template<>
|
|
447
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
|
|
448
|
+
{
|
|
449
|
+
return pmovemask(_mm256_castsi256_ps(a));
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
template<>
|
|
453
|
+
EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
|
|
454
|
+
{
|
|
455
|
+
return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
template<>
|
|
459
|
+
EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
|
|
460
|
+
{
|
|
461
|
+
return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
template<>
|
|
465
|
+
EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
466
|
+
{
|
|
467
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
468
|
+
return _mm256_cmpeq_epi64(a, b);
|
|
469
|
+
#else
|
|
470
|
+
Packet4i a1, a2, b1, b2;
|
|
471
|
+
split_two(a, a1, a2);
|
|
472
|
+
split_two(b, b1, b2);
|
|
473
|
+
return combine_two((Packet4i)_mm_cmpeq_epi64(a1, b1), (Packet4i)_mm_cmpeq_epi64(a2, b2));
|
|
474
|
+
#endif
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
template<>
|
|
478
|
+
EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
|
|
479
|
+
{
|
|
480
|
+
uint64_t u[4];
|
|
481
|
+
_mm256_storeu_si256((__m256i*)u, a);
|
|
482
|
+
u[0] = u[0] * b + c;
|
|
483
|
+
u[1] = u[1] * b + c;
|
|
484
|
+
u[2] = u[2] * b + c;
|
|
485
|
+
u[3] = u[3] * b + c;
|
|
486
|
+
return _mm256_loadu_si256((__m256i*)u);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
EIGEN_STRONG_INLINE __m256d uint64_to_double(__m256i x) {
|
|
490
|
+
auto y = _mm256_or_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0010000000000000));
|
|
491
|
+
return _mm256_sub_pd(y, _mm256_set1_pd(0x0010000000000000));
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
EIGEN_STRONG_INLINE __m256d int64_to_double(__m256i x) {
|
|
495
|
+
x = padd64(x, _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
|
|
496
|
+
return _mm256_sub_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0018000000000000));
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
EIGEN_STRONG_INLINE __m256i double_to_int64(__m256d x) {
|
|
500
|
+
x = _mm256_add_pd(_mm256_floor_pd(x), _mm256_set1_pd(0x0018000000000000));
|
|
501
|
+
return psub64(
|
|
502
|
+
_mm256_castpd_si256(x),
|
|
503
|
+
_mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))
|
|
504
|
+
);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
template<>
|
|
508
|
+
EIGEN_STRONG_INLINE Packet8i pcast64<Packet4d, Packet8i>(const Packet4d& a)
|
|
509
|
+
{
|
|
510
|
+
return double_to_int64(a);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
template<>
|
|
514
|
+
EIGEN_STRONG_INLINE Packet4d pcast64<Packet8i, Packet4d>(const Packet8i& a)
|
|
515
|
+
{
|
|
516
|
+
return int64_to_double(a);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
520
|
+
Packet4d psin<Packet4d>(const Packet4d& x)
|
|
521
|
+
{
|
|
522
|
+
return _psin(x);
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
#ifdef EIGENRAND_EIGEN_33_MODE
|
|
526
|
+
template <>
|
|
527
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
|
528
|
+
plog<Packet4d>(const Packet4d& _x) {
|
|
529
|
+
Packet4d x = _x;
|
|
530
|
+
_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
|
|
531
|
+
_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
|
|
532
|
+
|
|
533
|
+
auto inv_mant_mask = _mm256_castsi256_pd(pseti64<Packet8i>(~0x7ff0000000000000));
|
|
534
|
+
auto min_norm_pos = _mm256_castsi256_pd(pseti64<Packet8i>(0x10000000000000));
|
|
535
|
+
auto minus_inf = _mm256_castsi256_pd(pseti64<Packet8i>(0xfff0000000000000));
|
|
536
|
+
|
|
537
|
+
// Polynomial coefficients.
|
|
538
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_SQRTHF, 0.707106781186547524);
|
|
539
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p0, 7.0376836292E-2);
|
|
540
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p1, -1.1514610310E-1);
|
|
541
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p2, 1.1676998740E-1);
|
|
542
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p3, -1.2420140846E-1);
|
|
543
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p4, +1.4249322787E-1);
|
|
544
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p5, -1.6668057665E-1);
|
|
545
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p6, +2.0000714765E-1);
|
|
546
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p7, -2.4999993993E-1);
|
|
547
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p8, +3.3333331174E-1);
|
|
548
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_q1, -2.12194440e-4);
|
|
549
|
+
_EIGEN_DECLARE_CONST_Packet4d(cephes_log_q2, 0.693359375);
|
|
550
|
+
|
|
551
|
+
Packet4d invalid_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
|
|
552
|
+
Packet4d iszero_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_EQ_OQ);
|
|
553
|
+
|
|
554
|
+
// Truncate input values to the minimum positive normal.
|
|
555
|
+
x = pmax(x, min_norm_pos);
|
|
556
|
+
|
|
557
|
+
Packet4d emm0 = uint64_to_double(psrl64<52>(_mm256_castpd_si256(x)));
|
|
558
|
+
Packet4d e = psub(emm0, pset1<Packet4d>(1022));
|
|
559
|
+
|
|
560
|
+
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
|
561
|
+
x = _mm256_and_pd(x, inv_mant_mask);
|
|
562
|
+
x = _mm256_or_pd(x, p4d_half);
|
|
563
|
+
|
|
564
|
+
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
|
565
|
+
// and shift by -1. The values are then centered around 0, which improves
|
|
566
|
+
// the stability of the polynomial evaluation.
|
|
567
|
+
// if( x < SQRTHF ) {
|
|
568
|
+
// e -= 1;
|
|
569
|
+
// x = x + x - 1.0;
|
|
570
|
+
// } else { x = x - 1.0; }
|
|
571
|
+
Packet4d mask = _mm256_cmp_pd(x, p4d_cephes_SQRTHF, _CMP_LT_OQ);
|
|
572
|
+
Packet4d tmp = _mm256_and_pd(x, mask);
|
|
573
|
+
x = psub(x, p4d_1);
|
|
574
|
+
e = psub(e, _mm256_and_pd(p4d_1, mask));
|
|
575
|
+
x = padd(x, tmp);
|
|
576
|
+
|
|
577
|
+
Packet4d x2 = pmul(x, x);
|
|
578
|
+
Packet4d x3 = pmul(x2, x);
|
|
579
|
+
|
|
580
|
+
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
|
581
|
+
// to improve instruction-level parallelism.
|
|
582
|
+
Packet4d y, y1, y2;
|
|
583
|
+
y = pmadd(p4d_cephes_log_p0, x, p4d_cephes_log_p1);
|
|
584
|
+
y1 = pmadd(p4d_cephes_log_p3, x, p4d_cephes_log_p4);
|
|
585
|
+
y2 = pmadd(p4d_cephes_log_p6, x, p4d_cephes_log_p7);
|
|
586
|
+
y = pmadd(y, x, p4d_cephes_log_p2);
|
|
587
|
+
y1 = pmadd(y1, x, p4d_cephes_log_p5);
|
|
588
|
+
y2 = pmadd(y2, x, p4d_cephes_log_p8);
|
|
589
|
+
y = pmadd(y, x3, y1);
|
|
590
|
+
y = pmadd(y, x3, y2);
|
|
591
|
+
y = pmul(y, x3);
|
|
592
|
+
|
|
593
|
+
// Add the logarithm of the exponent back to the result of the interpolation.
|
|
594
|
+
y1 = pmul(e, p4d_cephes_log_q1);
|
|
595
|
+
tmp = pmul(x2, p4d_half);
|
|
596
|
+
y = padd(y, y1);
|
|
597
|
+
x = psub(x, tmp);
|
|
598
|
+
y2 = pmul(e, p4d_cephes_log_q2);
|
|
599
|
+
x = padd(x, y);
|
|
600
|
+
x = padd(x, y2);
|
|
601
|
+
|
|
602
|
+
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
|
603
|
+
return pblendv(iszero_mask, minus_inf, _mm256_or_pd(x, invalid_mask));
|
|
604
|
+
}
|
|
605
|
+
#endif
|
|
606
|
+
|
|
607
|
+
#if !(EIGEN_VERSION_AT_LEAST(3,3,5))
|
|
608
|
+
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
|
609
|
+
return _mm_cvtepi32_ps(a);
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
|
613
|
+
return _mm_cvttps_epi32(a);
|
|
614
|
+
}
|
|
615
|
+
#endif
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
#endif
|