tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file RandUtils.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.4.0
|
|
6
|
+
* @date 2021-09-17
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_RAND_UTILS_NEON_H
|
|
13
|
+
#define EIGENRAND_RAND_UTILS_NEON_H
|
|
14
|
+
|
|
15
|
+
#include <arm_neon.h>
|
|
16
|
+
|
|
17
|
+
namespace Eigen
|
|
18
|
+
{
|
|
19
|
+
namespace internal
|
|
20
|
+
{
|
|
21
|
+
template<typename Rng, typename RngResult>
|
|
22
|
+
struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
|
|
23
|
+
{
|
|
24
|
+
EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
|
|
25
|
+
{
|
|
26
|
+
if (sizeof(RngResult) == 8)
|
|
27
|
+
{
|
|
28
|
+
uint64_t v[2];
|
|
29
|
+
v[0] = rng();
|
|
30
|
+
v[1] = rng();
|
|
31
|
+
return vld1q_s32((int32_t*)v);
|
|
32
|
+
}
|
|
33
|
+
else
|
|
34
|
+
{
|
|
35
|
+
uint32_t v[4];
|
|
36
|
+
v[0] = rng();
|
|
37
|
+
v[1] = rng();
|
|
38
|
+
v[2] = rng();
|
|
39
|
+
v[3] = rng();
|
|
40
|
+
return vld1q_s32((int32_t*)v);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
|
|
45
|
+
{
|
|
46
|
+
if (sizeof(RngResult) == 8)
|
|
47
|
+
{
|
|
48
|
+
uint64_t v[2];
|
|
49
|
+
v[0] = rng();
|
|
50
|
+
v[1] = rng();
|
|
51
|
+
return vld1q_s32((int32_t*)v);
|
|
52
|
+
}
|
|
53
|
+
else
|
|
54
|
+
{
|
|
55
|
+
uint32_t v[4];
|
|
56
|
+
v[0] = rng();
|
|
57
|
+
v[1] = rng();
|
|
58
|
+
v[2] = rng();
|
|
59
|
+
v[3] = rng();
|
|
60
|
+
return vld1q_s32((int32_t*)v);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
|
|
65
|
+
{
|
|
66
|
+
if (sizeof(decltype(rng())) == 8)
|
|
67
|
+
{
|
|
68
|
+
uint64_t v[2];
|
|
69
|
+
v[0] = rng();
|
|
70
|
+
v[1] = 0;
|
|
71
|
+
return vld1q_s32((int32_t*)v);
|
|
72
|
+
}
|
|
73
|
+
else
|
|
74
|
+
{
|
|
75
|
+
uint32_t v[4];
|
|
76
|
+
v[0] = rng();
|
|
77
|
+
v[1] = rng();
|
|
78
|
+
v[2] = 0;
|
|
79
|
+
v[3] = 0;
|
|
80
|
+
return vld1q_s32((int32_t*)v);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
template<typename Rng>
|
|
86
|
+
struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
|
|
87
|
+
{
|
|
88
|
+
EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
|
|
89
|
+
{
|
|
90
|
+
return rng();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
|
|
94
|
+
{
|
|
95
|
+
return rng();
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
|
|
99
|
+
{
|
|
100
|
+
return rng();
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
template<typename Rng>
|
|
105
|
+
struct UniformRealUtils<Packet4f, Rng> : public RawbitsMaker<Packet4i, Rng>
|
|
106
|
+
{
|
|
107
|
+
EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
|
|
108
|
+
{
|
|
109
|
+
return pdiv((Packet4f)vcvtq_f32_s32(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
|
|
110
|
+
pset1<Packet4f>(0x7FFFFFFF));
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
|
|
114
|
+
{
|
|
115
|
+
return bit_to_ur_float(this->rawbits_34(rng));
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
template<typename Gen, typename Urng, bool _mutable>
|
|
120
|
+
struct functor_traits<scalar_rng_adaptor<Gen, double, Urng, _mutable> >
|
|
121
|
+
{
|
|
122
|
+
enum { Cost = HugeCost, PacketAccess = 0, IsRepeatable = false };
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
#endif
|
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file MorePacketMath.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.3.5
|
|
6
|
+
* @date 2021-07-16
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_MORE_PACKET_MATH_SSE_H
|
|
13
|
+
#define EIGENRAND_MORE_PACKET_MATH_SSE_H
|
|
14
|
+
|
|
15
|
+
#include <xmmintrin.h>
|
|
16
|
+
|
|
17
|
+
namespace Eigen
|
|
18
|
+
{
|
|
19
|
+
namespace internal
|
|
20
|
+
{
|
|
21
|
+
template<>
|
|
22
|
+
struct IsIntPacket<Packet4i> : std::true_type {};
|
|
23
|
+
|
|
24
|
+
template<>
|
|
25
|
+
struct IsFloatPacket<Packet4f> : std::true_type {};
|
|
26
|
+
|
|
27
|
+
template<>
|
|
28
|
+
struct IsDoublePacket<Packet2d> : std::true_type {};
|
|
29
|
+
|
|
30
|
+
template<>
|
|
31
|
+
struct HalfPacket<Packet4i>
|
|
32
|
+
{
|
|
33
|
+
using type = uint64_t;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
#ifdef EIGEN_VECTORIZE_AVX
|
|
37
|
+
#else
|
|
38
|
+
template<>
|
|
39
|
+
struct HalfPacket<Packet4f>
|
|
40
|
+
{
|
|
41
|
+
//using type = Packet2f;
|
|
42
|
+
};
|
|
43
|
+
#endif
|
|
44
|
+
template<>
|
|
45
|
+
struct reinterpreter<Packet4i>
|
|
46
|
+
{
|
|
47
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
|
|
48
|
+
{
|
|
49
|
+
return _mm_castsi128_ps(x);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
|
|
53
|
+
{
|
|
54
|
+
return _mm_castsi128_pd(x);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
|
|
58
|
+
{
|
|
59
|
+
return x;
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
template<>
|
|
64
|
+
struct reinterpreter<Packet4f>
|
|
65
|
+
{
|
|
66
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
|
|
67
|
+
{
|
|
68
|
+
return x;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
|
|
72
|
+
{
|
|
73
|
+
return _mm_castps_pd(x);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
|
|
77
|
+
{
|
|
78
|
+
return _mm_castps_si128(x);
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
template<>
|
|
83
|
+
struct reinterpreter<Packet2d>
|
|
84
|
+
{
|
|
85
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
|
|
86
|
+
{
|
|
87
|
+
return _mm_castpd_ps(x);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
|
|
91
|
+
{
|
|
92
|
+
return x;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
|
|
96
|
+
{
|
|
97
|
+
return _mm_castpd_si128(x);
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
template<>
|
|
102
|
+
EIGEN_STRONG_INLINE void split_two<Packet4i>(const Packet4i& x, uint64_t& a, uint64_t& b)
|
|
103
|
+
{
|
|
104
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
105
|
+
a = _mm_extract_epi64(x, 0);
|
|
106
|
+
b = _mm_extract_epi64(x, 1);
|
|
107
|
+
#else
|
|
108
|
+
uint64_t u[2];
|
|
109
|
+
_mm_storeu_si128((__m128i*)u, x);
|
|
110
|
+
a = u[0];
|
|
111
|
+
b = u[1];
|
|
112
|
+
#endif
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
|
|
116
|
+
{
|
|
117
|
+
auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
|
|
118
|
+
auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
|
|
119
|
+
sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
|
|
120
|
+
sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
|
|
121
|
+
return _mm_or_si128(sa, sb);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
template<>
|
|
125
|
+
EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
|
|
126
|
+
{
|
|
127
|
+
return _mm_set1_epi64x(a);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
template<>
|
|
131
|
+
EIGEN_STRONG_INLINE Packet4i padd64<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
132
|
+
{
|
|
133
|
+
return _mm_add_epi64(a, b);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
template<>
|
|
137
|
+
EIGEN_STRONG_INLINE Packet4i psub64<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
138
|
+
{
|
|
139
|
+
return _mm_sub_epi64(a, b);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
template<>
|
|
143
|
+
EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
144
|
+
{
|
|
145
|
+
return _mm_cmpeq_epi32(a, b);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
template<>
|
|
149
|
+
struct BitShifter<Packet4i>
|
|
150
|
+
{
|
|
151
|
+
template<int b>
|
|
152
|
+
EIGEN_STRONG_INLINE Packet4i sll(const Packet4i& a)
|
|
153
|
+
{
|
|
154
|
+
return _mm_slli_epi32(a, b);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
template<int b>
|
|
158
|
+
EIGEN_STRONG_INLINE Packet4i srl(const Packet4i& a, int _b = b)
|
|
159
|
+
{
|
|
160
|
+
if (b >= 0)
|
|
161
|
+
{
|
|
162
|
+
return _mm_srli_epi32(a, b);
|
|
163
|
+
}
|
|
164
|
+
else
|
|
165
|
+
{
|
|
166
|
+
return _mm_srli_epi32(a, _b);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template<int b>
|
|
171
|
+
EIGEN_STRONG_INLINE Packet4i sll64(const Packet4i& a)
|
|
172
|
+
{
|
|
173
|
+
return _mm_slli_epi64(a, b);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
template<int b>
|
|
177
|
+
EIGEN_STRONG_INLINE Packet4i srl64(const Packet4i& a)
|
|
178
|
+
{
|
|
179
|
+
return _mm_srli_epi64(a, b);
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
template<>
|
|
184
|
+
EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
185
|
+
{
|
|
186
|
+
return _mm_cmplt_epi32(a, b);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
template<>
|
|
190
|
+
EIGEN_STRONG_INLINE Packet4i pcmplt64<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
191
|
+
{
|
|
192
|
+
#ifdef EIGEN_VECTORIZE_SSE4_2
|
|
193
|
+
return _mm_cmpgt_epi64(b, a);
|
|
194
|
+
#else
|
|
195
|
+
int64_t u[2], v[2];
|
|
196
|
+
_mm_storeu_si128((__m128i*)u, a);
|
|
197
|
+
_mm_storeu_si128((__m128i*)v, b);
|
|
198
|
+
return _mm_set_epi64x(u[1] < v[1] ? -1 : 0, u[0] < v[0] ? -1 : 0);
|
|
199
|
+
#endif
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
template<>
|
|
203
|
+
EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
204
|
+
{
|
|
205
|
+
return _mm_cmplt_ps(a, b);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
template<>
|
|
209
|
+
EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
210
|
+
{
|
|
211
|
+
return _mm_cmple_ps(a, b);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
template<>
|
|
215
|
+
EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
|
|
216
|
+
{
|
|
217
|
+
return _mm_cmplt_pd(a, b);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
template<>
|
|
221
|
+
EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
|
|
222
|
+
{
|
|
223
|
+
return _mm_cmple_pd(a, b);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
template<>
|
|
227
|
+
EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
|
|
228
|
+
{
|
|
229
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
230
|
+
return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
|
|
231
|
+
#else
|
|
232
|
+
return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
|
|
233
|
+
#endif
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
template<>
|
|
237
|
+
EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4i& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
|
|
238
|
+
{
|
|
239
|
+
return pblendv(_mm_castsi128_ps(ifPacket), thenPacket, elsePacket);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
template<>
|
|
243
|
+
EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
|
|
244
|
+
{
|
|
245
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
246
|
+
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
|
|
247
|
+
#else
|
|
248
|
+
return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
|
|
249
|
+
#endif
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
template<>
|
|
253
|
+
EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
|
|
254
|
+
{
|
|
255
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
256
|
+
return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
|
|
257
|
+
#else
|
|
258
|
+
return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
|
|
259
|
+
#endif
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
template<>
|
|
264
|
+
EIGEN_STRONG_INLINE Packet2d pblendv(const Packet4i& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
|
|
265
|
+
{
|
|
266
|
+
return pblendv(_mm_castsi128_pd(ifPacket), thenPacket, elsePacket);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
template<>
|
|
270
|
+
EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
|
|
271
|
+
{
|
|
272
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
273
|
+
return _mm_i32gather_epi32(addr, index, 4);
|
|
274
|
+
#else
|
|
275
|
+
uint32_t u[4];
|
|
276
|
+
_mm_storeu_si128((__m128i*)u, index);
|
|
277
|
+
return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
|
|
278
|
+
#endif
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
template<>
|
|
282
|
+
EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
|
|
283
|
+
{
|
|
284
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
285
|
+
return _mm_i32gather_ps(addr, index, 4);
|
|
286
|
+
#else
|
|
287
|
+
uint32_t u[4];
|
|
288
|
+
_mm_storeu_si128((__m128i*)u, index);
|
|
289
|
+
return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
|
|
290
|
+
#endif
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
template<>
|
|
294
|
+
EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
|
|
295
|
+
{
|
|
296
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
297
|
+
return _mm_i32gather_pd(addr, index, 8);
|
|
298
|
+
#else
|
|
299
|
+
uint32_t u[4];
|
|
300
|
+
_mm_storeu_si128((__m128i*)u, index);
|
|
301
|
+
if (upperhalf)
|
|
302
|
+
{
|
|
303
|
+
return _mm_setr_pd(addr[u[2]], addr[u[3]]);
|
|
304
|
+
}
|
|
305
|
+
else
|
|
306
|
+
{
|
|
307
|
+
return _mm_setr_pd(addr[u[0]], addr[u[1]]);
|
|
308
|
+
}
|
|
309
|
+
#endif
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
template<>
|
|
313
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
|
|
314
|
+
{
|
|
315
|
+
return _mm_movemask_ps(a);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
template<>
|
|
319
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
|
|
320
|
+
{
|
|
321
|
+
return _mm_movemask_pd(a);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
template<>
|
|
325
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
|
|
326
|
+
{
|
|
327
|
+
return pmovemask((Packet4f)_mm_castsi128_ps(a));
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
template<>
|
|
331
|
+
EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
|
|
332
|
+
{
|
|
333
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
334
|
+
return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
335
|
+
#else
|
|
336
|
+
auto round = _MM_GET_ROUNDING_MODE();
|
|
337
|
+
_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
|
|
338
|
+
auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
|
|
339
|
+
_MM_SET_ROUNDING_MODE(round);
|
|
340
|
+
return ret;
|
|
341
|
+
#endif
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
template<>
|
|
345
|
+
EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
|
|
346
|
+
{
|
|
347
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
348
|
+
return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
349
|
+
#else
|
|
350
|
+
auto round = _MM_GET_ROUNDING_MODE();
|
|
351
|
+
_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
|
|
352
|
+
auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
|
|
353
|
+
_MM_SET_ROUNDING_MODE(round);
|
|
354
|
+
return ret;
|
|
355
|
+
#endif
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
template<>
|
|
359
|
+
EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
360
|
+
{
|
|
361
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
362
|
+
return _mm_cmpeq_epi64(a, b);
|
|
363
|
+
#else
|
|
364
|
+
Packet4i c = _mm_cmpeq_epi32(a, b);
|
|
365
|
+
return pand(c, (Packet4i)_mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
366
|
+
#endif
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
template<>
|
|
370
|
+
EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
|
|
371
|
+
{
|
|
372
|
+
uint64_t u[2];
|
|
373
|
+
_mm_storeu_si128((__m128i*)u, a);
|
|
374
|
+
u[0] = u[0] * b + c;
|
|
375
|
+
u[1] = u[1] * b + c;
|
|
376
|
+
return _mm_loadu_si128((__m128i*)u);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
EIGEN_STRONG_INLINE __m128d uint64_to_double(__m128i x) {
|
|
380
|
+
x = _mm_or_si128(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)));
|
|
381
|
+
return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0010000000000000));
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
EIGEN_STRONG_INLINE __m128d int64_to_double(__m128i x) {
|
|
385
|
+
x = _mm_add_epi64(x, _mm_castpd_si128(_mm_set1_pd(0x0018000000000000)));
|
|
386
|
+
return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0018000000000000));
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
EIGEN_STRONG_INLINE __m128i double_to_int64(__m128d x) {
|
|
390
|
+
int _mm_rounding = _MM_GET_ROUNDING_MODE();
|
|
391
|
+
_MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN);
|
|
392
|
+
x = _mm_add_pd(x, _mm_set1_pd(0x0018000000000000));
|
|
393
|
+
_MM_SET_ROUNDING_MODE(_mm_rounding);
|
|
394
|
+
return _mm_sub_epi64(
|
|
395
|
+
_mm_castpd_si128(x),
|
|
396
|
+
_mm_castpd_si128(_mm_set1_pd(0x0018000000000000))
|
|
397
|
+
);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
template<>
|
|
401
|
+
EIGEN_STRONG_INLINE Packet4i pcast64<Packet2d, Packet4i>(const Packet2d& a)
|
|
402
|
+
{
|
|
403
|
+
return double_to_int64(a);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
template<>
|
|
407
|
+
EIGEN_STRONG_INLINE Packet2d pcast64<Packet4i, Packet2d>(const Packet4i& a)
|
|
408
|
+
{
|
|
409
|
+
return int64_to_double(a);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
413
|
+
Packet2d psin<Packet2d>(const Packet2d& x)
|
|
414
|
+
{
|
|
415
|
+
return _psin(x);
|
|
416
|
+
}
|
|
417
|
+
#ifdef EIGENRAND_EIGEN_33_MODE
|
|
418
|
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
419
|
+
Packet2d plog<Packet2d>(const Packet2d& _x)
|
|
420
|
+
{
|
|
421
|
+
Packet2d x = _x;
|
|
422
|
+
_EIGEN_DECLARE_CONST_Packet2d(1, 1.0f);
|
|
423
|
+
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5f);
|
|
424
|
+
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
|
425
|
+
|
|
426
|
+
auto inv_mant_mask = _mm_castsi128_pd(pseti64<Packet4i>(~0x7ff0000000000000));
|
|
427
|
+
auto min_norm_pos = _mm_castsi128_pd(pseti64<Packet4i>(0x10000000000000));
|
|
428
|
+
auto minus_inf = _mm_castsi128_pd(pseti64<Packet4i>(0xfff0000000000000));
|
|
429
|
+
|
|
430
|
+
/* natural logarithm computed for 4 simultaneous float
|
|
431
|
+
return NaN for x <= 0
|
|
432
|
+
*/
|
|
433
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_SQRTHF, 0.707106781186547524);
|
|
434
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p0, 7.0376836292E-2);
|
|
435
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p1, -1.1514610310E-1);
|
|
436
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p2, 1.1676998740E-1);
|
|
437
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p3, -1.2420140846E-1);
|
|
438
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p4, +1.4249322787E-1);
|
|
439
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p5, -1.6668057665E-1);
|
|
440
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p6, +2.0000714765E-1);
|
|
441
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p7, -2.4999993993E-1);
|
|
442
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p8, +3.3333331174E-1);
|
|
443
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_q1, -2.12194440e-4);
|
|
444
|
+
_EIGEN_DECLARE_CONST_Packet2d(cephes_log_q2, 0.693359375);
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
Packet4i emm0;
|
|
448
|
+
|
|
449
|
+
Packet2d invalid_mask = _mm_cmpnge_pd(x, _mm_setzero_pd()); // not greater equal is true if x is NaN
|
|
450
|
+
Packet2d iszero_mask = _mm_cmpeq_pd(x, _mm_setzero_pd());
|
|
451
|
+
|
|
452
|
+
x = pmax(x, min_norm_pos); /* cut off denormalized stuff */
|
|
453
|
+
emm0 = _mm_srli_epi64(_mm_castpd_si128(x), 52);
|
|
454
|
+
|
|
455
|
+
/* keep only the fractional part */
|
|
456
|
+
x = _mm_and_pd(x, inv_mant_mask);
|
|
457
|
+
x = _mm_or_pd(x, p2d_half);
|
|
458
|
+
|
|
459
|
+
Packet2d e = _mm_sub_pd(uint64_to_double(emm0), pset1<Packet2d>(1022));
|
|
460
|
+
|
|
461
|
+
/* part2:
|
|
462
|
+
if( x < SQRTHF ) {
|
|
463
|
+
e -= 1;
|
|
464
|
+
x = x + x - 1.0;
|
|
465
|
+
} else { x = x - 1.0; }
|
|
466
|
+
*/
|
|
467
|
+
Packet2d mask = _mm_cmplt_pd(x, p2d_cephes_SQRTHF);
|
|
468
|
+
Packet2d tmp = pand(x, mask);
|
|
469
|
+
x = psub(x, p2d_1);
|
|
470
|
+
e = psub(e, pand(p2d_1, mask));
|
|
471
|
+
x = padd(x, tmp);
|
|
472
|
+
|
|
473
|
+
Packet2d x2 = pmul(x, x);
|
|
474
|
+
Packet2d x3 = pmul(x2, x);
|
|
475
|
+
|
|
476
|
+
Packet2d y, y1, y2;
|
|
477
|
+
y = pmadd(p2d_cephes_log_p0, x, p2d_cephes_log_p1);
|
|
478
|
+
y1 = pmadd(p2d_cephes_log_p3, x, p2d_cephes_log_p4);
|
|
479
|
+
y2 = pmadd(p2d_cephes_log_p6, x, p2d_cephes_log_p7);
|
|
480
|
+
y = pmadd(y, x, p2d_cephes_log_p2);
|
|
481
|
+
y1 = pmadd(y1, x, p2d_cephes_log_p5);
|
|
482
|
+
y2 = pmadd(y2, x, p2d_cephes_log_p8);
|
|
483
|
+
y = pmadd(y, x3, y1);
|
|
484
|
+
y = pmadd(y, x3, y2);
|
|
485
|
+
y = pmul(y, x3);
|
|
486
|
+
|
|
487
|
+
y1 = pmul(e, p2d_cephes_log_q1);
|
|
488
|
+
tmp = pmul(x2, p2d_half);
|
|
489
|
+
y = padd(y, y1);
|
|
490
|
+
x = psub(x, tmp);
|
|
491
|
+
y2 = pmul(e, p2d_cephes_log_q2);
|
|
492
|
+
x = padd(x, y);
|
|
493
|
+
x = padd(x, y2);
|
|
494
|
+
// negative arg will be NAN, 0 will be -INF
|
|
495
|
+
return pblendv(iszero_mask, minus_inf, _mm_or_pd(x, invalid_mask));
|
|
496
|
+
}
|
|
497
|
+
#endif
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
#endif
|