tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file MorePacketMath.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.4.0
|
|
6
|
+
* @date 2021-04-26
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_MORE_PACKET_MATH_NEON_H
|
|
13
|
+
#define EIGENRAND_MORE_PACKET_MATH_NEON_H
|
|
14
|
+
|
|
15
|
+
#include <arm_neon.h>
|
|
16
|
+
|
|
17
|
+
// device func of casting for Eigen ~3.3.9
|
|
18
|
+
#ifdef EIGENRAND_EIGEN_33_MODE
|
|
19
|
+
namespace Eigen
|
|
20
|
+
{
|
|
21
|
+
namespace internal
|
|
22
|
+
{
|
|
23
|
+
template<>
|
|
24
|
+
EIGEN_DEVICE_FUNC inline Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a)
|
|
25
|
+
{
|
|
26
|
+
return vcvtq_f32_s32(a);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
template<>
|
|
30
|
+
EIGEN_DEVICE_FUNC inline Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a)
|
|
31
|
+
{
|
|
32
|
+
return vcvtq_s32_f32(a);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
namespace Eigen
|
|
40
|
+
{
|
|
41
|
+
namespace internal
|
|
42
|
+
{
|
|
43
|
+
template<>
|
|
44
|
+
struct IsIntPacket<Packet4i> : std::true_type {};
|
|
45
|
+
|
|
46
|
+
template<>
|
|
47
|
+
struct IsFloatPacket<Packet4f> : std::true_type {};
|
|
48
|
+
|
|
49
|
+
template<>
|
|
50
|
+
struct HalfPacket<Packet4i>
|
|
51
|
+
{
|
|
52
|
+
using type = uint64_t;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
template<>
|
|
56
|
+
struct reinterpreter<Packet4i>
|
|
57
|
+
{
|
|
58
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
|
|
59
|
+
{
|
|
60
|
+
return (Packet4f)vreinterpretq_f32_s32(x);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
|
|
64
|
+
{
|
|
65
|
+
return x;
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
template<>
|
|
70
|
+
struct reinterpreter<Packet4f>
|
|
71
|
+
{
|
|
72
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
|
|
73
|
+
{
|
|
74
|
+
return x;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
|
|
78
|
+
{
|
|
79
|
+
return (Packet4i)vreinterpretq_s32_f32(x);
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
template<>
|
|
84
|
+
EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
85
|
+
{
|
|
86
|
+
return vreinterpretq_s32_u32(vceqq_s32(a, b));
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
template<>
|
|
90
|
+
EIGEN_STRONG_INLINE Packet4f pcmpeq<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
91
|
+
{
|
|
92
|
+
return vreinterpretq_f32_u32(vceqq_f32(a, b));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
template<>
|
|
96
|
+
EIGEN_STRONG_INLINE Packet4i pbitnot<Packet4i>(const Packet4i& a)
|
|
97
|
+
{
|
|
98
|
+
return vmvnq_s32(a);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
template<>
|
|
102
|
+
EIGEN_STRONG_INLINE Packet4f pbitnot<Packet4f>(const Packet4f& a)
|
|
103
|
+
{
|
|
104
|
+
return (Packet4f)vreinterpretq_f32_s32(pbitnot((Packet4i)vreinterpretq_s32_f32(a)));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
template<>
|
|
108
|
+
struct BitShifter<Packet4i>
|
|
109
|
+
{
|
|
110
|
+
template<int b>
|
|
111
|
+
EIGEN_STRONG_INLINE Packet4i sll(const Packet4i& a)
|
|
112
|
+
{
|
|
113
|
+
return vreinterpretq_s32_u32(vshlq_n_u32(vreinterpretq_u32_s32(a), b));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
template<int b>
|
|
117
|
+
EIGEN_STRONG_INLINE Packet4i srl(const Packet4i& a, int _b = b)
|
|
118
|
+
{
|
|
119
|
+
if (b > 0)
|
|
120
|
+
{
|
|
121
|
+
return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), b > 0 ? b : 1));
|
|
122
|
+
}
|
|
123
|
+
else
|
|
124
|
+
{
|
|
125
|
+
switch (_b)
|
|
126
|
+
{
|
|
127
|
+
case 0: return a;
|
|
128
|
+
case 1: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 1));
|
|
129
|
+
case 2: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 2));
|
|
130
|
+
case 3: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 3));
|
|
131
|
+
case 4: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 4));
|
|
132
|
+
case 5: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 5));
|
|
133
|
+
case 6: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 6));
|
|
134
|
+
case 7: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 7));
|
|
135
|
+
case 8: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 8));
|
|
136
|
+
case 9: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 9));
|
|
137
|
+
case 10: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 10));
|
|
138
|
+
case 11: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 11));
|
|
139
|
+
case 12: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 12));
|
|
140
|
+
case 13: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 13));
|
|
141
|
+
case 14: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 14));
|
|
142
|
+
case 15: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 15));
|
|
143
|
+
case 16: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 16));
|
|
144
|
+
case 17: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 17));
|
|
145
|
+
case 18: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 18));
|
|
146
|
+
case 19: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 19));
|
|
147
|
+
case 20: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 20));
|
|
148
|
+
case 21: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 21));
|
|
149
|
+
case 22: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 22));
|
|
150
|
+
case 23: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 23));
|
|
151
|
+
case 24: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 24));
|
|
152
|
+
case 25: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 25));
|
|
153
|
+
case 26: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 26));
|
|
154
|
+
case 27: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 27));
|
|
155
|
+
case 28: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 28));
|
|
156
|
+
case 29: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 29));
|
|
157
|
+
case 30: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 30));
|
|
158
|
+
case 31: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 31));
|
|
159
|
+
}
|
|
160
|
+
return vdupq_n_s32(0);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
template<int b>
|
|
165
|
+
EIGEN_STRONG_INLINE Packet4i sll64(const Packet4i& a)
|
|
166
|
+
{
|
|
167
|
+
return vreinterpretq_s32_u64(vshlq_n_u64(vreinterpretq_u64_s32(a), b));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template<int b>
|
|
171
|
+
EIGEN_STRONG_INLINE Packet4i srl64(const Packet4i& a)
|
|
172
|
+
{
|
|
173
|
+
return vreinterpretq_s32_u64(vshrq_n_u64(vreinterpretq_u64_s32(a), b));
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
template<>
|
|
178
|
+
EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
179
|
+
{
|
|
180
|
+
return vreinterpretq_s32_u32(vcltq_s32(a, b));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
template<>
|
|
184
|
+
EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
185
|
+
{
|
|
186
|
+
return vreinterpretq_f32_u32(vcltq_f32(a, b));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
template<>
|
|
190
|
+
EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
191
|
+
{
|
|
192
|
+
return vreinterpretq_f32_u32(vcleq_f32(a, b));
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
template<>
|
|
196
|
+
EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
|
|
197
|
+
{
|
|
198
|
+
return vbslq_f32(vreinterpretq_u32_f32(ifPacket), thenPacket, elsePacket);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
template<>
|
|
202
|
+
EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4i& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
|
|
203
|
+
{
|
|
204
|
+
return vbslq_f32(vreinterpretq_u32_s32(ifPacket), thenPacket, elsePacket);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
template<>
|
|
208
|
+
EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
|
|
209
|
+
{
|
|
210
|
+
return vbslq_s32(vreinterpretq_u32_s32(ifPacket), thenPacket, elsePacket);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
template<>
|
|
214
|
+
EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
|
|
215
|
+
{
|
|
216
|
+
int32_t u[4];
|
|
217
|
+
vst1q_s32(u, index);
|
|
218
|
+
int32_t t[4];
|
|
219
|
+
t[0] = addr[u[0]];
|
|
220
|
+
t[1] = addr[u[1]];
|
|
221
|
+
t[2] = addr[u[2]];
|
|
222
|
+
t[3] = addr[u[3]];
|
|
223
|
+
return vld1q_s32(t);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
template<>
|
|
227
|
+
EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
|
|
228
|
+
{
|
|
229
|
+
int32_t u[4];
|
|
230
|
+
vst1q_s32(u, index);
|
|
231
|
+
float t[4];
|
|
232
|
+
t[0] = addr[u[0]];
|
|
233
|
+
t[1] = addr[u[1]];
|
|
234
|
+
t[2] = addr[u[2]];
|
|
235
|
+
t[3] = addr[u[3]];
|
|
236
|
+
return vld1q_f32(t);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
template<>
|
|
240
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
|
|
241
|
+
{
|
|
242
|
+
int32_t bits[4] = { 1, 2, 4, 8 };
|
|
243
|
+
auto r = vbslq_s32(vreinterpretq_u32_f32(a), vld1q_s32(bits), vdupq_n_s32(0));
|
|
244
|
+
auto s = vadd_s32(vget_low_s32(r), vget_high_s32(r));
|
|
245
|
+
return vget_lane_s32(vpadd_s32(s, s), 0);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
template<>
|
|
249
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
|
|
250
|
+
{
|
|
251
|
+
return pmovemask((Packet4f)vreinterpretq_f32_s32(a));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
template<>
|
|
255
|
+
EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
|
|
256
|
+
{
|
|
257
|
+
return vrndq_f32(a);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
template<>
|
|
261
|
+
EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
|
|
262
|
+
{
|
|
263
|
+
return vreinterpretq_s32_u64(vdupq_n_u64(a));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
template<>
|
|
267
|
+
EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
268
|
+
{
|
|
269
|
+
return vreinterpretq_s32_u64(vceqq_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
template<>
|
|
273
|
+
EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
|
|
274
|
+
{
|
|
275
|
+
uint64_t u[2];
|
|
276
|
+
vst1q_u64(u, vreinterpretq_u64_s32(a));
|
|
277
|
+
u[0] = u[0] * b + c;
|
|
278
|
+
u[1] = u[1] * b + c;
|
|
279
|
+
return vreinterpretq_s32_u64(vld1q_u64(u));
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
#ifdef EIGENRAND_EIGEN_33_MODE
|
|
283
|
+
template<>
|
|
284
|
+
EIGEN_STRONG_INLINE Packet4f plog<Packet4f>(const Packet4f& _x)
|
|
285
|
+
{
|
|
286
|
+
Packet4f x = _x;
|
|
287
|
+
_EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
|
|
288
|
+
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
|
289
|
+
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
|
290
|
+
|
|
291
|
+
const Packet4f p4f_inv_mant_mask = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(~0x7f800000));
|
|
292
|
+
|
|
293
|
+
/* the smallest non denormalized float number */
|
|
294
|
+
const Packet4f p4f_min_norm_pos = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(0x00800000));
|
|
295
|
+
const Packet4f p4f_minus_inf = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(0xff800000));
|
|
296
|
+
|
|
297
|
+
/* natural logarithm computed for 4 simultaneous float
|
|
298
|
+
return NaN for x <= 0
|
|
299
|
+
*/
|
|
300
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
|
301
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
|
302
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310E-1f);
|
|
303
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
|
304
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846E-1f);
|
|
305
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787E-1f);
|
|
306
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665E-1f);
|
|
307
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765E-1f);
|
|
308
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993E-1f);
|
|
309
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174E-1f);
|
|
310
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
|
311
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
Packet4i emm0;
|
|
315
|
+
|
|
316
|
+
Packet4f invalid_mask = pbitnot(pcmple(pset1<Packet4f>(0), x)); // not greater equal is true if x is NaN
|
|
317
|
+
Packet4f iszero_mask = pcmpeq(x, pset1<Packet4f>(0));
|
|
318
|
+
|
|
319
|
+
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
|
320
|
+
emm0 = BitShifter<Packet4i>{}.template srl<23>((Packet4i)vreinterpretq_s32_f32(x));
|
|
321
|
+
|
|
322
|
+
/* keep only the fractional part */
|
|
323
|
+
x = pand(x, p4f_inv_mant_mask);
|
|
324
|
+
x = por(x, p4f_half);
|
|
325
|
+
|
|
326
|
+
emm0 = psub(emm0, p4i_0x7f);
|
|
327
|
+
Packet4f e = padd(Packet4f(vcvtq_f32_s32(emm0)), p4f_1);
|
|
328
|
+
|
|
329
|
+
/* part2:
|
|
330
|
+
if( x < SQRTHF ) {
|
|
331
|
+
e -= 1;
|
|
332
|
+
x = x + x - 1.0;
|
|
333
|
+
} else { x = x - 1.0; }
|
|
334
|
+
*/
|
|
335
|
+
Packet4f mask = pcmplt(x, p4f_cephes_SQRTHF);
|
|
336
|
+
Packet4f tmp = pand(x, mask);
|
|
337
|
+
x = psub(x, p4f_1);
|
|
338
|
+
e = psub(e, pand(p4f_1, mask));
|
|
339
|
+
x = padd(x, tmp);
|
|
340
|
+
|
|
341
|
+
Packet4f x2 = pmul(x, x);
|
|
342
|
+
Packet4f x3 = pmul(x2, x);
|
|
343
|
+
|
|
344
|
+
Packet4f y, y1, y2;
|
|
345
|
+
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
|
346
|
+
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
|
347
|
+
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
|
348
|
+
y = pmadd(y, x, p4f_cephes_log_p2);
|
|
349
|
+
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
|
350
|
+
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
|
351
|
+
y = pmadd(y, x3, y1);
|
|
352
|
+
y = pmadd(y, x3, y2);
|
|
353
|
+
y = pmul(y, x3);
|
|
354
|
+
|
|
355
|
+
y1 = pmul(e, p4f_cephes_log_q1);
|
|
356
|
+
tmp = pmul(x2, p4f_half);
|
|
357
|
+
y = padd(y, y1);
|
|
358
|
+
x = psub(x, tmp);
|
|
359
|
+
y2 = pmul(e, p4f_cephes_log_q2);
|
|
360
|
+
x = padd(x, y);
|
|
361
|
+
x = padd(x, y2);
|
|
362
|
+
// negative arg will be NAN, 0 will be -INF
|
|
363
|
+
return pblendv(iszero_mask, p4f_minus_inf, por(x, invalid_mask));
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
template<>
|
|
367
|
+
EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x)
|
|
368
|
+
{
|
|
369
|
+
return vsqrtq_f32(x);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
template<>
|
|
373
|
+
EIGEN_STRONG_INLINE Packet4f psin<Packet4f>(const Packet4f& _x)
|
|
374
|
+
{
|
|
375
|
+
Packet4f x = _x;
|
|
376
|
+
_EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
|
|
377
|
+
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
|
378
|
+
|
|
379
|
+
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
|
|
380
|
+
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
|
|
381
|
+
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
|
|
382
|
+
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
|
|
383
|
+
|
|
384
|
+
const Packet4f p4f_sign_mask = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(0x80000000));
|
|
385
|
+
|
|
386
|
+
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
|
|
387
|
+
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
|
|
388
|
+
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
|
|
389
|
+
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
|
|
390
|
+
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
|
|
391
|
+
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
|
|
392
|
+
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
|
|
393
|
+
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
|
|
394
|
+
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
|
|
395
|
+
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
|
|
396
|
+
|
|
397
|
+
Packet4f xmm1, xmm2, xmm3, sign_bit, y;
|
|
398
|
+
|
|
399
|
+
Packet4i emm0, emm2;
|
|
400
|
+
sign_bit = x;
|
|
401
|
+
/* take the absolute value */
|
|
402
|
+
x = pabs(x);
|
|
403
|
+
|
|
404
|
+
/* take the modulo */
|
|
405
|
+
|
|
406
|
+
/* extract the sign bit (upper one) */
|
|
407
|
+
sign_bit = pand(sign_bit, p4f_sign_mask);
|
|
408
|
+
|
|
409
|
+
/* scale by 4/Pi */
|
|
410
|
+
y = pmul(x, p4f_cephes_FOPI);
|
|
411
|
+
|
|
412
|
+
/* store the integer part of y in mm0 */
|
|
413
|
+
emm2 = vcvtq_s32_f32(y);
|
|
414
|
+
/* j=(j+1) & (~1) (see the cephes sources) */
|
|
415
|
+
emm2 = padd(emm2, p4i_1);
|
|
416
|
+
emm2 = pand(emm2, p4i_not1);
|
|
417
|
+
y = vcvtq_f32_s32(emm2);
|
|
418
|
+
/* get the swap sign flag */
|
|
419
|
+
emm0 = pand(emm2, p4i_4);
|
|
420
|
+
emm0 = BitShifter<Packet4i>{}.template sll<29>(emm0);
|
|
421
|
+
/* get the polynom selection mask
|
|
422
|
+
there is one polynom for 0 <= x <= Pi/4
|
|
423
|
+
and another one for Pi/4<x<=Pi/2
|
|
424
|
+
|
|
425
|
+
Both branches will be computed.
|
|
426
|
+
*/
|
|
427
|
+
emm2 = pand(emm2, p4i_2);
|
|
428
|
+
emm2 = pcmpeq(emm2, pset1<Packet4i>(0));
|
|
429
|
+
|
|
430
|
+
Packet4f swap_sign_bit = (Packet4f)vreinterpretq_f32_s32(emm0);
|
|
431
|
+
Packet4f poly_mask = (Packet4f)vreinterpretq_f32_s32(emm2);
|
|
432
|
+
sign_bit = pxor(sign_bit, swap_sign_bit);
|
|
433
|
+
|
|
434
|
+
/* The magic pass: "Extended precision modular arithmetic"
|
|
435
|
+
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
|
436
|
+
xmm1 = pmul(y, p4f_minus_cephes_DP1);
|
|
437
|
+
xmm2 = pmul(y, p4f_minus_cephes_DP2);
|
|
438
|
+
xmm3 = pmul(y, p4f_minus_cephes_DP3);
|
|
439
|
+
x = padd(x, xmm1);
|
|
440
|
+
x = padd(x, xmm2);
|
|
441
|
+
x = padd(x, xmm3);
|
|
442
|
+
|
|
443
|
+
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
|
444
|
+
y = p4f_coscof_p0;
|
|
445
|
+
Packet4f z = pmul(x, x);
|
|
446
|
+
|
|
447
|
+
y = pmadd(y, z, p4f_coscof_p1);
|
|
448
|
+
y = pmadd(y, z, p4f_coscof_p2);
|
|
449
|
+
y = pmul(y, z);
|
|
450
|
+
y = pmul(y, z);
|
|
451
|
+
Packet4f tmp = pmul(z, p4f_half);
|
|
452
|
+
y = psub(y, tmp);
|
|
453
|
+
y = padd(y, p4f_1);
|
|
454
|
+
|
|
455
|
+
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
|
456
|
+
|
|
457
|
+
Packet4f y2 = p4f_sincof_p0;
|
|
458
|
+
y2 = pmadd(y2, z, p4f_sincof_p1);
|
|
459
|
+
y2 = pmadd(y2, z, p4f_sincof_p2);
|
|
460
|
+
y2 = pmul(y2, z);
|
|
461
|
+
y2 = pmul(y2, x);
|
|
462
|
+
y2 = padd(y2, x);
|
|
463
|
+
|
|
464
|
+
/* select the correct result from the two polynoms */
|
|
465
|
+
y = pblendv(poly_mask, y2, y);
|
|
466
|
+
/* update the sign */
|
|
467
|
+
return pxor(y, sign_bit);
|
|
468
|
+
}
|
|
469
|
+
#endif
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
#endif
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file PacketFilter.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.4.0
|
|
6
|
+
* @date 2021-09-17
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_PACKET_FILTER_NEON_H
|
|
13
|
+
#define EIGENRAND_PACKET_FILTER_NEON_H
|
|
14
|
+
|
|
15
|
+
#include <arm_neon.h>
|
|
16
|
+
|
|
17
|
+
namespace Eigen
|
|
18
|
+
{
|
|
19
|
+
namespace Rand
|
|
20
|
+
{
|
|
21
|
+
namespace detail
|
|
22
|
+
{
|
|
23
|
+
template<>
|
|
24
|
+
class CompressMask<16>
|
|
25
|
+
{
|
|
26
|
+
std::array<std::array<uint8_t, 16>, 7> idx;
|
|
27
|
+
std::array<internal::Packet4f, 4> selector;
|
|
28
|
+
std::array<uint8_t, 16> cnt;
|
|
29
|
+
|
|
30
|
+
static uint8_t make_compress(int mask, int offset = 0)
|
|
31
|
+
{
|
|
32
|
+
uint8_t ret = 0;
|
|
33
|
+
int n = offset;
|
|
34
|
+
for (int i = 0; i < 4; ++i)
|
|
35
|
+
{
|
|
36
|
+
int l = mask & 1;
|
|
37
|
+
mask >>= 1;
|
|
38
|
+
if (l)
|
|
39
|
+
{
|
|
40
|
+
if (n >= 0) ret |= (i & 3) << (2 * n);
|
|
41
|
+
if (++n >= 4) break;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return ret;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static uint8_t count(int mask)
|
|
48
|
+
{
|
|
49
|
+
uint8_t ret = 0;
|
|
50
|
+
for (int i = 0; i < 4; ++i)
|
|
51
|
+
{
|
|
52
|
+
ret += mask & 1;
|
|
53
|
+
mask >>= 1;
|
|
54
|
+
}
|
|
55
|
+
return ret;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
CompressMask()
|
|
59
|
+
{
|
|
60
|
+
for (int i = 0; i < 16; ++i)
|
|
61
|
+
{
|
|
62
|
+
for (int o = 0; o < 7; ++o)
|
|
63
|
+
{
|
|
64
|
+
idx[o][i] = make_compress(i, o < 4 ? o : o - 7);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
cnt[i] = count(i);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
uint32_t v[4] = { 0, };
|
|
71
|
+
|
|
72
|
+
selector[0] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
|
|
73
|
+
v[0] = -1;
|
|
74
|
+
selector[1] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
|
|
75
|
+
v[1] = -1;
|
|
76
|
+
selector[2] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
|
|
77
|
+
v[2] = -1;
|
|
78
|
+
selector[3] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
static EIGEN_STRONG_INLINE internal::Packet4f permute(const internal::Packet4f& p, uint8_t i)
|
|
82
|
+
{
|
|
83
|
+
float u[4];
|
|
84
|
+
vst1q_f32(u, p);
|
|
85
|
+
float t[4];
|
|
86
|
+
t[0] = u[i & 3];
|
|
87
|
+
t[1] = u[(i >> 2) & 3];
|
|
88
|
+
t[2] = u[(i >> 4) & 3];
|
|
89
|
+
t[3] = u[(i >> 6) & 3];
|
|
90
|
+
return vld1q_f32(t);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
public:
|
|
94
|
+
|
|
95
|
+
enum { full_size = 4 };
|
|
96
|
+
|
|
97
|
+
static const CompressMask& get_inst()
|
|
98
|
+
{
|
|
99
|
+
static CompressMask cm;
|
|
100
|
+
return cm;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
template<typename Packet>
|
|
104
|
+
EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
|
|
105
|
+
Packet& _rest, int rest_cnt, bool& full) const
|
|
106
|
+
{
|
|
107
|
+
auto& value = reinterpret_cast<internal::Packet4f&>(_value);
|
|
108
|
+
auto& mask = reinterpret_cast<const internal::Packet4f&>(_mask);
|
|
109
|
+
auto& rest = reinterpret_cast<internal::Packet4f&>(_rest);
|
|
110
|
+
|
|
111
|
+
int m = internal::pmovemask(mask);
|
|
112
|
+
if (cnt[m] == full_size)
|
|
113
|
+
{
|
|
114
|
+
full = true;
|
|
115
|
+
return rest_cnt;
|
|
116
|
+
}
|
|
117
|
+
auto p1 = permute(value, idx[rest_cnt][m]);
|
|
118
|
+
p1 = internal::pblendv(selector[rest_cnt], rest, p1);
|
|
119
|
+
|
|
120
|
+
auto new_cnt = rest_cnt + cnt[m];
|
|
121
|
+
if (new_cnt >= full_size)
|
|
122
|
+
{
|
|
123
|
+
if (new_cnt > full_size)
|
|
124
|
+
{
|
|
125
|
+
rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
|
|
126
|
+
}
|
|
127
|
+
value = p1;
|
|
128
|
+
full = true;
|
|
129
|
+
return new_cnt - full_size;
|
|
130
|
+
}
|
|
131
|
+
else
|
|
132
|
+
{
|
|
133
|
+
rest = p1;
|
|
134
|
+
full = false;
|
|
135
|
+
return new_cnt;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
#endif
|