tomoto 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,133 @@
|
|
1
|
+
/**
|
2
|
+
* @file PacketFilter.h
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
4
|
+
* @brief
|
5
|
+
* @version 0.4.0
|
6
|
+
* @date 2021-09-17
|
7
|
+
*
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
#ifndef EIGENRAND_PACKET_FILTER_SSE_H
|
13
|
+
#define EIGENRAND_PACKET_FILTER_SSE_H
|
14
|
+
|
15
|
+
#include <xmmintrin.h>
|
16
|
+
|
17
|
+
namespace Eigen
|
18
|
+
{
|
19
|
+
namespace Rand
|
20
|
+
{
|
21
|
+
namespace detail
|
22
|
+
{
|
23
|
+
template<>
|
24
|
+
class CompressMask<16>
|
25
|
+
{
|
26
|
+
std::array<std::array<uint8_t, 16>, 7> idx;
|
27
|
+
std::array<internal::Packet4f, 4> selector;
|
28
|
+
std::array<uint8_t, 16> cnt;
|
29
|
+
|
30
|
+
static uint8_t make_compress(int mask, int offset = 0)
|
31
|
+
{
|
32
|
+
uint8_t ret = 0;
|
33
|
+
int n = offset;
|
34
|
+
for (int i = 0; i < 4; ++i)
|
35
|
+
{
|
36
|
+
int l = mask & 1;
|
37
|
+
mask >>= 1;
|
38
|
+
if (l)
|
39
|
+
{
|
40
|
+
if (n >= 0) ret |= (i & 3) << (2 * n);
|
41
|
+
if (++n >= 4) break;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
return ret;
|
45
|
+
}
|
46
|
+
|
47
|
+
static uint8_t count(int mask)
|
48
|
+
{
|
49
|
+
uint8_t ret = 0;
|
50
|
+
for (int i = 0; i < 4; ++i)
|
51
|
+
{
|
52
|
+
ret += mask & 1;
|
53
|
+
mask >>= 1;
|
54
|
+
}
|
55
|
+
return ret;
|
56
|
+
}
|
57
|
+
|
58
|
+
CompressMask()
|
59
|
+
{
|
60
|
+
for (int i = 0; i < 16; ++i)
|
61
|
+
{
|
62
|
+
for (int o = 0; o < 7; ++o)
|
63
|
+
{
|
64
|
+
idx[o][i] = make_compress(i, o < 4 ? o : o - 7);
|
65
|
+
}
|
66
|
+
|
67
|
+
cnt[i] = count(i);
|
68
|
+
}
|
69
|
+
|
70
|
+
selector[0] = _mm_castsi128_ps(_mm_setr_epi32(0, 0, 0, 0));
|
71
|
+
selector[1] = _mm_castsi128_ps(_mm_setr_epi32(-1, 0, 0, 0));
|
72
|
+
selector[2] = _mm_castsi128_ps(_mm_setr_epi32(-1, -1, 0, 0));
|
73
|
+
selector[3] = _mm_castsi128_ps(_mm_setr_epi32(-1, -1, -1, 0));
|
74
|
+
}
|
75
|
+
|
76
|
+
static EIGEN_STRONG_INLINE internal::Packet4f permute(const internal::Packet4f& p, uint8_t i)
|
77
|
+
{
|
78
|
+
float u[4];
|
79
|
+
_mm_storeu_ps(u, p);
|
80
|
+
return _mm_setr_ps(u[i & 3], u[(i >> 2) & 3], u[(i >> 4) & 3], u[(i >> 6) & 3]);
|
81
|
+
}
|
82
|
+
|
83
|
+
public:
|
84
|
+
|
85
|
+
enum { full_size = 4 };
|
86
|
+
|
87
|
+
static const CompressMask& get_inst()
|
88
|
+
{
|
89
|
+
static CompressMask cm;
|
90
|
+
return cm;
|
91
|
+
}
|
92
|
+
|
93
|
+
template<typename Packet>
|
94
|
+
EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
|
95
|
+
Packet& _rest, int rest_cnt, bool& full) const
|
96
|
+
{
|
97
|
+
auto& value = reinterpret_cast<internal::Packet4f&>(_value);
|
98
|
+
auto& mask = reinterpret_cast<const internal::Packet4f&>(_mask);
|
99
|
+
auto& rest = reinterpret_cast<internal::Packet4f&>(_rest);
|
100
|
+
|
101
|
+
int m = _mm_movemask_ps(mask);
|
102
|
+
if (cnt[m] == full_size)
|
103
|
+
{
|
104
|
+
full = true;
|
105
|
+
return rest_cnt;
|
106
|
+
}
|
107
|
+
|
108
|
+
auto p1 = permute(value, idx[rest_cnt][m]);
|
109
|
+
p1 = internal::pblendv(selector[rest_cnt], rest, p1);
|
110
|
+
|
111
|
+
auto new_cnt = rest_cnt + cnt[m];
|
112
|
+
if (new_cnt >= full_size)
|
113
|
+
{
|
114
|
+
if (new_cnt > full_size)
|
115
|
+
{
|
116
|
+
rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
|
117
|
+
}
|
118
|
+
value = p1;
|
119
|
+
full = true;
|
120
|
+
return new_cnt - full_size;
|
121
|
+
}
|
122
|
+
else
|
123
|
+
{
|
124
|
+
rest = p1;
|
125
|
+
full = false;
|
126
|
+
return new_cnt;
|
127
|
+
}
|
128
|
+
}
|
129
|
+
};
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
#endif
|
@@ -0,0 +1,120 @@
|
|
1
|
+
/**
|
2
|
+
* @file RandUtils.h
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
4
|
+
* @brief
|
5
|
+
* @version 0.4.0
|
6
|
+
* @date 2021-09-17
|
7
|
+
*
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
#ifndef EIGENRAND_RAND_UTILS_SSE_H
|
13
|
+
#define EIGENRAND_RAND_UTILS_SSE_H
|
14
|
+
|
15
|
+
#include <xmmintrin.h>
|
16
|
+
|
17
|
+
namespace Eigen
|
18
|
+
{
|
19
|
+
namespace internal
|
20
|
+
{
|
21
|
+
template<typename Rng, typename RngResult>
|
22
|
+
struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
|
23
|
+
{
|
24
|
+
EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
|
25
|
+
{
|
26
|
+
if (sizeof(RngResult) == 8)
|
27
|
+
{
|
28
|
+
return _mm_set_epi64x(rng(), rng());
|
29
|
+
}
|
30
|
+
else
|
31
|
+
{
|
32
|
+
return _mm_set_epi32(rng(), rng(), rng(), rng());
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
|
37
|
+
{
|
38
|
+
if (sizeof(RngResult) == 8)
|
39
|
+
{
|
40
|
+
return _mm_set_epi64x(rng(), rng());
|
41
|
+
}
|
42
|
+
else
|
43
|
+
{
|
44
|
+
#ifdef EIGEN_VECTORIZE_SSSE3
|
45
|
+
Packet4i p = _mm_setr_epi32(rng(), rng(), rng(), 0);
|
46
|
+
return _mm_shuffle_epi8(p, _mm_setr_epi8(
|
47
|
+
0, 1, 2, 3,
|
48
|
+
4, 5, 6, 7,
|
49
|
+
8, 9, 10, 11,
|
50
|
+
3, 7, 11, 11));
|
51
|
+
#else
|
52
|
+
return _mm_set_epi32(rng(), rng(), rng(), rng());
|
53
|
+
#endif
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
|
58
|
+
{
|
59
|
+
if (sizeof(decltype(rng())) == 8)
|
60
|
+
{
|
61
|
+
return _mm_set_epi64x(0, rng());
|
62
|
+
}
|
63
|
+
else
|
64
|
+
{
|
65
|
+
return _mm_setr_epi32(rng(), rng(), 0, 0);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
};
|
69
|
+
|
70
|
+
template<typename Rng>
|
71
|
+
struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
|
72
|
+
{
|
73
|
+
EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
|
74
|
+
{
|
75
|
+
return rng();
|
76
|
+
}
|
77
|
+
|
78
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
|
79
|
+
{
|
80
|
+
return rng();
|
81
|
+
}
|
82
|
+
|
83
|
+
EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
|
84
|
+
{
|
85
|
+
return rng();
|
86
|
+
}
|
87
|
+
};
|
88
|
+
|
89
|
+
template<typename Rng>
|
90
|
+
struct UniformRealUtils<Packet4f, Rng> : public RawbitsMaker<Packet4i, Rng>
|
91
|
+
{
|
92
|
+
EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
|
93
|
+
{
|
94
|
+
return pdiv((Packet4f)_mm_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
|
95
|
+
pset1<Packet4f>(0x7FFFFFFF));
|
96
|
+
}
|
97
|
+
|
98
|
+
EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
|
99
|
+
{
|
100
|
+
return bit_to_ur_float(this->rawbits_34(rng));
|
101
|
+
}
|
102
|
+
};
|
103
|
+
|
104
|
+
template<typename Rng>
|
105
|
+
struct UniformRealUtils<Packet2d, Rng> : public RawbitsMaker<Packet4i, Rng>
|
106
|
+
{
|
107
|
+
EIGEN_STRONG_INLINE Packet2d zero_to_one(Rng& rng)
|
108
|
+
{
|
109
|
+
return pdiv((Packet2d)_mm_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
|
110
|
+
pset1<Packet2d>(0x7FFFFFFF));
|
111
|
+
}
|
112
|
+
|
113
|
+
EIGEN_STRONG_INLINE Packet2d uniform_real(Rng& rng)
|
114
|
+
{
|
115
|
+
return bit_to_ur_double(this->rawbits(rng));
|
116
|
+
}
|
117
|
+
};
|
118
|
+
}
|
119
|
+
}
|
120
|
+
#endif
|
@@ -12,7 +12,7 @@
|
|
12
12
|
You can get 5~10 times speed by just replacing old Eigen's Random
|
13
13
|
or unvectorizable c++11 random number generators with EigenRand.
|
14
14
|
|
15
|
-
EigenRand currently supports only x86-64 architecture (SSE, AVX, AVX2).
|
15
|
+
EigenRand currently supports only x86-64 architecture (SSE, AVX, AVX2) and ARM64 NEON (experimental).
|
16
16
|
|
17
17
|
EigenRand is distributed under the MIT License.
|
18
18
|
|
@@ -29,7 +29,7 @@
|
|
29
29
|
|
30
30
|
You can install EigenRand by just downloading the source codes from [the repository](https://github.com/bab2min/EigenRand/releases).
|
31
31
|
Since EigenRand is a header-only library like Eigen, none of binaries needs to be installed.
|
32
|
-
All you need is [Eigen 3.3.
|
32
|
+
All you need is [Eigen 3.3.4 ~ 3.4.0](http://eigen.tuxfamily.org/index.php?title=Main_Page) and C++11 compiler.
|
33
33
|
|
34
34
|
@section getting_started_2 Simple Random Matrix Generators
|
35
35
|
@code
|
@@ -43,7 +43,7 @@
|
|
43
43
|
{
|
44
44
|
// Initialize random number generator with seed=42 for following codes.
|
45
45
|
// Or you can use C++11 RNG such as std::mt19937 or std::ranlux48.
|
46
|
-
Rand::
|
46
|
+
Rand::P8_mt19937_64 urng{ 42 };
|
47
47
|
|
48
48
|
// this will generate 4x4 real matrix with range [-1, 1]
|
49
49
|
MatrixXf mat = Rand::balanced<MatrixXf>(4, 4, urng);
|
@@ -69,7 +69,7 @@
|
|
69
69
|
|
70
70
|
int main()
|
71
71
|
{
|
72
|
-
Rand::
|
72
|
+
Rand::P8_mt19937_64 urng{ 42 };
|
73
73
|
|
74
74
|
MatrixXf mat{ 10, 10 };
|
75
75
|
// this will generate a random matrix in MatrixXf type with the shape (10, 10)
|
@@ -98,7 +98,7 @@
|
|
98
98
|
|
99
99
|
int main()
|
100
100
|
{
|
101
|
-
Rand::
|
101
|
+
Rand::P8_mt19937_64 urng{ 42 };
|
102
102
|
// constructs generator for normal distribution with mean=1.0, stdev=2.0
|
103
103
|
Rand::NormalGen<float> norm_gen{ 1.0, 2.0 };
|
104
104
|
|
@@ -126,7 +126,7 @@
|
|
126
126
|
|
127
127
|
int main()
|
128
128
|
{
|
129
|
-
Rand::
|
129
|
+
Rand::P8_mt19937_64 urng{ 42 };
|
130
130
|
|
131
131
|
Vector4f mean{ 0, 1, 2, 3 };
|
132
132
|
Matrix4f cov;
|
@@ -182,7 +182,7 @@
|
|
182
182
|
| `Eigen::Rand::lognormal` | `Eigen::Rand::LognormalGen` | float, double | generates real values on a [lognormal distribution](https://en.wikipedia.org/wiki/Lognormal_distribution). | `std::lognormal_distribution` |
|
183
183
|
| `Eigen::Rand::normal` | `Eigen::Rand::StdNormalGen`, `Eigen::Rand::NormalGen` | float, double | generates real values on a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). | `std::normal_distribution` |
|
184
184
|
| `Eigen::Rand::studentT` | `Eigen::Rand::StudentTGen` | float, double | generates real values on the [Student's t distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). | `std::student_t_distribution` |
|
185
|
-
| `Eigen::Rand::uniformReal` | `Eigen::Rand::UniformRealGen` | float, double | generates real values in the `[-1, 0)` range. | `std::generate_canonical` |
|
185
|
+
| `Eigen::Rand::uniformReal` | `Eigen::Rand::StdUniformRealGen`, `Eigen::Rand::UniformRealGen` | float, double | generates real values in the `[-1, 0)` range. | `std::generate_canonical` |
|
186
186
|
| `Eigen::Rand::weibull` | `Eigen::Rand::WeibullGen` | float, double | generates real values on the [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution). | `std::weibull_distribution` |
|
187
187
|
|
188
188
|
@section list_of_supported_distribution_2 Random Distributions for Integer Types
|
@@ -200,7 +200,7 @@
|
|
200
200
|
@section list_of_distribution_3 Multivariate Random Distributions
|
201
201
|
| Generator | Description | Equivalent to |
|
202
202
|
|:---:|:---:|:---:|
|
203
|
-
| `Eigen::Rand::MultinomialGen` | generates
|
203
|
+
| `Eigen::Rand::MultinomialGen` | generates integer vectors on a [multinomial distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) | [scipy.stats.multinomial in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multinomial.html#scipy.stats.multinomial) |
|
204
204
|
| `Eigen::Rand::DirichletGen` | generates real vectors on a [Dirichlet distribution](https://en.wikipedia.org/wiki/Dirichlet_distribution) | [scipy.stats.dirichlet in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.dirichlet.html#scipy.stats.dirichlet) |
|
205
205
|
| `Eigen::Rand::MvNormalGen` | generates real vectors on a [multivariate normal distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution) | [scipy.stats.multivariate_normal in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multivariate_normal.html#scipy.stats.multivariate_normal) |
|
206
206
|
| `Eigen::Rand::WishartGen` | generates real matrices on a [Wishart distribution](https://en.wikipedia.org/wiki/Wishart_distribution) | [scipy.stats.wishart in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wishart.html#scipy.stats.wishart) |
|
@@ -211,11 +211,13 @@
|
|
211
211
|
| | Description | Equivalent to |
|
212
212
|
|:---:|:---:|:---:|
|
213
213
|
| `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 and four integers with AVX2. | `std::mt19937_64` |
|
214
|
-
|
214
|
+
| `Eigen::Rand::P8_mt19937_64` | a vectorized version of Mersenne Twister algorithm. Since it generates eight 64bit random integers simultaneously, the random values are the same regardless of architecture. | |
|
215
215
|
*
|
216
216
|
* @page performance Performance
|
217
217
|
* The following charts show the relative speed-up of EigenRand compared to Reference(C++ std or Eigen functions). Detailed results are below the charts.
|
218
218
|
|
219
|
+
@section performance_1 Overview of Results at x86-64 Architecture
|
220
|
+
|
219
221
|
\image html perf_no_vect.png
|
220
222
|
|
221
223
|
\image html perf_sse2.png
|
@@ -230,7 +232,17 @@
|
|
230
232
|
|
231
233
|
* The following result is a measure of the time in seconds it takes to generate 1M random numbers. It shows the average of 20 times.
|
232
234
|
|
233
|
-
@section
|
235
|
+
@section performance_2 Overview of Results at ARM64 NEON (experimental)
|
236
|
+
|
237
|
+
\image html perf_neon_v0.3.90.png
|
238
|
+
|
239
|
+
\image html perf_mv_part1_neon_v0.3.90.png
|
240
|
+
|
241
|
+
\image html perf_mv_part2_neon_v0.3.90.png
|
242
|
+
|
243
|
+
* The following result is a measure of the time in seconds it takes to generate 1M random numbers. It shows the average of 20 times.
|
244
|
+
|
245
|
+
@section performance_3 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz (Ubuntu 16.04, gcc7.5)
|
234
246
|
|
235
247
|
| | C++ std (or Eigen) | EigenRand (No Vect.) | EigenRand (SSE2) | EigenRand (SSSE3) | EigenRand (AVX) | EigenRand (AVX2) |
|
236
248
|
|---|---:|---:|---:|---:|---:|---:|
|
@@ -289,7 +301,7 @@
|
|
289
301
|
| `Wishart(4)` | 71.19 | 5.28 | 2.70 | 2.93 | 2.04 | 1.94 |
|
290
302
|
| `Wishart(50)` | 1185.26 | 1360.49 | 492.91 | 517.44 | 359.03 | 324.60 |
|
291
303
|
|
292
|
-
@section
|
304
|
+
@section performance_4 AMD Ryzen 7 3700x CPU @ 3.60GHz (Windows 10, MSVC2017)
|
293
305
|
|
294
306
|
| | C++ std (or Eigen) | EigenRand (SSE2) | EigenRand (AVX) | EigenRand (AVX2) |
|
295
307
|
|---|---:|---:|---:|---:|
|
@@ -334,4 +346,4 @@
|
|
334
346
|
| Mersenne Twister(int64) | 5.1 | 3.9 | 3.9 | 3.3 |
|
335
347
|
|
336
348
|
*
|
337
|
-
*/
|
349
|
+
*/
|
data/vendor/EigenRand/README.md
CHANGED
@@ -16,13 +16,36 @@ You can get 5~10 times speed by just replacing old Eigen's Random or unvectoriza
|
|
16
16
|
* 5~10 times faster than non-vectorized functions
|
17
17
|
* Header-only (like Eigen)
|
18
18
|
* Can be easily integrated with Eigen's expressions
|
19
|
-
* Currently supports only x86
|
19
|
+
* Currently supports only x86, x86-64(up to AVX2), and ARM64 NEON (experimental) architecture.
|
20
20
|
|
21
21
|
## Requirement
|
22
22
|
|
23
|
-
* Eigen 3.3.
|
23
|
+
* Eigen 3.3.4 ~ 3.4.0
|
24
24
|
* C++11-compatible compilers
|
25
25
|
|
26
|
+
## Build for Test & Benchmark
|
27
|
+
You can build a test binary to verify if EigenRand is working well.
|
28
|
+
First, make sure you have Eigen 3.3.4~3.4.0 installed in your compiler include folder. Also make sure you have cmake 3.9 or higher installed.
|
29
|
+
After then, you can build it following:
|
30
|
+
```console
|
31
|
+
$ git clone https://github.com/bab2min/EigenRand
|
32
|
+
$ cd EigenRand
|
33
|
+
$ git clone https://github.com/google/googletest
|
34
|
+
$ pushd googletest && git checkout v1.8.x && popd
|
35
|
+
$ mkdir build && cd build
|
36
|
+
$ cmake -DCMAKE_BUILD_TYPE=Release ..
|
37
|
+
$ make
|
38
|
+
$ ./test/EigenRand-test # Binary for unit test
|
39
|
+
$ ./EigenRand-accuracy # Binary for accuracy test of univariate random distributions
|
40
|
+
$ ./EigenRand-benchmark # Binary for performance test of univariate random distributions
|
41
|
+
$ ./EigenRand-benchmark-mv # Binary for performance test of multivariate random distributions
|
42
|
+
```
|
43
|
+
|
44
|
+
You can specify additional compiler arguments including target machine options (e.g. -mavx2, -march) like:
|
45
|
+
```console
|
46
|
+
$ cmake -DCMAKE_BUILD_TYPE=Release -DEIGENRAND_CXX_FLAGS="-march=native" ..
|
47
|
+
```
|
48
|
+
|
26
49
|
## Documentation
|
27
50
|
|
28
51
|
https://bab2min.github.io/eigenrand/
|
@@ -63,7 +86,7 @@ https://bab2min.github.io/eigenrand/
|
|
63
86
|
|
64
87
|
| Generator | Description | Equivalent to |
|
65
88
|
|:---:|:---:|:---:|
|
66
|
-
| `Eigen::Rand::MultinomialGen` | generates
|
89
|
+
| `Eigen::Rand::MultinomialGen` | generates integer vectors on a [multinomial distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) | [scipy.stats.multinomial in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multinomial.html#scipy.stats.multinomial) |
|
67
90
|
| `Eigen::Rand::DirichletGen` | generates real vectors on a [Dirichlet distribution](https://en.wikipedia.org/wiki/Dirichlet_distribution) | [scipy.stats.dirichlet in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.dirichlet.html#scipy.stats.dirichlet) |
|
68
91
|
| `Eigen::Rand::MvNormalGen` | generates real vectors on a [multivariate normal distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution) | [scipy.stats.multivariate_normal in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multivariate_normal.html#scipy.stats.multivariate_normal) |
|
69
92
|
| `Eigen::Rand::WishartGen` | generates real matrices on a [Wishart distribution](https://en.wikipedia.org/wiki/Wishart_distribution) | [scipy.stats.wishart in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wishart.html#scipy.stats.wishart) |
|
@@ -74,7 +97,8 @@ https://bab2min.github.io/eigenrand/
|
|
74
97
|
|
75
98
|
| | Description | Equivalent to |
|
76
99
|
|:---:|:---:|:---:|
|
77
|
-
| `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 and four integers with AVX2. | `std::mt19937_64` |
|
100
|
+
| `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 & NEON and four integers with AVX2. | `std::mt19937_64` |
|
101
|
+
| `Eigen::Rand::P8_mt19937_64` | a vectorized version of Mersenne Twister algorithm. Since it generates eight 64bit random integers simultaneously, the random values are the same regardless of architecture. | |
|
78
102
|
|
79
103
|
## Performance
|
80
104
|
The following charts show the relative speed-up of EigenRand compared to references(equivalent functions of C++ std or Eigen).
|
@@ -318,6 +342,19 @@ It shows the average of 20 times.
|
|
318
342
|
| Mersenne Twister(int32) | 5.0 | 3.4 | 3.4 | 3.3 |
|
319
343
|
| Mersenne Twister(int64) | 5.1 | 3.9 | 3.9 | 3.3 |
|
320
344
|
|
345
|
+
### ARM64 NEON (Cortex-A73)
|
346
|
+
Currently, Support for ARM64 NEON is experimental and the result may be sub-optimal.
|
347
|
+
Also keep in mind that NEON does not support vectorization of double type.
|
348
|
+
So if you use double type generators, they would fallback into scalar computations.
|
349
|
+
|
350
|
+
![Perf_no_vect](/doxygen/images/perf_neon_v0.3.90.png)
|
351
|
+
|
352
|
+
The following charts are about multivariate distributions.
|
353
|
+
![Perf_no_vect](/doxygen/images/perf_mv_part1_neon_v0.3.90.png)
|
354
|
+
![Perf_no_vect](/doxygen/images/perf_mv_part2_neon_v0.3.90.png)
|
355
|
+
|
356
|
+
Cases filled with orange are generators that are slower than reference functions.
|
357
|
+
|
321
358
|
## Accuracy
|
322
359
|
Since vectorized mathematical functions may have a loss of precision, I measured how well the generated random number fits its actual distribution.
|
323
360
|
32768 samples were generated and Earth Mover's Distance between samples and its actual distribution was calculated for each distribution.
|
@@ -348,6 +385,22 @@ MIT License
|
|
348
385
|
|
349
386
|
## History
|
350
387
|
|
388
|
+
### 0.4.0 alpha (2021-09-28)
|
389
|
+
* Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
|
390
|
+
* Now EigenRand has compatibility to `Eigen 3.4.0`.
|
391
|
+
|
392
|
+
### 0.3.5 (2021-07-16)
|
393
|
+
* Now `UniformRealGen` generates accurate double values.
|
394
|
+
* Fixed a bug where non-vectorized double-type `NormalGen` would get stuck in an infinite loop.
|
395
|
+
* New overloading functions `balanced` and `balancedLike` which generate values over `[a, b]` were added.
|
396
|
+
|
397
|
+
### 0.3.4 (2021-04-25)
|
398
|
+
* Now Eigen 3.3.4 - 3.3.6 versions are additionally supported.
|
399
|
+
|
400
|
+
### 0.3.3 (2021-03-30)
|
401
|
+
* A compilation failure with some RNGs in `double` type was fixed.
|
402
|
+
* An internal function name `plgamma` conflict with one of `SpecialFunctionsPacketMath.h` was fixed.
|
403
|
+
|
351
404
|
### 0.3.2 (2021-03-26)
|
352
405
|
* A default constructor for `DiscreteGen` was added.
|
353
406
|
|
@@ -0,0 +1,203 @@
|
|
1
|
+
/*
|
2
|
+
Apache License
|
3
|
+
Version 2.0, January 2004
|
4
|
+
http://www.apache.org/licenses/
|
5
|
+
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7
|
+
|
8
|
+
1. Definitions.
|
9
|
+
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
12
|
+
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14
|
+
the copyright owner that is granting the License.
|
15
|
+
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
17
|
+
other entities that control, are controlled by, or are under common
|
18
|
+
control with that entity. For the purposes of this definition,
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
20
|
+
direction or management of such entity, whether by contract or
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23
|
+
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25
|
+
exercising permissions granted by this License.
|
26
|
+
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
28
|
+
including but not limited to software source code, documentation
|
29
|
+
source, and configuration files.
|
30
|
+
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
32
|
+
transformation or translation of a Source form, including but
|
33
|
+
not limited to compiled object code, generated documentation,
|
34
|
+
and conversions to other media types.
|
35
|
+
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
37
|
+
Object form, made available under the License, as indicated by a
|
38
|
+
copyright notice that is included in or attached to the work
|
39
|
+
(an example is provided in the Appendix below).
|
40
|
+
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47
|
+
the Work and Derivative Works thereof.
|
48
|
+
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
50
|
+
the original version of the Work and any modifications or additions
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
62
|
+
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
65
|
+
subsequently incorporated within the Work.
|
66
|
+
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
73
|
+
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79
|
+
where such license applies only to those patent claims licensable
|
80
|
+
by such Contributor that are necessarily infringed by their
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
83
|
+
institute patent litigation against any entity (including a
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
86
|
+
or contributory patent infringement, then any patent licenses
|
87
|
+
granted to You under this License for that Work shall terminate
|
88
|
+
as of the date such litigation is filed.
|
89
|
+
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
92
|
+
modifications, and in Source or Object form, provided that You
|
93
|
+
meet the following conditions:
|
94
|
+
|
95
|
+
(a) You must give any other recipients of the Work or
|
96
|
+
Derivative Works a copy of this License; and
|
97
|
+
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
99
|
+
stating that You changed the files; and
|
100
|
+
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
103
|
+
attribution notices from the Source form of the Work,
|
104
|
+
excluding those notices that do not pertain to any part of
|
105
|
+
the Derivative Works; and
|
106
|
+
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
109
|
+
include a readable copy of the attribution notices contained
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
112
|
+
of the following places: within a NOTICE text file distributed
|
113
|
+
as part of the Derivative Works; within the Source form or
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
115
|
+
within a display generated by the Derivative Works, if and
|
116
|
+
wherever such third-party notices normally appear. The contents
|
117
|
+
of the NOTICE file are for informational purposes only and
|
118
|
+
do not modify the License. You may add Your own attribution
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
121
|
+
that such additional attribution notices cannot be construed
|
122
|
+
as modifying the License.
|
123
|
+
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
125
|
+
may provide additional or different license terms and conditions
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
129
|
+
the conditions stated in this License.
|
130
|
+
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
134
|
+
this License, without any additional terms or conditions.
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136
|
+
the terms of any separate license agreement you may have executed
|
137
|
+
with Licensor regarding such Contributions.
|
138
|
+
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
141
|
+
except as required for reasonable and customary use in describing the
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
143
|
+
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
153
|
+
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
159
|
+
incidental, or consequential damages of any character arising as a
|
160
|
+
result of this License or out of the use or inability to use the
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
163
|
+
other commercial damages or losses), even if such Contributor
|
164
|
+
has been advised of the possibility of such damages.
|
165
|
+
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169
|
+
or other liability obligations and/or rights consistent with this
|
170
|
+
License. However, in accepting such obligations, You may act only
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
175
|
+
of your accepting any such warranty or additional liability.
|
176
|
+
|
177
|
+
END OF TERMS AND CONDITIONS
|
178
|
+
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
180
|
+
|
181
|
+
To apply the Apache License to your work, attach the following
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183
|
+
replaced with your own identifying information. (Don't include
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
185
|
+
comment syntax for the file format. We also recommend that a
|
186
|
+
file or class name and description of purpose be included on the
|
187
|
+
same "printed page" as the copyright notice for easier
|
188
|
+
identification within third-party archives.
|
189
|
+
|
190
|
+
Copyright [yyyy] [name of copyright owner]
|
191
|
+
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193
|
+
you may not use this file except in compliance with the License.
|
194
|
+
You may obtain a copy of the License at
|
195
|
+
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
197
|
+
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201
|
+
See the License for the specific language governing permissions and
|
202
|
+
limitations under the License.
|
203
|
+
*/
|
data/vendor/eigen/COPYING.BSD
CHANGED