tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -10,6 +10,10 @@
|
|
|
10
10
|
#ifndef EIGEN_PARALLELIZER_H
|
|
11
11
|
#define EIGEN_PARALLELIZER_H
|
|
12
12
|
|
|
13
|
+
#if EIGEN_HAS_CXX11_ATOMIC
|
|
14
|
+
#include <atomic>
|
|
15
|
+
#endif
|
|
16
|
+
|
|
13
17
|
namespace Eigen {
|
|
14
18
|
|
|
15
19
|
namespace internal {
|
|
@@ -17,7 +21,8 @@ namespace internal {
|
|
|
17
21
|
/** \internal */
|
|
18
22
|
inline void manage_multi_threading(Action action, int* v)
|
|
19
23
|
{
|
|
20
|
-
static
|
|
24
|
+
static int m_maxThreads = -1;
|
|
25
|
+
EIGEN_UNUSED_VARIABLE(m_maxThreads)
|
|
21
26
|
|
|
22
27
|
if(action==SetAction)
|
|
23
28
|
{
|
|
@@ -75,8 +80,17 @@ template<typename Index> struct GemmParallelInfo
|
|
|
75
80
|
{
|
|
76
81
|
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
|
77
82
|
|
|
83
|
+
// volatile is not enough on all architectures (see bug 1572)
|
|
84
|
+
// to guarantee that when thread A says to thread B that it is
|
|
85
|
+
// done with packing a block, then all writes have been really
|
|
86
|
+
// carried out... C++11 memory model+atomic guarantees this.
|
|
87
|
+
#if EIGEN_HAS_CXX11_ATOMIC
|
|
88
|
+
std::atomic<Index> sync;
|
|
89
|
+
std::atomic<int> users;
|
|
90
|
+
#else
|
|
78
91
|
Index volatile sync;
|
|
79
92
|
int volatile users;
|
|
93
|
+
#endif
|
|
80
94
|
|
|
81
95
|
Index lhs_start;
|
|
82
96
|
Index lhs_length;
|
|
@@ -87,11 +101,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
|
|
|
87
101
|
{
|
|
88
102
|
// TODO when EIGEN_USE_BLAS is defined,
|
|
89
103
|
// we should still enable OMP for other scalar types
|
|
90
|
-
|
|
104
|
+
// Without C++11, we have to disable GEMM's parallelization on
|
|
105
|
+
// non x86 architectures because there volatile is not enough for our purpose.
|
|
106
|
+
// See bug 1572.
|
|
107
|
+
#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
|
|
91
108
|
// FIXME the transpose variable is only needed to properly split
|
|
92
109
|
// the matrix product when multithreading is enabled. This is a temporary
|
|
93
110
|
// fix to support row-major destination matrices. This whole
|
|
94
|
-
// parallelizer mechanism has to be
|
|
111
|
+
// parallelizer mechanism has to be redesigned anyway.
|
|
95
112
|
EIGEN_UNUSED_VARIABLE(depth);
|
|
96
113
|
EIGEN_UNUSED_VARIABLE(transpose);
|
|
97
114
|
func(0,rows, 0,cols);
|
|
@@ -112,12 +129,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
|
|
|
112
129
|
double work = static_cast<double>(rows) * static_cast<double>(cols) *
|
|
113
130
|
static_cast<double>(depth);
|
|
114
131
|
double kMinTaskSize = 50000; // FIXME improve this heuristic.
|
|
115
|
-
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
|
|
132
|
+
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
|
|
116
133
|
|
|
117
134
|
// compute the number of threads we are going to use
|
|
118
135
|
Index threads = std::min<Index>(nbThreads(), pb_max_threads);
|
|
119
136
|
|
|
120
|
-
// if multi-threading is
|
|
137
|
+
// if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
|
|
121
138
|
// then abort multi-threading
|
|
122
139
|
// FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
|
|
123
140
|
if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
|
|
@@ -45,14 +45,23 @@ struct symm_pack_lhs
|
|
|
45
45
|
}
|
|
46
46
|
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
|
|
47
47
|
{
|
|
48
|
-
|
|
48
|
+
typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
|
|
49
|
+
typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;
|
|
50
|
+
enum { PacketSize = packet_traits<Scalar>::size,
|
|
51
|
+
HalfPacketSize = unpacket_traits<HalfPacket>::size,
|
|
52
|
+
QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
|
|
53
|
+
HasHalf = (int)HalfPacketSize < (int)PacketSize,
|
|
54
|
+
HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};
|
|
55
|
+
|
|
49
56
|
const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
|
|
50
57
|
Index count = 0;
|
|
51
58
|
//Index peeled_mc3 = (rows/Pack1)*Pack1;
|
|
52
59
|
|
|
53
60
|
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
|
|
54
61
|
const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
|
|
55
|
-
const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
|
|
62
|
+
const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;
|
|
63
|
+
const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;
|
|
64
|
+
const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;
|
|
56
65
|
|
|
57
66
|
if(Pack1>=3*PacketSize)
|
|
58
67
|
for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
|
|
@@ -66,8 +75,16 @@ struct symm_pack_lhs
|
|
|
66
75
|
for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
|
|
67
76
|
pack<1*PacketSize>(blockA, lhs, cols, i, count);
|
|
68
77
|
|
|
78
|
+
if(HasHalf && Pack1>=HalfPacketSize)
|
|
79
|
+
for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)
|
|
80
|
+
pack<HalfPacketSize>(blockA, lhs, cols, i, count);
|
|
81
|
+
|
|
82
|
+
if(HasQuarter && Pack1>=QuarterPacketSize)
|
|
83
|
+
for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
|
|
84
|
+
pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
|
|
85
|
+
|
|
69
86
|
// do the same with mr==1
|
|
70
|
-
for(Index i=
|
|
87
|
+
for(Index i=peeled_mc_quarter; i<rows; i++)
|
|
71
88
|
{
|
|
72
89
|
for(Index k=0; k<i; k++)
|
|
73
90
|
blockA[count++] = lhs(i, k); // normal
|
|
@@ -277,20 +294,21 @@ struct symm_pack_rhs
|
|
|
277
294
|
template <typename Scalar, typename Index,
|
|
278
295
|
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
|
279
296
|
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
|
280
|
-
int ResStorageOrder>
|
|
297
|
+
int ResStorageOrder, int ResInnerStride>
|
|
281
298
|
struct product_selfadjoint_matrix;
|
|
282
299
|
|
|
283
300
|
template <typename Scalar, typename Index,
|
|
284
301
|
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
|
285
|
-
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs
|
|
286
|
-
|
|
302
|
+
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
|
303
|
+
int ResInnerStride>
|
|
304
|
+
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
|
|
287
305
|
{
|
|
288
306
|
|
|
289
307
|
static EIGEN_STRONG_INLINE void run(
|
|
290
308
|
Index rows, Index cols,
|
|
291
309
|
const Scalar* lhs, Index lhsStride,
|
|
292
310
|
const Scalar* rhs, Index rhsStride,
|
|
293
|
-
Scalar* res, Index resStride,
|
|
311
|
+
Scalar* res, Index resIncr, Index resStride,
|
|
294
312
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
295
313
|
{
|
|
296
314
|
product_selfadjoint_matrix<Scalar, Index,
|
|
@@ -298,33 +316,35 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
|
|
|
298
316
|
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
|
|
299
317
|
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
|
300
318
|
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
|
301
|
-
ColMajor>
|
|
302
|
-
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
|
|
319
|
+
ColMajor,ResInnerStride>
|
|
320
|
+
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
303
321
|
}
|
|
304
322
|
};
|
|
305
323
|
|
|
306
324
|
template <typename Scalar, typename Index,
|
|
307
325
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
308
|
-
int RhsStorageOrder, bool ConjugateRhs
|
|
309
|
-
|
|
326
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
327
|
+
int ResInnerStride>
|
|
328
|
+
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
|
|
310
329
|
{
|
|
311
330
|
|
|
312
331
|
static EIGEN_DONT_INLINE void run(
|
|
313
332
|
Index rows, Index cols,
|
|
314
333
|
const Scalar* _lhs, Index lhsStride,
|
|
315
334
|
const Scalar* _rhs, Index rhsStride,
|
|
316
|
-
Scalar* res, Index resStride,
|
|
335
|
+
Scalar* res, Index resIncr, Index resStride,
|
|
317
336
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
318
337
|
};
|
|
319
338
|
|
|
320
339
|
template <typename Scalar, typename Index,
|
|
321
340
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
322
|
-
int RhsStorageOrder, bool ConjugateRhs
|
|
323
|
-
|
|
341
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
342
|
+
int ResInnerStride>
|
|
343
|
+
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
|
|
324
344
|
Index rows, Index cols,
|
|
325
345
|
const Scalar* _lhs, Index lhsStride,
|
|
326
346
|
const Scalar* _rhs, Index rhsStride,
|
|
327
|
-
Scalar* _res,
|
|
347
|
+
Scalar* _res, Index resIncr, Index resStride,
|
|
328
348
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
329
349
|
{
|
|
330
350
|
Index size = rows;
|
|
@@ -334,11 +354,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|
|
334
354
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
335
355
|
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
|
|
336
356
|
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
337
|
-
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
|
357
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
338
358
|
LhsMapper lhs(_lhs,lhsStride);
|
|
339
359
|
LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
|
|
340
360
|
RhsMapper rhs(_rhs,rhsStride);
|
|
341
|
-
ResMapper res(_res, resStride);
|
|
361
|
+
ResMapper res(_res, resStride, resIncr);
|
|
342
362
|
|
|
343
363
|
Index kc = blocking.kc(); // cache block size along the K direction
|
|
344
364
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
@@ -352,7 +372,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|
|
352
372
|
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
353
373
|
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
354
374
|
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
|
|
355
|
-
gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
|
|
375
|
+
gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
|
|
356
376
|
|
|
357
377
|
for(Index k2=0; k2<size; k2+=kc)
|
|
358
378
|
{
|
|
@@ -387,7 +407,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|
|
387
407
|
for(Index i2=k2+kc; i2<size; i2+=mc)
|
|
388
408
|
{
|
|
389
409
|
const Index actual_mc = (std::min)(i2+mc,size)-i2;
|
|
390
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
|
|
410
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
|
|
391
411
|
(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
392
412
|
|
|
393
413
|
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
@@ -398,26 +418,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|
|
398
418
|
// matrix * selfadjoint product
|
|
399
419
|
template <typename Scalar, typename Index,
|
|
400
420
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
401
|
-
int RhsStorageOrder, bool ConjugateRhs
|
|
402
|
-
|
|
421
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
422
|
+
int ResInnerStride>
|
|
423
|
+
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
|
|
403
424
|
{
|
|
404
425
|
|
|
405
426
|
static EIGEN_DONT_INLINE void run(
|
|
406
427
|
Index rows, Index cols,
|
|
407
428
|
const Scalar* _lhs, Index lhsStride,
|
|
408
429
|
const Scalar* _rhs, Index rhsStride,
|
|
409
|
-
Scalar* res, Index resStride,
|
|
430
|
+
Scalar* res, Index resIncr, Index resStride,
|
|
410
431
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
411
432
|
};
|
|
412
433
|
|
|
413
434
|
template <typename Scalar, typename Index,
|
|
414
435
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
415
|
-
int RhsStorageOrder, bool ConjugateRhs
|
|
416
|
-
|
|
436
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
437
|
+
int ResInnerStride>
|
|
438
|
+
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
|
|
417
439
|
Index rows, Index cols,
|
|
418
440
|
const Scalar* _lhs, Index lhsStride,
|
|
419
441
|
const Scalar* _rhs, Index rhsStride,
|
|
420
|
-
Scalar* _res,
|
|
442
|
+
Scalar* _res, Index resIncr, Index resStride,
|
|
421
443
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
422
444
|
{
|
|
423
445
|
Index size = cols;
|
|
@@ -425,9 +447,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
|
|
425
447
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
426
448
|
|
|
427
449
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
428
|
-
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
|
450
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
429
451
|
LhsMapper lhs(_lhs,lhsStride);
|
|
430
|
-
ResMapper res(_res,resStride);
|
|
452
|
+
ResMapper res(_res,resStride, resIncr);
|
|
431
453
|
|
|
432
454
|
Index kc = blocking.kc(); // cache block size along the K direction
|
|
433
455
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
@@ -437,7 +459,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
|
|
437
459
|
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
438
460
|
|
|
439
461
|
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
440
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
462
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
|
|
441
463
|
symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
|
|
442
464
|
|
|
443
465
|
for(Index k2=0; k2<size; k2+=kc)
|
|
@@ -503,12 +525,13 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
|
|
503
525
|
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
|
|
504
526
|
EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
|
|
505
527
|
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
|
|
506
|
-
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor
|
|
528
|
+
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
|
|
529
|
+
Dest::InnerStrideAtCompileTime>
|
|
507
530
|
::run(
|
|
508
531
|
lhs.rows(), rhs.cols(), // sizes
|
|
509
532
|
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
|
510
533
|
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
|
511
|
-
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
|
534
|
+
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
|
|
512
535
|
actualAlpha, blocking // alpha
|
|
513
536
|
);
|
|
514
537
|
}
|
|
@@ -44,16 +44,18 @@ namespace internal {
|
|
|
44
44
|
template <typename Index, \
|
|
45
45
|
int LhsStorageOrder, bool ConjugateLhs, \
|
|
46
46
|
int RhsStorageOrder, bool ConjugateRhs> \
|
|
47
|
-
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
|
47
|
+
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
|
|
48
48
|
{\
|
|
49
49
|
\
|
|
50
50
|
static void run( \
|
|
51
51
|
Index rows, Index cols, \
|
|
52
52
|
const EIGTYPE* _lhs, Index lhsStride, \
|
|
53
53
|
const EIGTYPE* _rhs, Index rhsStride, \
|
|
54
|
-
EIGTYPE* res, Index resStride, \
|
|
54
|
+
EIGTYPE* res, Index resIncr, Index resStride, \
|
|
55
55
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
|
56
56
|
{ \
|
|
57
|
+
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
|
58
|
+
eigen_assert(resIncr == 1); \
|
|
57
59
|
char side='L', uplo='L'; \
|
|
58
60
|
BlasIndex m, n, lda, ldb, ldc; \
|
|
59
61
|
const EIGTYPE *a, *b; \
|
|
@@ -91,15 +93,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
|
|
|
91
93
|
template <typename Index, \
|
|
92
94
|
int LhsStorageOrder, bool ConjugateLhs, \
|
|
93
95
|
int RhsStorageOrder, bool ConjugateRhs> \
|
|
94
|
-
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
|
96
|
+
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
|
|
95
97
|
{\
|
|
96
98
|
static void run( \
|
|
97
99
|
Index rows, Index cols, \
|
|
98
100
|
const EIGTYPE* _lhs, Index lhsStride, \
|
|
99
101
|
const EIGTYPE* _rhs, Index rhsStride, \
|
|
100
|
-
EIGTYPE* res, Index resStride, \
|
|
102
|
+
EIGTYPE* res, Index resIncr, Index resStride, \
|
|
101
103
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
|
102
104
|
{ \
|
|
105
|
+
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
|
106
|
+
eigen_assert(resIncr == 1); \
|
|
103
107
|
char side='L', uplo='L'; \
|
|
104
108
|
BlasIndex m, n, lda, ldb, ldc; \
|
|
105
109
|
const EIGTYPE *a, *b; \
|
|
@@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
|
|
|
167
171
|
template <typename Index, \
|
|
168
172
|
int LhsStorageOrder, bool ConjugateLhs, \
|
|
169
173
|
int RhsStorageOrder, bool ConjugateRhs> \
|
|
170
|
-
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
|
174
|
+
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
|
|
171
175
|
{\
|
|
172
176
|
\
|
|
173
177
|
static void run( \
|
|
174
178
|
Index rows, Index cols, \
|
|
175
179
|
const EIGTYPE* _lhs, Index lhsStride, \
|
|
176
180
|
const EIGTYPE* _rhs, Index rhsStride, \
|
|
177
|
-
EIGTYPE* res, Index resStride, \
|
|
181
|
+
EIGTYPE* res, Index resIncr, Index resStride, \
|
|
178
182
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
|
179
183
|
{ \
|
|
184
|
+
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
|
185
|
+
eigen_assert(resIncr == 1); \
|
|
180
186
|
char side='R', uplo='L'; \
|
|
181
187
|
BlasIndex m, n, lda, ldb, ldc; \
|
|
182
188
|
const EIGTYPE *a, *b; \
|
|
@@ -213,15 +219,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
|
|
|
213
219
|
template <typename Index, \
|
|
214
220
|
int LhsStorageOrder, bool ConjugateLhs, \
|
|
215
221
|
int RhsStorageOrder, bool ConjugateRhs> \
|
|
216
|
-
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
|
222
|
+
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
|
|
217
223
|
{\
|
|
218
224
|
static void run( \
|
|
219
225
|
Index rows, Index cols, \
|
|
220
226
|
const EIGTYPE* _lhs, Index lhsStride, \
|
|
221
227
|
const EIGTYPE* _rhs, Index rhsStride, \
|
|
222
|
-
EIGTYPE* res, Index resStride, \
|
|
228
|
+
EIGTYPE* res, Index resIncr, Index resStride, \
|
|
223
229
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
|
224
230
|
{ \
|
|
231
|
+
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
|
232
|
+
eigen_assert(resIncr == 1); \
|
|
225
233
|
char side='R', uplo='L'; \
|
|
226
234
|
BlasIndex m, n, lda, ldb, ldc; \
|
|
227
235
|
const EIGTYPE *a, *b; \
|
|
@@ -15,7 +15,7 @@ namespace Eigen {
|
|
|
15
15
|
namespace internal {
|
|
16
16
|
|
|
17
17
|
/* Optimized selfadjoint matrix * vector product:
|
|
18
|
-
* This algorithm processes 2 columns at
|
|
18
|
+
* This algorithm processes 2 columns at once that allows to both reduce
|
|
19
19
|
* the number of load/stores of the result by a factor 2 and to reduce
|
|
20
20
|
* the instruction dependency.
|
|
21
21
|
*/
|
|
@@ -27,7 +27,8 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
|
|
|
27
27
|
struct selfadjoint_matrix_vector_product
|
|
28
28
|
|
|
29
29
|
{
|
|
30
|
-
static EIGEN_DONT_INLINE
|
|
30
|
+
static EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
|
|
31
|
+
void run(
|
|
31
32
|
Index size,
|
|
32
33
|
const Scalar* lhs, Index lhsStride,
|
|
33
34
|
const Scalar* rhs,
|
|
@@ -36,7 +37,8 @@ static EIGEN_DONT_INLINE void run(
|
|
|
36
37
|
};
|
|
37
38
|
|
|
38
39
|
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
|
|
39
|
-
EIGEN_DONT_INLINE
|
|
40
|
+
EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
|
|
41
|
+
void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
|
|
40
42
|
Index size,
|
|
41
43
|
const Scalar* lhs, Index lhsStride,
|
|
42
44
|
const Scalar* rhs,
|
|
@@ -62,8 +64,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
|
|
|
62
64
|
|
|
63
65
|
Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
|
|
64
66
|
|
|
65
|
-
|
|
66
|
-
Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
|
|
67
|
+
Index bound = numext::maxi(Index(0), size-8) & 0xfffffffe;
|
|
67
68
|
if (FirstTriangular)
|
|
68
69
|
bound = size - bound;
|
|
69
70
|
|
|
@@ -175,7 +176,8 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
|
|
|
175
176
|
enum { LhsUpLo = LhsMode&(Upper|Lower) };
|
|
176
177
|
|
|
177
178
|
template<typename Dest>
|
|
178
|
-
static
|
|
179
|
+
static EIGEN_DEVICE_FUNC
|
|
180
|
+
void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
|
|
179
181
|
{
|
|
180
182
|
typedef typename Dest::Scalar ResScalar;
|
|
181
183
|
typedef typename Rhs::Scalar RhsScalar;
|
|
@@ -109,10 +109,10 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
|
|
|
109
109
|
internal::general_matrix_matrix_triangular_product<Index,
|
|
110
110
|
Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
|
111
111
|
Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
|
112
|
-
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
|
112
|
+
IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>
|
|
113
113
|
::run(size, depth,
|
|
114
|
-
|
|
115
|
-
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
|
114
|
+
actualOther.data(), actualOther.outerStride(), actualOther.data(), actualOther.outerStride(),
|
|
115
|
+
mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
|
|
116
116
|
}
|
|
117
117
|
};
|
|
118
118
|
|
|
@@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
|
|
|
120
120
|
|
|
121
121
|
template<typename MatrixType, unsigned int UpLo>
|
|
122
122
|
template<typename DerivedU>
|
|
123
|
-
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
|
123
|
+
EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
|
124
124
|
::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
|
|
125
125
|
{
|
|
126
126
|
selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
|
|
@@ -24,7 +24,8 @@ struct selfadjoint_rank2_update_selector;
|
|
|
24
24
|
template<typename Scalar, typename Index, typename UType, typename VType>
|
|
25
25
|
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
|
|
26
26
|
{
|
|
27
|
-
static
|
|
27
|
+
static EIGEN_DEVICE_FUNC
|
|
28
|
+
void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
|
|
28
29
|
{
|
|
29
30
|
const Index size = u.size();
|
|
30
31
|
for (Index i=0; i<size; ++i)
|
|
@@ -57,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
|
|
|
57
58
|
|
|
58
59
|
template<typename MatrixType, unsigned int UpLo>
|
|
59
60
|
template<typename DerivedU, typename DerivedV>
|
|
60
|
-
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
|
61
|
+
EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
|
61
62
|
::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
|
|
62
63
|
{
|
|
63
64
|
typedef internal::blas_traits<DerivedU> UBlasTraits;
|
|
@@ -79,8 +80,8 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
|
|
79
80
|
if (IsRowMajor)
|
|
80
81
|
actualAlpha = numext::conj(actualAlpha);
|
|
81
82
|
|
|
82
|
-
typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
|
|
83
|
-
typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
|
|
83
|
+
typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;
|
|
84
|
+
typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;
|
|
84
85
|
internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
|
|
85
86
|
(IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
|
|
86
87
|
::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
|