tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -45,22 +45,24 @@ template <typename Scalar, typename Index,
|
|
|
45
45
|
int Mode, bool LhsIsTriangular,
|
|
46
46
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
47
47
|
int RhsStorageOrder, bool ConjugateRhs,
|
|
48
|
-
int ResStorageOrder, int
|
|
48
|
+
int ResStorageOrder, int ResInnerStride,
|
|
49
|
+
int Version = Specialized>
|
|
49
50
|
struct product_triangular_matrix_matrix;
|
|
50
51
|
|
|
51
52
|
template <typename Scalar, typename Index,
|
|
52
53
|
int Mode, bool LhsIsTriangular,
|
|
53
54
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
54
|
-
int RhsStorageOrder, bool ConjugateRhs,
|
|
55
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
56
|
+
int ResInnerStride, int Version>
|
|
55
57
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
|
56
58
|
LhsStorageOrder,ConjugateLhs,
|
|
57
|
-
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
|
|
59
|
+
RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>
|
|
58
60
|
{
|
|
59
61
|
static EIGEN_STRONG_INLINE void run(
|
|
60
62
|
Index rows, Index cols, Index depth,
|
|
61
63
|
const Scalar* lhs, Index lhsStride,
|
|
62
64
|
const Scalar* rhs, Index rhsStride,
|
|
63
|
-
Scalar* res, Index resStride,
|
|
65
|
+
Scalar* res, Index resIncr, Index resStride,
|
|
64
66
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
65
67
|
{
|
|
66
68
|
product_triangular_matrix_matrix<Scalar, Index,
|
|
@@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
|
|
70
72
|
ConjugateRhs,
|
|
71
73
|
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
|
72
74
|
ConjugateLhs,
|
|
73
|
-
ColMajor>
|
|
74
|
-
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
|
|
75
|
+
ColMajor, ResInnerStride>
|
|
76
|
+
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
75
77
|
}
|
|
76
78
|
};
|
|
77
79
|
|
|
78
80
|
// implements col-major += alpha * op(triangular) * op(general)
|
|
79
81
|
template <typename Scalar, typename Index, int Mode,
|
|
80
82
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
81
|
-
int RhsStorageOrder, bool ConjugateRhs,
|
|
83
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
84
|
+
int ResInnerStride, int Version>
|
|
82
85
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
83
86
|
LhsStorageOrder,ConjugateLhs,
|
|
84
|
-
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
|
87
|
+
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
|
|
85
88
|
{
|
|
86
89
|
|
|
87
90
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
@@ -95,20 +98,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
|
95
98
|
Index _rows, Index _cols, Index _depth,
|
|
96
99
|
const Scalar* _lhs, Index lhsStride,
|
|
97
100
|
const Scalar* _rhs, Index rhsStride,
|
|
98
|
-
Scalar* res, Index resStride,
|
|
101
|
+
Scalar* res, Index resIncr, Index resStride,
|
|
99
102
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
100
103
|
};
|
|
101
104
|
|
|
102
105
|
template <typename Scalar, typename Index, int Mode,
|
|
103
106
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
104
|
-
int RhsStorageOrder, bool ConjugateRhs,
|
|
107
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
108
|
+
int ResInnerStride, int Version>
|
|
105
109
|
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
106
110
|
LhsStorageOrder,ConjugateLhs,
|
|
107
|
-
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
|
|
111
|
+
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
|
|
108
112
|
Index _rows, Index _cols, Index _depth,
|
|
109
113
|
const Scalar* _lhs, Index lhsStride,
|
|
110
114
|
const Scalar* _rhs, Index rhsStride,
|
|
111
|
-
Scalar* _res,
|
|
115
|
+
Scalar* _res, Index resIncr, Index resStride,
|
|
112
116
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
113
117
|
{
|
|
114
118
|
// strip zeros
|
|
@@ -119,10 +123,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
|
119
123
|
|
|
120
124
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
121
125
|
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
122
|
-
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
|
126
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
123
127
|
LhsMapper lhs(_lhs,lhsStride);
|
|
124
128
|
RhsMapper rhs(_rhs,rhsStride);
|
|
125
|
-
ResMapper res(_res, resStride);
|
|
129
|
+
ResMapper res(_res, resStride, resIncr);
|
|
126
130
|
|
|
127
131
|
Index kc = blocking.kc(); // cache block size along the K direction
|
|
128
132
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
@@ -151,7 +155,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
|
151
155
|
triangularBuffer.diagonal().setOnes();
|
|
152
156
|
|
|
153
157
|
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
154
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
158
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
|
|
155
159
|
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
|
|
156
160
|
|
|
157
161
|
for(Index k2=IsLower ? depth : 0;
|
|
@@ -222,7 +226,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
|
222
226
|
for(Index i2=start; i2<end; i2+=mc)
|
|
223
227
|
{
|
|
224
228
|
const Index actual_mc = (std::min)(i2+mc,end)-i2;
|
|
225
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
|
|
229
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
|
|
226
230
|
(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
|
|
227
231
|
|
|
228
232
|
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
|
|
@@ -235,10 +239,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|
|
235
239
|
// implements col-major += alpha * op(general) * op(triangular)
|
|
236
240
|
template <typename Scalar, typename Index, int Mode,
|
|
237
241
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
238
|
-
int RhsStorageOrder, bool ConjugateRhs,
|
|
242
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
243
|
+
int ResInnerStride, int Version>
|
|
239
244
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|
240
245
|
LhsStorageOrder,ConjugateLhs,
|
|
241
|
-
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
|
246
|
+
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
|
|
242
247
|
{
|
|
243
248
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
244
249
|
enum {
|
|
@@ -251,20 +256,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|
|
251
256
|
Index _rows, Index _cols, Index _depth,
|
|
252
257
|
const Scalar* _lhs, Index lhsStride,
|
|
253
258
|
const Scalar* _rhs, Index rhsStride,
|
|
254
|
-
Scalar* res, Index resStride,
|
|
259
|
+
Scalar* res, Index resIncr, Index resStride,
|
|
255
260
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
256
261
|
};
|
|
257
262
|
|
|
258
263
|
template <typename Scalar, typename Index, int Mode,
|
|
259
264
|
int LhsStorageOrder, bool ConjugateLhs,
|
|
260
|
-
int RhsStorageOrder, bool ConjugateRhs,
|
|
265
|
+
int RhsStorageOrder, bool ConjugateRhs,
|
|
266
|
+
int ResInnerStride, int Version>
|
|
261
267
|
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|
262
268
|
LhsStorageOrder,ConjugateLhs,
|
|
263
|
-
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
|
|
269
|
+
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
|
|
264
270
|
Index _rows, Index _cols, Index _depth,
|
|
265
271
|
const Scalar* _lhs, Index lhsStride,
|
|
266
272
|
const Scalar* _rhs, Index rhsStride,
|
|
267
|
-
Scalar* _res,
|
|
273
|
+
Scalar* _res, Index resIncr, Index resStride,
|
|
268
274
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
269
275
|
{
|
|
270
276
|
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
|
|
@@ -276,10 +282,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|
|
276
282
|
|
|
277
283
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
278
284
|
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
279
|
-
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
|
285
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
280
286
|
LhsMapper lhs(_lhs,lhsStride);
|
|
281
287
|
RhsMapper rhs(_rhs,rhsStride);
|
|
282
|
-
ResMapper res(_res, resStride);
|
|
288
|
+
ResMapper res(_res, resStride, resIncr);
|
|
283
289
|
|
|
284
290
|
Index kc = blocking.kc(); // cache block size along the K direction
|
|
285
291
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
@@ -299,7 +305,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|
|
299
305
|
triangularBuffer.diagonal().setOnes();
|
|
300
306
|
|
|
301
307
|
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
302
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
308
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
|
|
303
309
|
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
|
|
304
310
|
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
|
|
305
311
|
|
|
@@ -433,12 +439,12 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
|
|
433
439
|
Mode, LhsIsTriangular,
|
|
434
440
|
(internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
|
435
441
|
(internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
|
436
|
-
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
|
442
|
+
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>
|
|
437
443
|
::run(
|
|
438
444
|
stripedRows, stripedCols, stripedDepth, // sizes
|
|
439
445
|
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
|
440
446
|
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
|
441
|
-
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
|
447
|
+
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
|
|
442
448
|
actualAlpha, blocking
|
|
443
449
|
);
|
|
444
450
|
|
|
@@ -46,7 +46,7 @@ template <typename Scalar, typename Index,
|
|
|
46
46
|
struct product_triangular_matrix_matrix_trmm :
|
|
47
47
|
product_triangular_matrix_matrix<Scalar,Index,Mode,
|
|
48
48
|
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
|
|
49
|
-
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
|
|
49
|
+
RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {};
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
// try to go to BLAS specialization
|
|
@@ -55,13 +55,15 @@ template <typename Index, int Mode, \
|
|
|
55
55
|
int LhsStorageOrder, bool ConjugateLhs, \
|
|
56
56
|
int RhsStorageOrder, bool ConjugateRhs> \
|
|
57
57
|
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
|
|
58
|
-
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
|
|
58
|
+
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \
|
|
59
59
|
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
|
|
60
|
-
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
|
|
60
|
+
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
|
|
61
|
+
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
|
62
|
+
eigen_assert(resIncr == 1); \
|
|
61
63
|
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
|
|
62
64
|
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
|
|
63
65
|
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
|
|
64
|
-
|
|
66
|
+
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
|
65
67
|
} \
|
|
66
68
|
};
|
|
67
69
|
|
|
@@ -115,8 +117,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|
|
115
117
|
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
|
|
116
118
|
/* Most likely no benefit to call TRMM or GEMM from BLAS */ \
|
|
117
119
|
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
|
|
118
|
-
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
|
|
119
|
-
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
|
120
|
+
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
|
|
121
|
+
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
|
|
120
122
|
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
|
121
123
|
} else { \
|
|
122
124
|
/* Make sense to call GEMM */ \
|
|
@@ -124,8 +126,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|
|
124
126
|
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
|
|
125
127
|
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
|
|
126
128
|
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
|
127
|
-
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
|
128
|
-
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
|
|
129
|
+
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
|
|
130
|
+
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \
|
|
129
131
|
\
|
|
130
132
|
/*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
|
|
131
133
|
} \
|
|
@@ -232,8 +234,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|
|
232
234
|
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
|
|
233
235
|
/* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
|
|
234
236
|
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
|
|
235
|
-
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
|
|
236
|
-
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
|
237
|
+
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
|
|
238
|
+
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
|
|
237
239
|
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
|
238
240
|
} else { \
|
|
239
241
|
/* Make sense to call GEMM */ \
|
|
@@ -241,8 +243,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|
|
241
243
|
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
|
|
242
244
|
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
|
|
243
245
|
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
|
244
|
-
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
|
245
|
-
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
|
|
246
|
+
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
|
|
247
|
+
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \
|
|
246
248
|
\
|
|
247
249
|
/*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
|
|
248
250
|
} \
|
|
@@ -15,48 +15,48 @@ namespace Eigen {
|
|
|
15
15
|
namespace internal {
|
|
16
16
|
|
|
17
17
|
// if the rhs is row major, let's transpose the product
|
|
18
|
-
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
|
|
19
|
-
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
|
|
18
|
+
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
19
|
+
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
|
|
20
20
|
{
|
|
21
21
|
static void run(
|
|
22
22
|
Index size, Index cols,
|
|
23
23
|
const Scalar* tri, Index triStride,
|
|
24
|
-
Scalar* _other, Index otherStride,
|
|
24
|
+
Scalar* _other, Index otherIncr, Index otherStride,
|
|
25
25
|
level3_blocking<Scalar,Scalar>& blocking)
|
|
26
26
|
{
|
|
27
27
|
triangular_solve_matrix<
|
|
28
28
|
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
|
|
29
29
|
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
|
|
30
30
|
NumTraits<Scalar>::IsComplex && Conjugate,
|
|
31
|
-
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
|
|
32
|
-
::run(size, cols, tri, triStride, _other, otherStride, blocking);
|
|
31
|
+
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
|
|
32
|
+
::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
|
|
33
33
|
}
|
|
34
34
|
};
|
|
35
35
|
|
|
36
36
|
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
|
|
37
37
|
*/
|
|
38
|
-
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
|
39
|
-
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
|
|
38
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
|
|
39
|
+
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
|
|
40
40
|
{
|
|
41
41
|
static EIGEN_DONT_INLINE void run(
|
|
42
42
|
Index size, Index otherSize,
|
|
43
43
|
const Scalar* _tri, Index triStride,
|
|
44
|
-
Scalar* _other, Index otherStride,
|
|
44
|
+
Scalar* _other, Index otherIncr, Index otherStride,
|
|
45
45
|
level3_blocking<Scalar,Scalar>& blocking);
|
|
46
46
|
};
|
|
47
|
-
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
|
48
|
-
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
|
|
47
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
48
|
+
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
|
|
49
49
|
Index size, Index otherSize,
|
|
50
50
|
const Scalar* _tri, Index triStride,
|
|
51
|
-
Scalar* _other, Index otherStride,
|
|
51
|
+
Scalar* _other, Index otherIncr, Index otherStride,
|
|
52
52
|
level3_blocking<Scalar,Scalar>& blocking)
|
|
53
53
|
{
|
|
54
54
|
Index cols = otherSize;
|
|
55
55
|
|
|
56
56
|
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
57
|
-
typedef blas_data_mapper<Scalar, Index, ColMajor> OtherMapper;
|
|
57
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
58
58
|
TriMapper tri(_tri, triStride);
|
|
59
|
-
OtherMapper other(_other, otherStride);
|
|
59
|
+
OtherMapper other(_other, otherStride, otherIncr);
|
|
60
60
|
|
|
61
61
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
62
62
|
|
|
@@ -76,7 +76,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
|
|
76
76
|
|
|
77
77
|
conj_if<Conjugate> conj;
|
|
78
78
|
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
|
79
|
-
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
|
|
79
|
+
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, TriStorageOrder> pack_lhs;
|
|
80
80
|
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
|
|
81
81
|
|
|
82
82
|
// the goal here is to subdivise the Rhs panels such that we keep some cache
|
|
@@ -128,19 +128,21 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
|
|
128
128
|
{
|
|
129
129
|
Scalar b(0);
|
|
130
130
|
const Scalar* l = &tri(i,s);
|
|
131
|
-
|
|
131
|
+
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
|
132
132
|
for (Index i3=0; i3<k; ++i3)
|
|
133
|
-
b += conj(l[i3]) * r
|
|
133
|
+
b += conj(l[i3]) * r(i3);
|
|
134
134
|
|
|
135
135
|
other(i,j) = (other(i,j) - b)*a;
|
|
136
136
|
}
|
|
137
137
|
else
|
|
138
138
|
{
|
|
139
|
-
Scalar
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
Scalar& otherij = other(i,j);
|
|
140
|
+
otherij *= a;
|
|
141
|
+
Scalar b = otherij;
|
|
142
|
+
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
|
143
|
+
typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
|
|
142
144
|
for (Index i3=0;i3<rs;++i3)
|
|
143
|
-
r
|
|
145
|
+
r(i3) -= b * conj(l(i3));
|
|
144
146
|
}
|
|
145
147
|
}
|
|
146
148
|
}
|
|
@@ -185,28 +187,28 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
|
|
185
187
|
|
|
186
188
|
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
|
187
189
|
*/
|
|
188
|
-
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
|
189
|
-
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
|
|
190
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
191
|
+
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
|
|
190
192
|
{
|
|
191
193
|
static EIGEN_DONT_INLINE void run(
|
|
192
194
|
Index size, Index otherSize,
|
|
193
195
|
const Scalar* _tri, Index triStride,
|
|
194
|
-
Scalar* _other, Index otherStride,
|
|
196
|
+
Scalar* _other, Index otherIncr, Index otherStride,
|
|
195
197
|
level3_blocking<Scalar,Scalar>& blocking);
|
|
196
198
|
};
|
|
197
|
-
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
|
198
|
-
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
|
|
199
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
200
|
+
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
|
|
199
201
|
Index size, Index otherSize,
|
|
200
202
|
const Scalar* _tri, Index triStride,
|
|
201
|
-
Scalar* _other, Index otherStride,
|
|
203
|
+
Scalar* _other, Index otherIncr, Index otherStride,
|
|
202
204
|
level3_blocking<Scalar,Scalar>& blocking)
|
|
203
205
|
{
|
|
204
206
|
Index rows = otherSize;
|
|
205
207
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
206
208
|
|
|
207
|
-
typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper;
|
|
209
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
|
|
208
210
|
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
|
209
|
-
LhsMapper lhs(_other, otherStride);
|
|
211
|
+
LhsMapper lhs(_other, otherStride, otherIncr);
|
|
210
212
|
RhsMapper rhs(_tri, triStride);
|
|
211
213
|
|
|
212
214
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
@@ -229,7 +231,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
|
|
229
231
|
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
|
230
232
|
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
231
233
|
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
|
|
232
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
|
|
234
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor, false, true> pack_lhs_panel;
|
|
233
235
|
|
|
234
236
|
for(Index k2=IsLower ? size : 0;
|
|
235
237
|
IsLower ? k2>0 : k2<size;
|
|
@@ -297,24 +299,24 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
|
|
297
299
|
{
|
|
298
300
|
Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
|
|
299
301
|
|
|
300
|
-
|
|
302
|
+
typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
|
|
301
303
|
for (Index k3=0; k3<k; ++k3)
|
|
302
304
|
{
|
|
303
305
|
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
|
|
304
|
-
|
|
306
|
+
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
|
|
305
307
|
for (Index i=0; i<actual_mc; ++i)
|
|
306
|
-
r
|
|
308
|
+
r(i) -= a(i) * b;
|
|
307
309
|
}
|
|
308
310
|
if((Mode & UnitDiag)==0)
|
|
309
311
|
{
|
|
310
312
|
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
|
311
313
|
for (Index i=0; i<actual_mc; ++i)
|
|
312
|
-
r
|
|
314
|
+
r(i) *= inv_rjj;
|
|
313
315
|
}
|
|
314
316
|
}
|
|
315
317
|
|
|
316
318
|
// pack the just computed part of lhs to A
|
|
317
|
-
pack_lhs_panel(blockA,
|
|
319
|
+
pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
|
|
318
320
|
actualPanelWidth, actual_mc,
|
|
319
321
|
actual_kc, j2);
|
|
320
322
|
}
|
|
@@ -40,7 +40,7 @@ namespace internal {
|
|
|
40
40
|
// implements LeftSide op(triangular)^-1 * general
|
|
41
41
|
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
|
|
42
42
|
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
|
43
|
-
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
|
43
|
+
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
|
|
44
44
|
{ \
|
|
45
45
|
enum { \
|
|
46
46
|
IsLower = (Mode&Lower) == Lower, \
|
|
@@ -51,8 +51,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
|
|
|
51
51
|
static void run( \
|
|
52
52
|
Index size, Index otherSize, \
|
|
53
53
|
const EIGTYPE* _tri, Index triStride, \
|
|
54
|
-
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
|
54
|
+
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
|
55
55
|
{ \
|
|
56
|
+
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
|
|
57
|
+
eigen_assert(otherIncr == 1); \
|
|
56
58
|
BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
|
|
57
59
|
char side = 'L', uplo, diag='N', transa; \
|
|
58
60
|
/* Set alpha_ */ \
|
|
@@ -99,7 +101,7 @@ EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
|
|
|
99
101
|
// implements RightSide general * op(triangular)^-1
|
|
100
102
|
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
|
|
101
103
|
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
|
102
|
-
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
|
104
|
+
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
|
|
103
105
|
{ \
|
|
104
106
|
enum { \
|
|
105
107
|
IsLower = (Mode&Lower) == Lower, \
|
|
@@ -110,8 +112,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
|
|
|
110
112
|
static void run( \
|
|
111
113
|
Index size, Index otherSize, \
|
|
112
114
|
const EIGTYPE* _tri, Index triStride, \
|
|
113
|
-
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
|
115
|
+
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
|
114
116
|
{ \
|
|
117
|
+
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
|
|
118
|
+
eigen_assert(otherIncr == 1); \
|
|
115
119
|
BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
|
|
116
120
|
char side = 'R', uplo, diag='N', transa; \
|
|
117
121
|
/* Set alpha_ */ \
|
|
@@ -58,7 +58,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
|
|
|
58
58
|
{
|
|
59
59
|
// let's directly call the low level product function because:
|
|
60
60
|
// 1 - it is faster to compile
|
|
61
|
-
// 2 - it is
|
|
61
|
+
// 2 - it is slightly faster at runtime
|
|
62
62
|
Index startRow = IsLower ? pi : pi-actualPanelWidth;
|
|
63
63
|
Index startCol = IsLower ? 0 : pi;
|
|
64
64
|
|
|
@@ -77,7 +77,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
|
|
|
77
77
|
if (k>0)
|
|
78
78
|
rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map<const Matrix<RhsScalar,Dynamic,1> >(rhs+s,k))).sum();
|
|
79
79
|
|
|
80
|
-
if(!(Mode & UnitDiag))
|
|
80
|
+
if((!(Mode & UnitDiag)) && numext::not_equal_strict(rhs[i],RhsScalar(0)))
|
|
81
81
|
rhs[i] /= cjLhs(i,i);
|
|
82
82
|
}
|
|
83
83
|
}
|
|
@@ -114,20 +114,23 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
|
|
|
114
114
|
for(Index k=0; k<actualPanelWidth; ++k)
|
|
115
115
|
{
|
|
116
116
|
Index i = IsLower ? pi+k : pi-k-1;
|
|
117
|
-
if(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
117
|
+
if(numext::not_equal_strict(rhs[i],RhsScalar(0)))
|
|
118
|
+
{
|
|
119
|
+
if(!(Mode & UnitDiag))
|
|
120
|
+
rhs[i] /= cjLhs.coeff(i,i);
|
|
121
|
+
|
|
122
|
+
Index r = actualPanelWidth - k - 1; // remaining size
|
|
123
|
+
Index s = IsLower ? i+1 : i-r;
|
|
124
|
+
if (r>0)
|
|
125
|
+
Map<Matrix<RhsScalar,Dynamic,1> >(rhs+s,r) -= rhs[i] * cjLhs.col(i).segment(s,r);
|
|
126
|
+
}
|
|
124
127
|
}
|
|
125
128
|
Index r = IsLower ? size - endBlock : startBlock; // remaining size
|
|
126
129
|
if (r > 0)
|
|
127
130
|
{
|
|
128
131
|
// let's directly call the low level product function because:
|
|
129
132
|
// 1 - it is faster to compile
|
|
130
|
-
// 2 - it is
|
|
133
|
+
// 2 - it is slightly faster at runtime
|
|
131
134
|
general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
|
|
132
135
|
r, actualPanelWidth,
|
|
133
136
|
LhsMapper(&lhs.coeffRef(endBlock,startBlock), lhsStride),
|