tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -20,8 +20,9 @@ template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
|
|
|
20
20
|
template<
|
|
21
21
|
typename Index,
|
|
22
22
|
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
|
23
|
-
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs
|
|
24
|
-
|
|
23
|
+
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
24
|
+
int ResInnerStride>
|
|
25
|
+
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride>
|
|
25
26
|
{
|
|
26
27
|
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
|
|
27
28
|
|
|
@@ -30,7 +31,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
|
|
|
30
31
|
Index rows, Index cols, Index depth,
|
|
31
32
|
const LhsScalar* lhs, Index lhsStride,
|
|
32
33
|
const RhsScalar* rhs, Index rhsStride,
|
|
33
|
-
ResScalar* res, Index resStride,
|
|
34
|
+
ResScalar* res, Index resIncr, Index resStride,
|
|
34
35
|
ResScalar alpha,
|
|
35
36
|
level3_blocking<RhsScalar,LhsScalar>& blocking,
|
|
36
37
|
GemmParallelInfo<Index>* info = 0)
|
|
@@ -39,8 +40,8 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
|
|
|
39
40
|
general_matrix_matrix_product<Index,
|
|
40
41
|
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
|
41
42
|
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
|
42
|
-
ColMajor>
|
|
43
|
-
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
|
|
43
|
+
ColMajor,ResInnerStride>
|
|
44
|
+
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info);
|
|
44
45
|
}
|
|
45
46
|
};
|
|
46
47
|
|
|
@@ -49,8 +50,9 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
|
|
|
49
50
|
template<
|
|
50
51
|
typename Index,
|
|
51
52
|
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
|
52
|
-
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs
|
|
53
|
-
|
|
53
|
+
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
54
|
+
int ResInnerStride>
|
|
55
|
+
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride>
|
|
54
56
|
{
|
|
55
57
|
|
|
56
58
|
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
|
@@ -59,23 +61,23 @@ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScala
|
|
|
59
61
|
static void run(Index rows, Index cols, Index depth,
|
|
60
62
|
const LhsScalar* _lhs, Index lhsStride,
|
|
61
63
|
const RhsScalar* _rhs, Index rhsStride,
|
|
62
|
-
ResScalar* _res, Index resStride,
|
|
64
|
+
ResScalar* _res, Index resIncr, Index resStride,
|
|
63
65
|
ResScalar alpha,
|
|
64
66
|
level3_blocking<LhsScalar,RhsScalar>& blocking,
|
|
65
67
|
GemmParallelInfo<Index>* info = 0)
|
|
66
68
|
{
|
|
67
69
|
typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
|
|
68
70
|
typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
|
|
69
|
-
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
|
70
|
-
LhsMapper lhs(_lhs,lhsStride);
|
|
71
|
-
RhsMapper rhs(_rhs,rhsStride);
|
|
72
|
-
ResMapper res(_res, resStride);
|
|
71
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper;
|
|
72
|
+
LhsMapper lhs(_lhs, lhsStride);
|
|
73
|
+
RhsMapper rhs(_rhs, rhsStride);
|
|
74
|
+
ResMapper res(_res, resStride, resIncr);
|
|
73
75
|
|
|
74
76
|
Index kc = blocking.kc(); // cache block size along the K direction
|
|
75
77
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
76
78
|
Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
|
|
77
79
|
|
|
78
|
-
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
80
|
+
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
|
|
79
81
|
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
80
82
|
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
|
|
81
83
|
|
|
@@ -108,7 +110,7 @@ static void run(Index rows, Index cols, Index depth,
|
|
|
108
110
|
// i.e., we test that info[tid].users equals 0.
|
|
109
111
|
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
|
|
110
112
|
while(info[tid].users!=0) {}
|
|
111
|
-
info[tid].users
|
|
113
|
+
info[tid].users = threads;
|
|
112
114
|
|
|
113
115
|
pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
|
|
114
116
|
|
|
@@ -146,7 +148,9 @@ static void run(Index rows, Index cols, Index depth,
|
|
|
146
148
|
// Release all the sub blocks A'_i of A' for the current thread,
|
|
147
149
|
// i.e., we simply decrement the number of users by 1
|
|
148
150
|
for(Index i=0; i<threads; ++i)
|
|
151
|
+
#if !EIGEN_HAS_CXX11_ATOMIC
|
|
149
152
|
#pragma omp atomic
|
|
153
|
+
#endif
|
|
150
154
|
info[i].users -= 1;
|
|
151
155
|
}
|
|
152
156
|
}
|
|
@@ -226,7 +230,7 @@ struct gemm_functor
|
|
|
226
230
|
Gemm::run(rows, cols, m_lhs.cols(),
|
|
227
231
|
&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
|
|
228
232
|
&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
|
|
229
|
-
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
|
|
233
|
+
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(),
|
|
230
234
|
m_actualAlpha, m_blocking, info);
|
|
231
235
|
}
|
|
232
236
|
|
|
@@ -427,8 +431,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
|
|
427
431
|
template<typename Dst>
|
|
428
432
|
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
429
433
|
{
|
|
430
|
-
|
|
431
|
-
|
|
434
|
+
// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program
|
|
435
|
+
// to determine the following heuristic.
|
|
436
|
+
// EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
|
|
437
|
+
// unless it has been specialized by the user or for a given architecture.
|
|
438
|
+
// Note that the condition rhs.rows()>0 was required because lazy product is (was?) not happy with empty inputs.
|
|
439
|
+
// I'm not sure it is still required.
|
|
440
|
+
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
|
|
441
|
+
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());
|
|
432
442
|
else
|
|
433
443
|
{
|
|
434
444
|
dst.setZero();
|
|
@@ -439,8 +449,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
|
|
439
449
|
template<typename Dst>
|
|
440
450
|
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
441
451
|
{
|
|
442
|
-
if((rhs.rows()+dst.rows()+dst.cols())<
|
|
443
|
-
lazyproduct::
|
|
452
|
+
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
|
|
453
|
+
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
|
444
454
|
else
|
|
445
455
|
scaleAndAddTo(dst,lhs, rhs, Scalar(1));
|
|
446
456
|
}
|
|
@@ -448,8 +458,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
|
|
448
458
|
template<typename Dst>
|
|
449
459
|
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
450
460
|
{
|
|
451
|
-
if((rhs.rows()+dst.rows()+dst.cols())<
|
|
452
|
-
lazyproduct::
|
|
461
|
+
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
|
|
462
|
+
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
|
453
463
|
else
|
|
454
464
|
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
|
|
455
465
|
}
|
|
@@ -461,11 +471,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
|
|
461
471
|
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
|
|
462
472
|
return;
|
|
463
473
|
|
|
474
|
+
if (dst.cols() == 1)
|
|
475
|
+
{
|
|
476
|
+
// Fallback to GEMV if either the lhs or rhs is a runtime vector
|
|
477
|
+
typename Dest::ColXpr dst_vec(dst.col(0));
|
|
478
|
+
return internal::generic_product_impl<Lhs,typename Rhs::ConstColXpr,DenseShape,DenseShape,GemvProduct>
|
|
479
|
+
::scaleAndAddTo(dst_vec, a_lhs, a_rhs.col(0), alpha);
|
|
480
|
+
}
|
|
481
|
+
else if (dst.rows() == 1)
|
|
482
|
+
{
|
|
483
|
+
// Fallback to GEMV if either the lhs or rhs is a runtime vector
|
|
484
|
+
typename Dest::RowXpr dst_vec(dst.row(0));
|
|
485
|
+
return internal::generic_product_impl<typename Lhs::ConstRowXpr,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
486
|
+
::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);
|
|
487
|
+
}
|
|
488
|
+
|
|
464
489
|
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
|
|
465
490
|
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
|
|
466
491
|
|
|
467
|
-
Scalar actualAlpha = alpha
|
|
468
|
-
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
|
492
|
+
Scalar actualAlpha = combine_scalar_factors(alpha, a_lhs, a_rhs);
|
|
469
493
|
|
|
470
494
|
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
|
|
471
495
|
Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
|
|
@@ -476,7 +500,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
|
|
476
500
|
Index,
|
|
477
501
|
LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
|
|
478
502
|
RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
|
|
479
|
-
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor
|
|
503
|
+
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,
|
|
504
|
+
Dest::InnerStrideAtCompileTime>,
|
|
480
505
|
ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
|
|
481
506
|
|
|
482
507
|
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
|
|
@@ -25,51 +25,54 @@ namespace internal {
|
|
|
25
25
|
**********************************************************************/
|
|
26
26
|
|
|
27
27
|
// forward declarations (defined at the end of this file)
|
|
28
|
-
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
|
28
|
+
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
|
|
29
29
|
struct tribb_kernel;
|
|
30
30
|
|
|
31
31
|
/* Optimized matrix-matrix product evaluating only one triangular half */
|
|
32
32
|
template <typename Index,
|
|
33
33
|
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
|
34
34
|
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
35
|
-
int ResStorageOrder, int UpLo, int Version = Specialized>
|
|
35
|
+
int ResStorageOrder, int ResInnerStride, int UpLo, int Version = Specialized>
|
|
36
36
|
struct general_matrix_matrix_triangular_product;
|
|
37
37
|
|
|
38
38
|
// as usual if the result is row major => we transpose the product
|
|
39
39
|
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
|
40
|
-
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
41
|
-
|
|
40
|
+
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
41
|
+
int ResInnerStride, int UpLo, int Version>
|
|
42
|
+
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,UpLo,Version>
|
|
42
43
|
{
|
|
43
44
|
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
|
44
45
|
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
|
|
45
|
-
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
|
|
46
|
+
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr, Index resStride,
|
|
46
47
|
const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)
|
|
47
48
|
{
|
|
48
49
|
general_matrix_matrix_triangular_product<Index,
|
|
49
50
|
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
|
50
51
|
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
|
51
|
-
ColMajor, UpLo==Lower?Upper:Lower>
|
|
52
|
-
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
|
|
52
|
+
ColMajor, ResInnerStride, UpLo==Lower?Upper:Lower>
|
|
53
|
+
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking);
|
|
53
54
|
}
|
|
54
55
|
};
|
|
55
56
|
|
|
56
57
|
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
|
57
|
-
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
58
|
-
|
|
58
|
+
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
|
59
|
+
int ResInnerStride, int UpLo, int Version>
|
|
60
|
+
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,UpLo,Version>
|
|
59
61
|
{
|
|
60
62
|
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
|
61
63
|
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
|
|
62
|
-
const RhsScalar* _rhs, Index rhsStride,
|
|
64
|
+
const RhsScalar* _rhs, Index rhsStride,
|
|
65
|
+
ResScalar* _res, Index resIncr, Index resStride,
|
|
63
66
|
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
|
|
64
67
|
{
|
|
65
68
|
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
|
66
69
|
|
|
67
70
|
typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
|
|
68
71
|
typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
|
|
69
|
-
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
|
72
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
70
73
|
LhsMapper lhs(_lhs,lhsStride);
|
|
71
74
|
RhsMapper rhs(_rhs,rhsStride);
|
|
72
|
-
ResMapper res(_res, resStride);
|
|
75
|
+
ResMapper res(_res, resStride, resIncr);
|
|
73
76
|
|
|
74
77
|
Index kc = blocking.kc();
|
|
75
78
|
Index mc = (std::min)(size,blocking.mc());
|
|
@@ -84,10 +87,10 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
|
|
84
87
|
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
|
85
88
|
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
|
86
89
|
|
|
87
|
-
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
90
|
+
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
|
|
88
91
|
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
89
92
|
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
|
|
90
|
-
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
|
|
93
|
+
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, ResInnerStride, UpLo> sybb;
|
|
91
94
|
|
|
92
95
|
for(Index k2=0; k2<depth; k2+=kc)
|
|
93
96
|
{
|
|
@@ -110,8 +113,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
|
|
110
113
|
gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,
|
|
111
114
|
(std::min)(size,i2), alpha, -1, -1, 0, 0);
|
|
112
115
|
|
|
113
|
-
|
|
114
|
-
sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
|
|
116
|
+
sybb(_res+resStride*i2 + resIncr*i2, resIncr, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
|
|
115
117
|
|
|
116
118
|
if (UpLo==Upper)
|
|
117
119
|
{
|
|
@@ -133,7 +135,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
|
|
133
135
|
// while the triangular block overlapping the diagonal is evaluated into a
|
|
134
136
|
// small temporary buffer which is then accumulated into the result using a
|
|
135
137
|
// triangular traversal.
|
|
136
|
-
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
|
138
|
+
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
|
|
137
139
|
struct tribb_kernel
|
|
138
140
|
{
|
|
139
141
|
typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
|
|
@@ -142,11 +144,13 @@ struct tribb_kernel
|
|
|
142
144
|
enum {
|
|
143
145
|
BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
|
|
144
146
|
};
|
|
145
|
-
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
|
|
147
|
+
void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
|
|
146
148
|
{
|
|
147
|
-
typedef blas_data_mapper<ResScalar, Index, ColMajor> ResMapper;
|
|
148
|
-
|
|
149
|
-
|
|
149
|
+
typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
150
|
+
typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned> BufferMapper;
|
|
151
|
+
ResMapper res(_res, resStride, resIncr);
|
|
152
|
+
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel1;
|
|
153
|
+
gebp_kernel<LhsScalar, RhsScalar, Index, BufferMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel2;
|
|
150
154
|
|
|
151
155
|
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));
|
|
152
156
|
|
|
@@ -158,31 +162,32 @@ struct tribb_kernel
|
|
|
158
162
|
const RhsScalar* actual_b = blockB+j*depth;
|
|
159
163
|
|
|
160
164
|
if(UpLo==Upper)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
165
|
+
gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
|
|
166
|
+
-1, -1, 0, 0);
|
|
167
|
+
|
|
164
168
|
// selfadjoint micro block
|
|
165
169
|
{
|
|
166
170
|
Index i = j;
|
|
167
171
|
buffer.setZero();
|
|
168
172
|
// 1 - apply the kernel on the temporary buffer
|
|
169
|
-
|
|
170
|
-
|
|
173
|
+
gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
|
|
174
|
+
-1, -1, 0, 0);
|
|
175
|
+
|
|
171
176
|
// 2 - triangular accumulation
|
|
172
177
|
for(Index j1=0; j1<actualBlockSize; ++j1)
|
|
173
178
|
{
|
|
174
|
-
|
|
179
|
+
typename ResMapper::LinearMapper r = res.getLinearMapper(i,j+j1);
|
|
175
180
|
for(Index i1=UpLo==Lower ? j1 : 0;
|
|
176
181
|
UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
|
|
177
|
-
r
|
|
182
|
+
r(i1) += buffer(i1,j1);
|
|
178
183
|
}
|
|
179
184
|
}
|
|
180
185
|
|
|
181
186
|
if(UpLo==Lower)
|
|
182
187
|
{
|
|
183
188
|
Index i = j+actualBlockSize;
|
|
184
|
-
|
|
185
|
-
|
|
189
|
+
gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i,
|
|
190
|
+
depth, actualBlockSize, alpha, -1, -1, 0, 0);
|
|
186
191
|
}
|
|
187
192
|
}
|
|
188
193
|
}
|
|
@@ -286,23 +291,24 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
|
|
286
291
|
internal::general_matrix_matrix_triangular_product<Index,
|
|
287
292
|
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
|
288
293
|
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
|
289
|
-
IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
|
|
294
|
+
IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo&(Lower|Upper)>
|
|
290
295
|
::run(size, depth,
|
|
291
296
|
&actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
|
|
292
297
|
&actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
|
|
293
|
-
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ?
|
|
298
|
+
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? mat.innerStride() : mat.outerStride() ) : 0),
|
|
299
|
+
mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
|
|
294
300
|
}
|
|
295
301
|
};
|
|
296
302
|
|
|
297
303
|
template<typename MatrixType, unsigned int UpLo>
|
|
298
304
|
template<typename ProductType>
|
|
299
|
-
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
|
|
305
|
+
EIGEN_DEVICE_FUNC TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
|
|
300
306
|
{
|
|
301
307
|
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
|
|
302
308
|
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
|
|
303
|
-
|
|
309
|
+
|
|
304
310
|
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
|
|
305
|
-
|
|
311
|
+
|
|
306
312
|
return derived();
|
|
307
313
|
}
|
|
308
314
|
|
|
@@ -37,10 +37,10 @@ namespace Eigen {
|
|
|
37
37
|
|
|
38
38
|
namespace internal {
|
|
39
39
|
|
|
40
|
-
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int
|
|
40
|
+
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
|
|
41
41
|
struct general_matrix_matrix_rankupdate :
|
|
42
42
|
general_matrix_matrix_triangular_product<
|
|
43
|
-
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
|
|
43
|
+
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,1,UpLo,BuiltIn> {};
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
// try to go to BLAS specialization
|
|
@@ -48,9 +48,9 @@ struct general_matrix_matrix_rankupdate :
|
|
|
48
48
|
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
|
|
49
49
|
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
|
|
50
50
|
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
|
|
51
|
-
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
|
|
51
|
+
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,1,UpLo,Specialized> { \
|
|
52
52
|
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
|
|
53
|
-
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
|
|
53
|
+
const Scalar* rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
|
|
54
54
|
{ \
|
|
55
55
|
if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \
|
|
56
56
|
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
|
|
@@ -59,8 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
|
|
|
59
59
|
general_matrix_matrix_triangular_product<Index, \
|
|
60
60
|
Scalar, LhsStorageOrder, ConjugateLhs, \
|
|
61
61
|
Scalar, RhsStorageOrder, ConjugateRhs, \
|
|
62
|
-
ColMajor, UpLo, BuiltIn> \
|
|
63
|
-
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
|
|
62
|
+
ColMajor, 1, UpLo, BuiltIn> \
|
|
63
|
+
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resIncr,resStride,alpha,blocking); \
|
|
64
64
|
} \
|
|
65
65
|
} \
|
|
66
66
|
};
|
|
@@ -51,20 +51,22 @@ template< \
|
|
|
51
51
|
typename Index, \
|
|
52
52
|
int LhsStorageOrder, bool ConjugateLhs, \
|
|
53
53
|
int RhsStorageOrder, bool ConjugateRhs> \
|
|
54
|
-
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
|
|
54
|
+
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1> \
|
|
55
55
|
{ \
|
|
56
56
|
typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
|
|
57
57
|
\
|
|
58
58
|
static void run(Index rows, Index cols, Index depth, \
|
|
59
59
|
const EIGTYPE* _lhs, Index lhsStride, \
|
|
60
60
|
const EIGTYPE* _rhs, Index rhsStride, \
|
|
61
|
-
EIGTYPE* res, Index resStride, \
|
|
61
|
+
EIGTYPE* res, Index resIncr, Index resStride, \
|
|
62
62
|
EIGTYPE alpha, \
|
|
63
63
|
level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
|
|
64
64
|
GemmParallelInfo<Index>* /*info = 0*/) \
|
|
65
65
|
{ \
|
|
66
66
|
using std::conj; \
|
|
67
67
|
\
|
|
68
|
+
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
|
69
|
+
eigen_assert(resIncr == 1); \
|
|
68
70
|
char transa, transb; \
|
|
69
71
|
BlasIndex m, n, k, lda, ldb, ldc; \
|
|
70
72
|
const EIGTYPE *a, *b; \
|