tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
|
@@ -14,27 +14,27 @@
|
|
|
14
14
|
#define EIGEN_PRODUCTEVALUATORS_H
|
|
15
15
|
|
|
16
16
|
namespace Eigen {
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
namespace internal {
|
|
19
19
|
|
|
20
20
|
/** \internal
|
|
21
21
|
* Evaluator of a product expression.
|
|
22
22
|
* Since products require special treatments to handle all possible cases,
|
|
23
|
-
* we simply
|
|
23
|
+
* we simply defer the evaluation logic to a product_evaluator class
|
|
24
24
|
* which offers more partial specialization possibilities.
|
|
25
|
-
*
|
|
25
|
+
*
|
|
26
26
|
* \sa class product_evaluator
|
|
27
27
|
*/
|
|
28
28
|
template<typename Lhs, typename Rhs, int Options>
|
|
29
|
-
struct evaluator<Product<Lhs, Rhs, Options> >
|
|
29
|
+
struct evaluator<Product<Lhs, Rhs, Options> >
|
|
30
30
|
: public product_evaluator<Product<Lhs, Rhs, Options> >
|
|
31
31
|
{
|
|
32
32
|
typedef Product<Lhs, Rhs, Options> XprType;
|
|
33
33
|
typedef product_evaluator<XprType> Base;
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
|
36
36
|
};
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
|
39
39
|
// TODO we should apply that rule only if that's really helpful
|
|
40
40
|
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
|
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
template<typename Lhs, typename Rhs, int DiagIndex>
|
|
65
|
-
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
|
65
|
+
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
|
66
66
|
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
|
|
67
67
|
{
|
|
68
68
|
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
|
69
69
|
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
|
72
72
|
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
|
73
73
|
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
|
|
@@ -108,27 +108,27 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
|
|
|
108
108
|
: m_result(xpr.rows(), xpr.cols())
|
|
109
109
|
{
|
|
110
110
|
::new (static_cast<Base*>(this)) Base(m_result);
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
// FIXME shall we handle nested_eval here?,
|
|
113
113
|
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
|
|
114
114
|
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
|
115
115
|
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
|
116
116
|
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
|
117
117
|
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
|
118
|
-
//
|
|
118
|
+
//
|
|
119
119
|
// const LhsNested lhs(xpr.lhs());
|
|
120
120
|
// const RhsNested rhs(xpr.rhs());
|
|
121
|
-
//
|
|
121
|
+
//
|
|
122
122
|
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
|
|
123
123
|
|
|
124
124
|
generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
|
|
125
125
|
}
|
|
126
|
-
|
|
127
|
-
protected:
|
|
126
|
+
|
|
127
|
+
protected:
|
|
128
128
|
PlainObject m_result;
|
|
129
129
|
};
|
|
130
130
|
|
|
131
|
-
// The following three shortcuts are enabled only if the scalar types match
|
|
131
|
+
// The following three shortcuts are enabled only if the scalar types match exactly.
|
|
132
132
|
// TODO: we could enable them for different scalar types when the product is not vectorized.
|
|
133
133
|
|
|
134
134
|
// Dense = Product
|
|
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
|
|
137
137
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
|
138
138
|
{
|
|
139
139
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
|
140
|
-
static EIGEN_STRONG_INLINE
|
|
140
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
141
141
|
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
|
142
142
|
{
|
|
143
143
|
Index dstRows = src.rows();
|
|
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
|
|
155
155
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
|
156
156
|
{
|
|
157
157
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
|
158
|
-
static EIGEN_STRONG_INLINE
|
|
158
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
159
159
|
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
|
160
160
|
{
|
|
161
161
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
|
|
170
170
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
|
171
171
|
{
|
|
172
172
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
|
173
|
-
static EIGEN_STRONG_INLINE
|
|
173
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
174
174
|
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
|
175
175
|
{
|
|
176
176
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
|
|
|
190
190
|
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
|
191
191
|
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
|
192
192
|
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
|
193
|
-
static EIGEN_STRONG_INLINE
|
|
193
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
194
194
|
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
|
195
195
|
{
|
|
196
196
|
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
|
@@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
|
|
|
217
217
|
struct assignment_from_xpr_op_product
|
|
218
218
|
{
|
|
219
219
|
template<typename SrcXprType, typename InitialFunc>
|
|
220
|
-
static EIGEN_STRONG_INLINE
|
|
220
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
221
221
|
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
|
222
222
|
{
|
|
223
223
|
call_assignment_no_alias(dst, src.lhs(), Func1());
|
|
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
|
|
|
246
246
|
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
|
247
247
|
{
|
|
248
248
|
template<typename Dst>
|
|
249
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
249
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
250
250
|
{
|
|
251
251
|
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
|
252
252
|
}
|
|
253
|
-
|
|
253
|
+
|
|
254
254
|
template<typename Dst>
|
|
255
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
255
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
256
256
|
{
|
|
257
257
|
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
|
258
258
|
}
|
|
259
|
-
|
|
259
|
+
|
|
260
260
|
template<typename Dst>
|
|
261
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
261
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
262
262
|
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
|
263
263
|
};
|
|
264
264
|
|
|
@@ -269,10 +269,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
|
|
269
269
|
|
|
270
270
|
// Column major result
|
|
271
271
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
272
|
-
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
|
272
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
|
273
273
|
{
|
|
274
274
|
evaluator<Rhs> rhsEval(rhs);
|
|
275
|
-
|
|
275
|
+
ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
|
|
276
276
|
// FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
|
|
277
277
|
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
|
278
278
|
const Index cols = dst.cols();
|
|
@@ -282,10 +282,10 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
|
|
282
282
|
|
|
283
283
|
// Row major result
|
|
284
284
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
285
|
-
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
|
285
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
|
286
286
|
{
|
|
287
287
|
evaluator<Lhs> lhsEval(lhs);
|
|
288
|
-
|
|
288
|
+
ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
|
|
289
289
|
// FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
|
|
290
290
|
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
|
291
291
|
const Index rows = dst.rows();
|
|
@@ -298,43 +298,43 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
|
|
298
298
|
{
|
|
299
299
|
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
|
300
300
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
301
|
-
|
|
301
|
+
|
|
302
302
|
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
|
303
|
-
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
|
304
|
-
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
|
305
|
-
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
|
303
|
+
struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
|
304
|
+
struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
|
305
|
+
struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
|
306
306
|
struct adds {
|
|
307
307
|
Scalar m_scale;
|
|
308
308
|
explicit adds(const Scalar& s) : m_scale(s) {}
|
|
309
|
-
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
|
|
309
|
+
template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
|
|
310
310
|
dst.const_cast_derived() += m_scale * src;
|
|
311
311
|
}
|
|
312
312
|
};
|
|
313
|
-
|
|
313
|
+
|
|
314
314
|
template<typename Dst>
|
|
315
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
315
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
316
316
|
{
|
|
317
317
|
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
|
318
318
|
}
|
|
319
|
-
|
|
319
|
+
|
|
320
320
|
template<typename Dst>
|
|
321
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
321
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
322
322
|
{
|
|
323
323
|
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
|
324
324
|
}
|
|
325
|
-
|
|
325
|
+
|
|
326
326
|
template<typename Dst>
|
|
327
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
327
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
328
328
|
{
|
|
329
329
|
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
|
330
330
|
}
|
|
331
|
-
|
|
331
|
+
|
|
332
332
|
template<typename Dst>
|
|
333
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
333
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
334
334
|
{
|
|
335
335
|
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
|
336
336
|
}
|
|
337
|
-
|
|
337
|
+
|
|
338
338
|
};
|
|
339
339
|
|
|
340
340
|
|
|
@@ -343,21 +343,21 @@ template<typename Lhs, typename Rhs, typename Derived>
|
|
|
343
343
|
struct generic_product_impl_base
|
|
344
344
|
{
|
|
345
345
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
346
|
-
|
|
346
|
+
|
|
347
347
|
template<typename Dst>
|
|
348
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
348
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
349
349
|
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
|
350
350
|
|
|
351
351
|
template<typename Dst>
|
|
352
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
352
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
353
353
|
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
|
354
354
|
|
|
355
355
|
template<typename Dst>
|
|
356
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
356
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
357
357
|
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
|
358
|
-
|
|
358
|
+
|
|
359
359
|
template<typename Dst>
|
|
360
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
360
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
361
361
|
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
|
|
362
362
|
|
|
363
363
|
};
|
|
@@ -373,8 +373,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
|
373
373
|
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
|
374
374
|
|
|
375
375
|
template<typename Dest>
|
|
376
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
376
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
377
377
|
{
|
|
378
|
+
// Fallback to inner product if both the lhs and rhs is a runtime vector.
|
|
379
|
+
if (lhs.rows() == 1 && rhs.cols() == 1) {
|
|
380
|
+
dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
378
383
|
LhsNested actual_lhs(lhs);
|
|
379
384
|
RhsNested actual_rhs(rhs);
|
|
380
385
|
internal::gemv_dense_selector<Side,
|
|
@@ -385,35 +390,84 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
|
385
390
|
};
|
|
386
391
|
|
|
387
392
|
template<typename Lhs, typename Rhs>
|
|
388
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|
393
|
+
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|
389
394
|
{
|
|
390
395
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
391
|
-
|
|
396
|
+
|
|
392
397
|
template<typename Dst>
|
|
393
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
398
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
394
399
|
{
|
|
395
400
|
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
|
396
401
|
// but easier on the compiler side
|
|
397
402
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
|
|
398
403
|
}
|
|
399
|
-
|
|
404
|
+
|
|
400
405
|
template<typename Dst>
|
|
401
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
406
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
402
407
|
{
|
|
403
408
|
// dst.noalias() += lhs.lazyProduct(rhs);
|
|
404
409
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
|
405
410
|
}
|
|
406
|
-
|
|
411
|
+
|
|
407
412
|
template<typename Dst>
|
|
408
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
413
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
409
414
|
{
|
|
410
415
|
// dst.noalias() -= lhs.lazyProduct(rhs);
|
|
411
416
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
|
412
417
|
}
|
|
413
|
-
|
|
414
|
-
//
|
|
415
|
-
//
|
|
416
|
-
// {
|
|
418
|
+
|
|
419
|
+
// This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
|
|
420
|
+
// This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
|
|
421
|
+
// dst {,+,-}= (s1*A)*(B*s2)
|
|
422
|
+
// will be rewritten as:
|
|
423
|
+
// dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
|
|
424
|
+
// There are at least four benefits of doing so:
|
|
425
|
+
// 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
|
|
426
|
+
// 2 - it is faster than simply by-passing the heap allocation through stack allocation.
|
|
427
|
+
// 3 - it makes this fallback consistent with the heavy GEMM routine.
|
|
428
|
+
// 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
|
|
429
|
+
// (see https://stackoverflow.com/questions/54738495)
|
|
430
|
+
// For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
|
|
431
|
+
// and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
|
|
432
|
+
// enabled only when falling back from the main GEMM.
|
|
433
|
+
template<typename Dst, typename Func>
|
|
434
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
435
|
+
void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
|
|
436
|
+
{
|
|
437
|
+
enum {
|
|
438
|
+
HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
|
|
439
|
+
ConjLhs = blas_traits<Lhs>::NeedToConjugate,
|
|
440
|
+
ConjRhs = blas_traits<Rhs>::NeedToConjugate
|
|
441
|
+
};
|
|
442
|
+
// FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
|
|
443
|
+
// this is important for real*complex_mat
|
|
444
|
+
Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
|
|
445
|
+
|
|
446
|
+
eval_dynamic_impl(dst,
|
|
447
|
+
blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
|
448
|
+
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
|
|
449
|
+
func,
|
|
450
|
+
actualAlpha,
|
|
451
|
+
typename conditional<HasScalarFactor,true_type,false_type>::type());
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
protected:
|
|
455
|
+
|
|
456
|
+
template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
|
457
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
458
|
+
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
|
|
459
|
+
{
|
|
460
|
+
EIGEN_UNUSED_VARIABLE(s);
|
|
461
|
+
eigen_internal_assert(s==Scalar(1));
|
|
462
|
+
call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
|
466
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
467
|
+
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
|
|
468
|
+
{
|
|
469
|
+
call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
|
|
470
|
+
}
|
|
417
471
|
};
|
|
418
472
|
|
|
419
473
|
// This specialization enforces the use of a coefficient-based evaluation strategy
|
|
@@ -471,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
471
525
|
|
|
472
526
|
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
|
473
527
|
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
|
474
|
-
|
|
528
|
+
|
|
475
529
|
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
|
476
530
|
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
|
477
531
|
|
|
@@ -490,19 +544,19 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
490
544
|
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
|
|
491
545
|
|
|
492
546
|
enum {
|
|
493
|
-
|
|
547
|
+
|
|
494
548
|
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
|
495
549
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
|
496
550
|
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
|
497
551
|
: InnerSize == Dynamic ? HugeCost
|
|
498
|
-
|
|
552
|
+
: InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
|
|
499
553
|
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
|
500
554
|
|
|
501
555
|
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
|
502
|
-
|
|
556
|
+
|
|
503
557
|
LhsFlags = LhsEtorType::Flags,
|
|
504
558
|
RhsFlags = RhsEtorType::Flags,
|
|
505
|
-
|
|
559
|
+
|
|
506
560
|
LhsRowMajor = LhsFlags & RowMajorBit,
|
|
507
561
|
RhsRowMajor = RhsFlags & RowMajorBit,
|
|
508
562
|
|
|
@@ -512,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
512
566
|
// Here, we don't care about alignment larger than the usable packet size.
|
|
513
567
|
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
|
514
568
|
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
|
515
|
-
|
|
569
|
+
|
|
516
570
|
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
|
517
571
|
|
|
518
572
|
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
|
|
@@ -522,12 +576,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
522
576
|
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
|
523
577
|
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
|
524
578
|
|
|
525
|
-
Flags = ((
|
|
579
|
+
Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
|
|
526
580
|
| (EvalToRowMajor ? RowMajorBit : 0)
|
|
527
581
|
// TODO enable vectorization for mixed types
|
|
528
582
|
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
|
529
583
|
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
|
|
530
|
-
|
|
584
|
+
|
|
531
585
|
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
|
532
586
|
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
|
533
587
|
|
|
@@ -543,10 +597,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
543
597
|
CanVectorizeInner = SameType
|
|
544
598
|
&& LhsRowMajor
|
|
545
599
|
&& (!RhsRowMajor)
|
|
546
|
-
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
|
547
|
-
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
|
600
|
+
&& (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
|
|
601
|
+
&& (int(InnerSize) % packet_traits<Scalar>::size == 0)
|
|
548
602
|
};
|
|
549
|
-
|
|
603
|
+
|
|
550
604
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
|
|
551
605
|
{
|
|
552
606
|
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
|
@@ -556,7 +610,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
556
610
|
* which is why we don't set the LinearAccessBit.
|
|
557
611
|
* TODO: this seems possible when the result is a vector
|
|
558
612
|
*/
|
|
559
|
-
EIGEN_DEVICE_FUNC
|
|
613
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
614
|
+
const CoeffReturnType coeff(Index index) const
|
|
560
615
|
{
|
|
561
616
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
|
562
617
|
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
|
|
@@ -564,6 +619,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
564
619
|
}
|
|
565
620
|
|
|
566
621
|
template<int LoadMode, typename PacketType>
|
|
622
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
567
623
|
const PacketType packet(Index row, Index col) const
|
|
568
624
|
{
|
|
569
625
|
PacketType res;
|
|
@@ -575,6 +631,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
575
631
|
}
|
|
576
632
|
|
|
577
633
|
template<int LoadMode, typename PacketType>
|
|
634
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
578
635
|
const PacketType packet(Index index) const
|
|
579
636
|
{
|
|
580
637
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
|
@@ -585,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
585
642
|
protected:
|
|
586
643
|
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
|
587
644
|
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
|
588
|
-
|
|
645
|
+
|
|
589
646
|
LhsEtorType m_lhsImpl;
|
|
590
647
|
RhsEtorType m_rhsImpl;
|
|
591
648
|
|
|
@@ -603,7 +660,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|
|
603
660
|
enum {
|
|
604
661
|
Flags = Base::Flags | EvalBeforeNestingBit
|
|
605
662
|
};
|
|
606
|
-
EIGEN_DEVICE_FUNC
|
|
663
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
664
|
+
explicit product_evaluator(const XprType& xpr)
|
|
607
665
|
: Base(BaseProduct(xpr.lhs(),xpr.rhs()))
|
|
608
666
|
{}
|
|
609
667
|
};
|
|
@@ -615,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|
|
615
673
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
616
674
|
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
|
617
675
|
{
|
|
618
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
676
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
619
677
|
{
|
|
620
678
|
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
|
621
679
|
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
|
@@ -625,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|
|
625
683
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
626
684
|
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
|
627
685
|
{
|
|
628
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
686
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
629
687
|
{
|
|
630
688
|
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
|
631
689
|
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
|
@@ -635,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|
|
635
693
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
636
694
|
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
637
695
|
{
|
|
638
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
696
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
639
697
|
{
|
|
640
698
|
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
|
641
699
|
}
|
|
@@ -644,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
|
644
702
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
645
703
|
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
646
704
|
{
|
|
647
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
705
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
648
706
|
{
|
|
649
707
|
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
|
650
708
|
}
|
|
@@ -653,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
|
653
711
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
654
712
|
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
655
713
|
{
|
|
656
|
-
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
714
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
657
715
|
{
|
|
658
716
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
659
717
|
}
|
|
@@ -662,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
|
662
720
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
663
721
|
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
664
722
|
{
|
|
665
|
-
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
723
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
666
724
|
{
|
|
667
725
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
668
726
|
}
|
|
@@ -671,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
|
671
729
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
672
730
|
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
673
731
|
{
|
|
674
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
732
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
675
733
|
{
|
|
676
734
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
677
735
|
for(Index i = 0; i < innerDim; ++i)
|
|
@@ -682,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
|
682
740
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
683
741
|
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
684
742
|
{
|
|
685
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
743
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
686
744
|
{
|
|
687
745
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
688
746
|
for(Index i = 0; i < innerDim; ++i)
|
|
@@ -704,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
|
|
|
704
762
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
|
|
705
763
|
{
|
|
706
764
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
707
|
-
|
|
765
|
+
|
|
708
766
|
template<typename Dest>
|
|
709
767
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
710
768
|
{
|
|
@@ -718,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
|
|
|
718
776
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
|
|
719
777
|
{
|
|
720
778
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
721
|
-
|
|
779
|
+
|
|
722
780
|
template<typename Dest>
|
|
723
781
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
724
782
|
{
|
|
@@ -739,9 +797,10 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
|
|
|
739
797
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
|
|
740
798
|
{
|
|
741
799
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
742
|
-
|
|
800
|
+
|
|
743
801
|
template<typename Dest>
|
|
744
|
-
static
|
|
802
|
+
static EIGEN_DEVICE_FUNC
|
|
803
|
+
void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
745
804
|
{
|
|
746
805
|
selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
|
|
747
806
|
}
|
|
@@ -752,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
|
752
811
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
|
|
753
812
|
{
|
|
754
813
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
755
|
-
|
|
814
|
+
|
|
756
815
|
template<typename Dest>
|
|
757
816
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
758
817
|
{
|
|
@@ -764,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
|
764
823
|
/***************************************************************************
|
|
765
824
|
* Diagonal products
|
|
766
825
|
***************************************************************************/
|
|
767
|
-
|
|
826
|
+
|
|
768
827
|
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
|
|
769
828
|
struct diagonal_product_evaluator_base
|
|
770
829
|
: evaluator_base<Derived>
|
|
@@ -772,17 +831,25 @@ struct diagonal_product_evaluator_base
|
|
|
772
831
|
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
|
773
832
|
public:
|
|
774
833
|
enum {
|
|
775
|
-
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
|
776
|
-
|
|
834
|
+
CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
|
|
835
|
+
|
|
777
836
|
MatrixFlags = evaluator<MatrixType>::Flags,
|
|
778
837
|
DiagFlags = evaluator<DiagonalType>::Flags,
|
|
779
|
-
|
|
838
|
+
|
|
839
|
+
_StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
|
|
840
|
+
: (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
|
|
841
|
+
: MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
|
|
842
|
+
_SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
|
|
843
|
+
|
|
780
844
|
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
|
|
781
845
|
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
|
|
782
846
|
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
|
|
783
847
|
// FIXME currently we need same types, but in the future the next rule should be the one
|
|
784
848
|
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
|
|
785
|
-
_Vectorizable =
|
|
849
|
+
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
|
|
850
|
+
&& _SameTypes
|
|
851
|
+
&& (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
|
|
852
|
+
&& (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
|
786
853
|
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
|
787
854
|
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
|
788
855
|
Alignment = evaluator<MatrixType>::Alignment,
|
|
@@ -791,14 +858,14 @@ public:
|
|
|
791
858
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|
|
792
859
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
|
793
860
|
};
|
|
794
|
-
|
|
795
|
-
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
|
861
|
+
|
|
862
|
+
EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
|
796
863
|
: m_diagImpl(diag), m_matImpl(mat)
|
|
797
864
|
{
|
|
798
865
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
|
799
866
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
|
800
867
|
}
|
|
801
|
-
|
|
868
|
+
|
|
802
869
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
|
|
803
870
|
{
|
|
804
871
|
if(AsScalarProduct)
|
|
@@ -806,7 +873,7 @@ public:
|
|
|
806
873
|
else
|
|
807
874
|
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
|
808
875
|
}
|
|
809
|
-
|
|
876
|
+
|
|
810
877
|
protected:
|
|
811
878
|
template<int LoadMode,typename PacketType>
|
|
812
879
|
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
|
|
@@ -814,7 +881,7 @@ protected:
|
|
|
814
881
|
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
|
815
882
|
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
|
816
883
|
}
|
|
817
|
-
|
|
884
|
+
|
|
818
885
|
template<int LoadMode,typename PacketType>
|
|
819
886
|
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
|
|
820
887
|
{
|
|
@@ -825,7 +892,7 @@ protected:
|
|
|
825
892
|
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
|
826
893
|
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
|
827
894
|
}
|
|
828
|
-
|
|
895
|
+
|
|
829
896
|
evaluator<DiagonalType> m_diagImpl;
|
|
830
897
|
evaluator<MatrixType> m_matImpl;
|
|
831
898
|
};
|
|
@@ -840,25 +907,25 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|
|
840
907
|
using Base::m_matImpl;
|
|
841
908
|
using Base::coeff;
|
|
842
909
|
typedef typename Base::Scalar Scalar;
|
|
843
|
-
|
|
910
|
+
|
|
844
911
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
|
845
912
|
typedef typename XprType::PlainObject PlainObject;
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
};
|
|
913
|
+
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
enum { StorageOrder = Base::_StorageOrder };
|
|
850
917
|
|
|
851
918
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
|
852
919
|
: Base(xpr.rhs(), xpr.lhs().diagonal())
|
|
853
920
|
{
|
|
854
921
|
}
|
|
855
|
-
|
|
922
|
+
|
|
856
923
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
|
857
924
|
{
|
|
858
925
|
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
|
|
859
926
|
}
|
|
860
|
-
|
|
861
|
-
#ifndef
|
|
927
|
+
|
|
928
|
+
#ifndef EIGEN_GPUCC
|
|
862
929
|
template<int LoadMode,typename PacketType>
|
|
863
930
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
|
864
931
|
{
|
|
@@ -867,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|
|
867
934
|
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
|
868
935
|
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
|
869
936
|
}
|
|
870
|
-
|
|
937
|
+
|
|
871
938
|
template<int LoadMode,typename PacketType>
|
|
872
939
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
|
873
940
|
{
|
|
@@ -886,30 +953,30 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
|
|
886
953
|
using Base::m_matImpl;
|
|
887
954
|
using Base::coeff;
|
|
888
955
|
typedef typename Base::Scalar Scalar;
|
|
889
|
-
|
|
956
|
+
|
|
890
957
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
|
891
958
|
typedef typename XprType::PlainObject PlainObject;
|
|
892
|
-
|
|
893
|
-
enum { StorageOrder =
|
|
959
|
+
|
|
960
|
+
enum { StorageOrder = Base::_StorageOrder };
|
|
894
961
|
|
|
895
962
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
|
896
963
|
: Base(xpr.lhs(), xpr.rhs().diagonal())
|
|
897
964
|
{
|
|
898
965
|
}
|
|
899
|
-
|
|
966
|
+
|
|
900
967
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
|
901
968
|
{
|
|
902
969
|
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
|
|
903
970
|
}
|
|
904
|
-
|
|
905
|
-
#ifndef
|
|
971
|
+
|
|
972
|
+
#ifndef EIGEN_GPUCC
|
|
906
973
|
template<int LoadMode,typename PacketType>
|
|
907
974
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
|
908
975
|
{
|
|
909
976
|
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
|
910
977
|
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
|
911
978
|
}
|
|
912
|
-
|
|
979
|
+
|
|
913
980
|
template<int LoadMode,typename PacketType>
|
|
914
981
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
|
915
982
|
{
|
|
@@ -937,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
|
|
937
1004
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
|
938
1005
|
|
|
939
1006
|
template<typename Dest, typename PermutationType>
|
|
940
|
-
static
|
|
1007
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
|
941
1008
|
{
|
|
942
1009
|
MatrixType mat(xpr);
|
|
943
1010
|
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
|
@@ -991,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
991
1058
|
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
|
992
1059
|
{
|
|
993
1060
|
template<typename Dest>
|
|
994
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1061
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
995
1062
|
{
|
|
996
1063
|
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
|
997
1064
|
}
|
|
@@ -1001,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1001
1068
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
|
1002
1069
|
{
|
|
1003
1070
|
template<typename Dest>
|
|
1004
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1071
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1005
1072
|
{
|
|
1006
1073
|
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
|
1007
1074
|
}
|
|
@@ -1011,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1011
1078
|
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
|
1012
1079
|
{
|
|
1013
1080
|
template<typename Dest>
|
|
1014
|
-
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
|
1081
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
|
1015
1082
|
{
|
|
1016
1083
|
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
|
1017
1084
|
}
|
|
@@ -1021,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1021
1088
|
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
|
1022
1089
|
{
|
|
1023
1090
|
template<typename Dest>
|
|
1024
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
|
1091
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
|
1025
1092
|
{
|
|
1026
1093
|
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
|
1027
1094
|
}
|
|
@@ -1043,9 +1110,9 @@ struct transposition_matrix_product
|
|
|
1043
1110
|
{
|
|
1044
1111
|
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
|
1045
1112
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
|
1046
|
-
|
|
1113
|
+
|
|
1047
1114
|
template<typename Dest, typename TranspositionType>
|
|
1048
|
-
static
|
|
1115
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
|
1049
1116
|
{
|
|
1050
1117
|
MatrixType mat(xpr);
|
|
1051
1118
|
typedef typename TranspositionType::StorageIndex StorageIndex;
|
|
@@ -1068,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1068
1135
|
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
|
1069
1136
|
{
|
|
1070
1137
|
template<typename Dest>
|
|
1071
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1138
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1072
1139
|
{
|
|
1073
1140
|
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
|
1074
1141
|
}
|
|
@@ -1078,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1078
1145
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
|
1079
1146
|
{
|
|
1080
1147
|
template<typename Dest>
|
|
1081
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1148
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1082
1149
|
{
|
|
1083
1150
|
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
|
1084
1151
|
}
|
|
@@ -1089,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1089
1156
|
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
|
1090
1157
|
{
|
|
1091
1158
|
template<typename Dest>
|
|
1092
|
-
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
|
1159
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
|
1093
1160
|
{
|
|
1094
1161
|
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
|
1095
1162
|
}
|
|
@@ -1099,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1099
1166
|
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
|
1100
1167
|
{
|
|
1101
1168
|
template<typename Dest>
|
|
1102
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
|
1169
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
|
1103
1170
|
{
|
|
1104
1171
|
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
|
1105
1172
|
}
|