tomoto 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +8 -10
- data/ext/tomoto/extconf.rb +6 -2
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +21 -0
- data/vendor/tomotopy/README.rst +20 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +60 -14
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -14,27 +14,27 @@
|
|
14
14
|
#define EIGEN_PRODUCTEVALUATORS_H
|
15
15
|
|
16
16
|
namespace Eigen {
|
17
|
-
|
17
|
+
|
18
18
|
namespace internal {
|
19
19
|
|
20
20
|
/** \internal
|
21
21
|
* Evaluator of a product expression.
|
22
22
|
* Since products require special treatments to handle all possible cases,
|
23
|
-
* we simply
|
23
|
+
* we simply defer the evaluation logic to a product_evaluator class
|
24
24
|
* which offers more partial specialization possibilities.
|
25
|
-
*
|
25
|
+
*
|
26
26
|
* \sa class product_evaluator
|
27
27
|
*/
|
28
28
|
template<typename Lhs, typename Rhs, int Options>
|
29
|
-
struct evaluator<Product<Lhs, Rhs, Options> >
|
29
|
+
struct evaluator<Product<Lhs, Rhs, Options> >
|
30
30
|
: public product_evaluator<Product<Lhs, Rhs, Options> >
|
31
31
|
{
|
32
32
|
typedef Product<Lhs, Rhs, Options> XprType;
|
33
33
|
typedef product_evaluator<XprType> Base;
|
34
|
-
|
34
|
+
|
35
35
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
36
36
|
};
|
37
|
-
|
37
|
+
|
38
38
|
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
39
39
|
// TODO we should apply that rule only if that's really helpful
|
40
40
|
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
|
62
62
|
|
63
63
|
|
64
64
|
template<typename Lhs, typename Rhs, int DiagIndex>
|
65
|
-
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
65
|
+
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
66
66
|
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
|
67
67
|
{
|
68
68
|
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
69
69
|
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
|
70
|
-
|
70
|
+
|
71
71
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
72
72
|
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
73
73
|
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
|
@@ -108,27 +108,27 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
|
|
108
108
|
: m_result(xpr.rows(), xpr.cols())
|
109
109
|
{
|
110
110
|
::new (static_cast<Base*>(this)) Base(m_result);
|
111
|
-
|
111
|
+
|
112
112
|
// FIXME shall we handle nested_eval here?,
|
113
113
|
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
|
114
114
|
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
115
115
|
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
116
116
|
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
117
117
|
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
118
|
-
//
|
118
|
+
//
|
119
119
|
// const LhsNested lhs(xpr.lhs());
|
120
120
|
// const RhsNested rhs(xpr.rhs());
|
121
|
-
//
|
121
|
+
//
|
122
122
|
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
|
123
123
|
|
124
124
|
generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
|
125
125
|
}
|
126
|
-
|
127
|
-
protected:
|
126
|
+
|
127
|
+
protected:
|
128
128
|
PlainObject m_result;
|
129
129
|
};
|
130
130
|
|
131
|
-
// The following three shortcuts are enabled only if the scalar types match
|
131
|
+
// The following three shortcuts are enabled only if the scalar types match exactly.
|
132
132
|
// TODO: we could enable them for different scalar types when the product is not vectorized.
|
133
133
|
|
134
134
|
// Dense = Product
|
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
|
137
137
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
138
138
|
{
|
139
139
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
140
|
-
static EIGEN_STRONG_INLINE
|
140
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
141
141
|
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
142
142
|
{
|
143
143
|
Index dstRows = src.rows();
|
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
|
155
155
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
156
156
|
{
|
157
157
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
158
|
-
static EIGEN_STRONG_INLINE
|
158
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
159
159
|
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
160
160
|
{
|
161
161
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
|
170
170
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
171
171
|
{
|
172
172
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
173
|
-
static EIGEN_STRONG_INLINE
|
173
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
174
174
|
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
175
175
|
{
|
176
176
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
|
|
190
190
|
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
191
191
|
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
192
192
|
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
193
|
-
static EIGEN_STRONG_INLINE
|
193
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
194
194
|
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
195
195
|
{
|
196
196
|
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
@@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
|
|
217
217
|
struct assignment_from_xpr_op_product
|
218
218
|
{
|
219
219
|
template<typename SrcXprType, typename InitialFunc>
|
220
|
-
static EIGEN_STRONG_INLINE
|
220
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
221
221
|
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
222
222
|
{
|
223
223
|
call_assignment_no_alias(dst, src.lhs(), Func1());
|
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
|
|
246
246
|
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
247
247
|
{
|
248
248
|
template<typename Dst>
|
249
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
249
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
250
250
|
{
|
251
251
|
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
252
252
|
}
|
253
|
-
|
253
|
+
|
254
254
|
template<typename Dst>
|
255
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
255
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
256
256
|
{
|
257
257
|
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
258
258
|
}
|
259
|
-
|
259
|
+
|
260
260
|
template<typename Dst>
|
261
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
261
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
262
262
|
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
263
263
|
};
|
264
264
|
|
@@ -269,10 +269,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
|
269
269
|
|
270
270
|
// Column major result
|
271
271
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
272
|
-
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
272
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
273
273
|
{
|
274
274
|
evaluator<Rhs> rhsEval(rhs);
|
275
|
-
|
275
|
+
ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
|
276
276
|
// FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
|
277
277
|
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
278
278
|
const Index cols = dst.cols();
|
@@ -282,10 +282,10 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
|
282
282
|
|
283
283
|
// Row major result
|
284
284
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
285
|
-
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
285
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
286
286
|
{
|
287
287
|
evaluator<Lhs> lhsEval(lhs);
|
288
|
-
|
288
|
+
ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
|
289
289
|
// FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
|
290
290
|
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
291
291
|
const Index rows = dst.rows();
|
@@ -298,43 +298,43 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
|
298
298
|
{
|
299
299
|
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
300
300
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
301
|
-
|
301
|
+
|
302
302
|
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
303
|
-
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
304
|
-
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
305
|
-
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
303
|
+
struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
304
|
+
struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
305
|
+
struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
306
306
|
struct adds {
|
307
307
|
Scalar m_scale;
|
308
308
|
explicit adds(const Scalar& s) : m_scale(s) {}
|
309
|
-
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
|
309
|
+
template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
|
310
310
|
dst.const_cast_derived() += m_scale * src;
|
311
311
|
}
|
312
312
|
};
|
313
|
-
|
313
|
+
|
314
314
|
template<typename Dst>
|
315
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
315
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
316
316
|
{
|
317
317
|
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
318
318
|
}
|
319
|
-
|
319
|
+
|
320
320
|
template<typename Dst>
|
321
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
321
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
322
322
|
{
|
323
323
|
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
324
324
|
}
|
325
|
-
|
325
|
+
|
326
326
|
template<typename Dst>
|
327
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
327
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
328
328
|
{
|
329
329
|
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
330
330
|
}
|
331
|
-
|
331
|
+
|
332
332
|
template<typename Dst>
|
333
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
333
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
334
334
|
{
|
335
335
|
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
336
336
|
}
|
337
|
-
|
337
|
+
|
338
338
|
};
|
339
339
|
|
340
340
|
|
@@ -343,21 +343,21 @@ template<typename Lhs, typename Rhs, typename Derived>
|
|
343
343
|
struct generic_product_impl_base
|
344
344
|
{
|
345
345
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
346
|
-
|
346
|
+
|
347
347
|
template<typename Dst>
|
348
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
348
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
349
349
|
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
350
350
|
|
351
351
|
template<typename Dst>
|
352
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
352
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
353
353
|
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
354
354
|
|
355
355
|
template<typename Dst>
|
356
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
356
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
357
357
|
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
358
|
-
|
358
|
+
|
359
359
|
template<typename Dst>
|
360
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
360
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
361
361
|
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
|
362
362
|
|
363
363
|
};
|
@@ -373,8 +373,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
373
373
|
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
374
374
|
|
375
375
|
template<typename Dest>
|
376
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
376
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
377
377
|
{
|
378
|
+
// Fallback to inner product if both the lhs and rhs is a runtime vector.
|
379
|
+
if (lhs.rows() == 1 && rhs.cols() == 1) {
|
380
|
+
dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
|
381
|
+
return;
|
382
|
+
}
|
378
383
|
LhsNested actual_lhs(lhs);
|
379
384
|
RhsNested actual_rhs(rhs);
|
380
385
|
internal::gemv_dense_selector<Side,
|
@@ -385,35 +390,84 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
385
390
|
};
|
386
391
|
|
387
392
|
template<typename Lhs, typename Rhs>
|
388
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
393
|
+
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
389
394
|
{
|
390
395
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
391
|
-
|
396
|
+
|
392
397
|
template<typename Dst>
|
393
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
398
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
394
399
|
{
|
395
400
|
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
396
401
|
// but easier on the compiler side
|
397
402
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
|
398
403
|
}
|
399
|
-
|
404
|
+
|
400
405
|
template<typename Dst>
|
401
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
406
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
402
407
|
{
|
403
408
|
// dst.noalias() += lhs.lazyProduct(rhs);
|
404
409
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
405
410
|
}
|
406
|
-
|
411
|
+
|
407
412
|
template<typename Dst>
|
408
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
413
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
409
414
|
{
|
410
415
|
// dst.noalias() -= lhs.lazyProduct(rhs);
|
411
416
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
412
417
|
}
|
413
|
-
|
414
|
-
//
|
415
|
-
//
|
416
|
-
// {
|
418
|
+
|
419
|
+
// This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
|
420
|
+
// This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
|
421
|
+
// dst {,+,-}= (s1*A)*(B*s2)
|
422
|
+
// will be rewritten as:
|
423
|
+
// dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
|
424
|
+
// There are at least four benefits of doing so:
|
425
|
+
// 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
|
426
|
+
// 2 - it is faster than simply by-passing the heap allocation through stack allocation.
|
427
|
+
// 3 - it makes this fallback consistent with the heavy GEMM routine.
|
428
|
+
// 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
|
429
|
+
// (see https://stackoverflow.com/questions/54738495)
|
430
|
+
// For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
|
431
|
+
// and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
|
432
|
+
// enabled only when falling back from the main GEMM.
|
433
|
+
template<typename Dst, typename Func>
|
434
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
435
|
+
void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
|
436
|
+
{
|
437
|
+
enum {
|
438
|
+
HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
|
439
|
+
ConjLhs = blas_traits<Lhs>::NeedToConjugate,
|
440
|
+
ConjRhs = blas_traits<Rhs>::NeedToConjugate
|
441
|
+
};
|
442
|
+
// FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
|
443
|
+
// this is important for real*complex_mat
|
444
|
+
Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
|
445
|
+
|
446
|
+
eval_dynamic_impl(dst,
|
447
|
+
blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
448
|
+
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
|
449
|
+
func,
|
450
|
+
actualAlpha,
|
451
|
+
typename conditional<HasScalarFactor,true_type,false_type>::type());
|
452
|
+
}
|
453
|
+
|
454
|
+
protected:
|
455
|
+
|
456
|
+
template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
457
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
458
|
+
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
|
459
|
+
{
|
460
|
+
EIGEN_UNUSED_VARIABLE(s);
|
461
|
+
eigen_internal_assert(s==Scalar(1));
|
462
|
+
call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
|
463
|
+
}
|
464
|
+
|
465
|
+
template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
466
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
467
|
+
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
|
468
|
+
{
|
469
|
+
call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
|
470
|
+
}
|
417
471
|
};
|
418
472
|
|
419
473
|
// This specialization enforces the use of a coefficient-based evaluation strategy
|
@@ -471,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
471
525
|
|
472
526
|
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
473
527
|
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
474
|
-
|
528
|
+
|
475
529
|
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
476
530
|
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
477
531
|
|
@@ -490,19 +544,19 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
490
544
|
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
|
491
545
|
|
492
546
|
enum {
|
493
|
-
|
547
|
+
|
494
548
|
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
495
549
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
496
550
|
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
497
551
|
: InnerSize == Dynamic ? HugeCost
|
498
|
-
|
552
|
+
: InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
|
499
553
|
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
500
554
|
|
501
555
|
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
502
|
-
|
556
|
+
|
503
557
|
LhsFlags = LhsEtorType::Flags,
|
504
558
|
RhsFlags = RhsEtorType::Flags,
|
505
|
-
|
559
|
+
|
506
560
|
LhsRowMajor = LhsFlags & RowMajorBit,
|
507
561
|
RhsRowMajor = RhsFlags & RowMajorBit,
|
508
562
|
|
@@ -512,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
512
566
|
// Here, we don't care about alignment larger than the usable packet size.
|
513
567
|
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
514
568
|
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
515
|
-
|
569
|
+
|
516
570
|
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
517
571
|
|
518
572
|
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
|
@@ -522,12 +576,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
522
576
|
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
523
577
|
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
524
578
|
|
525
|
-
Flags = ((
|
579
|
+
Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
|
526
580
|
| (EvalToRowMajor ? RowMajorBit : 0)
|
527
581
|
// TODO enable vectorization for mixed types
|
528
582
|
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
529
583
|
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
|
530
|
-
|
584
|
+
|
531
585
|
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
532
586
|
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
533
587
|
|
@@ -543,10 +597,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
543
597
|
CanVectorizeInner = SameType
|
544
598
|
&& LhsRowMajor
|
545
599
|
&& (!RhsRowMajor)
|
546
|
-
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
547
|
-
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
600
|
+
&& (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
|
601
|
+
&& (int(InnerSize) % packet_traits<Scalar>::size == 0)
|
548
602
|
};
|
549
|
-
|
603
|
+
|
550
604
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
|
551
605
|
{
|
552
606
|
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
@@ -556,7 +610,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
556
610
|
* which is why we don't set the LinearAccessBit.
|
557
611
|
* TODO: this seems possible when the result is a vector
|
558
612
|
*/
|
559
|
-
EIGEN_DEVICE_FUNC
|
613
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
614
|
+
const CoeffReturnType coeff(Index index) const
|
560
615
|
{
|
561
616
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
562
617
|
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
|
@@ -564,6 +619,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
564
619
|
}
|
565
620
|
|
566
621
|
template<int LoadMode, typename PacketType>
|
622
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
567
623
|
const PacketType packet(Index row, Index col) const
|
568
624
|
{
|
569
625
|
PacketType res;
|
@@ -575,6 +631,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
575
631
|
}
|
576
632
|
|
577
633
|
template<int LoadMode, typename PacketType>
|
634
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
578
635
|
const PacketType packet(Index index) const
|
579
636
|
{
|
580
637
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
@@ -585,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
585
642
|
protected:
|
586
643
|
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
587
644
|
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
588
|
-
|
645
|
+
|
589
646
|
LhsEtorType m_lhsImpl;
|
590
647
|
RhsEtorType m_rhsImpl;
|
591
648
|
|
@@ -603,7 +660,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|
603
660
|
enum {
|
604
661
|
Flags = Base::Flags | EvalBeforeNestingBit
|
605
662
|
};
|
606
|
-
EIGEN_DEVICE_FUNC
|
663
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
664
|
+
explicit product_evaluator(const XprType& xpr)
|
607
665
|
: Base(BaseProduct(xpr.lhs(),xpr.rhs()))
|
608
666
|
{}
|
609
667
|
};
|
@@ -615,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|
615
673
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
616
674
|
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
617
675
|
{
|
618
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
676
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
619
677
|
{
|
620
678
|
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
621
679
|
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
@@ -625,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|
625
683
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
626
684
|
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
627
685
|
{
|
628
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
686
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
629
687
|
{
|
630
688
|
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
631
689
|
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
@@ -635,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|
635
693
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
636
694
|
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
637
695
|
{
|
638
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
696
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
639
697
|
{
|
640
698
|
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
641
699
|
}
|
@@ -644,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
644
702
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
645
703
|
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
646
704
|
{
|
647
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
705
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
648
706
|
{
|
649
707
|
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
650
708
|
}
|
@@ -653,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
653
711
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
654
712
|
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
655
713
|
{
|
656
|
-
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
714
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
657
715
|
{
|
658
716
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
659
717
|
}
|
@@ -662,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
662
720
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
663
721
|
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
664
722
|
{
|
665
|
-
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
723
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
666
724
|
{
|
667
725
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
668
726
|
}
|
@@ -671,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
671
729
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
672
730
|
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
673
731
|
{
|
674
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
732
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
675
733
|
{
|
676
734
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
677
735
|
for(Index i = 0; i < innerDim; ++i)
|
@@ -682,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
682
740
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
683
741
|
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
684
742
|
{
|
685
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
743
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
686
744
|
{
|
687
745
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
688
746
|
for(Index i = 0; i < innerDim; ++i)
|
@@ -704,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
|
|
704
762
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
|
705
763
|
{
|
706
764
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
707
|
-
|
765
|
+
|
708
766
|
template<typename Dest>
|
709
767
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
710
768
|
{
|
@@ -718,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
|
|
718
776
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
|
719
777
|
{
|
720
778
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
721
|
-
|
779
|
+
|
722
780
|
template<typename Dest>
|
723
781
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
724
782
|
{
|
@@ -739,9 +797,10 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
|
|
739
797
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
|
740
798
|
{
|
741
799
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
742
|
-
|
800
|
+
|
743
801
|
template<typename Dest>
|
744
|
-
static
|
802
|
+
static EIGEN_DEVICE_FUNC
|
803
|
+
void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
745
804
|
{
|
746
805
|
selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
|
747
806
|
}
|
@@ -752,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
752
811
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
|
753
812
|
{
|
754
813
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
755
|
-
|
814
|
+
|
756
815
|
template<typename Dest>
|
757
816
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
758
817
|
{
|
@@ -764,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
764
823
|
/***************************************************************************
|
765
824
|
* Diagonal products
|
766
825
|
***************************************************************************/
|
767
|
-
|
826
|
+
|
768
827
|
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
|
769
828
|
struct diagonal_product_evaluator_base
|
770
829
|
: evaluator_base<Derived>
|
@@ -772,17 +831,25 @@ struct diagonal_product_evaluator_base
|
|
772
831
|
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
773
832
|
public:
|
774
833
|
enum {
|
775
|
-
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
776
|
-
|
834
|
+
CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
|
835
|
+
|
777
836
|
MatrixFlags = evaluator<MatrixType>::Flags,
|
778
837
|
DiagFlags = evaluator<DiagonalType>::Flags,
|
779
|
-
|
838
|
+
|
839
|
+
_StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
|
840
|
+
: (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
|
841
|
+
: MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
|
842
|
+
_SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
|
843
|
+
|
780
844
|
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
|
781
845
|
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
|
782
846
|
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
|
783
847
|
// FIXME currently we need same types, but in the future the next rule should be the one
|
784
848
|
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
|
785
|
-
_Vectorizable =
|
849
|
+
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
|
850
|
+
&& _SameTypes
|
851
|
+
&& (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
|
852
|
+
&& (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
786
853
|
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
787
854
|
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
788
855
|
Alignment = evaluator<MatrixType>::Alignment,
|
@@ -791,14 +858,14 @@ public:
|
|
791
858
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|
792
859
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
793
860
|
};
|
794
|
-
|
795
|
-
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
861
|
+
|
862
|
+
EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
796
863
|
: m_diagImpl(diag), m_matImpl(mat)
|
797
864
|
{
|
798
865
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
799
866
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
800
867
|
}
|
801
|
-
|
868
|
+
|
802
869
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
|
803
870
|
{
|
804
871
|
if(AsScalarProduct)
|
@@ -806,7 +873,7 @@ public:
|
|
806
873
|
else
|
807
874
|
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
808
875
|
}
|
809
|
-
|
876
|
+
|
810
877
|
protected:
|
811
878
|
template<int LoadMode,typename PacketType>
|
812
879
|
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
|
@@ -814,7 +881,7 @@ protected:
|
|
814
881
|
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
815
882
|
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
816
883
|
}
|
817
|
-
|
884
|
+
|
818
885
|
template<int LoadMode,typename PacketType>
|
819
886
|
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
|
820
887
|
{
|
@@ -825,7 +892,7 @@ protected:
|
|
825
892
|
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
826
893
|
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
827
894
|
}
|
828
|
-
|
895
|
+
|
829
896
|
evaluator<DiagonalType> m_diagImpl;
|
830
897
|
evaluator<MatrixType> m_matImpl;
|
831
898
|
};
|
@@ -840,25 +907,25 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|
840
907
|
using Base::m_matImpl;
|
841
908
|
using Base::coeff;
|
842
909
|
typedef typename Base::Scalar Scalar;
|
843
|
-
|
910
|
+
|
844
911
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
845
912
|
typedef typename XprType::PlainObject PlainObject;
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
};
|
913
|
+
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
914
|
+
|
915
|
+
|
916
|
+
enum { StorageOrder = Base::_StorageOrder };
|
850
917
|
|
851
918
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
852
919
|
: Base(xpr.rhs(), xpr.lhs().diagonal())
|
853
920
|
{
|
854
921
|
}
|
855
|
-
|
922
|
+
|
856
923
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
857
924
|
{
|
858
925
|
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
|
859
926
|
}
|
860
|
-
|
861
|
-
#ifndef
|
927
|
+
|
928
|
+
#ifndef EIGEN_GPUCC
|
862
929
|
template<int LoadMode,typename PacketType>
|
863
930
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
864
931
|
{
|
@@ -867,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|
867
934
|
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
868
935
|
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
869
936
|
}
|
870
|
-
|
937
|
+
|
871
938
|
template<int LoadMode,typename PacketType>
|
872
939
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
873
940
|
{
|
@@ -886,30 +953,30 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
|
886
953
|
using Base::m_matImpl;
|
887
954
|
using Base::coeff;
|
888
955
|
typedef typename Base::Scalar Scalar;
|
889
|
-
|
956
|
+
|
890
957
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
891
958
|
typedef typename XprType::PlainObject PlainObject;
|
892
|
-
|
893
|
-
enum { StorageOrder =
|
959
|
+
|
960
|
+
enum { StorageOrder = Base::_StorageOrder };
|
894
961
|
|
895
962
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
896
963
|
: Base(xpr.lhs(), xpr.rhs().diagonal())
|
897
964
|
{
|
898
965
|
}
|
899
|
-
|
966
|
+
|
900
967
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
901
968
|
{
|
902
969
|
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
|
903
970
|
}
|
904
|
-
|
905
|
-
#ifndef
|
971
|
+
|
972
|
+
#ifndef EIGEN_GPUCC
|
906
973
|
template<int LoadMode,typename PacketType>
|
907
974
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
908
975
|
{
|
909
976
|
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
910
977
|
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
911
978
|
}
|
912
|
-
|
979
|
+
|
913
980
|
template<int LoadMode,typename PacketType>
|
914
981
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
915
982
|
{
|
@@ -937,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
|
937
1004
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
938
1005
|
|
939
1006
|
template<typename Dest, typename PermutationType>
|
940
|
-
static
|
1007
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
941
1008
|
{
|
942
1009
|
MatrixType mat(xpr);
|
943
1010
|
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
@@ -991,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
991
1058
|
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
992
1059
|
{
|
993
1060
|
template<typename Dest>
|
994
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1061
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
995
1062
|
{
|
996
1063
|
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
997
1064
|
}
|
@@ -1001,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1001
1068
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
1002
1069
|
{
|
1003
1070
|
template<typename Dest>
|
1004
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1071
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1005
1072
|
{
|
1006
1073
|
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
1007
1074
|
}
|
@@ -1011,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1011
1078
|
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
1012
1079
|
{
|
1013
1080
|
template<typename Dest>
|
1014
|
-
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
1081
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
1015
1082
|
{
|
1016
1083
|
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
1017
1084
|
}
|
@@ -1021,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1021
1088
|
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
1022
1089
|
{
|
1023
1090
|
template<typename Dest>
|
1024
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
1091
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
1025
1092
|
{
|
1026
1093
|
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
1027
1094
|
}
|
@@ -1043,9 +1110,9 @@ struct transposition_matrix_product
|
|
1043
1110
|
{
|
1044
1111
|
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
1045
1112
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
1046
|
-
|
1113
|
+
|
1047
1114
|
template<typename Dest, typename TranspositionType>
|
1048
|
-
static
|
1115
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
1049
1116
|
{
|
1050
1117
|
MatrixType mat(xpr);
|
1051
1118
|
typedef typename TranspositionType::StorageIndex StorageIndex;
|
@@ -1068,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1068
1135
|
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
1069
1136
|
{
|
1070
1137
|
template<typename Dest>
|
1071
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1138
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1072
1139
|
{
|
1073
1140
|
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
1074
1141
|
}
|
@@ -1078,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1078
1145
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
1079
1146
|
{
|
1080
1147
|
template<typename Dest>
|
1081
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1148
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
1082
1149
|
{
|
1083
1150
|
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
1084
1151
|
}
|
@@ -1089,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1089
1156
|
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
1090
1157
|
{
|
1091
1158
|
template<typename Dest>
|
1092
|
-
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
1159
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
1093
1160
|
{
|
1094
1161
|
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
1095
1162
|
}
|
@@ -1099,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1099
1166
|
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
1100
1167
|
{
|
1101
1168
|
template<typename Dest>
|
1102
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
1169
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
1103
1170
|
{
|
1104
1171
|
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
1105
1172
|
}
|