@smake/eigen 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +1 -1
- package/eigen/COPYING.MINPACK +51 -52
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +2 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -14,27 +14,27 @@
|
|
|
14
14
|
#define EIGEN_PRODUCTEVALUATORS_H
|
|
15
15
|
|
|
16
16
|
namespace Eigen {
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
namespace internal {
|
|
19
19
|
|
|
20
20
|
/** \internal
|
|
21
21
|
* Evaluator of a product expression.
|
|
22
22
|
* Since products require special treatments to handle all possible cases,
|
|
23
|
-
* we simply
|
|
23
|
+
* we simply defer the evaluation logic to a product_evaluator class
|
|
24
24
|
* which offers more partial specialization possibilities.
|
|
25
|
-
*
|
|
25
|
+
*
|
|
26
26
|
* \sa class product_evaluator
|
|
27
27
|
*/
|
|
28
28
|
template<typename Lhs, typename Rhs, int Options>
|
|
29
|
-
struct evaluator<Product<Lhs, Rhs, Options> >
|
|
29
|
+
struct evaluator<Product<Lhs, Rhs, Options> >
|
|
30
30
|
: public product_evaluator<Product<Lhs, Rhs, Options> >
|
|
31
31
|
{
|
|
32
32
|
typedef Product<Lhs, Rhs, Options> XprType;
|
|
33
33
|
typedef product_evaluator<XprType> Base;
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
|
36
36
|
};
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
|
39
39
|
// TODO we should apply that rule only if that's really helpful
|
|
40
40
|
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
|
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
template<typename Lhs, typename Rhs, int DiagIndex>
|
|
65
|
-
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
|
65
|
+
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
|
66
66
|
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
|
|
67
67
|
{
|
|
68
68
|
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
|
69
69
|
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
|
72
72
|
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
|
73
73
|
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
|
|
@@ -108,27 +108,27 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
|
|
|
108
108
|
: m_result(xpr.rows(), xpr.cols())
|
|
109
109
|
{
|
|
110
110
|
::new (static_cast<Base*>(this)) Base(m_result);
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
// FIXME shall we handle nested_eval here?,
|
|
113
113
|
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
|
|
114
114
|
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
|
115
115
|
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
|
116
116
|
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
|
117
117
|
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
|
118
|
-
//
|
|
118
|
+
//
|
|
119
119
|
// const LhsNested lhs(xpr.lhs());
|
|
120
120
|
// const RhsNested rhs(xpr.rhs());
|
|
121
|
-
//
|
|
121
|
+
//
|
|
122
122
|
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
|
|
123
123
|
|
|
124
124
|
generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
|
|
125
125
|
}
|
|
126
|
-
|
|
127
|
-
protected:
|
|
126
|
+
|
|
127
|
+
protected:
|
|
128
128
|
PlainObject m_result;
|
|
129
129
|
};
|
|
130
130
|
|
|
131
|
-
// The following three shortcuts are enabled only if the scalar types match
|
|
131
|
+
// The following three shortcuts are enabled only if the scalar types match exactly.
|
|
132
132
|
// TODO: we could enable them for different scalar types when the product is not vectorized.
|
|
133
133
|
|
|
134
134
|
// Dense = Product
|
|
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
|
|
137
137
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
|
138
138
|
{
|
|
139
139
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
|
140
|
-
static EIGEN_STRONG_INLINE
|
|
140
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
141
141
|
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
|
142
142
|
{
|
|
143
143
|
Index dstRows = src.rows();
|
|
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
|
|
155
155
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
|
156
156
|
{
|
|
157
157
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
|
158
|
-
static EIGEN_STRONG_INLINE
|
|
158
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
159
159
|
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
|
160
160
|
{
|
|
161
161
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
|
|
170
170
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
|
171
171
|
{
|
|
172
172
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
|
173
|
-
static EIGEN_STRONG_INLINE
|
|
173
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
174
174
|
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
|
175
175
|
{
|
|
176
176
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
|
|
|
190
190
|
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
|
191
191
|
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
|
192
192
|
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
|
193
|
-
static EIGEN_STRONG_INLINE
|
|
193
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
194
194
|
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
|
195
195
|
{
|
|
196
196
|
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
|
@@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
|
|
|
217
217
|
struct assignment_from_xpr_op_product
|
|
218
218
|
{
|
|
219
219
|
template<typename SrcXprType, typename InitialFunc>
|
|
220
|
-
static EIGEN_STRONG_INLINE
|
|
220
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
221
221
|
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
|
222
222
|
{
|
|
223
223
|
call_assignment_no_alias(dst, src.lhs(), Func1());
|
|
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
|
|
|
246
246
|
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
|
247
247
|
{
|
|
248
248
|
template<typename Dst>
|
|
249
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
249
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
250
250
|
{
|
|
251
251
|
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
|
252
252
|
}
|
|
253
|
-
|
|
253
|
+
|
|
254
254
|
template<typename Dst>
|
|
255
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
255
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
256
256
|
{
|
|
257
257
|
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
|
258
258
|
}
|
|
259
|
-
|
|
259
|
+
|
|
260
260
|
template<typename Dst>
|
|
261
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
261
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
262
262
|
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
|
263
263
|
};
|
|
264
264
|
|
|
@@ -269,10 +269,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
|
|
269
269
|
|
|
270
270
|
// Column major result
|
|
271
271
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
272
|
-
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
|
272
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
|
273
273
|
{
|
|
274
274
|
evaluator<Rhs> rhsEval(rhs);
|
|
275
|
-
|
|
275
|
+
ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
|
|
276
276
|
// FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
|
|
277
277
|
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
|
278
278
|
const Index cols = dst.cols();
|
|
@@ -282,10 +282,10 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
|
|
282
282
|
|
|
283
283
|
// Row major result
|
|
284
284
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
285
|
-
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
|
285
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
|
286
286
|
{
|
|
287
287
|
evaluator<Lhs> lhsEval(lhs);
|
|
288
|
-
|
|
288
|
+
ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
|
|
289
289
|
// FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
|
|
290
290
|
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
|
291
291
|
const Index rows = dst.rows();
|
|
@@ -298,43 +298,43 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
|
|
298
298
|
{
|
|
299
299
|
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
|
300
300
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
301
|
-
|
|
301
|
+
|
|
302
302
|
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
|
303
|
-
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
|
304
|
-
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
|
305
|
-
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
|
303
|
+
struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
|
304
|
+
struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
|
305
|
+
struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
|
306
306
|
struct adds {
|
|
307
307
|
Scalar m_scale;
|
|
308
308
|
explicit adds(const Scalar& s) : m_scale(s) {}
|
|
309
|
-
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
|
|
309
|
+
template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
|
|
310
310
|
dst.const_cast_derived() += m_scale * src;
|
|
311
311
|
}
|
|
312
312
|
};
|
|
313
|
-
|
|
313
|
+
|
|
314
314
|
template<typename Dst>
|
|
315
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
315
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
316
316
|
{
|
|
317
317
|
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
|
318
318
|
}
|
|
319
|
-
|
|
319
|
+
|
|
320
320
|
template<typename Dst>
|
|
321
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
321
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
322
322
|
{
|
|
323
323
|
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
|
324
324
|
}
|
|
325
|
-
|
|
325
|
+
|
|
326
326
|
template<typename Dst>
|
|
327
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
327
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
328
328
|
{
|
|
329
329
|
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
|
330
330
|
}
|
|
331
|
-
|
|
331
|
+
|
|
332
332
|
template<typename Dst>
|
|
333
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
333
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
334
334
|
{
|
|
335
335
|
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
|
336
336
|
}
|
|
337
|
-
|
|
337
|
+
|
|
338
338
|
};
|
|
339
339
|
|
|
340
340
|
|
|
@@ -343,21 +343,21 @@ template<typename Lhs, typename Rhs, typename Derived>
|
|
|
343
343
|
struct generic_product_impl_base
|
|
344
344
|
{
|
|
345
345
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
346
|
-
|
|
346
|
+
|
|
347
347
|
template<typename Dst>
|
|
348
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
348
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
349
349
|
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
|
350
350
|
|
|
351
351
|
template<typename Dst>
|
|
352
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
352
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
353
353
|
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
|
354
354
|
|
|
355
355
|
template<typename Dst>
|
|
356
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
356
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
357
357
|
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
|
358
|
-
|
|
358
|
+
|
|
359
359
|
template<typename Dst>
|
|
360
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
360
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
361
361
|
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
|
|
362
362
|
|
|
363
363
|
};
|
|
@@ -373,8 +373,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
|
373
373
|
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
|
374
374
|
|
|
375
375
|
template<typename Dest>
|
|
376
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
376
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
377
377
|
{
|
|
378
|
+
// Fallback to inner product if both the lhs and rhs is a runtime vector.
|
|
379
|
+
if (lhs.rows() == 1 && rhs.cols() == 1) {
|
|
380
|
+
dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
378
383
|
LhsNested actual_lhs(lhs);
|
|
379
384
|
RhsNested actual_rhs(rhs);
|
|
380
385
|
internal::gemv_dense_selector<Side,
|
|
@@ -385,12 +390,12 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
|
385
390
|
};
|
|
386
391
|
|
|
387
392
|
template<typename Lhs, typename Rhs>
|
|
388
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|
393
|
+
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|
389
394
|
{
|
|
390
395
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
391
|
-
|
|
396
|
+
|
|
392
397
|
template<typename Dst>
|
|
393
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
398
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
394
399
|
{
|
|
395
400
|
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
|
396
401
|
// but easier on the compiler side
|
|
@@ -398,48 +403,71 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|
|
398
403
|
}
|
|
399
404
|
|
|
400
405
|
template<typename Dst>
|
|
401
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
406
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
402
407
|
{
|
|
403
408
|
// dst.noalias() += lhs.lazyProduct(rhs);
|
|
404
409
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
|
405
410
|
}
|
|
406
|
-
|
|
411
|
+
|
|
407
412
|
template<typename Dst>
|
|
408
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
413
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
409
414
|
{
|
|
410
415
|
// dst.noalias() -= lhs.lazyProduct(rhs);
|
|
411
416
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
|
412
417
|
}
|
|
413
418
|
|
|
414
|
-
//
|
|
415
|
-
//
|
|
416
|
-
//
|
|
417
|
-
//
|
|
418
|
-
//
|
|
419
|
-
//
|
|
420
|
-
//
|
|
421
|
-
//
|
|
422
|
-
|
|
419
|
+
// This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
|
|
420
|
+
// This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
|
|
421
|
+
// dst {,+,-}= (s1*A)*(B*s2)
|
|
422
|
+
// will be rewritten as:
|
|
423
|
+
// dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
|
|
424
|
+
// There are at least four benefits of doing so:
|
|
425
|
+
// 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
|
|
426
|
+
// 2 - it is faster than simply by-passing the heap allocation through stack allocation.
|
|
427
|
+
// 3 - it makes this fallback consistent with the heavy GEMM routine.
|
|
428
|
+
// 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
|
|
429
|
+
// (see https://stackoverflow.com/questions/54738495)
|
|
430
|
+
// For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
|
|
431
|
+
// and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
|
|
432
|
+
// enabled only when falling back from the main GEMM.
|
|
433
|
+
template<typename Dst, typename Func>
|
|
423
434
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
424
|
-
void eval_dynamic(Dst& dst, const
|
|
425
|
-
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
|
|
435
|
+
void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
|
|
426
436
|
{
|
|
427
|
-
|
|
437
|
+
enum {
|
|
438
|
+
HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
|
|
439
|
+
ConjLhs = blas_traits<Lhs>::NeedToConjugate,
|
|
440
|
+
ConjRhs = blas_traits<Rhs>::NeedToConjugate
|
|
441
|
+
};
|
|
442
|
+
// FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
|
|
443
|
+
// this is important for real*complex_mat
|
|
444
|
+
Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
|
|
445
|
+
|
|
446
|
+
eval_dynamic_impl(dst,
|
|
447
|
+
blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
|
448
|
+
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
|
|
449
|
+
func,
|
|
450
|
+
actualAlpha,
|
|
451
|
+
typename conditional<HasScalarFactor,true_type,false_type>::type());
|
|
428
452
|
}
|
|
429
453
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
template<typename Dst, typename LhsT, typename Func>
|
|
454
|
+
protected:
|
|
455
|
+
|
|
456
|
+
template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
|
457
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
458
|
+
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
|
|
459
|
+
{
|
|
460
|
+
EIGEN_UNUSED_VARIABLE(s);
|
|
461
|
+
eigen_internal_assert(s==Scalar(1));
|
|
462
|
+
call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
|
433
466
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
434
|
-
void
|
|
467
|
+
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
|
|
435
468
|
{
|
|
436
|
-
|
|
469
|
+
call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
|
|
437
470
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
// template<typename Dst>
|
|
441
|
-
// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
442
|
-
// { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
|
|
443
471
|
};
|
|
444
472
|
|
|
445
473
|
// This specialization enforces the use of a coefficient-based evaluation strategy
|
|
@@ -497,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
497
525
|
|
|
498
526
|
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
|
499
527
|
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
|
500
|
-
|
|
528
|
+
|
|
501
529
|
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
|
502
530
|
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
|
503
531
|
|
|
@@ -516,19 +544,19 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
516
544
|
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
|
|
517
545
|
|
|
518
546
|
enum {
|
|
519
|
-
|
|
547
|
+
|
|
520
548
|
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
|
521
549
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
|
522
550
|
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
|
523
551
|
: InnerSize == Dynamic ? HugeCost
|
|
524
|
-
|
|
552
|
+
: InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
|
|
525
553
|
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
|
526
554
|
|
|
527
555
|
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
|
528
|
-
|
|
556
|
+
|
|
529
557
|
LhsFlags = LhsEtorType::Flags,
|
|
530
558
|
RhsFlags = RhsEtorType::Flags,
|
|
531
|
-
|
|
559
|
+
|
|
532
560
|
LhsRowMajor = LhsFlags & RowMajorBit,
|
|
533
561
|
RhsRowMajor = RhsFlags & RowMajorBit,
|
|
534
562
|
|
|
@@ -538,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
538
566
|
// Here, we don't care about alignment larger than the usable packet size.
|
|
539
567
|
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
|
540
568
|
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
|
541
|
-
|
|
569
|
+
|
|
542
570
|
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
|
543
571
|
|
|
544
572
|
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
|
|
@@ -548,12 +576,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
548
576
|
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
|
549
577
|
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
|
550
578
|
|
|
551
|
-
Flags = ((
|
|
579
|
+
Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
|
|
552
580
|
| (EvalToRowMajor ? RowMajorBit : 0)
|
|
553
581
|
// TODO enable vectorization for mixed types
|
|
554
582
|
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
|
555
583
|
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
|
|
556
|
-
|
|
584
|
+
|
|
557
585
|
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
|
558
586
|
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
|
559
587
|
|
|
@@ -569,10 +597,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
569
597
|
CanVectorizeInner = SameType
|
|
570
598
|
&& LhsRowMajor
|
|
571
599
|
&& (!RhsRowMajor)
|
|
572
|
-
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
|
573
|
-
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
|
600
|
+
&& (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
|
|
601
|
+
&& (int(InnerSize) % packet_traits<Scalar>::size == 0)
|
|
574
602
|
};
|
|
575
|
-
|
|
603
|
+
|
|
576
604
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
|
|
577
605
|
{
|
|
578
606
|
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
|
@@ -582,7 +610,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
582
610
|
* which is why we don't set the LinearAccessBit.
|
|
583
611
|
* TODO: this seems possible when the result is a vector
|
|
584
612
|
*/
|
|
585
|
-
EIGEN_DEVICE_FUNC
|
|
613
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
614
|
+
const CoeffReturnType coeff(Index index) const
|
|
586
615
|
{
|
|
587
616
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
|
588
617
|
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
|
|
@@ -590,6 +619,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
590
619
|
}
|
|
591
620
|
|
|
592
621
|
template<int LoadMode, typename PacketType>
|
|
622
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
593
623
|
const PacketType packet(Index row, Index col) const
|
|
594
624
|
{
|
|
595
625
|
PacketType res;
|
|
@@ -601,6 +631,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
601
631
|
}
|
|
602
632
|
|
|
603
633
|
template<int LoadMode, typename PacketType>
|
|
634
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
604
635
|
const PacketType packet(Index index) const
|
|
605
636
|
{
|
|
606
637
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
|
|
@@ -611,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
611
642
|
protected:
|
|
612
643
|
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
|
613
644
|
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
|
614
|
-
|
|
645
|
+
|
|
615
646
|
LhsEtorType m_lhsImpl;
|
|
616
647
|
RhsEtorType m_rhsImpl;
|
|
617
648
|
|
|
@@ -629,7 +660,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|
|
629
660
|
enum {
|
|
630
661
|
Flags = Base::Flags | EvalBeforeNestingBit
|
|
631
662
|
};
|
|
632
|
-
EIGEN_DEVICE_FUNC
|
|
663
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
664
|
+
explicit product_evaluator(const XprType& xpr)
|
|
633
665
|
: Base(BaseProduct(xpr.lhs(),xpr.rhs()))
|
|
634
666
|
{}
|
|
635
667
|
};
|
|
@@ -641,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
|
|
|
641
673
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
642
674
|
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
|
643
675
|
{
|
|
644
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
676
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
645
677
|
{
|
|
646
678
|
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
|
647
679
|
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
|
|
@@ -651,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|
|
651
683
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
652
684
|
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
|
653
685
|
{
|
|
654
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
686
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
|
655
687
|
{
|
|
656
688
|
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
|
657
689
|
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
|
|
@@ -661,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|
|
661
693
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
662
694
|
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
663
695
|
{
|
|
664
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
696
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
665
697
|
{
|
|
666
698
|
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
|
|
667
699
|
}
|
|
@@ -670,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
|
670
702
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
671
703
|
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
672
704
|
{
|
|
673
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
705
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
|
674
706
|
{
|
|
675
707
|
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
|
676
708
|
}
|
|
@@ -679,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
|
679
711
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
680
712
|
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
681
713
|
{
|
|
682
|
-
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
714
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
683
715
|
{
|
|
684
716
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
685
717
|
}
|
|
@@ -688,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
|
688
720
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
689
721
|
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
690
722
|
{
|
|
691
|
-
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
723
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
|
692
724
|
{
|
|
693
725
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
694
726
|
}
|
|
@@ -697,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
|
697
729
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
698
730
|
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
699
731
|
{
|
|
700
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
732
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
701
733
|
{
|
|
702
734
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
703
735
|
for(Index i = 0; i < innerDim; ++i)
|
|
@@ -708,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
|
708
740
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
709
741
|
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
710
742
|
{
|
|
711
|
-
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
743
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
|
712
744
|
{
|
|
713
745
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
714
746
|
for(Index i = 0; i < innerDim; ++i)
|
|
@@ -730,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
|
|
|
730
762
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
|
|
731
763
|
{
|
|
732
764
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
733
|
-
|
|
765
|
+
|
|
734
766
|
template<typename Dest>
|
|
735
767
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
736
768
|
{
|
|
@@ -744,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
|
|
|
744
776
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
|
|
745
777
|
{
|
|
746
778
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
747
|
-
|
|
779
|
+
|
|
748
780
|
template<typename Dest>
|
|
749
781
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
750
782
|
{
|
|
@@ -765,9 +797,10 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
|
|
|
765
797
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
|
|
766
798
|
{
|
|
767
799
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
768
|
-
|
|
800
|
+
|
|
769
801
|
template<typename Dest>
|
|
770
|
-
static
|
|
802
|
+
static EIGEN_DEVICE_FUNC
|
|
803
|
+
void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
771
804
|
{
|
|
772
805
|
selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
|
|
773
806
|
}
|
|
@@ -778,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
|
778
811
|
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
|
|
779
812
|
{
|
|
780
813
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
781
|
-
|
|
814
|
+
|
|
782
815
|
template<typename Dest>
|
|
783
816
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
784
817
|
{
|
|
@@ -790,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
|
790
823
|
/***************************************************************************
|
|
791
824
|
* Diagonal products
|
|
792
825
|
***************************************************************************/
|
|
793
|
-
|
|
826
|
+
|
|
794
827
|
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
|
|
795
828
|
struct diagonal_product_evaluator_base
|
|
796
829
|
: evaluator_base<Derived>
|
|
@@ -798,17 +831,25 @@ struct diagonal_product_evaluator_base
|
|
|
798
831
|
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
|
799
832
|
public:
|
|
800
833
|
enum {
|
|
801
|
-
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
|
802
|
-
|
|
834
|
+
CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
|
|
835
|
+
|
|
803
836
|
MatrixFlags = evaluator<MatrixType>::Flags,
|
|
804
837
|
DiagFlags = evaluator<DiagonalType>::Flags,
|
|
805
|
-
|
|
838
|
+
|
|
839
|
+
_StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
|
|
840
|
+
: (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
|
|
841
|
+
: MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
|
|
842
|
+
_SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
|
|
843
|
+
|
|
806
844
|
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
|
|
807
845
|
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
|
|
808
846
|
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
|
|
809
847
|
// FIXME currently we need same types, but in the future the next rule should be the one
|
|
810
848
|
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
|
|
811
|
-
_Vectorizable =
|
|
849
|
+
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
|
|
850
|
+
&& _SameTypes
|
|
851
|
+
&& (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
|
|
852
|
+
&& (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
|
812
853
|
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
|
813
854
|
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
|
814
855
|
Alignment = evaluator<MatrixType>::Alignment,
|
|
@@ -817,14 +858,14 @@ public:
|
|
|
817
858
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|
|
818
859
|
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
|
819
860
|
};
|
|
820
|
-
|
|
821
|
-
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
|
861
|
+
|
|
862
|
+
EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
|
822
863
|
: m_diagImpl(diag), m_matImpl(mat)
|
|
823
864
|
{
|
|
824
865
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
|
825
866
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
|
826
867
|
}
|
|
827
|
-
|
|
868
|
+
|
|
828
869
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
|
|
829
870
|
{
|
|
830
871
|
if(AsScalarProduct)
|
|
@@ -832,7 +873,7 @@ public:
|
|
|
832
873
|
else
|
|
833
874
|
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
|
834
875
|
}
|
|
835
|
-
|
|
876
|
+
|
|
836
877
|
protected:
|
|
837
878
|
template<int LoadMode,typename PacketType>
|
|
838
879
|
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
|
|
@@ -840,7 +881,7 @@ protected:
|
|
|
840
881
|
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
|
841
882
|
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
|
842
883
|
}
|
|
843
|
-
|
|
884
|
+
|
|
844
885
|
template<int LoadMode,typename PacketType>
|
|
845
886
|
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
|
|
846
887
|
{
|
|
@@ -851,7 +892,7 @@ protected:
|
|
|
851
892
|
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
|
852
893
|
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
|
853
894
|
}
|
|
854
|
-
|
|
895
|
+
|
|
855
896
|
evaluator<DiagonalType> m_diagImpl;
|
|
856
897
|
evaluator<MatrixType> m_matImpl;
|
|
857
898
|
};
|
|
@@ -866,25 +907,25 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|
|
866
907
|
using Base::m_matImpl;
|
|
867
908
|
using Base::coeff;
|
|
868
909
|
typedef typename Base::Scalar Scalar;
|
|
869
|
-
|
|
910
|
+
|
|
870
911
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
|
871
912
|
typedef typename XprType::PlainObject PlainObject;
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
};
|
|
913
|
+
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
enum { StorageOrder = Base::_StorageOrder };
|
|
876
917
|
|
|
877
918
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
|
878
919
|
: Base(xpr.rhs(), xpr.lhs().diagonal())
|
|
879
920
|
{
|
|
880
921
|
}
|
|
881
|
-
|
|
922
|
+
|
|
882
923
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
|
883
924
|
{
|
|
884
925
|
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
|
|
885
926
|
}
|
|
886
|
-
|
|
887
|
-
#ifndef
|
|
927
|
+
|
|
928
|
+
#ifndef EIGEN_GPUCC
|
|
888
929
|
template<int LoadMode,typename PacketType>
|
|
889
930
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
|
890
931
|
{
|
|
@@ -893,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|
|
893
934
|
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
|
894
935
|
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
|
895
936
|
}
|
|
896
|
-
|
|
937
|
+
|
|
897
938
|
template<int LoadMode,typename PacketType>
|
|
898
939
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
|
899
940
|
{
|
|
@@ -912,30 +953,30 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
|
|
912
953
|
using Base::m_matImpl;
|
|
913
954
|
using Base::coeff;
|
|
914
955
|
typedef typename Base::Scalar Scalar;
|
|
915
|
-
|
|
956
|
+
|
|
916
957
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
|
917
958
|
typedef typename XprType::PlainObject PlainObject;
|
|
918
|
-
|
|
919
|
-
enum { StorageOrder =
|
|
959
|
+
|
|
960
|
+
enum { StorageOrder = Base::_StorageOrder };
|
|
920
961
|
|
|
921
962
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
|
922
963
|
: Base(xpr.lhs(), xpr.rhs().diagonal())
|
|
923
964
|
{
|
|
924
965
|
}
|
|
925
|
-
|
|
966
|
+
|
|
926
967
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
|
927
968
|
{
|
|
928
969
|
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
|
|
929
970
|
}
|
|
930
|
-
|
|
931
|
-
#ifndef
|
|
971
|
+
|
|
972
|
+
#ifndef EIGEN_GPUCC
|
|
932
973
|
template<int LoadMode,typename PacketType>
|
|
933
974
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
|
934
975
|
{
|
|
935
976
|
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
|
936
977
|
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
|
937
978
|
}
|
|
938
|
-
|
|
979
|
+
|
|
939
980
|
template<int LoadMode,typename PacketType>
|
|
940
981
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
|
941
982
|
{
|
|
@@ -963,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
|
|
963
1004
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
|
964
1005
|
|
|
965
1006
|
template<typename Dest, typename PermutationType>
|
|
966
|
-
static
|
|
1007
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
|
967
1008
|
{
|
|
968
1009
|
MatrixType mat(xpr);
|
|
969
1010
|
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
|
@@ -1017,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1017
1058
|
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
|
1018
1059
|
{
|
|
1019
1060
|
template<typename Dest>
|
|
1020
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1061
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1021
1062
|
{
|
|
1022
1063
|
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
|
1023
1064
|
}
|
|
@@ -1027,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1027
1068
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
|
1028
1069
|
{
|
|
1029
1070
|
template<typename Dest>
|
|
1030
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1071
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1031
1072
|
{
|
|
1032
1073
|
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
|
1033
1074
|
}
|
|
@@ -1037,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1037
1078
|
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
|
1038
1079
|
{
|
|
1039
1080
|
template<typename Dest>
|
|
1040
|
-
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
|
1081
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
|
1041
1082
|
{
|
|
1042
1083
|
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
|
1043
1084
|
}
|
|
@@ -1047,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1047
1088
|
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
|
1048
1089
|
{
|
|
1049
1090
|
template<typename Dest>
|
|
1050
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
|
1091
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
|
1051
1092
|
{
|
|
1052
1093
|
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
|
1053
1094
|
}
|
|
@@ -1069,9 +1110,9 @@ struct transposition_matrix_product
|
|
|
1069
1110
|
{
|
|
1070
1111
|
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
|
1071
1112
|
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
|
1072
|
-
|
|
1113
|
+
|
|
1073
1114
|
template<typename Dest, typename TranspositionType>
|
|
1074
|
-
static
|
|
1115
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
|
1075
1116
|
{
|
|
1076
1117
|
MatrixType mat(xpr);
|
|
1077
1118
|
typedef typename TranspositionType::StorageIndex StorageIndex;
|
|
@@ -1094,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1094
1135
|
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
|
1095
1136
|
{
|
|
1096
1137
|
template<typename Dest>
|
|
1097
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1138
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1098
1139
|
{
|
|
1099
1140
|
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
|
1100
1141
|
}
|
|
@@ -1104,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1104
1145
|
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
|
1105
1146
|
{
|
|
1106
1147
|
template<typename Dest>
|
|
1107
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1148
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1108
1149
|
{
|
|
1109
1150
|
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
|
1110
1151
|
}
|
|
@@ -1115,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1115
1156
|
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
|
1116
1157
|
{
|
|
1117
1158
|
template<typename Dest>
|
|
1118
|
-
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
|
1159
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
|
1119
1160
|
{
|
|
1120
1161
|
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
|
1121
1162
|
}
|
|
@@ -1125,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
|
1125
1166
|
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
|
1126
1167
|
{
|
|
1127
1168
|
template<typename Dest>
|
|
1128
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
|
1169
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
|
1129
1170
|
{
|
|
1130
1171
|
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
|
1131
1172
|
}
|