@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -9,155 +9,138 @@
|
|
|
9
9
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
10
10
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
#ifndef EIGEN_PRODUCTEVALUATORS_H
|
|
14
13
|
#define EIGEN_PRODUCTEVALUATORS_H
|
|
15
14
|
|
|
15
|
+
// IWYU pragma: private
|
|
16
|
+
#include "./InternalHeaderCheck.h"
|
|
17
|
+
|
|
16
18
|
namespace Eigen {
|
|
17
|
-
|
|
19
|
+
|
|
18
20
|
namespace internal {
|
|
19
21
|
|
|
20
22
|
/** \internal
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
template<typename Lhs, typename Rhs, int Options>
|
|
29
|
-
struct evaluator<Product<Lhs, Rhs, Options
|
|
30
|
-
: public product_evaluator<Product<Lhs, Rhs, Options> >
|
|
31
|
-
{
|
|
23
|
+
* Evaluator of a product expression.
|
|
24
|
+
* Since products require special treatments to handle all possible cases,
|
|
25
|
+
* we simply defer the evaluation logic to a product_evaluator class
|
|
26
|
+
* which offers more partial specialization possibilities.
|
|
27
|
+
*
|
|
28
|
+
* \sa class product_evaluator
|
|
29
|
+
*/
|
|
30
|
+
template <typename Lhs, typename Rhs, int Options>
|
|
31
|
+
struct evaluator<Product<Lhs, Rhs, Options>> : public product_evaluator<Product<Lhs, Rhs, Options>> {
|
|
32
32
|
typedef Product<Lhs, Rhs, Options> XprType;
|
|
33
33
|
typedef product_evaluator<XprType> Base;
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
|
36
36
|
};
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
|
39
39
|
// TODO we should apply that rule only if that's really helpful
|
|
40
|
-
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
|
41
|
-
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
|
40
|
+
template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
|
41
|
+
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
|
|
42
42
|
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
|
|
43
|
-
const Product<Lhs, Rhs, DefaultProduct
|
|
44
|
-
{
|
|
43
|
+
const Product<Lhs, Rhs, DefaultProduct>>> {
|
|
45
44
|
static const bool value = true;
|
|
46
45
|
};
|
|
47
|
-
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
|
48
|
-
struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
|
46
|
+
template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
|
|
47
|
+
struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
|
|
49
48
|
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
|
|
50
|
-
const Product<Lhs, Rhs, DefaultProduct
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct
|
|
49
|
+
const Product<Lhs, Rhs, DefaultProduct>>>
|
|
50
|
+
: public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> {
|
|
51
|
+
typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
|
|
52
|
+
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
|
|
53
|
+
const Product<Lhs, Rhs, DefaultProduct>>
|
|
54
|
+
XprType;
|
|
55
|
+
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> Base;
|
|
57
56
|
|
|
58
57
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
|
59
|
-
|
|
60
|
-
{}
|
|
58
|
+
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {}
|
|
61
59
|
};
|
|
62
60
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
|
|
67
|
-
{
|
|
61
|
+
template <typename Lhs, typename Rhs, int DiagIndex>
|
|
62
|
+
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex>>
|
|
63
|
+
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> {
|
|
68
64
|
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
|
69
|
-
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex
|
|
70
|
-
|
|
65
|
+
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> Base;
|
|
66
|
+
|
|
71
67
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
xpr.index() ))
|
|
75
|
-
{}
|
|
68
|
+
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
|
69
|
+
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index())) {}
|
|
76
70
|
};
|
|
77
71
|
|
|
78
|
-
|
|
79
72
|
// Helper class to perform a matrix product with the destination at hand.
|
|
80
73
|
// Depending on the sizes of the factors, there are different evaluation strategies
|
|
81
74
|
// as controlled by internal::product_type.
|
|
82
|
-
template<
|
|
83
|
-
typename LhsShape = typename evaluator_traits<Lhs>::Shape,
|
|
75
|
+
template <typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
|
|
84
76
|
typename RhsShape = typename evaluator_traits<Rhs>::Shape,
|
|
85
|
-
int ProductType = internal::product_type<Lhs,Rhs>::value>
|
|
77
|
+
int ProductType = internal::product_type<Lhs, Rhs>::value>
|
|
86
78
|
struct generic_product_impl;
|
|
87
79
|
|
|
88
|
-
template<typename Lhs, typename Rhs>
|
|
89
|
-
struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct
|
|
80
|
+
template <typename Lhs, typename Rhs>
|
|
81
|
+
struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct>> {
|
|
90
82
|
static const bool value = true;
|
|
91
83
|
};
|
|
92
84
|
|
|
93
85
|
// This is the default evaluator implementation for products:
|
|
94
86
|
// It creates a temporary and call generic_product_impl
|
|
95
|
-
template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
|
|
87
|
+
template <typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
|
|
96
88
|
struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
|
|
97
|
-
|
|
98
|
-
{
|
|
89
|
+
: public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> {
|
|
99
90
|
typedef Product<Lhs, Rhs, Options> XprType;
|
|
100
91
|
typedef typename XprType::PlainObject PlainObject;
|
|
101
92
|
typedef evaluator<PlainObject> Base;
|
|
102
|
-
enum {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
//
|
|
113
|
-
//
|
|
114
|
-
// typedef
|
|
115
|
-
//
|
|
116
|
-
//
|
|
117
|
-
//
|
|
118
|
-
//
|
|
119
|
-
//
|
|
120
|
-
// const RhsNested rhs(xpr.rhs());
|
|
121
|
-
//
|
|
122
|
-
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
|
|
93
|
+
enum { Flags = Base::Flags | EvalBeforeNestingBit };
|
|
94
|
+
|
|
95
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
|
|
96
|
+
: m_result(xpr.rows(), xpr.cols()) {
|
|
97
|
+
internal::construct_at<Base>(this, m_result);
|
|
98
|
+
|
|
99
|
+
// FIXME shall we handle nested_eval here?,
|
|
100
|
+
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in
|
|
101
|
+
// permutation_matrix_product, transposition_matrix_product, etc.)
|
|
102
|
+
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
|
103
|
+
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
|
104
|
+
// typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
|
|
105
|
+
// typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
|
|
106
|
+
//
|
|
107
|
+
// const LhsNested lhs(xpr.lhs());
|
|
108
|
+
// const RhsNested rhs(xpr.rhs());
|
|
109
|
+
//
|
|
110
|
+
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
|
|
123
111
|
|
|
124
112
|
generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
|
|
125
113
|
}
|
|
126
|
-
|
|
127
|
-
protected:
|
|
114
|
+
|
|
115
|
+
protected:
|
|
128
116
|
PlainObject m_result;
|
|
129
117
|
};
|
|
130
118
|
|
|
131
|
-
// The following three shortcuts are enabled only if the scalar types match
|
|
119
|
+
// The following three shortcuts are enabled only if the scalar types match exactly.
|
|
132
120
|
// TODO: we could enable them for different scalar types when the product is not vectorized.
|
|
133
121
|
|
|
134
122
|
// Dense = Product
|
|
135
|
-
template<
|
|
136
|
-
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
|
142
|
-
{
|
|
123
|
+
template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
|
124
|
+
struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::assign_op<Scalar, Scalar>, Dense2Dense,
|
|
125
|
+
std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
|
|
126
|
+
typedef Product<Lhs, Rhs, Options> SrcXprType;
|
|
127
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
128
|
+
const internal::assign_op<Scalar, Scalar>&) {
|
|
143
129
|
Index dstRows = src.rows();
|
|
144
130
|
Index dstCols = src.cols();
|
|
145
|
-
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
|
146
|
-
dst.resize(dstRows, dstCols);
|
|
131
|
+
if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
|
|
147
132
|
// FIXME shall we handle nested_eval here?
|
|
148
133
|
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
|
|
149
134
|
}
|
|
150
135
|
};
|
|
151
136
|
|
|
152
137
|
// Dense += Product
|
|
153
|
-
template<
|
|
154
|
-
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
|
160
|
-
{
|
|
138
|
+
template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
|
139
|
+
struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::add_assign_op<Scalar, Scalar>, Dense2Dense,
|
|
140
|
+
std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
|
|
141
|
+
typedef Product<Lhs, Rhs, Options> SrcXprType;
|
|
142
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
143
|
+
const internal::add_assign_op<Scalar, Scalar>&) {
|
|
161
144
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
162
145
|
// FIXME shall we handle nested_eval here?
|
|
163
146
|
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
|
@@ -165,35 +148,35 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
|
|
165
148
|
};
|
|
166
149
|
|
|
167
150
|
// Dense -= Product
|
|
168
|
-
template<
|
|
169
|
-
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
|
175
|
-
{
|
|
151
|
+
template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
|
152
|
+
struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::sub_assign_op<Scalar, Scalar>, Dense2Dense,
|
|
153
|
+
std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
|
|
154
|
+
typedef Product<Lhs, Rhs, Options> SrcXprType;
|
|
155
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
156
|
+
const internal::sub_assign_op<Scalar, Scalar>&) {
|
|
176
157
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
177
158
|
// FIXME shall we handle nested_eval here?
|
|
178
159
|
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
|
179
160
|
}
|
|
180
161
|
};
|
|
181
162
|
|
|
182
|
-
|
|
183
163
|
// Dense ?= scalar * Product
|
|
184
164
|
// TODO we should apply that rule if that's really helpful
|
|
185
165
|
// for instance, this is not good for inner products
|
|
186
|
-
template<
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
166
|
+
template <typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis,
|
|
167
|
+
typename Plain>
|
|
168
|
+
struct Assignment<DstXprType,
|
|
169
|
+
CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
|
|
170
|
+
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
|
|
171
|
+
const Product<Lhs, Rhs, DefaultProduct>>,
|
|
172
|
+
AssignFunc, Dense2Dense> {
|
|
173
|
+
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
|
|
174
|
+
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
|
|
175
|
+
const Product<Lhs, Rhs, DefaultProduct>>
|
|
176
|
+
SrcXprType;
|
|
177
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
178
|
+
const AssignFunc& func) {
|
|
179
|
+
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func);
|
|
197
180
|
}
|
|
198
181
|
};
|
|
199
182
|
|
|
@@ -201,251 +184,291 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
|
|
|
201
184
|
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
|
|
202
185
|
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
|
|
203
186
|
|
|
204
|
-
template<typename OtherXpr, typename Lhs, typename Rhs>
|
|
205
|
-
struct evaluator_assume_aliasing<
|
|
206
|
-
|
|
187
|
+
template <typename OtherXpr, typename Lhs, typename Rhs>
|
|
188
|
+
struct evaluator_assume_aliasing<
|
|
189
|
+
CwiseBinaryOp<
|
|
190
|
+
internal::scalar_sum_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
|
|
191
|
+
const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
|
|
192
|
+
DenseShape> {
|
|
207
193
|
static const bool value = true;
|
|
208
194
|
};
|
|
209
195
|
|
|
210
|
-
template<typename OtherXpr, typename Lhs, typename Rhs>
|
|
211
|
-
struct evaluator_assume_aliasing<
|
|
212
|
-
|
|
196
|
+
template <typename OtherXpr, typename Lhs, typename Rhs>
|
|
197
|
+
struct evaluator_assume_aliasing<
|
|
198
|
+
CwiseBinaryOp<
|
|
199
|
+
internal::scalar_difference_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
|
|
200
|
+
const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
|
|
201
|
+
DenseShape> {
|
|
213
202
|
static const bool value = true;
|
|
214
203
|
};
|
|
215
204
|
|
|
216
|
-
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
|
|
217
|
-
struct assignment_from_xpr_op_product
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
|
222
|
-
{
|
|
205
|
+
template <typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
|
|
206
|
+
struct assignment_from_xpr_op_product {
|
|
207
|
+
template <typename SrcXprType, typename InitialFunc>
|
|
208
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
209
|
+
const InitialFunc& /*func*/) {
|
|
223
210
|
call_assignment_no_alias(dst, src.lhs(), Func1());
|
|
224
211
|
call_assignment_no_alias(dst, src.rhs(), Func2());
|
|
225
212
|
}
|
|
226
213
|
};
|
|
227
214
|
|
|
228
|
-
#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2)
|
|
229
|
-
template<
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
215
|
+
#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP, BINOP, ASSIGN_OP2) \
|
|
216
|
+
template <typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, \
|
|
217
|
+
typename SrcScalar, typename OtherScalar, typename ProdScalar> \
|
|
218
|
+
struct Assignment<DstXprType, \
|
|
219
|
+
CwiseBinaryOp<internal::BINOP<OtherScalar, ProdScalar>, const OtherXpr, \
|
|
220
|
+
const Product<Lhs, Rhs, DefaultProduct>>, \
|
|
221
|
+
internal::ASSIGN_OP<DstScalar, SrcScalar>, Dense2Dense> \
|
|
222
|
+
: assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs, Rhs, DefaultProduct>, \
|
|
223
|
+
internal::ASSIGN_OP<DstScalar, OtherScalar>, \
|
|
224
|
+
internal::ASSIGN_OP2<DstScalar, ProdScalar>> {}
|
|
225
|
+
|
|
226
|
+
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op, add_assign_op);
|
|
227
|
+
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_sum_op, add_assign_op);
|
|
228
|
+
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_sum_op, sub_assign_op);
|
|
229
|
+
|
|
230
|
+
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op, sub_assign_op);
|
|
231
|
+
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_difference_op, sub_assign_op);
|
|
232
|
+
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assign_op);
|
|
234
233
|
|
|
235
|
-
|
|
236
|
-
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
|
|
237
|
-
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
|
|
234
|
+
//----------------------------------------
|
|
238
235
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
236
|
+
template <typename Lhs, typename Rhs>
|
|
237
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
|
|
238
|
+
using impl = default_inner_product_impl<Lhs, Rhs, false>;
|
|
239
|
+
template <typename Dst>
|
|
240
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
241
|
+
dst.coeffRef(0, 0) = impl::run(lhs, rhs);
|
|
242
|
+
}
|
|
242
243
|
|
|
243
|
-
|
|
244
|
+
template <typename Dst>
|
|
245
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
246
|
+
dst.coeffRef(0, 0) += impl::run(lhs, rhs);
|
|
247
|
+
}
|
|
244
248
|
|
|
245
|
-
template<typename
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
250
|
-
{
|
|
251
|
-
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
template<typename Dst>
|
|
255
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
256
|
-
{
|
|
257
|
-
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
template<typename Dst>
|
|
261
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
262
|
-
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
|
249
|
+
template <typename Dst>
|
|
250
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
251
|
+
dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
|
|
252
|
+
}
|
|
263
253
|
};
|
|
264
254
|
|
|
265
|
-
|
|
266
255
|
/***********************************************************************
|
|
267
|
-
* Implementation of outer dense * dense vector product
|
|
268
|
-
***********************************************************************/
|
|
256
|
+
* Implementation of outer dense * dense vector product
|
|
257
|
+
***********************************************************************/
|
|
269
258
|
|
|
270
259
|
// Column major result
|
|
271
|
-
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
272
|
-
void outer_product_selector_run(Dst& dst, const Lhs
|
|
273
|
-
{
|
|
260
|
+
template <typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
261
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
|
|
262
|
+
const false_type&) {
|
|
274
263
|
evaluator<Rhs> rhsEval(rhs);
|
|
275
|
-
|
|
264
|
+
ei_declare_local_nested_eval(Lhs, lhs, Rhs::SizeAtCompileTime, actual_lhs);
|
|
276
265
|
// FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
|
|
277
266
|
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
|
278
267
|
const Index cols = dst.cols();
|
|
279
|
-
for (Index j=0; j<cols; ++j)
|
|
280
|
-
func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
|
|
268
|
+
for (Index j = 0; j < cols; ++j) func(dst.col(j), rhsEval.coeff(Index(0), j) * actual_lhs);
|
|
281
269
|
}
|
|
282
270
|
|
|
283
271
|
// Row major result
|
|
284
|
-
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
285
|
-
void outer_product_selector_run(Dst& dst, const Lhs
|
|
286
|
-
{
|
|
272
|
+
template <typename Dst, typename Lhs, typename Rhs, typename Func>
|
|
273
|
+
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
|
|
274
|
+
const true_type&) {
|
|
287
275
|
evaluator<Lhs> lhsEval(lhs);
|
|
288
|
-
|
|
276
|
+
ei_declare_local_nested_eval(Rhs, rhs, Lhs::SizeAtCompileTime, actual_rhs);
|
|
289
277
|
// FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
|
|
290
278
|
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
|
291
279
|
const Index rows = dst.rows();
|
|
292
|
-
for (Index i=0; i<rows; ++i)
|
|
293
|
-
func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
|
|
280
|
+
for (Index i = 0; i < rows; ++i) func(dst.row(i), lhsEval.coeff(i, Index(0)) * actual_rhs);
|
|
294
281
|
}
|
|
295
282
|
|
|
296
|
-
template<typename Lhs, typename Rhs>
|
|
297
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
301
|
-
|
|
283
|
+
template <typename Lhs, typename Rhs>
|
|
284
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
|
|
285
|
+
template <typename T>
|
|
286
|
+
struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {};
|
|
287
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
288
|
+
|
|
302
289
|
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
|
303
|
-
struct set
|
|
304
|
-
|
|
305
|
-
|
|
290
|
+
struct set {
|
|
291
|
+
template <typename Dst, typename Src>
|
|
292
|
+
EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
|
|
293
|
+
dst.const_cast_derived() = src;
|
|
294
|
+
}
|
|
295
|
+
};
|
|
296
|
+
struct add {
|
|
297
|
+
/** Add to dst. */
|
|
298
|
+
template <typename Dst, typename Src>
|
|
299
|
+
EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
|
|
300
|
+
dst.const_cast_derived() += src;
|
|
301
|
+
}
|
|
302
|
+
};
|
|
303
|
+
struct sub {
|
|
304
|
+
template <typename Dst, typename Src>
|
|
305
|
+
EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
|
|
306
|
+
dst.const_cast_derived() -= src;
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
/** Scaled add. */
|
|
306
310
|
struct adds {
|
|
307
311
|
Scalar m_scale;
|
|
312
|
+
/** Constructor */
|
|
308
313
|
explicit adds(const Scalar& s) : m_scale(s) {}
|
|
309
|
-
|
|
314
|
+
/** Scaled add to dst. */
|
|
315
|
+
template <typename Dst, typename Src>
|
|
316
|
+
void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
|
|
310
317
|
dst.const_cast_derived() += m_scale * src;
|
|
311
318
|
}
|
|
312
319
|
};
|
|
313
|
-
|
|
314
|
-
template<typename Dst>
|
|
315
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
316
|
-
{
|
|
320
|
+
|
|
321
|
+
template <typename Dst>
|
|
322
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
317
323
|
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
|
318
324
|
}
|
|
319
|
-
|
|
320
|
-
template<typename Dst>
|
|
321
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
322
|
-
{
|
|
325
|
+
|
|
326
|
+
template <typename Dst>
|
|
327
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
323
328
|
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
|
324
329
|
}
|
|
325
|
-
|
|
326
|
-
template<typename Dst>
|
|
327
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
328
|
-
{
|
|
330
|
+
|
|
331
|
+
template <typename Dst>
|
|
332
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
329
333
|
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
|
330
334
|
}
|
|
331
|
-
|
|
332
|
-
template<typename Dst>
|
|
333
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
|
|
334
|
-
|
|
335
|
+
|
|
336
|
+
template <typename Dst>
|
|
337
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
|
|
338
|
+
const Scalar& alpha) {
|
|
335
339
|
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
|
336
340
|
}
|
|
337
|
-
|
|
338
341
|
};
|
|
339
342
|
|
|
340
|
-
|
|
341
343
|
// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
|
|
342
|
-
template<typename Lhs, typename Rhs, typename Derived>
|
|
343
|
-
struct generic_product_impl_base
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
357
|
-
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
|
358
|
-
|
|
359
|
-
template<typename Dst>
|
|
360
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
|
361
|
-
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
|
|
344
|
+
template <typename Lhs, typename Rhs, typename Derived>
|
|
345
|
+
struct generic_product_impl_base {
|
|
346
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
347
|
+
|
|
348
|
+
template <typename Dst>
|
|
349
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
350
|
+
dst.setZero();
|
|
351
|
+
scaleAndAddTo(dst, lhs, rhs, Scalar(1));
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
template <typename Dst>
|
|
355
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
356
|
+
scaleAndAddTo(dst, lhs, rhs, Scalar(1));
|
|
357
|
+
}
|
|
362
358
|
|
|
359
|
+
template <typename Dst>
|
|
360
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
361
|
+
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
template <typename Dst>
|
|
365
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
|
|
366
|
+
const Scalar& alpha) {
|
|
367
|
+
Derived::scaleAndAddTo(dst, lhs, rhs, alpha);
|
|
368
|
+
}
|
|
363
369
|
};
|
|
364
370
|
|
|
365
|
-
template<typename Lhs, typename Rhs>
|
|
366
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
typedef typename nested_eval<
|
|
370
|
-
typedef typename
|
|
371
|
-
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
|
371
|
+
template <typename Lhs, typename Rhs>
|
|
372
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>
|
|
373
|
+
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>> {
|
|
374
|
+
typedef typename nested_eval<Lhs, 1>::type LhsNested;
|
|
375
|
+
typedef typename nested_eval<Rhs, 1>::type RhsNested;
|
|
376
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
372
377
|
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
|
373
|
-
typedef
|
|
374
|
-
|
|
375
|
-
template<typename Dest>
|
|
376
|
-
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
|
|
377
|
-
|
|
378
|
+
typedef internal::remove_all_t<std::conditional_t<int(Side) == OnTheRight, LhsNested, RhsNested>> MatrixType;
|
|
379
|
+
|
|
380
|
+
template <typename Dest>
|
|
381
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
|
|
382
|
+
const Scalar& alpha) {
|
|
383
|
+
// Fallback to inner product if both the lhs and rhs is a runtime vector.
|
|
384
|
+
if (lhs.rows() == 1 && rhs.cols() == 1) {
|
|
385
|
+
dst.coeffRef(0, 0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
378
388
|
LhsNested actual_lhs(lhs);
|
|
379
389
|
RhsNested actual_rhs(rhs);
|
|
380
|
-
internal::gemv_dense_selector<Side,
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
390
|
+
internal::gemv_dense_selector<Side, (int(MatrixType::Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
|
391
|
+
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(actual_lhs,
|
|
392
|
+
actual_rhs, dst,
|
|
393
|
+
alpha);
|
|
384
394
|
}
|
|
385
395
|
};
|
|
386
396
|
|
|
387
|
-
template<typename Lhs, typename Rhs>
|
|
388
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
394
|
-
{
|
|
397
|
+
template <typename Lhs, typename Rhs>
|
|
398
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {
|
|
399
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
400
|
+
|
|
401
|
+
template <typename Dst>
|
|
402
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
395
403
|
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
|
396
404
|
// but easier on the compiler side
|
|
397
|
-
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
|
|
405
|
+
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar, Scalar>());
|
|
398
406
|
}
|
|
399
407
|
|
|
400
|
-
template<typename Dst>
|
|
401
|
-
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
402
|
-
{
|
|
408
|
+
template <typename Dst>
|
|
409
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
403
410
|
// dst.noalias() += lhs.lazyProduct(rhs);
|
|
404
|
-
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
|
411
|
+
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar, Scalar>());
|
|
405
412
|
}
|
|
406
|
-
|
|
407
|
-
template<typename Dst>
|
|
408
|
-
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
|
409
|
-
{
|
|
413
|
+
|
|
414
|
+
template <typename Dst>
|
|
415
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
410
416
|
// dst.noalias() -= lhs.lazyProduct(rhs);
|
|
411
|
-
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
//
|
|
415
|
-
//
|
|
416
|
-
//
|
|
417
|
-
//
|
|
418
|
-
//
|
|
419
|
-
//
|
|
420
|
-
//
|
|
421
|
-
//
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
417
|
+
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar, Scalar>());
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
|
|
421
|
+
// This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
|
|
422
|
+
// dst {,+,-}= (s1*A)*(B*s2)
|
|
423
|
+
// will be rewritten as:
|
|
424
|
+
// dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
|
|
425
|
+
// There are at least four benefits of doing so:
|
|
426
|
+
// 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
|
|
427
|
+
// 2 - it is faster than simply by-passing the heap allocation through stack allocation.
|
|
428
|
+
// 3 - it makes this fallback consistent with the heavy GEMM routine.
|
|
429
|
+
// 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
|
|
430
|
+
// (see https://stackoverflow.com/questions/54738495)
|
|
431
|
+
// For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3
|
|
432
|
+
// slower, and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
|
|
433
|
+
// enabled only when falling back from the main GEMM.
|
|
434
|
+
template <typename Dst, typename Func>
|
|
435
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs,
|
|
436
|
+
const Func& func) {
|
|
437
|
+
enum {
|
|
438
|
+
HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
|
|
439
|
+
ConjLhs = blas_traits<Lhs>::NeedToConjugate,
|
|
440
|
+
ConjRhs = blas_traits<Rhs>::NeedToConjugate
|
|
441
|
+
};
|
|
442
|
+
// FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
|
|
443
|
+
// this is important for real*complex_mat
|
|
444
|
+
Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
|
|
445
|
+
|
|
446
|
+
eval_dynamic_impl(dst, blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
|
447
|
+
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(), func, actualAlpha,
|
|
448
|
+
bool_constant<HasScalarFactor>());
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
protected:
|
|
452
|
+
template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
|
453
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
|
|
454
|
+
const Func& func, const Scalar& s /* == 1 */,
|
|
455
|
+
false_type) {
|
|
456
|
+
EIGEN_UNUSED_VARIABLE(s);
|
|
457
|
+
eigen_internal_assert(numext::is_exactly_one(s));
|
|
458
|
+
call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
|
|
462
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
|
|
463
|
+
const Func& func, const Scalar& s, true_type) {
|
|
464
|
+
call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
|
|
465
|
+
}
|
|
443
466
|
};
|
|
444
467
|
|
|
445
468
|
// This specialization enforces the use of a coefficient-based evaluation strategy
|
|
446
|
-
template<typename Lhs, typename Rhs>
|
|
447
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
|
|
448
|
-
|
|
469
|
+
template <typename Lhs, typename Rhs>
|
|
470
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, LazyCoeffBasedProductMode>
|
|
471
|
+
: generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {};
|
|
449
472
|
|
|
450
473
|
// Case 2: Evaluate coeff by coeff
|
|
451
474
|
//
|
|
@@ -453,29 +476,27 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductM
|
|
|
453
476
|
// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
|
|
454
477
|
// for the inner dimension of the product, because evaluator object do not know their size.
|
|
455
478
|
|
|
456
|
-
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
|
479
|
+
template <int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
|
457
480
|
struct etor_product_coeff_impl;
|
|
458
481
|
|
|
459
|
-
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
482
|
+
template <int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
460
483
|
struct etor_product_packet_impl;
|
|
461
484
|
|
|
462
|
-
template<typename Lhs, typename Rhs, int ProductTag>
|
|
485
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
463
486
|
struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
|
|
464
|
-
: evaluator_base<Product<Lhs, Rhs, LazyProduct
|
|
465
|
-
{
|
|
487
|
+
: evaluator_base<Product<Lhs, Rhs, LazyProduct>> {
|
|
466
488
|
typedef Product<Lhs, Rhs, LazyProduct> XprType;
|
|
467
489
|
typedef typename XprType::Scalar Scalar;
|
|
468
490
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
|
469
491
|
|
|
470
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
{
|
|
492
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
|
|
493
|
+
: m_lhs(xpr.lhs()),
|
|
494
|
+
m_rhs(xpr.rhs()),
|
|
495
|
+
m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
|
|
496
|
+
m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable
|
|
497
|
+
// them when not needed, or perhaps declare them on the fly on the packet method... We
|
|
498
|
+
// have experiment to check what's best.
|
|
499
|
+
m_innerDim(xpr.lhs().cols()) {
|
|
479
500
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
|
480
501
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
|
|
481
502
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
|
@@ -495,11 +516,11 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
495
516
|
|
|
496
517
|
// Everything below here is taken from CoeffBasedProduct.h
|
|
497
518
|
|
|
498
|
-
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
|
499
|
-
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
|
500
|
-
|
|
501
|
-
typedef
|
|
502
|
-
typedef
|
|
519
|
+
typedef typename internal::nested_eval<Lhs, Rhs::ColsAtCompileTime>::type LhsNested;
|
|
520
|
+
typedef typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type RhsNested;
|
|
521
|
+
|
|
522
|
+
typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
|
|
523
|
+
typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
|
|
503
524
|
|
|
504
525
|
typedef evaluator<LhsNestedCleaned> LhsEtorType;
|
|
505
526
|
typedef evaluator<RhsNestedCleaned> RhsEtorType;
|
|
@@ -507,28 +528,29 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
507
528
|
enum {
|
|
508
529
|
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
|
|
509
530
|
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
|
|
510
|
-
InnerSize =
|
|
531
|
+
InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
|
|
511
532
|
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
|
|
512
533
|
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
|
|
513
534
|
};
|
|
514
535
|
|
|
515
|
-
typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
|
|
516
|
-
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
|
|
536
|
+
typedef typename find_best_packet<Scalar, RowsAtCompileTime>::type LhsVecPacketType;
|
|
537
|
+
typedef typename find_best_packet<Scalar, ColsAtCompileTime>::type RhsVecPacketType;
|
|
517
538
|
|
|
518
539
|
enum {
|
|
519
|
-
|
|
540
|
+
|
|
520
541
|
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
|
521
542
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
|
522
|
-
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
543
|
+
CoeffReadCost = InnerSize == 0 ? NumTraits<Scalar>::ReadCost
|
|
544
|
+
: InnerSize == Dynamic
|
|
545
|
+
? HugeCost
|
|
546
|
+
: InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) +
|
|
547
|
+
(InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
|
526
548
|
|
|
527
549
|
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
|
528
|
-
|
|
550
|
+
|
|
529
551
|
LhsFlags = LhsEtorType::Flags,
|
|
530
552
|
RhsFlags = RhsEtorType::Flags,
|
|
531
|
-
|
|
553
|
+
|
|
532
554
|
LhsRowMajor = LhsFlags & RowMajorBit,
|
|
533
555
|
RhsRowMajor = RhsFlags & RowMajorBit,
|
|
534
556
|
|
|
@@ -536,82 +558,105 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|
|
536
558
|
RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
|
|
537
559
|
|
|
538
560
|
// Here, we don't care about alignment larger than the usable packet size.
|
|
539
|
-
LhsAlignment =
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
561
|
+
LhsAlignment =
|
|
562
|
+
plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))),
|
|
563
|
+
RhsAlignment =
|
|
564
|
+
plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))),
|
|
565
|
+
|
|
566
|
+
SameType = is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value,
|
|
567
|
+
|
|
568
|
+
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1),
|
|
569
|
+
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1),
|
|
570
|
+
|
|
571
|
+
EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
|
|
572
|
+
: (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
|
|
573
|
+
? 0
|
|
574
|
+
: (bool(RhsRowMajor) && !CanVectorizeLhs),
|
|
575
|
+
|
|
576
|
+
Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
|
|
577
|
+
(EvalToRowMajor ? RowMajorBit : 0)
|
|
578
|
+
// TODO enable vectorization for mixed types
|
|
579
|
+
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
|
|
580
|
+
(XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
|
|
581
|
+
|
|
582
|
+
LhsOuterStrideBytes =
|
|
583
|
+
int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
|
584
|
+
RhsOuterStrideBytes =
|
|
585
|
+
int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
|
586
|
+
|
|
587
|
+
Alignment = bool(CanVectorizeLhs)
|
|
588
|
+
? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0
|
|
589
|
+
? 0
|
|
590
|
+
: LhsAlignment)
|
|
591
|
+
: bool(CanVectorizeRhs)
|
|
592
|
+
? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0
|
|
593
|
+
? 0
|
|
594
|
+
: RhsAlignment)
|
|
595
|
+
: 0,
|
|
563
596
|
|
|
564
597
|
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
|
565
598
|
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
|
566
599
|
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
|
567
600
|
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
|
568
601
|
*/
|
|
569
|
-
CanVectorizeInner =
|
|
570
|
-
&&
|
|
571
|
-
|
|
572
|
-
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
|
573
|
-
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
|
602
|
+
CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
|
|
603
|
+
(int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
|
|
604
|
+
(int(InnerSize) % packet_traits<Scalar>::size == 0)
|
|
574
605
|
};
|
|
575
|
-
|
|
576
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
|
|
577
|
-
|
|
578
|
-
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
|
606
|
+
|
|
607
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const {
|
|
608
|
+
return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
|
|
579
609
|
}
|
|
580
610
|
|
|
581
611
|
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
|
582
612
|
* which is why we don't set the LinearAccessBit.
|
|
583
613
|
* TODO: this seems possible when the result is a vector
|
|
584
614
|
*/
|
|
585
|
-
EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
|
|
586
|
-
|
|
587
|
-
const Index
|
|
588
|
-
|
|
589
|
-
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
|
615
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
|
|
616
|
+
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
|
|
617
|
+
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
|
|
618
|
+
return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
|
|
590
619
|
}
|
|
591
620
|
|
|
592
|
-
template<int LoadMode, typename PacketType>
|
|
593
|
-
const PacketType packet(Index row, Index col) const
|
|
594
|
-
{
|
|
621
|
+
template <int LoadMode, typename PacketType>
|
|
622
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const {
|
|
595
623
|
PacketType res;
|
|
596
|
-
typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
|
597
|
-
Unroll ? int(InnerSize) : Dynamic,
|
|
598
|
-
|
|
624
|
+
typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
|
625
|
+
Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
|
|
626
|
+
PacketImpl;
|
|
599
627
|
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
|
600
628
|
return res;
|
|
601
629
|
}
|
|
602
630
|
|
|
603
|
-
template<int LoadMode, typename PacketType>
|
|
604
|
-
const PacketType packet(Index index) const
|
|
605
|
-
|
|
606
|
-
const Index
|
|
607
|
-
|
|
608
|
-
return packet<LoadMode,PacketType>(row,col);
|
|
631
|
+
template <int LoadMode, typename PacketType>
|
|
632
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index index) const {
|
|
633
|
+
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
|
|
634
|
+
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
|
|
635
|
+
return packet<LoadMode, PacketType>(row, col);
|
|
609
636
|
}
|
|
610
637
|
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
638
|
+
template <int LoadMode, typename PacketType>
|
|
639
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin,
|
|
640
|
+
Index count) const {
|
|
641
|
+
PacketType res;
|
|
642
|
+
typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
|
643
|
+
Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
|
|
644
|
+
PacketImpl;
|
|
645
|
+
PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count);
|
|
646
|
+
return res;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
template <int LoadMode, typename PacketType>
|
|
650
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const {
|
|
651
|
+
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
|
|
652
|
+
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
|
|
653
|
+
return packetSegment<LoadMode, PacketType>(row, col, begin, count);
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
protected:
|
|
657
|
+
add_const_on_value_type_t<LhsNested> m_lhs;
|
|
658
|
+
add_const_on_value_type_t<RhsNested> m_rhs;
|
|
659
|
+
|
|
615
660
|
LhsEtorType m_lhsImpl;
|
|
616
661
|
RhsEtorType m_rhsImpl;
|
|
617
662
|
|
|
@@ -619,520 +664,624 @@ protected:
|
|
|
619
664
|
Index m_innerDim;
|
|
620
665
|
};
|
|
621
666
|
|
|
622
|
-
template<typename Lhs, typename Rhs>
|
|
667
|
+
template <typename Lhs, typename Rhs>
|
|
623
668
|
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
|
|
624
|
-
|
|
625
|
-
{
|
|
669
|
+
: product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> {
|
|
626
670
|
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
|
|
627
671
|
typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
|
|
628
672
|
typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
|
|
629
|
-
enum {
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
|
633
|
-
: Base(BaseProduct(xpr.lhs(),xpr.rhs()))
|
|
634
|
-
{}
|
|
673
|
+
enum { Flags = Base::Flags | EvalBeforeNestingBit };
|
|
674
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
|
|
675
|
+
: Base(BaseProduct(xpr.lhs(), xpr.rhs())) {}
|
|
635
676
|
};
|
|
636
677
|
|
|
637
678
|
/****************************************
|
|
638
679
|
*** Coeff based product, Packet path ***
|
|
639
680
|
****************************************/
|
|
640
681
|
|
|
641
|
-
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
642
|
-
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
res =
|
|
682
|
+
template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
683
|
+
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
|
|
684
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
685
|
+
Index innerDim, Packet& res) {
|
|
686
|
+
etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
|
|
687
|
+
innerDim, res);
|
|
688
|
+
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
|
|
689
|
+
rhs.template packet<LoadMode, Packet>(Index(UnrollingIndex - 1), col), res);
|
|
690
|
+
}
|
|
691
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
692
|
+
Index innerDim, Packet& res, Index begin, Index count) {
|
|
693
|
+
etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
|
|
694
|
+
row, col, lhs, rhs, innerDim, res, begin, count);
|
|
695
|
+
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
|
|
696
|
+
rhs.template packetSegment<LoadMode, Packet>(Index(UnrollingIndex - 1), col, begin, count), res);
|
|
648
697
|
}
|
|
649
698
|
};
|
|
650
699
|
|
|
651
|
-
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
652
|
-
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
res =
|
|
700
|
+
template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
701
|
+
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
|
|
702
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
703
|
+
Index innerDim, Packet& res) {
|
|
704
|
+
etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
|
|
705
|
+
innerDim, res);
|
|
706
|
+
res = pmadd(lhs.template packet<LoadMode, Packet>(row, Index(UnrollingIndex - 1)),
|
|
707
|
+
pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
|
|
708
|
+
}
|
|
709
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
710
|
+
Index innerDim, Packet& res, Index begin, Index count) {
|
|
711
|
+
etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
|
|
712
|
+
row, col, lhs, rhs, innerDim, res, begin, count);
|
|
713
|
+
res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, Index(UnrollingIndex - 1), begin, count),
|
|
714
|
+
pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
|
|
658
715
|
}
|
|
659
716
|
};
|
|
660
717
|
|
|
661
|
-
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
662
|
-
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
718
|
+
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
719
|
+
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {
|
|
720
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
721
|
+
Index /*innerDim*/, Packet& res) {
|
|
722
|
+
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
|
|
723
|
+
}
|
|
724
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
725
|
+
Index /*innerDim*/, Packet& res, Index begin,
|
|
726
|
+
Index count) {
|
|
727
|
+
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),
|
|
728
|
+
rhs.template packetSegment<LoadMode, Packet>(Index(0), col, begin, count));
|
|
667
729
|
}
|
|
668
730
|
};
|
|
669
731
|
|
|
670
|
-
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
671
|
-
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
732
|
+
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
733
|
+
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> {
|
|
734
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
735
|
+
Index /*innerDim*/, Packet& res) {
|
|
736
|
+
res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
|
737
|
+
}
|
|
738
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
739
|
+
Index /*innerDim*/, Packet& res, Index begin,
|
|
740
|
+
Index count) {
|
|
741
|
+
res = pmul(lhs.template packetSegment<LoadMode, Packet>(row, Index(0), begin, count),
|
|
742
|
+
pset1<Packet>(rhs.coeff(Index(0), col)));
|
|
676
743
|
}
|
|
677
744
|
};
|
|
678
745
|
|
|
679
|
-
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
680
|
-
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
746
|
+
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
747
|
+
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> {
|
|
748
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
|
|
749
|
+
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
|
|
750
|
+
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
751
|
+
}
|
|
752
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
|
|
753
|
+
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
|
|
754
|
+
Index /*begin*/, Index /*count*/) {
|
|
684
755
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
685
756
|
}
|
|
686
757
|
};
|
|
687
758
|
|
|
688
|
-
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
689
|
-
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
759
|
+
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
760
|
+
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> {
|
|
761
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
|
|
762
|
+
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
|
|
763
|
+
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
764
|
+
}
|
|
765
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
|
|
766
|
+
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
|
|
767
|
+
Index /*begin*/, Index /*count*/) {
|
|
693
768
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
694
769
|
}
|
|
695
770
|
};
|
|
696
771
|
|
|
697
|
-
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
698
|
-
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
{
|
|
772
|
+
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
773
|
+
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
|
|
774
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
775
|
+
Index innerDim, Packet& res) {
|
|
702
776
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
703
|
-
for(Index i = 0; i < innerDim; ++i)
|
|
704
|
-
res =
|
|
777
|
+
for (Index i = 0; i < innerDim; ++i)
|
|
778
|
+
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode, Packet>(i, col), res);
|
|
779
|
+
}
|
|
780
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
781
|
+
Index innerDim, Packet& res, Index begin, Index count) {
|
|
782
|
+
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
783
|
+
for (Index i = 0; i < innerDim; ++i)
|
|
784
|
+
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packetSegment<LoadMode, Packet>(i, col, begin, count),
|
|
785
|
+
res);
|
|
705
786
|
}
|
|
706
787
|
};
|
|
707
788
|
|
|
708
|
-
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
709
|
-
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
789
|
+
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
|
790
|
+
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
|
|
791
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
792
|
+
Index innerDim, Packet& res) {
|
|
793
|
+
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
794
|
+
for (Index i = 0; i < innerDim; ++i)
|
|
795
|
+
res = pmadd(lhs.template packet<LoadMode, Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
|
796
|
+
}
|
|
797
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
|
798
|
+
Index innerDim, Packet& res, Index begin, Index count) {
|
|
713
799
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
|
714
|
-
for(Index i = 0; i < innerDim; ++i)
|
|
715
|
-
res =
|
|
800
|
+
for (Index i = 0; i < innerDim; ++i)
|
|
801
|
+
res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, i, begin, count), pset1<Packet>(rhs.coeff(i, col)),
|
|
802
|
+
res);
|
|
716
803
|
}
|
|
717
804
|
};
|
|
718
805
|
|
|
719
|
-
|
|
720
806
|
/***************************************************************************
|
|
721
|
-
* Triangular products
|
|
722
|
-
***************************************************************************/
|
|
723
|
-
template<int Mode, bool LhsIsTriangular,
|
|
724
|
-
typename Lhs, bool LhsIsVector,
|
|
725
|
-
typename Rhs, bool RhsIsVector>
|
|
807
|
+
* Triangular products
|
|
808
|
+
***************************************************************************/
|
|
809
|
+
template <int Mode, bool LhsIsTriangular, typename Lhs, bool LhsIsVector, typename Rhs, bool RhsIsVector>
|
|
726
810
|
struct triangular_product_impl;
|
|
727
811
|
|
|
728
|
-
template<typename Lhs, typename Rhs, int ProductTag>
|
|
729
|
-
struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
|
|
738
|
-
::run(dst, lhs.nestedExpression(), rhs, alpha);
|
|
812
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
813
|
+
struct generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>
|
|
814
|
+
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>> {
|
|
815
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
816
|
+
|
|
817
|
+
template <typename Dest>
|
|
818
|
+
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
|
819
|
+
triangular_product_impl<Lhs::Mode, true, typename Lhs::MatrixType, false, Rhs, Rhs::ColsAtCompileTime == 1>::run(
|
|
820
|
+
dst, lhs.nestedExpression(), rhs, alpha);
|
|
739
821
|
}
|
|
740
822
|
};
|
|
741
823
|
|
|
742
|
-
template<typename Lhs, typename Rhs, int ProductTag>
|
|
743
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
|
|
744
|
-
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
|
|
824
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
825
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>
|
|
826
|
+
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>> {
|
|
827
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
828
|
+
|
|
829
|
+
template <typename Dest>
|
|
830
|
+
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
|
831
|
+
triangular_product_impl<Rhs::Mode, false, Lhs, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, false>::run(
|
|
832
|
+
dst, lhs, rhs.nestedExpression(), alpha);
|
|
752
833
|
}
|
|
753
834
|
};
|
|
754
835
|
|
|
755
|
-
|
|
756
836
|
/***************************************************************************
|
|
757
|
-
* SelfAdjoint products
|
|
758
|
-
***************************************************************************/
|
|
759
|
-
template <typename Lhs, int LhsMode, bool LhsIsVector,
|
|
760
|
-
typename Rhs, int RhsMode, bool RhsIsVector>
|
|
837
|
+
* SelfAdjoint products
|
|
838
|
+
***************************************************************************/
|
|
839
|
+
template <typename Lhs, int LhsMode, bool LhsIsVector, typename Rhs, int RhsMode, bool RhsIsVector>
|
|
761
840
|
struct selfadjoint_product_impl;
|
|
762
841
|
|
|
763
|
-
template<typename Lhs, typename Rhs, int ProductTag>
|
|
764
|
-
struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
|
|
842
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
843
|
+
struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
|
|
844
|
+
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>> {
|
|
845
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
846
|
+
|
|
847
|
+
template <typename Dest>
|
|
848
|
+
static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
|
849
|
+
selfadjoint_product_impl<typename Lhs::MatrixType, Lhs::Mode, false, Rhs, 0, Rhs::ColsAtCompileTime == 1>::run(
|
|
850
|
+
dst, lhs.nestedExpression(), rhs, alpha);
|
|
773
851
|
}
|
|
774
852
|
};
|
|
775
853
|
|
|
776
|
-
template<typename Lhs, typename Rhs, int ProductTag>
|
|
777
|
-
struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
|
|
778
|
-
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
|
|
854
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
855
|
+
struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>
|
|
856
|
+
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>> {
|
|
857
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
858
|
+
|
|
859
|
+
template <typename Dest>
|
|
860
|
+
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
|
861
|
+
selfadjoint_product_impl<Lhs, 0, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, Rhs::Mode, false>::run(
|
|
862
|
+
dst, lhs, rhs.nestedExpression(), alpha);
|
|
786
863
|
}
|
|
787
864
|
};
|
|
788
865
|
|
|
789
|
-
|
|
790
866
|
/***************************************************************************
|
|
791
|
-
* Diagonal products
|
|
792
|
-
***************************************************************************/
|
|
793
|
-
|
|
794
|
-
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
|
|
795
|
-
struct diagonal_product_evaluator_base
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
public:
|
|
867
|
+
* Diagonal products
|
|
868
|
+
***************************************************************************/
|
|
869
|
+
|
|
870
|
+
template <typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
|
|
871
|
+
struct diagonal_product_evaluator_base : evaluator_base<Derived> {
|
|
872
|
+
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
|
873
|
+
|
|
874
|
+
public:
|
|
800
875
|
enum {
|
|
801
|
-
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost +
|
|
802
|
-
|
|
876
|
+
CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) +
|
|
877
|
+
int(evaluator<DiagonalType>::CoeffReadCost),
|
|
878
|
+
|
|
803
879
|
MatrixFlags = evaluator<MatrixType>::Flags,
|
|
804
880
|
DiagFlags = evaluator<DiagonalType>::Flags,
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
881
|
+
|
|
882
|
+
StorageOrder_ = (Derived::MaxRowsAtCompileTime == 1 && Derived::MaxColsAtCompileTime != 1) ? RowMajor
|
|
883
|
+
: (Derived::MaxColsAtCompileTime == 1 && Derived::MaxRowsAtCompileTime != 1) ? ColMajor
|
|
884
|
+
: MatrixFlags & RowMajorBit ? RowMajor
|
|
885
|
+
: ColMajor,
|
|
886
|
+
SameStorageOrder_ = int(StorageOrder_) == ((MatrixFlags & RowMajorBit) ? RowMajor : ColMajor),
|
|
887
|
+
|
|
888
|
+
ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft) ||
|
|
889
|
+
(int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
|
|
890
|
+
SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
|
|
809
891
|
// FIXME currently we need same types, but in the future the next rule should be the one
|
|
810
|
-
//
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
892
|
+
// Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ &&
|
|
893
|
+
// bool(int(DiagFlags)&PacketAccessBit))),
|
|
894
|
+
Vectorizable_ = bool(int(MatrixFlags) & PacketAccessBit) && SameTypes_ &&
|
|
895
|
+
(SameStorageOrder_ || (MatrixFlags & LinearAccessBit) == LinearAccessBit) &&
|
|
896
|
+
(ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))),
|
|
897
|
+
LinearAccessMask_ =
|
|
898
|
+
(MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
|
899
|
+
Flags =
|
|
900
|
+
((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
|
|
814
901
|
Alignment = evaluator<MatrixType>::Alignment,
|
|
815
902
|
|
|
816
|
-
AsScalarProduct =
|
|
817
|
-
|
|
818
|
-
|
|
903
|
+
AsScalarProduct =
|
|
904
|
+
(DiagonalType::SizeAtCompileTime == 1) ||
|
|
905
|
+
(DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::RowsAtCompileTime == 1 &&
|
|
906
|
+
ProductOrder == OnTheLeft) ||
|
|
907
|
+
(DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == 1 && ProductOrder == OnTheRight)
|
|
819
908
|
};
|
|
820
|
-
|
|
821
|
-
diagonal_product_evaluator_base(const MatrixType
|
|
822
|
-
|
|
823
|
-
{
|
|
909
|
+
|
|
910
|
+
EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType& mat, const DiagonalType& diag)
|
|
911
|
+
: m_diagImpl(diag), m_matImpl(mat) {
|
|
824
912
|
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
|
|
825
913
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
|
826
914
|
}
|
|
827
|
-
|
|
828
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
|
|
829
|
-
|
|
830
|
-
if(AsScalarProduct)
|
|
915
|
+
|
|
916
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const {
|
|
917
|
+
if (AsScalarProduct)
|
|
831
918
|
return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
|
|
832
919
|
else
|
|
833
920
|
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
|
834
921
|
}
|
|
835
|
-
|
|
836
|
-
protected:
|
|
837
|
-
template<int LoadMode,typename PacketType>
|
|
838
|
-
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
|
|
839
|
-
|
|
840
|
-
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
|
922
|
+
|
|
923
|
+
protected:
|
|
924
|
+
template <int LoadMode, typename PacketType>
|
|
925
|
+
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const {
|
|
926
|
+
return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
|
|
841
927
|
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
|
842
928
|
}
|
|
843
|
-
|
|
844
|
-
template<int LoadMode,typename PacketType>
|
|
845
|
-
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
|
|
846
|
-
{
|
|
929
|
+
|
|
930
|
+
template <int LoadMode, typename PacketType>
|
|
931
|
+
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const {
|
|
847
932
|
enum {
|
|
848
933
|
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
|
849
|
-
DiagonalPacketLoadMode =
|
|
934
|
+
DiagonalPacketLoadMode = plain_enum_min(
|
|
935
|
+
LoadMode,
|
|
936
|
+
((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
|
850
937
|
};
|
|
851
|
-
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
|
852
|
-
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
|
938
|
+
return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
|
|
939
|
+
m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
|
|
853
940
|
}
|
|
854
|
-
|
|
941
|
+
|
|
942
|
+
template <int LoadMode, typename PacketType>
|
|
943
|
+
EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
|
|
944
|
+
internal::true_type) const {
|
|
945
|
+
return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
|
946
|
+
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
template <int LoadMode, typename PacketType>
|
|
950
|
+
EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
|
|
951
|
+
internal::false_type) const {
|
|
952
|
+
enum {
|
|
953
|
+
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
|
954
|
+
DiagonalPacketLoadMode = plain_enum_min(
|
|
955
|
+
LoadMode,
|
|
956
|
+
((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
|
957
|
+
};
|
|
958
|
+
return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
|
959
|
+
m_diagImpl.template packetSegment<DiagonalPacketLoadMode, PacketType>(id, begin, count));
|
|
960
|
+
}
|
|
961
|
+
|
|
855
962
|
evaluator<DiagonalType> m_diagImpl;
|
|
856
|
-
evaluator<MatrixType>
|
|
963
|
+
evaluator<MatrixType> m_matImpl;
|
|
857
964
|
};
|
|
858
965
|
|
|
859
966
|
// diagonal * dense
|
|
860
|
-
template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
|
|
967
|
+
template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
|
|
861
968
|
struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
|
|
862
|
-
|
|
863
|
-
{
|
|
864
|
-
typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
|
|
969
|
+
: diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
|
|
970
|
+
OnTheLeft> {
|
|
971
|
+
typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
|
|
972
|
+
OnTheLeft>
|
|
973
|
+
Base;
|
|
974
|
+
using Base::coeff;
|
|
865
975
|
using Base::m_diagImpl;
|
|
866
976
|
using Base::m_matImpl;
|
|
867
|
-
using Base::coeff;
|
|
868
977
|
typedef typename Base::Scalar Scalar;
|
|
869
|
-
|
|
978
|
+
|
|
870
979
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
|
871
980
|
typedef typename XprType::PlainObject PlainObject;
|
|
872
|
-
|
|
873
|
-
enum {
|
|
874
|
-
StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
|
|
875
|
-
};
|
|
981
|
+
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
|
876
982
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
|
883
|
-
{
|
|
983
|
+
static constexpr int StorageOrder = Base::StorageOrder_;
|
|
984
|
+
using IsRowMajor_t = bool_constant<StorageOrder == RowMajor>;
|
|
985
|
+
|
|
986
|
+
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
|
|
987
|
+
|
|
988
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
|
|
884
989
|
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
|
|
885
990
|
}
|
|
886
|
-
|
|
887
|
-
#ifndef
|
|
888
|
-
template<int LoadMode,typename PacketType>
|
|
889
|
-
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
|
890
|
-
{
|
|
991
|
+
|
|
992
|
+
#ifndef EIGEN_GPUCC
|
|
993
|
+
template <int LoadMode, typename PacketType>
|
|
994
|
+
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
|
891
995
|
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
|
892
996
|
// See also similar calls below.
|
|
893
|
-
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
|
894
|
-
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
|
997
|
+
return this->template packet_impl<LoadMode, PacketType>(row, col, row, IsRowMajor_t());
|
|
895
998
|
}
|
|
896
|
-
|
|
897
|
-
template<int LoadMode,typename PacketType>
|
|
898
|
-
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
|
899
|
-
|
|
900
|
-
|
|
999
|
+
|
|
1000
|
+
template <int LoadMode, typename PacketType>
|
|
1001
|
+
EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
|
|
1002
|
+
return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
|
|
1003
|
+
int(StorageOrder) == ColMajor ? 0 : idx);
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
template <int LoadMode, typename PacketType>
|
|
1007
|
+
EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
|
1008
|
+
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
|
1009
|
+
// See also similar calls below.
|
|
1010
|
+
return this->template packet_segment_impl<LoadMode, PacketType>(row, col, row, begin, count, IsRowMajor_t());
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
template <int LoadMode, typename PacketType>
|
|
1014
|
+
EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
|
|
1015
|
+
return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
|
|
1016
|
+
begin, count);
|
|
901
1017
|
}
|
|
902
1018
|
#endif
|
|
903
1019
|
};
|
|
904
1020
|
|
|
905
1021
|
// dense * diagonal
|
|
906
|
-
template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
|
|
1022
|
+
template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
|
|
907
1023
|
struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
|
|
908
|
-
|
|
909
|
-
{
|
|
910
|
-
typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
|
|
1024
|
+
: diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
|
|
1025
|
+
OnTheRight> {
|
|
1026
|
+
typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
|
|
1027
|
+
OnTheRight>
|
|
1028
|
+
Base;
|
|
1029
|
+
using Base::coeff;
|
|
911
1030
|
using Base::m_diagImpl;
|
|
912
1031
|
using Base::m_matImpl;
|
|
913
|
-
using Base::coeff;
|
|
914
1032
|
typedef typename Base::Scalar Scalar;
|
|
915
|
-
|
|
1033
|
+
|
|
916
1034
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
|
917
1035
|
typedef typename XprType::PlainObject PlainObject;
|
|
918
|
-
|
|
919
|
-
enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
|
|
920
1036
|
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
}
|
|
925
|
-
|
|
926
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
|
927
|
-
{
|
|
1037
|
+
static constexpr int StorageOrder = Base::StorageOrder_;
|
|
1038
|
+
using IsColMajor_t = bool_constant<StorageOrder == ColMajor>;
|
|
1039
|
+
|
|
1040
|
+
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
|
|
1041
|
+
|
|
1042
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
|
|
928
1043
|
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
|
|
929
1044
|
}
|
|
930
|
-
|
|
931
|
-
#ifndef
|
|
932
|
-
template<int LoadMode,typename PacketType>
|
|
933
|
-
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
|
934
|
-
|
|
935
|
-
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
|
936
|
-
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
|
1045
|
+
|
|
1046
|
+
#ifndef EIGEN_GPUCC
|
|
1047
|
+
template <int LoadMode, typename PacketType>
|
|
1048
|
+
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
|
1049
|
+
return this->template packet_impl<LoadMode, PacketType>(row, col, col, IsColMajor_t());
|
|
937
1050
|
}
|
|
938
|
-
|
|
939
|
-
template<int LoadMode,typename PacketType>
|
|
940
|
-
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
|
941
|
-
|
|
942
|
-
|
|
1051
|
+
|
|
1052
|
+
template <int LoadMode, typename PacketType>
|
|
1053
|
+
EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
|
|
1054
|
+
return packet<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx);
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
template <int LoadMode, typename PacketType>
|
|
1058
|
+
EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
|
1059
|
+
return this->template packet_segment_impl<LoadMode, PacketType>(row, col, col, begin, count, IsColMajor_t());
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
template <int LoadMode, typename PacketType>
|
|
1063
|
+
EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
|
|
1064
|
+
return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
|
|
1065
|
+
begin, count);
|
|
943
1066
|
}
|
|
944
1067
|
#endif
|
|
945
1068
|
};
|
|
946
1069
|
|
|
947
1070
|
/***************************************************************************
|
|
948
|
-
* Products with permutation matrices
|
|
949
|
-
***************************************************************************/
|
|
1071
|
+
* Products with permutation matrices
|
|
1072
|
+
***************************************************************************/
|
|
950
1073
|
|
|
951
1074
|
/** \internal
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
|
|
1075
|
+
* \class permutation_matrix_product
|
|
1076
|
+
* Internal helper class implementing the product between a permutation matrix and a matrix.
|
|
1077
|
+
* This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h
|
|
1078
|
+
*/
|
|
1079
|
+
template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
|
|
957
1080
|
struct permutation_matrix_product;
|
|
958
1081
|
|
|
959
|
-
template<typename ExpressionType, int Side, bool Transposed>
|
|
960
|
-
struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
|
964
|
-
|
|
965
|
-
template<typename Dest, typename PermutationType>
|
|
966
|
-
static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
|
967
|
-
{
|
|
968
|
-
MatrixType mat(xpr);
|
|
969
|
-
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
|
|
970
|
-
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
|
|
971
|
-
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
|
|
972
|
-
//if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
|
|
973
|
-
if(is_same_dense(dst, mat))
|
|
974
|
-
{
|
|
975
|
-
// apply the permutation inplace
|
|
976
|
-
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
|
|
977
|
-
mask.fill(false);
|
|
978
|
-
Index r = 0;
|
|
979
|
-
while(r < perm.size())
|
|
980
|
-
{
|
|
981
|
-
// search for the next seed
|
|
982
|
-
while(r<perm.size() && mask[r]) r++;
|
|
983
|
-
if(r>=perm.size())
|
|
984
|
-
break;
|
|
985
|
-
// we got one, let's follow it until we are back to the seed
|
|
986
|
-
Index k0 = r++;
|
|
987
|
-
Index kPrev = k0;
|
|
988
|
-
mask.coeffRef(k0) = true;
|
|
989
|
-
for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
|
|
990
|
-
{
|
|
991
|
-
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
|
|
992
|
-
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
|
993
|
-
(dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
|
|
994
|
-
|
|
995
|
-
mask.coeffRef(k) = true;
|
|
996
|
-
kPrev = k;
|
|
997
|
-
}
|
|
998
|
-
}
|
|
999
|
-
}
|
|
1000
|
-
else
|
|
1001
|
-
{
|
|
1002
|
-
for(Index i = 0; i < n; ++i)
|
|
1003
|
-
{
|
|
1004
|
-
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
|
1005
|
-
(dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
|
|
1006
|
-
|
|
1007
|
-
=
|
|
1082
|
+
template <typename ExpressionType, int Side, bool Transposed>
|
|
1083
|
+
struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> {
|
|
1084
|
+
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
|
1085
|
+
typedef remove_all_t<MatrixType> MatrixTypeCleaned;
|
|
1008
1086
|
|
|
1009
|
-
|
|
1010
|
-
|
|
1087
|
+
template <typename Dest, typename PermutationType>
|
|
1088
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm,
|
|
1089
|
+
const ExpressionType& xpr) {
|
|
1090
|
+
MatrixType mat(xpr);
|
|
1091
|
+
const Index n = Side == OnTheLeft ? mat.rows() : mat.cols();
|
|
1092
|
+
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
|
|
1093
|
+
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
|
|
1094
|
+
// if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
|
|
1095
|
+
if (is_same_dense(dst, mat)) {
|
|
1096
|
+
// apply the permutation inplace
|
|
1097
|
+
Matrix<bool, PermutationType::RowsAtCompileTime, 1, 0, PermutationType::MaxRowsAtCompileTime> mask(perm.size());
|
|
1098
|
+
mask.fill(false);
|
|
1099
|
+
Index r = 0;
|
|
1100
|
+
while (r < perm.size()) {
|
|
1101
|
+
// search for the next seed
|
|
1102
|
+
while (r < perm.size() && mask[r]) r++;
|
|
1103
|
+
if (r >= perm.size()) break;
|
|
1104
|
+
// we got one, let's follow it until we are back to the seed
|
|
1105
|
+
Index k0 = r++;
|
|
1106
|
+
Index kPrev = k0;
|
|
1107
|
+
mask.coeffRef(k0) = true;
|
|
1108
|
+
for (Index k = perm.indices().coeff(k0); k != k0; k = perm.indices().coeff(k)) {
|
|
1109
|
+
Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
|
|
1110
|
+
Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
|
|
1111
|
+
.swap(Block < Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
|
|
1112
|
+
Side == OnTheRight
|
|
1113
|
+
? 1
|
|
1114
|
+
: Dest::ColsAtCompileTime > (dst, ((Side == OnTheLeft) ^ Transposed) ? k0 : kPrev));
|
|
1115
|
+
|
|
1116
|
+
mask.coeffRef(k) = true;
|
|
1117
|
+
kPrev = k;
|
|
1011
1118
|
}
|
|
1012
1119
|
}
|
|
1120
|
+
} else {
|
|
1121
|
+
for (Index i = 0; i < n; ++i) {
|
|
1122
|
+
Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(
|
|
1123
|
+
dst, ((Side == OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
|
|
1124
|
+
|
|
1125
|
+
=
|
|
1126
|
+
|
|
1127
|
+
Block < const MatrixTypeCleaned,
|
|
1128
|
+
Side == OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,
|
|
1129
|
+
Side == OnTheRight ? 1
|
|
1130
|
+
: MatrixTypeCleaned::ColsAtCompileTime >
|
|
1131
|
+
(mat, ((Side == OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
|
|
1132
|
+
}
|
|
1013
1133
|
}
|
|
1134
|
+
}
|
|
1014
1135
|
};
|
|
1015
1136
|
|
|
1016
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1017
|
-
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1021
|
-
{
|
|
1137
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1138
|
+
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> {
|
|
1139
|
+
template <typename Dest>
|
|
1140
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1022
1141
|
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
|
1023
1142
|
}
|
|
1024
1143
|
};
|
|
1025
1144
|
|
|
1026
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1027
|
-
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1031
|
-
{
|
|
1145
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1146
|
+
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> {
|
|
1147
|
+
template <typename Dest>
|
|
1148
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1032
1149
|
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
|
1033
1150
|
}
|
|
1034
1151
|
};
|
|
1035
1152
|
|
|
1036
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1037
|
-
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
|
|
1041
|
-
{
|
|
1153
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1154
|
+
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> {
|
|
1155
|
+
template <typename Dest>
|
|
1156
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) {
|
|
1042
1157
|
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
|
1043
1158
|
}
|
|
1044
1159
|
};
|
|
1045
1160
|
|
|
1046
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1047
|
-
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
|
|
1051
|
-
{
|
|
1161
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1162
|
+
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> {
|
|
1163
|
+
template <typename Dest>
|
|
1164
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) {
|
|
1052
1165
|
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
|
1053
1166
|
}
|
|
1054
1167
|
};
|
|
1055
1168
|
|
|
1056
|
-
|
|
1057
1169
|
/***************************************************************************
|
|
1058
|
-
* Products with transpositions matrices
|
|
1059
|
-
***************************************************************************/
|
|
1170
|
+
* Products with transpositions matrices
|
|
1171
|
+
***************************************************************************/
|
|
1060
1172
|
|
|
1061
1173
|
// FIXME could we unify Transpositions and Permutation into a single "shape"??
|
|
1062
1174
|
|
|
1063
1175
|
/** \internal
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
|
|
1068
|
-
struct transposition_matrix_product
|
|
1069
|
-
{
|
|
1176
|
+
* \class transposition_matrix_product
|
|
1177
|
+
* Internal helper class implementing the product between a permutation matrix and a matrix.
|
|
1178
|
+
*/
|
|
1179
|
+
template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
|
|
1180
|
+
struct transposition_matrix_product {
|
|
1070
1181
|
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
|
1071
|
-
typedef
|
|
1072
|
-
|
|
1073
|
-
template<typename Dest, typename TranspositionType>
|
|
1074
|
-
static
|
|
1075
|
-
|
|
1182
|
+
typedef remove_all_t<MatrixType> MatrixTypeCleaned;
|
|
1183
|
+
|
|
1184
|
+
template <typename Dest, typename TranspositionType>
|
|
1185
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr,
|
|
1186
|
+
const ExpressionType& xpr) {
|
|
1076
1187
|
MatrixType mat(xpr);
|
|
1077
1188
|
typedef typename TranspositionType::StorageIndex StorageIndex;
|
|
1078
1189
|
const Index size = tr.size();
|
|
1079
1190
|
StorageIndex j = 0;
|
|
1080
1191
|
|
|
1081
|
-
if(!is_same_dense(dst,mat))
|
|
1082
|
-
dst = mat;
|
|
1192
|
+
if (!is_same_dense(dst, mat)) dst = mat;
|
|
1083
1193
|
|
|
1084
|
-
for(Index k=(Transposed?size-1:0)
|
|
1085
|
-
if(Index(j=tr.coeff(k))!=k)
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
else if(Side==OnTheRight)
|
|
1194
|
+
for (Index k = (Transposed ? size - 1 : 0); Transposed ? k >= 0 : k < size; Transposed ? --k : ++k)
|
|
1195
|
+
if (Index(j = tr.coeff(k)) != k) {
|
|
1196
|
+
if (Side == OnTheLeft)
|
|
1197
|
+
dst.row(k).swap(dst.row(j));
|
|
1198
|
+
else if (Side == OnTheRight)
|
|
1199
|
+
dst.col(k).swap(dst.col(j));
|
|
1089
1200
|
}
|
|
1090
1201
|
}
|
|
1091
1202
|
};
|
|
1092
1203
|
|
|
1093
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1094
|
-
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1098
|
-
{
|
|
1204
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1205
|
+
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
|
|
1206
|
+
template <typename Dest>
|
|
1207
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1099
1208
|
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
|
|
1100
1209
|
}
|
|
1101
1210
|
};
|
|
1102
1211
|
|
|
1103
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1104
|
-
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
|
1108
|
-
{
|
|
1212
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1213
|
+
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> {
|
|
1214
|
+
template <typename Dest>
|
|
1215
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1109
1216
|
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
|
|
1110
1217
|
}
|
|
1111
1218
|
};
|
|
1112
1219
|
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
{
|
|
1117
|
-
template<typename Dest>
|
|
1118
|
-
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
|
|
1119
|
-
{
|
|
1220
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1221
|
+
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
|
|
1222
|
+
template <typename Dest>
|
|
1223
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) {
|
|
1120
1224
|
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
|
|
1121
1225
|
}
|
|
1122
1226
|
};
|
|
1123
1227
|
|
|
1124
|
-
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1125
|
-
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
|
|
1129
|
-
{
|
|
1228
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1229
|
+
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> {
|
|
1230
|
+
template <typename Dest>
|
|
1231
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) {
|
|
1130
1232
|
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
|
|
1131
1233
|
}
|
|
1132
1234
|
};
|
|
1133
1235
|
|
|
1134
|
-
|
|
1236
|
+
/***************************************************************************
|
|
1237
|
+
* skew symmetric products
|
|
1238
|
+
* for now we just call the generic implementation
|
|
1239
|
+
***************************************************************************/
|
|
1240
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1241
|
+
struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag> {
|
|
1242
|
+
template <typename Dest>
|
|
1243
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1244
|
+
generic_product_impl<typename Lhs::DenseMatrixType, Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs,
|
|
1245
|
+
rhs);
|
|
1246
|
+
}
|
|
1247
|
+
};
|
|
1248
|
+
|
|
1249
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1250
|
+
struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag> {
|
|
1251
|
+
template <typename Dest>
|
|
1252
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1253
|
+
generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs,
|
|
1254
|
+
rhs);
|
|
1255
|
+
}
|
|
1256
|
+
};
|
|
1257
|
+
|
|
1258
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
1259
|
+
struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag> {
|
|
1260
|
+
template <typename Dest>
|
|
1261
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
|
|
1262
|
+
generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape,
|
|
1263
|
+
ProductTag>::evalTo(dst, lhs, rhs);
|
|
1264
|
+
}
|
|
1265
|
+
};
|
|
1266
|
+
|
|
1267
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1268
|
+
struct generic_product_impl<Lhs, Rhs, MatrixShape, HomogeneousShape, ProductTag>
|
|
1269
|
+
: generic_product_impl<Lhs, typename Rhs::PlainObject, MatrixShape, DenseShape, ProductTag> {};
|
|
1270
|
+
|
|
1271
|
+
template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
|
1272
|
+
struct generic_product_impl<Lhs, Rhs, HomogeneousShape, MatrixShape, ProductTag>
|
|
1273
|
+
: generic_product_impl<typename Lhs::PlainObject, Rhs, DenseShape, MatrixShape, ProductTag> {};
|
|
1274
|
+
|
|
1275
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
1276
|
+
struct generic_product_impl<Lhs, Rhs, PermutationShape, HomogeneousShape, ProductTag>
|
|
1277
|
+
: generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag> {};
|
|
1278
|
+
|
|
1279
|
+
template <typename Lhs, typename Rhs, int ProductTag>
|
|
1280
|
+
struct generic_product_impl<Lhs, Rhs, HomogeneousShape, PermutationShape, ProductTag>
|
|
1281
|
+
: generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag> {};
|
|
1282
|
+
|
|
1283
|
+
} // end namespace internal
|
|
1135
1284
|
|
|
1136
|
-
}
|
|
1285
|
+
} // end namespace Eigen
|
|
1137
1286
|
|
|
1138
|
-
#endif
|
|
1287
|
+
#endif // EIGEN_PRODUCT_EVALUATORS_H
|