@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -10,7 +10,10 @@
|
|
|
10
10
|
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
|
|
11
11
|
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
19
|
|
|
@@ -18,10 +21,10 @@ namespace internal {
|
|
|
18
21
|
// struct gemm_pack_lhs_triangular
|
|
19
22
|
// {
|
|
20
23
|
// Matrix<Scalar,mr,mr,
|
|
21
|
-
// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar*
|
|
24
|
+
// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* lhs_, int lhsStride, int depth, int rows)
|
|
22
25
|
// {
|
|
23
26
|
// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
|
24
|
-
// const_blas_data_mapper<Scalar, StorageOrder> lhs(
|
|
27
|
+
// const_blas_data_mapper<Scalar, StorageOrder> lhs(lhs_,lhsStride);
|
|
25
28
|
// int count = 0;
|
|
26
29
|
// const int peeled_mc = (rows/mr)*mr;
|
|
27
30
|
// for(int i=0; i<peeled_mc; i+=mr)
|
|
@@ -41,432 +44,354 @@ namespace internal {
|
|
|
41
44
|
/* Optimized triangular matrix * matrix (_TRMM++) product built on top of
|
|
42
45
|
* the general matrix matrix product.
|
|
43
46
|
*/
|
|
44
|
-
template <typename Scalar, typename Index,
|
|
45
|
-
int
|
|
46
|
-
int LhsStorageOrder, bool ConjugateLhs,
|
|
47
|
-
int RhsStorageOrder, bool ConjugateRhs,
|
|
48
|
-
int ResStorageOrder, int ResInnerStride,
|
|
49
|
-
int Version = Specialized>
|
|
47
|
+
template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs,
|
|
48
|
+
int RhsStorageOrder, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride, int Version = Specialized>
|
|
50
49
|
struct product_triangular_matrix_matrix;
|
|
51
50
|
|
|
52
|
-
template <typename Scalar, typename Index,
|
|
53
|
-
int
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const Scalar* rhs, Index rhsStride,
|
|
65
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
66
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
|
67
|
-
{
|
|
68
|
-
product_triangular_matrix_matrix<Scalar, Index,
|
|
69
|
-
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
|
|
70
|
-
(!LhsIsTriangular),
|
|
71
|
-
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
|
72
|
-
ConjugateRhs,
|
|
73
|
-
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
|
74
|
-
ConjugateLhs,
|
|
75
|
-
ColMajor, ResInnerStride>
|
|
76
|
-
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
51
|
+
template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs,
|
|
52
|
+
int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int Version>
|
|
53
|
+
struct product_triangular_matrix_matrix<Scalar, Index, Mode, LhsIsTriangular, LhsStorageOrder, ConjugateLhs,
|
|
54
|
+
RhsStorageOrder, ConjugateRhs, RowMajor, ResInnerStride, Version> {
|
|
55
|
+
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride,
|
|
56
|
+
const Scalar* rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride,
|
|
57
|
+
const Scalar& alpha, level3_blocking<Scalar, Scalar>& blocking) {
|
|
58
|
+
product_triangular_matrix_matrix<Scalar, Index, (Mode & (UnitDiag | ZeroDiag)) | ((Mode & Upper) ? Lower : Upper),
|
|
59
|
+
(!LhsIsTriangular), RhsStorageOrder == RowMajor ? ColMajor : RowMajor,
|
|
60
|
+
ConjugateRhs, LhsStorageOrder == RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
|
61
|
+
ColMajor, ResInnerStride>::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride,
|
|
62
|
+
res, resIncr, resStride, alpha, blocking);
|
|
77
63
|
}
|
|
78
64
|
};
|
|
79
65
|
|
|
80
66
|
// implements col-major += alpha * op(triangular) * op(general)
|
|
81
|
-
template <typename Scalar, typename Index, int Mode,
|
|
82
|
-
int
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
LhsStorageOrder,ConjugateLhs,
|
|
87
|
-
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
|
|
88
|
-
{
|
|
89
|
-
|
|
90
|
-
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
67
|
+
template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
68
|
+
bool ConjugateRhs, int ResInnerStride, int Version>
|
|
69
|
+
struct product_triangular_matrix_matrix<Scalar, Index, Mode, true, LhsStorageOrder, ConjugateLhs, RhsStorageOrder,
|
|
70
|
+
ConjugateRhs, ColMajor, ResInnerStride, Version> {
|
|
71
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
91
72
|
enum {
|
|
92
|
-
SmallPanelWidth
|
|
93
|
-
IsLower = (Mode&Lower) == Lower,
|
|
94
|
-
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
|
|
73
|
+
SmallPanelWidth = 2 * plain_enum_max(Traits::mr, Traits::nr),
|
|
74
|
+
IsLower = (Mode & Lower) == Lower,
|
|
75
|
+
SetDiag = (Mode & (ZeroDiag | UnitDiag)) ? 0 : 1
|
|
95
76
|
};
|
|
96
77
|
|
|
97
|
-
static EIGEN_DONT_INLINE void run(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
const Scalar* _rhs, Index rhsStride,
|
|
101
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
102
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
78
|
+
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, Index _depth, const Scalar* lhs_, Index lhsStride,
|
|
79
|
+
const Scalar* rhs_, Index rhsStride, Scalar* res, Index resIncr, Index resStride,
|
|
80
|
+
const Scalar& alpha, level3_blocking<Scalar, Scalar>& blocking);
|
|
103
81
|
};
|
|
104
82
|
|
|
105
|
-
template <typename Scalar, typename Index, int Mode,
|
|
106
|
-
int
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
for(Index k2=IsLower ? depth : 0;
|
|
162
|
-
IsLower ? k2>0 : k2<depth;
|
|
163
|
-
IsLower ? k2-=kc : k2+=kc)
|
|
164
|
-
{
|
|
165
|
-
Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
|
|
166
|
-
Index actual_k2 = IsLower ? k2-actual_kc : k2;
|
|
167
|
-
|
|
168
|
-
// align blocks with the end of the triangular part for trapezoidal lhs
|
|
169
|
-
if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
|
|
170
|
-
{
|
|
171
|
-
actual_kc = rows-k2;
|
|
172
|
-
k2 = k2+actual_kc-kc;
|
|
173
|
-
}
|
|
83
|
+
template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
84
|
+
bool ConjugateRhs, int ResInnerStride, int Version>
|
|
85
|
+
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<
|
|
86
|
+
Scalar, Index, Mode, true, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, ResInnerStride,
|
|
87
|
+
Version>::run(Index _rows, Index _cols, Index _depth, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
|
|
88
|
+
Index rhsStride, Scalar* res_, Index resIncr, Index resStride, const Scalar& alpha,
|
|
89
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
90
|
+
// strip zeros
|
|
91
|
+
Index diagSize = (std::min)(_rows, _depth);
|
|
92
|
+
Index rows = IsLower ? _rows : diagSize;
|
|
93
|
+
Index depth = IsLower ? diagSize : _depth;
|
|
94
|
+
Index cols = _cols;
|
|
95
|
+
|
|
96
|
+
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
97
|
+
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
98
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
99
|
+
LhsMapper lhs(lhs_, lhsStride);
|
|
100
|
+
RhsMapper rhs(rhs_, rhsStride);
|
|
101
|
+
ResMapper res(res_, resStride, resIncr);
|
|
102
|
+
|
|
103
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
104
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
105
|
+
// The small panel size must not be larger than blocking size.
|
|
106
|
+
// Usually this should never be the case because SmallPanelWidth^2 is very small
|
|
107
|
+
// compared to L2 cache size, but let's be safe:
|
|
108
|
+
Index panelWidth = (std::min)(Index(SmallPanelWidth), (std::min)(kc, mc));
|
|
109
|
+
|
|
110
|
+
std::size_t sizeA = kc * mc;
|
|
111
|
+
std::size_t sizeB = kc * cols;
|
|
112
|
+
|
|
113
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
114
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
115
|
+
|
|
116
|
+
Matrix<Scalar, SmallPanelWidth, SmallPanelWidth, LhsStorageOrder> triangularBuffer;
|
|
117
|
+
triangularBuffer.setZero();
|
|
118
|
+
if ((Mode & ZeroDiag) == ZeroDiag)
|
|
119
|
+
triangularBuffer.diagonal().setZero();
|
|
120
|
+
else
|
|
121
|
+
triangularBuffer.diagonal().setOnes();
|
|
122
|
+
|
|
123
|
+
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
124
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
125
|
+
LhsStorageOrder>
|
|
126
|
+
pack_lhs;
|
|
127
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
128
|
+
|
|
129
|
+
for (Index k2 = IsLower ? depth : 0; IsLower ? k2 > 0 : k2 < depth; IsLower ? k2 -= kc : k2 += kc) {
|
|
130
|
+
Index actual_kc = (std::min)(IsLower ? k2 : depth - k2, kc);
|
|
131
|
+
Index actual_k2 = IsLower ? k2 - actual_kc : k2;
|
|
132
|
+
|
|
133
|
+
// align blocks with the end of the triangular part for trapezoidal lhs
|
|
134
|
+
if ((!IsLower) && (k2 < rows) && (k2 + actual_kc > rows)) {
|
|
135
|
+
actual_kc = rows - k2;
|
|
136
|
+
k2 = k2 + actual_kc - kc;
|
|
137
|
+
}
|
|
174
138
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
for (Index k
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
|
|
202
|
-
}
|
|
203
|
-
pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
|
|
204
|
-
|
|
205
|
-
gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
|
|
206
|
-
actualPanelWidth, actualPanelWidth, cols, alpha,
|
|
207
|
-
actualPanelWidth, actual_kc, 0, blockBOffset);
|
|
139
|
+
pack_rhs(blockB, rhs.getSubMapper(actual_k2, 0), actual_kc, cols);
|
|
140
|
+
|
|
141
|
+
// the selected lhs's panel has to be split in three different parts:
|
|
142
|
+
// 1 - the part which is zero => skip it
|
|
143
|
+
// 2 - the diagonal block => special kernel
|
|
144
|
+
// 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
|
|
145
|
+
|
|
146
|
+
// the block diagonal, if any:
|
|
147
|
+
if (IsLower || actual_k2 < rows) {
|
|
148
|
+
// for each small vertical panels of lhs
|
|
149
|
+
for (Index k1 = 0; k1 < actual_kc; k1 += panelWidth) {
|
|
150
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - k1, panelWidth);
|
|
151
|
+
Index lengthTarget = IsLower ? actual_kc - k1 - actualPanelWidth : k1;
|
|
152
|
+
Index startBlock = actual_k2 + k1;
|
|
153
|
+
Index blockBOffset = k1;
|
|
154
|
+
|
|
155
|
+
// => GEBP with the micro triangular block
|
|
156
|
+
// The trick is to pack this micro block while filling the opposite triangular part with zeros.
|
|
157
|
+
// To this end we do an extra triangular copy to a small temporary buffer
|
|
158
|
+
for (Index k = 0; k < actualPanelWidth; ++k) {
|
|
159
|
+
if (SetDiag) triangularBuffer.coeffRef(k, k) = lhs(startBlock + k, startBlock + k);
|
|
160
|
+
for (Index i = IsLower ? k + 1 : 0; IsLower ? i < actualPanelWidth : i < k; ++i)
|
|
161
|
+
triangularBuffer.coeffRef(i, k) = lhs(startBlock + i, startBlock + k);
|
|
162
|
+
}
|
|
163
|
+
pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth,
|
|
164
|
+
actualPanelWidth);
|
|
208
165
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
{
|
|
212
|
-
Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
|
|
166
|
+
gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
|
|
167
|
+
actualPanelWidth, actual_kc, 0, blockBOffset);
|
|
213
168
|
|
|
214
|
-
|
|
169
|
+
// GEBP with remaining micro panel
|
|
170
|
+
if (lengthTarget > 0) {
|
|
171
|
+
Index startTarget = IsLower ? actual_k2 + k1 + actualPanelWidth : actual_k2;
|
|
215
172
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
173
|
+
pack_lhs(blockA, lhs.getSubMapper(startTarget, startBlock), actualPanelWidth, lengthTarget);
|
|
174
|
+
|
|
175
|
+
gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
|
|
176
|
+
actualPanelWidth, actual_kc, 0, blockBOffset);
|
|
220
177
|
}
|
|
221
178
|
}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
actual_kc, cols, alpha, -1, -1, 0, 0);
|
|
234
|
-
}
|
|
179
|
+
}
|
|
180
|
+
// the part below (lower case) or above (upper case) the diagonal => GEPP
|
|
181
|
+
{
|
|
182
|
+
Index start = IsLower ? k2 : 0;
|
|
183
|
+
Index end = IsLower ? rows : (std::min)(actual_k2, rows);
|
|
184
|
+
for (Index i2 = start; i2 < end; i2 += mc) {
|
|
185
|
+
const Index actual_mc = (std::min)(i2 + mc, end) - i2;
|
|
186
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
187
|
+
LhsStorageOrder, false>()(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
|
|
188
|
+
|
|
189
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0);
|
|
235
190
|
}
|
|
236
191
|
}
|
|
237
192
|
}
|
|
193
|
+
}
|
|
238
194
|
|
|
239
195
|
// implements col-major += alpha * op(general) * op(triangular)
|
|
240
|
-
template <typename Scalar, typename Index, int Mode,
|
|
241
|
-
int
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
LhsStorageOrder,ConjugateLhs,
|
|
246
|
-
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
|
|
247
|
-
{
|
|
248
|
-
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
196
|
+
template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
197
|
+
bool ConjugateRhs, int ResInnerStride, int Version>
|
|
198
|
+
struct product_triangular_matrix_matrix<Scalar, Index, Mode, false, LhsStorageOrder, ConjugateLhs, RhsStorageOrder,
|
|
199
|
+
ConjugateRhs, ColMajor, ResInnerStride, Version> {
|
|
200
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
249
201
|
enum {
|
|
250
|
-
SmallPanelWidth
|
|
251
|
-
IsLower = (Mode&Lower) == Lower,
|
|
252
|
-
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
|
|
202
|
+
SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr),
|
|
203
|
+
IsLower = (Mode & Lower) == Lower,
|
|
204
|
+
SetDiag = (Mode & (ZeroDiag | UnitDiag)) ? 0 : 1
|
|
253
205
|
};
|
|
254
206
|
|
|
255
|
-
static EIGEN_DONT_INLINE void run(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
const Scalar* _rhs, Index rhsStride,
|
|
259
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
260
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
207
|
+
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, Index _depth, const Scalar* lhs_, Index lhsStride,
|
|
208
|
+
const Scalar* rhs_, Index rhsStride, Scalar* res, Index resIncr, Index resStride,
|
|
209
|
+
const Scalar& alpha, level3_blocking<Scalar, Scalar>& blocking);
|
|
261
210
|
};
|
|
262
211
|
|
|
263
|
-
template <typename Scalar, typename Index, int Mode,
|
|
264
|
-
int
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
|
|
317
|
-
Index actual_k2 = IsLower ? k2 : k2-actual_kc;
|
|
318
|
-
|
|
319
|
-
// align blocks with the end of the triangular part for trapezoidal rhs
|
|
320
|
-
if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
|
|
321
|
-
{
|
|
322
|
-
actual_kc = cols-k2;
|
|
323
|
-
k2 = actual_k2 + actual_kc - kc;
|
|
324
|
-
}
|
|
212
|
+
template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
213
|
+
bool ConjugateRhs, int ResInnerStride, int Version>
|
|
214
|
+
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<
|
|
215
|
+
Scalar, Index, Mode, false, LhsStorageOrder, ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, ResInnerStride,
|
|
216
|
+
Version>::run(Index _rows, Index _cols, Index _depth, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
|
|
217
|
+
Index rhsStride, Scalar* res_, Index resIncr, Index resStride, const Scalar& alpha,
|
|
218
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
219
|
+
const Index PacketBytes = packet_traits<Scalar>::size * sizeof(Scalar);
|
|
220
|
+
// strip zeros
|
|
221
|
+
Index diagSize = (std::min)(_cols, _depth);
|
|
222
|
+
Index rows = _rows;
|
|
223
|
+
Index depth = IsLower ? _depth : diagSize;
|
|
224
|
+
Index cols = IsLower ? diagSize : _cols;
|
|
225
|
+
|
|
226
|
+
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
227
|
+
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
228
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
229
|
+
LhsMapper lhs(lhs_, lhsStride);
|
|
230
|
+
RhsMapper rhs(rhs_, rhsStride);
|
|
231
|
+
ResMapper res(res_, resStride, resIncr);
|
|
232
|
+
|
|
233
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
234
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
235
|
+
|
|
236
|
+
std::size_t sizeA = kc * mc;
|
|
237
|
+
std::size_t sizeB = kc * cols + EIGEN_MAX_ALIGN_BYTES / sizeof(Scalar);
|
|
238
|
+
|
|
239
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
240
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
241
|
+
|
|
242
|
+
Matrix<Scalar, SmallPanelWidth, SmallPanelWidth, RhsStorageOrder> triangularBuffer;
|
|
243
|
+
triangularBuffer.setZero();
|
|
244
|
+
if ((Mode & ZeroDiag) == ZeroDiag)
|
|
245
|
+
triangularBuffer.diagonal().setZero();
|
|
246
|
+
else
|
|
247
|
+
triangularBuffer.diagonal().setOnes();
|
|
248
|
+
|
|
249
|
+
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
250
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
251
|
+
LhsStorageOrder>
|
|
252
|
+
pack_lhs;
|
|
253
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
254
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder, false, true> pack_rhs_panel;
|
|
255
|
+
|
|
256
|
+
for (Index k2 = IsLower ? 0 : depth; IsLower ? k2 < depth : k2 > 0; IsLower ? k2 += kc : k2 -= kc) {
|
|
257
|
+
Index actual_kc = (std::min)(IsLower ? depth - k2 : k2, kc);
|
|
258
|
+
Index actual_k2 = IsLower ? k2 : k2 - actual_kc;
|
|
259
|
+
|
|
260
|
+
// align blocks with the end of the triangular part for trapezoidal rhs
|
|
261
|
+
if (IsLower && (k2 < cols) && (actual_k2 + actual_kc > cols)) {
|
|
262
|
+
actual_kc = cols - k2;
|
|
263
|
+
k2 = actual_k2 + actual_kc - kc;
|
|
264
|
+
}
|
|
325
265
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
{
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
for (Index j=0;j<actualPanelWidth;++j)
|
|
353
|
-
{
|
|
354
|
-
if (SetDiag)
|
|
355
|
-
triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
|
|
356
|
-
for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
|
|
357
|
-
triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
pack_rhs_panel(blockB+j2*actual_kc,
|
|
361
|
-
RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
|
|
362
|
-
actualPanelWidth, actualPanelWidth,
|
|
363
|
-
actual_kc, j2);
|
|
266
|
+
// remaining size
|
|
267
|
+
Index rs = IsLower ? (std::min)(cols, actual_k2) : cols - k2;
|
|
268
|
+
// size of the triangular part
|
|
269
|
+
Index ts = (IsLower && actual_k2 >= cols) ? 0 : actual_kc;
|
|
270
|
+
|
|
271
|
+
Scalar* geb = blockB + ts * ts;
|
|
272
|
+
geb = geb + internal::first_aligned<PacketBytes>(geb, PacketBytes / sizeof(Scalar));
|
|
273
|
+
|
|
274
|
+
pack_rhs(geb, rhs.getSubMapper(actual_k2, IsLower ? 0 : k2), actual_kc, rs);
|
|
275
|
+
|
|
276
|
+
// pack the triangular part of the rhs padding the unrolled blocks with zeros
|
|
277
|
+
if (ts > 0) {
|
|
278
|
+
for (Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
|
|
279
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
|
|
280
|
+
Index actual_j2 = actual_k2 + j2;
|
|
281
|
+
Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
|
|
282
|
+
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
283
|
+
// general part
|
|
284
|
+
pack_rhs_panel(blockB + j2 * actual_kc, rhs.getSubMapper(actual_k2 + panelOffset, actual_j2), panelLength,
|
|
285
|
+
actualPanelWidth, actual_kc, panelOffset);
|
|
286
|
+
|
|
287
|
+
// append the triangular part via a temporary buffer
|
|
288
|
+
for (Index j = 0; j < actualPanelWidth; ++j) {
|
|
289
|
+
if (SetDiag) triangularBuffer.coeffRef(j, j) = rhs(actual_j2 + j, actual_j2 + j);
|
|
290
|
+
for (Index k = IsLower ? j + 1 : 0; IsLower ? k < actualPanelWidth : k < j; ++k)
|
|
291
|
+
triangularBuffer.coeffRef(k, j) = rhs(actual_j2 + k, actual_j2 + j);
|
|
364
292
|
}
|
|
293
|
+
|
|
294
|
+
pack_rhs_panel(blockB + j2 * actual_kc, RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
|
|
295
|
+
actualPanelWidth, actualPanelWidth, actual_kc, j2);
|
|
365
296
|
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
for (Index i2 = 0; i2 < rows; i2 += mc) {
|
|
300
|
+
const Index actual_mc = (std::min)(mc, rows - i2);
|
|
301
|
+
pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
|
|
302
|
+
|
|
303
|
+
// triangular kernel
|
|
304
|
+
if (ts > 0) {
|
|
305
|
+
for (Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
|
|
306
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
|
|
307
|
+
Index panelLength = IsLower ? actual_kc - j2 : j2 + actualPanelWidth;
|
|
308
|
+
Index blockOffset = IsLower ? j2 : 0;
|
|
366
309
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
|
|
371
|
-
|
|
372
|
-
// triangular kernel
|
|
373
|
-
if(ts>0)
|
|
374
|
-
{
|
|
375
|
-
for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
|
|
376
|
-
{
|
|
377
|
-
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
|
|
378
|
-
Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
|
|
379
|
-
Index blockOffset = IsLower ? j2 : 0;
|
|
380
|
-
|
|
381
|
-
gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
|
|
382
|
-
blockA, blockB+j2*actual_kc,
|
|
383
|
-
actual_mc, panelLength, actualPanelWidth,
|
|
384
|
-
alpha,
|
|
385
|
-
actual_kc, actual_kc, // strides
|
|
386
|
-
blockOffset, blockOffset);// offsets
|
|
387
|
-
}
|
|
310
|
+
gebp_kernel(res.getSubMapper(i2, actual_k2 + j2), blockA, blockB + j2 * actual_kc, actual_mc, panelLength,
|
|
311
|
+
actualPanelWidth, alpha, actual_kc, actual_kc, // strides
|
|
312
|
+
blockOffset, blockOffset); // offsets
|
|
388
313
|
}
|
|
389
|
-
gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
|
|
390
|
-
blockA, geb, actual_mc, actual_kc, rs,
|
|
391
|
-
alpha,
|
|
392
|
-
-1, -1, 0, 0);
|
|
393
314
|
}
|
|
315
|
+
gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2), blockA, geb, actual_mc, actual_kc, rs, alpha, -1, -1, 0, 0);
|
|
394
316
|
}
|
|
395
317
|
}
|
|
318
|
+
}
|
|
396
319
|
|
|
397
320
|
/***************************************************************************
|
|
398
|
-
* Wrapper to product_triangular_matrix_matrix
|
|
399
|
-
***************************************************************************/
|
|
321
|
+
* Wrapper to product_triangular_matrix_matrix
|
|
322
|
+
***************************************************************************/
|
|
400
323
|
|
|
401
|
-
}
|
|
324
|
+
} // end namespace internal
|
|
402
325
|
|
|
403
326
|
namespace internal {
|
|
404
|
-
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
|
|
405
|
-
struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
typedef typename
|
|
410
|
-
typedef typename Rhs::Scalar RhsScalar;
|
|
327
|
+
template <int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
|
|
328
|
+
struct triangular_product_impl<Mode, LhsIsTriangular, Lhs, false, Rhs, false> {
|
|
329
|
+
template <typename Dest>
|
|
330
|
+
static void run(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const typename Dest::Scalar& alpha) {
|
|
331
|
+
typedef typename Lhs::Scalar LhsScalar;
|
|
332
|
+
typedef typename Rhs::Scalar RhsScalar;
|
|
411
333
|
typedef typename Dest::Scalar Scalar;
|
|
412
|
-
|
|
334
|
+
|
|
413
335
|
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
|
414
336
|
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
|
415
|
-
typedef
|
|
337
|
+
typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
|
|
416
338
|
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
|
417
339
|
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
|
418
|
-
typedef
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
340
|
+
typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
|
|
341
|
+
|
|
342
|
+
internal::add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
|
|
343
|
+
internal::add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
|
|
344
|
+
|
|
345
|
+
// Empty product, return early. Otherwise, we get `nullptr` use errors below when we try to access
|
|
346
|
+
// coeffRef(0,0).
|
|
347
|
+
if (lhs.size() == 0 || rhs.size() == 0) {
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
422
350
|
|
|
423
351
|
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
|
|
424
352
|
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
|
|
425
353
|
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
|
426
354
|
|
|
427
|
-
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
|
428
|
-
|
|
355
|
+
typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Scalar, Scalar,
|
|
356
|
+
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
|
|
357
|
+
Lhs::MaxColsAtCompileTime, 4>
|
|
358
|
+
BlockingType;
|
|
429
359
|
|
|
430
|
-
enum { IsLower = (Mode&Lower) == Lower };
|
|
431
|
-
Index stripedRows
|
|
432
|
-
Index stripedCols
|
|
433
|
-
Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
|
|
434
|
-
: ((IsLower)
|
|
360
|
+
enum { IsLower = (Mode & Lower) == Lower };
|
|
361
|
+
Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(), lhs.cols());
|
|
362
|
+
Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(), rhs.rows());
|
|
363
|
+
Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(), lhs.rows()))
|
|
364
|
+
: ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(), rhs.cols()));
|
|
435
365
|
|
|
436
366
|
BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);
|
|
437
367
|
|
|
438
|
-
internal::product_triangular_matrix_matrix<
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
stripedRows, stripedCols, stripedDepth,
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
);
|
|
368
|
+
internal::product_triangular_matrix_matrix<
|
|
369
|
+
Scalar, Index, Mode, LhsIsTriangular,
|
|
370
|
+
(internal::traits<ActualLhsTypeCleaned>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
371
|
+
LhsBlasTraits::NeedToConjugate,
|
|
372
|
+
(internal::traits<ActualRhsTypeCleaned>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
373
|
+
RhsBlasTraits::NeedToConjugate, (internal::traits<Dest>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
374
|
+
Dest::InnerStrideAtCompileTime>::run(stripedRows, stripedCols, stripedDepth, // sizes
|
|
375
|
+
&lhs.coeffRef(0, 0), lhs.outerStride(), // lhs info
|
|
376
|
+
&rhs.coeffRef(0, 0), rhs.outerStride(), // rhs info
|
|
377
|
+
&dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(), // result info
|
|
378
|
+
actualAlpha, blocking);
|
|
450
379
|
|
|
451
380
|
// Apply correction if the diagonal is unit and a scalar factor was nested:
|
|
452
|
-
if ((Mode&UnitDiag)==UnitDiag)
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
|
|
460
|
-
{
|
|
461
|
-
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
|
|
462
|
-
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
|
|
381
|
+
if ((Mode & UnitDiag) == UnitDiag) {
|
|
382
|
+
if (LhsIsTriangular && !numext::is_exactly_one(lhs_alpha)) {
|
|
383
|
+
Index diagSize = (std::min)(lhs.rows(), lhs.cols());
|
|
384
|
+
dst.topRows(diagSize) -= ((lhs_alpha - LhsScalar(1)) * a_rhs).topRows(diagSize);
|
|
385
|
+
} else if ((!LhsIsTriangular) && !numext::is_exactly_one(rhs_alpha)) {
|
|
386
|
+
Index diagSize = (std::min)(rhs.rows(), rhs.cols());
|
|
387
|
+
dst.leftCols(diagSize) -= (rhs_alpha - RhsScalar(1)) * a_lhs.leftCols(diagSize);
|
|
463
388
|
}
|
|
464
389
|
}
|
|
465
390
|
}
|
|
466
391
|
};
|
|
467
392
|
|
|
468
|
-
}
|
|
393
|
+
} // end namespace internal
|
|
469
394
|
|
|
470
|
-
}
|
|
395
|
+
} // end namespace Eigen
|
|
471
396
|
|
|
472
|
-
#endif
|
|
397
|
+
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
|