@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
// for linear algebra.
|
|
3
3
|
//
|
|
4
4
|
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
// Modifications Copyright (C) 2022 Intel Corporation
|
|
5
6
|
//
|
|
6
7
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
8
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
@@ -10,328 +11,378 @@
|
|
|
10
11
|
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
|
|
11
12
|
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
// IWYU pragma: private
|
|
15
|
+
#include "../InternalHeaderCheck.h"
|
|
16
|
+
|
|
17
|
+
namespace Eigen {
|
|
14
18
|
|
|
15
19
|
namespace internal {
|
|
16
20
|
|
|
21
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
22
|
+
bool Specialized>
|
|
23
|
+
struct trsmKernelL {
|
|
24
|
+
// Generic Implementation of triangular solve for triangular matrix on left and multiple rhs.
|
|
25
|
+
// Handles non-packed matrices.
|
|
26
|
+
static void kernel(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr,
|
|
27
|
+
Index otherStride);
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
31
|
+
bool Specialized>
|
|
32
|
+
struct trsmKernelR {
|
|
33
|
+
// Generic Implementation of triangular solve for triangular matrix on right and multiple lhs.
|
|
34
|
+
// Handles non-packed matrices.
|
|
35
|
+
static void kernel(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr,
|
|
36
|
+
Index otherStride);
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
40
|
+
bool Specialized>
|
|
41
|
+
EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
|
|
42
|
+
Specialized>::kernel(Index size, Index otherSize, const Scalar* _tri,
|
|
43
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
44
|
+
Index otherStride) {
|
|
45
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
46
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
47
|
+
TriMapper tri(_tri, triStride);
|
|
48
|
+
OtherMapper other(_other, otherStride, otherIncr);
|
|
49
|
+
|
|
50
|
+
enum { IsLower = (Mode & Lower) == Lower };
|
|
51
|
+
conj_if<Conjugate> conj;
|
|
52
|
+
|
|
53
|
+
// tr solve
|
|
54
|
+
for (Index k = 0; k < size; ++k) {
|
|
55
|
+
// TODO write a small kernel handling this (can be shared with trsv)
|
|
56
|
+
Index i = IsLower ? k : -k - 1;
|
|
57
|
+
Index rs = size - k - 1; // remaining size
|
|
58
|
+
Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs;
|
|
59
|
+
|
|
60
|
+
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1) / conj(tri(i, i)));
|
|
61
|
+
for (Index j = 0; j < otherSize; ++j) {
|
|
62
|
+
if (TriStorageOrder == RowMajor) {
|
|
63
|
+
Scalar b(0);
|
|
64
|
+
const Scalar* l = &tri(i, s);
|
|
65
|
+
typename OtherMapper::LinearMapper r = other.getLinearMapper(s, j);
|
|
66
|
+
for (Index i3 = 0; i3 < k; ++i3) b += conj(l[i3]) * r(i3);
|
|
67
|
+
|
|
68
|
+
other(i, j) = (other(i, j) - b) * a;
|
|
69
|
+
} else {
|
|
70
|
+
Scalar& otherij = other(i, j);
|
|
71
|
+
otherij *= a;
|
|
72
|
+
Scalar b = otherij;
|
|
73
|
+
typename OtherMapper::LinearMapper r = other.getLinearMapper(s, j);
|
|
74
|
+
typename TriMapper::LinearMapper l = tri.getLinearMapper(s, i);
|
|
75
|
+
for (Index i3 = 0; i3 < rs; ++i3) r(i3) -= b * conj(l(i3));
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
82
|
+
bool Specialized>
|
|
83
|
+
EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
|
|
84
|
+
Specialized>::kernel(Index size, Index otherSize, const Scalar* _tri,
|
|
85
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
86
|
+
Index otherStride) {
|
|
87
|
+
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
88
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
|
|
89
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
|
90
|
+
LhsMapper lhs(_other, otherStride, otherIncr);
|
|
91
|
+
RhsMapper rhs(_tri, triStride);
|
|
92
|
+
|
|
93
|
+
enum { RhsStorageOrder = TriStorageOrder, IsLower = (Mode & Lower) == Lower };
|
|
94
|
+
conj_if<Conjugate> conj;
|
|
95
|
+
|
|
96
|
+
for (Index k = 0; k < size; ++k) {
|
|
97
|
+
Index j = IsLower ? size - k - 1 : k;
|
|
98
|
+
|
|
99
|
+
typename LhsMapper::LinearMapper r = lhs.getLinearMapper(0, j);
|
|
100
|
+
for (Index k3 = 0; k3 < k; ++k3) {
|
|
101
|
+
Scalar b = conj(rhs(IsLower ? j + 1 + k3 : k3, j));
|
|
102
|
+
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(0, IsLower ? j + 1 + k3 : k3);
|
|
103
|
+
for (Index i = 0; i < otherSize; ++i) r(i) -= a(i) * b;
|
|
104
|
+
}
|
|
105
|
+
if ((Mode & UnitDiag) == 0) {
|
|
106
|
+
Scalar inv_rjj = RealScalar(1) / conj(rhs(j, j));
|
|
107
|
+
for (Index i = 0; i < otherSize; ++i) r(i) *= inv_rjj;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
17
112
|
// if the rhs is row major, let's transpose the product
|
|
18
|
-
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder,
|
|
19
|
-
|
|
20
|
-
{
|
|
21
|
-
static void run(
|
|
22
|
-
|
|
23
|
-
const Scalar* tri, Index triStride,
|
|
24
|
-
Scalar* _other, Index otherIncr, Index otherStride,
|
|
25
|
-
level3_blocking<Scalar,Scalar>& blocking)
|
|
26
|
-
{
|
|
113
|
+
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder,
|
|
114
|
+
int OtherInnerStride>
|
|
115
|
+
struct triangular_solve_matrix<Scalar, Index, Side, Mode, Conjugate, TriStorageOrder, RowMajor, OtherInnerStride> {
|
|
116
|
+
static void run(Index size, Index cols, const Scalar* tri, Index triStride, Scalar* _other, Index otherIncr,
|
|
117
|
+
Index otherStride, level3_blocking<Scalar, Scalar>& blocking) {
|
|
27
118
|
triangular_solve_matrix<
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
|
|
32
|
-
::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
|
|
119
|
+
Scalar, Index, Side == OnTheLeft ? OnTheRight : OnTheLeft, (Mode & UnitDiag) | ((Mode & Upper) ? Lower : Upper),
|
|
120
|
+
NumTraits<Scalar>::IsComplex && Conjugate, TriStorageOrder == RowMajor ? ColMajor : RowMajor, ColMajor,
|
|
121
|
+
OtherInnerStride>::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
|
|
33
122
|
}
|
|
34
123
|
};
|
|
35
124
|
|
|
36
125
|
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
|
|
37
126
|
*/
|
|
38
|
-
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
|
|
39
|
-
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
|
|
40
|
-
{
|
|
41
|
-
static EIGEN_DONT_INLINE void run(
|
|
42
|
-
Index size, Index otherSize,
|
|
43
|
-
const Scalar* _tri, Index triStride,
|
|
44
|
-
Scalar* _other, Index otherIncr, Index otherStride,
|
|
45
|
-
level3_blocking<Scalar,Scalar>& blocking);
|
|
46
|
-
};
|
|
47
127
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
level3_blocking<Scalar,Scalar>& blocking)
|
|
53
|
-
{
|
|
54
|
-
Index cols = otherSize;
|
|
55
|
-
|
|
56
|
-
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
57
|
-
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
58
|
-
TriMapper tri(_tri, triStride);
|
|
59
|
-
OtherMapper other(_other, otherStride, otherIncr);
|
|
60
|
-
|
|
61
|
-
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
62
|
-
|
|
63
|
-
enum {
|
|
64
|
-
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
|
65
|
-
IsLower = (Mode&Lower) == Lower
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
Index kc = blocking.kc(); // cache block size along the K direction
|
|
69
|
-
Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
|
|
70
|
-
|
|
71
|
-
std::size_t sizeA = kc*mc;
|
|
72
|
-
std::size_t sizeB = kc*cols;
|
|
73
|
-
|
|
74
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
75
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
76
|
-
|
|
77
|
-
conj_if<Conjugate> conj;
|
|
78
|
-
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
|
79
|
-
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, TriStorageOrder> pack_lhs;
|
|
80
|
-
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
|
|
81
|
-
|
|
82
|
-
// the goal here is to subdivise the Rhs panels such that we keep some cache
|
|
83
|
-
// coherence when accessing the rhs elements
|
|
84
|
-
std::ptrdiff_t l1, l2, l3;
|
|
85
|
-
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
86
|
-
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
|
|
87
|
-
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
|
|
128
|
+
struct triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, Conjugate, TriStorageOrder, ColMajor, OtherInnerStride> {
|
|
129
|
+
static EIGEN_DONT_INLINE void run(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other,
|
|
130
|
+
Index otherIncr, Index otherStride, level3_blocking<Scalar, Scalar>& blocking);
|
|
131
|
+
};
|
|
88
132
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
133
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
134
|
+
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, Conjugate, TriStorageOrder, ColMajor,
|
|
135
|
+
OtherInnerStride>::run(Index size, Index otherSize, const Scalar* _tri,
|
|
136
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
137
|
+
Index otherStride,
|
|
138
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
139
|
+
Index cols = otherSize;
|
|
140
|
+
|
|
141
|
+
std::ptrdiff_t l1, l2, l3;
|
|
142
|
+
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
143
|
+
|
|
144
|
+
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
|
|
145
|
+
EIGEN_IF_CONSTEXPR(
|
|
146
|
+
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
|
147
|
+
// Very rough cutoffs to determine when to call trsm w/o packing
|
|
148
|
+
// For small problem sizes trsmKernel compiled with clang is generally faster.
|
|
149
|
+
// TODO: Investigate better heuristics for cutoffs.
|
|
150
|
+
double L2Cap = 0.5; // 50% of L2 size
|
|
151
|
+
if (size < avx512_trsm_cutoff<Scalar>(l2, cols, L2Cap)) {
|
|
152
|
+
trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1, /*Specialized=*/true>::kernel(
|
|
153
|
+
size, cols, _tri, triStride, _other, 1, otherStride);
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
#endif
|
|
158
|
+
|
|
159
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
160
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
161
|
+
TriMapper tri(_tri, triStride);
|
|
162
|
+
OtherMapper other(_other, otherStride, otherIncr);
|
|
163
|
+
|
|
164
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
165
|
+
|
|
166
|
+
enum { SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr), IsLower = (Mode & Lower) == Lower };
|
|
167
|
+
|
|
168
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
169
|
+
Index mc = (std::min)(size, blocking.mc()); // cache block size along the M direction
|
|
170
|
+
|
|
171
|
+
std::size_t sizeA = kc * mc;
|
|
172
|
+
std::size_t sizeB = kc * cols;
|
|
173
|
+
|
|
174
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
175
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
176
|
+
|
|
177
|
+
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
|
178
|
+
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
179
|
+
TriStorageOrder>
|
|
180
|
+
pack_lhs;
|
|
181
|
+
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
|
|
182
|
+
|
|
183
|
+
// the goal here is to subdivise the Rhs panels such that we keep some cache
|
|
184
|
+
// coherence when accessing the rhs elements
|
|
185
|
+
Index subcols = cols > 0 ? l2 / (4 * sizeof(Scalar) * std::max<Index>(otherStride, size)) : 0;
|
|
186
|
+
subcols = std::max<Index>((subcols / Traits::nr) * Traits::nr, Traits::nr);
|
|
187
|
+
|
|
188
|
+
for (Index k2 = IsLower ? 0 : size; IsLower ? k2 < size : k2 > 0; IsLower ? k2 += kc : k2 -= kc) {
|
|
189
|
+
const Index actual_kc = (std::min)(IsLower ? size - k2 : k2, kc);
|
|
190
|
+
|
|
191
|
+
// We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
|
|
192
|
+
// and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
|
|
193
|
+
// A11 (the triangular part) and A21 the remaining rectangular part.
|
|
194
|
+
// Then the high level algorithm is:
|
|
195
|
+
// - B = R1 => general block copy (done during the next step)
|
|
196
|
+
// - R1 = A11^-1 B => tricky part
|
|
197
|
+
// - update B from the new R1 => actually this has to be performed continuously during the above step
|
|
198
|
+
// - R2 -= A21 * B => GEPP
|
|
199
|
+
|
|
200
|
+
// The tricky part: compute R1 = A11^-1 B while updating B from R1
|
|
201
|
+
// The idea is to split A11 into multiple small vertical panels.
|
|
202
|
+
// Each panel can be split into a small triangular part T1k which is processed without optimization,
|
|
203
|
+
// and the remaining small part T2k which is processed using gebp with appropriate block strides
|
|
204
|
+
for (Index j2 = 0; j2 < cols; j2 += subcols) {
|
|
205
|
+
Index actual_cols = (std::min)(cols - j2, subcols);
|
|
206
|
+
// for each small vertical panels [T1k^T, T2k^T]^T of lhs
|
|
207
|
+
for (Index k1 = 0; k1 < actual_kc; k1 += SmallPanelWidth) {
|
|
208
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - k1, SmallPanelWidth);
|
|
209
|
+
// tr solve
|
|
113
210
|
{
|
|
114
|
-
Index
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
|
|
120
|
-
Index rs = actualPanelWidth - k - 1; // remaining size
|
|
121
|
-
Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
|
|
122
|
-
: IsLower ? i+1 : i-rs;
|
|
123
|
-
|
|
124
|
-
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
|
|
125
|
-
for (Index j=j2; j<j2+actual_cols; ++j)
|
|
126
|
-
{
|
|
127
|
-
if (TriStorageOrder==RowMajor)
|
|
128
|
-
{
|
|
129
|
-
Scalar b(0);
|
|
130
|
-
const Scalar* l = &tri(i,s);
|
|
131
|
-
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
|
132
|
-
for (Index i3=0; i3<k; ++i3)
|
|
133
|
-
b += conj(l[i3]) * r(i3);
|
|
134
|
-
|
|
135
|
-
other(i,j) = (other(i,j) - b)*a;
|
|
136
|
-
}
|
|
137
|
-
else
|
|
138
|
-
{
|
|
139
|
-
Scalar& otherij = other(i,j);
|
|
140
|
-
otherij *= a;
|
|
141
|
-
Scalar b = otherij;
|
|
142
|
-
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
|
143
|
-
typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
|
|
144
|
-
for (Index i3=0;i3<rs;++i3)
|
|
145
|
-
r(i3) -= b * conj(l(i3));
|
|
146
|
-
}
|
|
147
|
-
}
|
|
211
|
+
Index i = IsLower ? k2 + k1 : k2 - k1;
|
|
212
|
+
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS
|
|
213
|
+
EIGEN_IF_CONSTEXPR(
|
|
214
|
+
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
|
215
|
+
i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
|
|
148
216
|
}
|
|
217
|
+
#endif
|
|
218
|
+
trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
|
|
219
|
+
actualPanelWidth, actual_cols, _tri + i + (i)*triStride, triStride,
|
|
220
|
+
_other + i * OtherInnerStride + j2 * otherStride, otherIncr, otherStride);
|
|
221
|
+
}
|
|
149
222
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
223
|
+
Index lengthTarget = actual_kc - k1 - actualPanelWidth;
|
|
224
|
+
Index startBlock = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
|
|
225
|
+
Index blockBOffset = IsLower ? k1 : lengthTarget;
|
|
153
226
|
|
|
154
|
-
|
|
155
|
-
|
|
227
|
+
// update the respective rows of B from other
|
|
228
|
+
pack_rhs(blockB + actual_kc * j2, other.getSubMapper(startBlock, j2), actualPanelWidth, actual_cols, actual_kc,
|
|
229
|
+
blockBOffset);
|
|
156
230
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
|
|
231
|
+
// GEBP
|
|
232
|
+
if (lengthTarget > 0) {
|
|
233
|
+
Index startTarget = IsLower ? k2 + k1 + actualPanelWidth : k2 - actual_kc;
|
|
161
234
|
|
|
162
|
-
|
|
235
|
+
pack_lhs(blockA, tri.getSubMapper(startTarget, startBlock), actualPanelWidth, lengthTarget);
|
|
163
236
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
}
|
|
237
|
+
gebp_kernel(other.getSubMapper(startTarget, j2), blockA, blockB + actual_kc * j2, lengthTarget,
|
|
238
|
+
actualPanelWidth, actual_cols, Scalar(-1), actualPanelWidth, actual_kc, 0, blockBOffset);
|
|
167
239
|
}
|
|
168
240
|
}
|
|
169
|
-
|
|
170
|
-
// R2 -= A21 * B => GEPP
|
|
171
|
-
{
|
|
172
|
-
Index start = IsLower ? k2+kc : 0;
|
|
173
|
-
Index end = IsLower ? size : k2-kc;
|
|
174
|
-
for(Index i2=start; i2<end; i2+=mc)
|
|
175
|
-
{
|
|
176
|
-
const Index actual_mc = (std::min)(mc,end-i2);
|
|
177
|
-
if (actual_mc>0)
|
|
178
|
-
{
|
|
179
|
-
pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
|
|
241
|
+
}
|
|
180
242
|
|
|
181
|
-
|
|
182
|
-
|
|
243
|
+
// R2 -= A21 * B => GEPP
|
|
244
|
+
{
|
|
245
|
+
Index start = IsLower ? k2 + kc : 0;
|
|
246
|
+
Index end = IsLower ? size : k2 - kc;
|
|
247
|
+
for (Index i2 = start; i2 < end; i2 += mc) {
|
|
248
|
+
const Index actual_mc = (std::min)(mc, end - i2);
|
|
249
|
+
if (actual_mc > 0) {
|
|
250
|
+
pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2 - kc), actual_kc, actual_mc);
|
|
251
|
+
|
|
252
|
+
gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
|
|
183
253
|
}
|
|
184
254
|
}
|
|
185
255
|
}
|
|
186
256
|
}
|
|
257
|
+
}
|
|
187
258
|
|
|
188
259
|
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
|
189
260
|
*/
|
|
190
261
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
191
|
-
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,
|
|
192
|
-
{
|
|
193
|
-
static EIGEN_DONT_INLINE void run(
|
|
194
|
-
|
|
195
|
-
const Scalar* _tri, Index triStride,
|
|
196
|
-
Scalar* _other, Index otherIncr, Index otherStride,
|
|
197
|
-
level3_blocking<Scalar,Scalar>& blocking);
|
|
262
|
+
struct triangular_solve_matrix<Scalar, Index, OnTheRight, Mode, Conjugate, TriStorageOrder, ColMajor,
|
|
263
|
+
OtherInnerStride> {
|
|
264
|
+
static EIGEN_DONT_INLINE void run(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other,
|
|
265
|
+
Index otherIncr, Index otherStride, level3_blocking<Scalar, Scalar>& blocking);
|
|
198
266
|
};
|
|
267
|
+
|
|
199
268
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
200
|
-
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
Index kc = blocking.kc(); // cache block size along the K direction
|
|
222
|
-
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
223
|
-
|
|
224
|
-
std::size_t sizeA = kc*mc;
|
|
225
|
-
std::size_t sizeB = kc*size;
|
|
226
|
-
|
|
227
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
228
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
229
|
-
|
|
230
|
-
conj_if<Conjugate> conj;
|
|
231
|
-
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
|
232
|
-
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
233
|
-
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
|
|
234
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor, false, true> pack_lhs_panel;
|
|
235
|
-
|
|
236
|
-
for(Index k2=IsLower ? size : 0;
|
|
237
|
-
IsLower ? k2>0 : k2<size;
|
|
238
|
-
IsLower ? k2-=kc : k2+=kc)
|
|
239
|
-
{
|
|
240
|
-
const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
|
|
241
|
-
Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
|
|
269
|
+
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheRight, Mode, Conjugate, TriStorageOrder, ColMajor,
|
|
270
|
+
OtherInnerStride>::run(Index size, Index otherSize, const Scalar* _tri,
|
|
271
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
272
|
+
Index otherStride,
|
|
273
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
274
|
+
Index rows = otherSize;
|
|
275
|
+
|
|
276
|
+
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
|
|
277
|
+
EIGEN_IF_CONSTEXPR(
|
|
278
|
+
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
|
279
|
+
// TODO: Investigate better heuristics for cutoffs.
|
|
280
|
+
std::ptrdiff_t l1, l2, l3;
|
|
281
|
+
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
282
|
+
double L2Cap = 0.5; // 50% of L2 size
|
|
283
|
+
if (size < avx512_trsm_cutoff<Scalar>(l2, rows, L2Cap)) {
|
|
284
|
+
trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
|
|
285
|
+
size, rows, _tri, triStride, _other, 1, otherStride);
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
#endif
|
|
242
290
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
291
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
|
|
292
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
|
293
|
+
LhsMapper lhs(_other, otherStride, otherIncr);
|
|
294
|
+
RhsMapper rhs(_tri, triStride);
|
|
246
295
|
|
|
247
|
-
|
|
296
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
297
|
+
enum {
|
|
298
|
+
RhsStorageOrder = TriStorageOrder,
|
|
299
|
+
SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr),
|
|
300
|
+
IsLower = (Mode & Lower) == Lower
|
|
301
|
+
};
|
|
248
302
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
303
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
304
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
305
|
+
|
|
306
|
+
std::size_t sizeA = kc * mc;
|
|
307
|
+
std::size_t sizeB = kc * size;
|
|
308
|
+
|
|
309
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
310
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
311
|
+
|
|
312
|
+
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
|
313
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
314
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder, false, true> pack_rhs_panel;
|
|
315
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor,
|
|
316
|
+
false, true>
|
|
317
|
+
pack_lhs_panel;
|
|
318
|
+
|
|
319
|
+
for (Index k2 = IsLower ? size : 0; IsLower ? k2 > 0 : k2 < size; IsLower ? k2 -= kc : k2 += kc) {
|
|
320
|
+
const Index actual_kc = (std::min)(IsLower ? k2 : size - k2, kc);
|
|
321
|
+
Index actual_k2 = IsLower ? k2 - actual_kc : k2;
|
|
322
|
+
|
|
323
|
+
Index startPanel = IsLower ? 0 : k2 + actual_kc;
|
|
324
|
+
Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
|
|
325
|
+
Scalar* geb = blockB + actual_kc * actual_kc;
|
|
326
|
+
|
|
327
|
+
if (rs > 0) pack_rhs(geb, rhs.getSubMapper(actual_k2, startPanel), actual_kc, rs);
|
|
328
|
+
|
|
329
|
+
// triangular packing (we only pack the panels off the diagonal,
|
|
330
|
+
// neglecting the blocks overlapping the diagonal
|
|
331
|
+
{
|
|
332
|
+
for (Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
|
|
333
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
|
|
334
|
+
Index actual_j2 = actual_k2 + j2;
|
|
335
|
+
Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
|
|
336
|
+
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
337
|
+
|
|
338
|
+
if (panelLength > 0)
|
|
339
|
+
pack_rhs_panel(blockB + j2 * actual_kc, rhs.getSubMapper(actual_k2 + panelOffset, actual_j2), panelLength,
|
|
340
|
+
actualPanelWidth, actual_kc, panelOffset);
|
|
265
341
|
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
for (Index i2 = 0; i2 < rows; i2 += mc) {
|
|
345
|
+
const Index actual_mc = (std::min)(mc, rows - i2);
|
|
266
346
|
|
|
267
|
-
|
|
347
|
+
// triangular solver kernel
|
|
268
348
|
{
|
|
269
|
-
|
|
349
|
+
// for each small block of the diagonal (=> vertical panels of rhs)
|
|
350
|
+
for (Index j2 = IsLower ? (actual_kc - ((actual_kc % SmallPanelWidth) ? Index(actual_kc % SmallPanelWidth)
|
|
351
|
+
: Index(SmallPanelWidth)))
|
|
352
|
+
: 0;
|
|
353
|
+
IsLower ? j2 >= 0 : j2 < actual_kc; IsLower ? j2 -= SmallPanelWidth : j2 += SmallPanelWidth) {
|
|
354
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
|
|
355
|
+
Index absolute_j2 = actual_k2 + j2;
|
|
356
|
+
Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
|
|
357
|
+
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
270
358
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
: 0;
|
|
278
|
-
IsLower ? j2>=0 : j2<actual_kc;
|
|
279
|
-
IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
|
|
280
|
-
{
|
|
281
|
-
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
|
|
282
|
-
Index absolute_j2 = actual_k2 + j2;
|
|
283
|
-
Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
|
|
284
|
-
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
285
|
-
|
|
286
|
-
// GEBP
|
|
287
|
-
if(panelLength>0)
|
|
288
|
-
{
|
|
289
|
-
gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
|
|
290
|
-
blockA, blockB+j2*actual_kc,
|
|
291
|
-
actual_mc, panelLength, actualPanelWidth,
|
|
292
|
-
Scalar(-1),
|
|
293
|
-
actual_kc, actual_kc, // strides
|
|
294
|
-
panelOffset, panelOffset); // offsets
|
|
295
|
-
}
|
|
359
|
+
// GEBP
|
|
360
|
+
if (panelLength > 0) {
|
|
361
|
+
gebp_kernel(lhs.getSubMapper(i2, absolute_j2), blockA, blockB + j2 * actual_kc, actual_mc, panelLength,
|
|
362
|
+
actualPanelWidth, Scalar(-1), actual_kc, actual_kc, // strides
|
|
363
|
+
panelOffset, panelOffset); // offsets
|
|
364
|
+
}
|
|
296
365
|
|
|
366
|
+
{
|
|
297
367
|
// unblocked triangular solve
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
for (Index k3=0; k3<k; ++k3)
|
|
304
|
-
{
|
|
305
|
-
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
|
|
306
|
-
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
|
|
307
|
-
for (Index i=0; i<actual_mc; ++i)
|
|
308
|
-
r(i) -= a(i) * b;
|
|
309
|
-
}
|
|
310
|
-
if((Mode & UnitDiag)==0)
|
|
311
|
-
{
|
|
312
|
-
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
|
313
|
-
for (Index i=0; i<actual_mc; ++i)
|
|
314
|
-
r(i) *= inv_rjj;
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
// pack the just computed part of lhs to A
|
|
319
|
-
pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
|
|
320
|
-
actualPanelWidth, actual_mc,
|
|
321
|
-
actual_kc, j2);
|
|
368
|
+
trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
|
|
369
|
+
/*Specialized=*/true>::kernel(actualPanelWidth, actual_mc,
|
|
370
|
+
_tri + absolute_j2 + absolute_j2 * triStride, triStride,
|
|
371
|
+
_other + i2 * OtherInnerStride + absolute_j2 * otherStride,
|
|
372
|
+
otherIncr, otherStride);
|
|
322
373
|
}
|
|
374
|
+
// pack the just computed part of lhs to A
|
|
375
|
+
pack_lhs_panel(blockA, lhs.getSubMapper(i2, absolute_j2), actualPanelWidth, actual_mc, actual_kc, j2);
|
|
323
376
|
}
|
|
324
|
-
|
|
325
|
-
if (rs>0)
|
|
326
|
-
gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
|
|
327
|
-
actual_mc, actual_kc, rs, Scalar(-1),
|
|
328
|
-
-1, -1, 0, 0);
|
|
329
377
|
}
|
|
378
|
+
|
|
379
|
+
if (rs > 0)
|
|
380
|
+
gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb, actual_mc, actual_kc, rs, Scalar(-1), -1, -1, 0, 0);
|
|
330
381
|
}
|
|
331
382
|
}
|
|
383
|
+
}
|
|
384
|
+
} // end namespace internal
|
|
332
385
|
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
} // end namespace Eigen
|
|
386
|
+
} // end namespace Eigen
|
|
336
387
|
|
|
337
|
-
#endif
|
|
388
|
+
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
|