@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
// for linear algebra.
|
|
3
3
|
//
|
|
4
4
|
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
// Modifications Copyright (C) 2022 Intel Corporation
|
|
5
6
|
//
|
|
6
7
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
8
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
@@ -10,326 +11,378 @@
|
|
|
10
11
|
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
|
|
11
12
|
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
// IWYU pragma: private
|
|
15
|
+
#include "../InternalHeaderCheck.h"
|
|
16
|
+
|
|
17
|
+
namespace Eigen {
|
|
14
18
|
|
|
15
19
|
namespace internal {
|
|
16
20
|
|
|
21
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
22
|
+
bool Specialized>
|
|
23
|
+
struct trsmKernelL {
|
|
24
|
+
// Generic Implementation of triangular solve for triangular matrix on left and multiple rhs.
|
|
25
|
+
// Handles non-packed matrices.
|
|
26
|
+
static void kernel(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr,
|
|
27
|
+
Index otherStride);
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
31
|
+
bool Specialized>
|
|
32
|
+
struct trsmKernelR {
|
|
33
|
+
// Generic Implementation of triangular solve for triangular matrix on right and multiple lhs.
|
|
34
|
+
// Handles non-packed matrices.
|
|
35
|
+
static void kernel(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr,
|
|
36
|
+
Index otherStride);
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
40
|
+
bool Specialized>
|
|
41
|
+
EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
|
|
42
|
+
Specialized>::kernel(Index size, Index otherSize, const Scalar* _tri,
|
|
43
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
44
|
+
Index otherStride) {
|
|
45
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
46
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
47
|
+
TriMapper tri(_tri, triStride);
|
|
48
|
+
OtherMapper other(_other, otherStride, otherIncr);
|
|
49
|
+
|
|
50
|
+
enum { IsLower = (Mode & Lower) == Lower };
|
|
51
|
+
conj_if<Conjugate> conj;
|
|
52
|
+
|
|
53
|
+
// tr solve
|
|
54
|
+
for (Index k = 0; k < size; ++k) {
|
|
55
|
+
// TODO write a small kernel handling this (can be shared with trsv)
|
|
56
|
+
Index i = IsLower ? k : -k - 1;
|
|
57
|
+
Index rs = size - k - 1; // remaining size
|
|
58
|
+
Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs;
|
|
59
|
+
|
|
60
|
+
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1) / conj(tri(i, i)));
|
|
61
|
+
for (Index j = 0; j < otherSize; ++j) {
|
|
62
|
+
if (TriStorageOrder == RowMajor) {
|
|
63
|
+
Scalar b(0);
|
|
64
|
+
const Scalar* l = &tri(i, s);
|
|
65
|
+
typename OtherMapper::LinearMapper r = other.getLinearMapper(s, j);
|
|
66
|
+
for (Index i3 = 0; i3 < k; ++i3) b += conj(l[i3]) * r(i3);
|
|
67
|
+
|
|
68
|
+
other(i, j) = (other(i, j) - b) * a;
|
|
69
|
+
} else {
|
|
70
|
+
Scalar& otherij = other(i, j);
|
|
71
|
+
otherij *= a;
|
|
72
|
+
Scalar b = otherij;
|
|
73
|
+
typename OtherMapper::LinearMapper r = other.getLinearMapper(s, j);
|
|
74
|
+
typename TriMapper::LinearMapper l = tri.getLinearMapper(s, i);
|
|
75
|
+
for (Index i3 = 0; i3 < rs; ++i3) r(i3) -= b * conj(l(i3));
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
|
|
82
|
+
bool Specialized>
|
|
83
|
+
EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
|
|
84
|
+
Specialized>::kernel(Index size, Index otherSize, const Scalar* _tri,
|
|
85
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
86
|
+
Index otherStride) {
|
|
87
|
+
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
88
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
|
|
89
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
|
90
|
+
LhsMapper lhs(_other, otherStride, otherIncr);
|
|
91
|
+
RhsMapper rhs(_tri, triStride);
|
|
92
|
+
|
|
93
|
+
enum { RhsStorageOrder = TriStorageOrder, IsLower = (Mode & Lower) == Lower };
|
|
94
|
+
conj_if<Conjugate> conj;
|
|
95
|
+
|
|
96
|
+
for (Index k = 0; k < size; ++k) {
|
|
97
|
+
Index j = IsLower ? size - k - 1 : k;
|
|
98
|
+
|
|
99
|
+
typename LhsMapper::LinearMapper r = lhs.getLinearMapper(0, j);
|
|
100
|
+
for (Index k3 = 0; k3 < k; ++k3) {
|
|
101
|
+
Scalar b = conj(rhs(IsLower ? j + 1 + k3 : k3, j));
|
|
102
|
+
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(0, IsLower ? j + 1 + k3 : k3);
|
|
103
|
+
for (Index i = 0; i < otherSize; ++i) r(i) -= a(i) * b;
|
|
104
|
+
}
|
|
105
|
+
if ((Mode & UnitDiag) == 0) {
|
|
106
|
+
Scalar inv_rjj = RealScalar(1) / conj(rhs(j, j));
|
|
107
|
+
for (Index i = 0; i < otherSize; ++i) r(i) *= inv_rjj;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
17
112
|
// if the rhs is row major, let's transpose the product
|
|
18
|
-
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder,
|
|
19
|
-
|
|
20
|
-
{
|
|
21
|
-
static void run(
|
|
22
|
-
|
|
23
|
-
const Scalar* tri, Index triStride,
|
|
24
|
-
Scalar* _other, Index otherIncr, Index otherStride,
|
|
25
|
-
level3_blocking<Scalar,Scalar>& blocking)
|
|
26
|
-
{
|
|
113
|
+
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder,
|
|
114
|
+
int OtherInnerStride>
|
|
115
|
+
struct triangular_solve_matrix<Scalar, Index, Side, Mode, Conjugate, TriStorageOrder, RowMajor, OtherInnerStride> {
|
|
116
|
+
static void run(Index size, Index cols, const Scalar* tri, Index triStride, Scalar* _other, Index otherIncr,
|
|
117
|
+
Index otherStride, level3_blocking<Scalar, Scalar>& blocking) {
|
|
27
118
|
triangular_solve_matrix<
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
|
|
32
|
-
::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
|
|
119
|
+
Scalar, Index, Side == OnTheLeft ? OnTheRight : OnTheLeft, (Mode & UnitDiag) | ((Mode & Upper) ? Lower : Upper),
|
|
120
|
+
NumTraits<Scalar>::IsComplex && Conjugate, TriStorageOrder == RowMajor ? ColMajor : RowMajor, ColMajor,
|
|
121
|
+
OtherInnerStride>::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
|
|
33
122
|
}
|
|
34
123
|
};
|
|
35
124
|
|
|
36
125
|
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
|
|
37
126
|
*/
|
|
38
|
-
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
|
|
39
|
-
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
|
|
40
|
-
{
|
|
41
|
-
static EIGEN_DONT_INLINE void run(
|
|
42
|
-
Index size, Index otherSize,
|
|
43
|
-
const Scalar* _tri, Index triStride,
|
|
44
|
-
Scalar* _other, Index otherIncr, Index otherStride,
|
|
45
|
-
level3_blocking<Scalar,Scalar>& blocking);
|
|
46
|
-
};
|
|
47
127
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
level3_blocking<Scalar,Scalar>& blocking)
|
|
53
|
-
{
|
|
54
|
-
Index cols = otherSize;
|
|
55
|
-
|
|
56
|
-
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
57
|
-
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
58
|
-
TriMapper tri(_tri, triStride);
|
|
59
|
-
OtherMapper other(_other, otherStride, otherIncr);
|
|
60
|
-
|
|
61
|
-
typedef gebp_traits<Scalar,Scalar> Traits;
|
|
62
|
-
|
|
63
|
-
enum {
|
|
64
|
-
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
|
|
65
|
-
IsLower = (Mode&Lower) == Lower
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
Index kc = blocking.kc(); // cache block size along the K direction
|
|
69
|
-
Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
|
|
70
|
-
|
|
71
|
-
std::size_t sizeA = kc*mc;
|
|
72
|
-
std::size_t sizeB = kc*cols;
|
|
73
|
-
|
|
74
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
75
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
76
|
-
|
|
77
|
-
conj_if<Conjugate> conj;
|
|
78
|
-
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
|
79
|
-
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
|
|
80
|
-
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
|
|
81
|
-
|
|
82
|
-
// the goal here is to subdivise the Rhs panels such that we keep some cache
|
|
83
|
-
// coherence when accessing the rhs elements
|
|
84
|
-
std::ptrdiff_t l1, l2, l3;
|
|
85
|
-
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
86
|
-
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
|
|
87
|
-
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
|
|
128
|
+
struct triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, Conjugate, TriStorageOrder, ColMajor, OtherInnerStride> {
|
|
129
|
+
static EIGEN_DONT_INLINE void run(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other,
|
|
130
|
+
Index otherIncr, Index otherStride, level3_blocking<Scalar, Scalar>& blocking);
|
|
131
|
+
};
|
|
88
132
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
133
|
+
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
134
|
+
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, Conjugate, TriStorageOrder, ColMajor,
|
|
135
|
+
OtherInnerStride>::run(Index size, Index otherSize, const Scalar* _tri,
|
|
136
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
137
|
+
Index otherStride,
|
|
138
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
139
|
+
Index cols = otherSize;
|
|
140
|
+
|
|
141
|
+
std::ptrdiff_t l1, l2, l3;
|
|
142
|
+
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
143
|
+
|
|
144
|
+
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
|
|
145
|
+
EIGEN_IF_CONSTEXPR(
|
|
146
|
+
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
|
147
|
+
// Very rough cutoffs to determine when to call trsm w/o packing
|
|
148
|
+
// For small problem sizes trsmKernel compiled with clang is generally faster.
|
|
149
|
+
// TODO: Investigate better heuristics for cutoffs.
|
|
150
|
+
double L2Cap = 0.5; // 50% of L2 size
|
|
151
|
+
if (size < avx512_trsm_cutoff<Scalar>(l2, cols, L2Cap)) {
|
|
152
|
+
trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1, /*Specialized=*/true>::kernel(
|
|
153
|
+
size, cols, _tri, triStride, _other, 1, otherStride);
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
#endif
|
|
158
|
+
|
|
159
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
|
160
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
|
161
|
+
TriMapper tri(_tri, triStride);
|
|
162
|
+
OtherMapper other(_other, otherStride, otherIncr);
|
|
163
|
+
|
|
164
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
165
|
+
|
|
166
|
+
enum { SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr), IsLower = (Mode & Lower) == Lower };
|
|
167
|
+
|
|
168
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
169
|
+
Index mc = (std::min)(size, blocking.mc()); // cache block size along the M direction
|
|
170
|
+
|
|
171
|
+
std::size_t sizeA = kc * mc;
|
|
172
|
+
std::size_t sizeB = kc * cols;
|
|
173
|
+
|
|
174
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
175
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
176
|
+
|
|
177
|
+
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
|
|
178
|
+
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
179
|
+
TriStorageOrder>
|
|
180
|
+
pack_lhs;
|
|
181
|
+
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
|
|
182
|
+
|
|
183
|
+
// the goal here is to subdivise the Rhs panels such that we keep some cache
|
|
184
|
+
// coherence when accessing the rhs elements
|
|
185
|
+
Index subcols = cols > 0 ? l2 / (4 * sizeof(Scalar) * std::max<Index>(otherStride, size)) : 0;
|
|
186
|
+
subcols = std::max<Index>((subcols / Traits::nr) * Traits::nr, Traits::nr);
|
|
187
|
+
|
|
188
|
+
for (Index k2 = IsLower ? 0 : size; IsLower ? k2 < size : k2 > 0; IsLower ? k2 += kc : k2 -= kc) {
|
|
189
|
+
const Index actual_kc = (std::min)(IsLower ? size - k2 : k2, kc);
|
|
190
|
+
|
|
191
|
+
// We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
|
|
192
|
+
// and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
|
|
193
|
+
// A11 (the triangular part) and A21 the remaining rectangular part.
|
|
194
|
+
// Then the high level algorithm is:
|
|
195
|
+
// - B = R1 => general block copy (done during the next step)
|
|
196
|
+
// - R1 = A11^-1 B => tricky part
|
|
197
|
+
// - update B from the new R1 => actually this has to be performed continuously during the above step
|
|
198
|
+
// - R2 -= A21 * B => GEPP
|
|
199
|
+
|
|
200
|
+
// The tricky part: compute R1 = A11^-1 B while updating B from R1
|
|
201
|
+
// The idea is to split A11 into multiple small vertical panels.
|
|
202
|
+
// Each panel can be split into a small triangular part T1k which is processed without optimization,
|
|
203
|
+
// and the remaining small part T2k which is processed using gebp with appropriate block strides
|
|
204
|
+
for (Index j2 = 0; j2 < cols; j2 += subcols) {
|
|
205
|
+
Index actual_cols = (std::min)(cols - j2, subcols);
|
|
206
|
+
// for each small vertical panels [T1k^T, T2k^T]^T of lhs
|
|
207
|
+
for (Index k1 = 0; k1 < actual_kc; k1 += SmallPanelWidth) {
|
|
208
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - k1, SmallPanelWidth);
|
|
209
|
+
// tr solve
|
|
113
210
|
{
|
|
114
|
-
Index
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
|
|
120
|
-
Index rs = actualPanelWidth - k - 1; // remaining size
|
|
121
|
-
Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
|
|
122
|
-
: IsLower ? i+1 : i-rs;
|
|
123
|
-
|
|
124
|
-
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
|
|
125
|
-
for (Index j=j2; j<j2+actual_cols; ++j)
|
|
126
|
-
{
|
|
127
|
-
if (TriStorageOrder==RowMajor)
|
|
128
|
-
{
|
|
129
|
-
Scalar b(0);
|
|
130
|
-
const Scalar* l = &tri(i,s);
|
|
131
|
-
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
|
132
|
-
for (Index i3=0; i3<k; ++i3)
|
|
133
|
-
b += conj(l[i3]) * r(i3);
|
|
134
|
-
|
|
135
|
-
other(i,j) = (other(i,j) - b)*a;
|
|
136
|
-
}
|
|
137
|
-
else
|
|
138
|
-
{
|
|
139
|
-
Scalar b = (other(i,j) *= a);
|
|
140
|
-
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
|
141
|
-
typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
|
|
142
|
-
for (Index i3=0;i3<rs;++i3)
|
|
143
|
-
r(i3) -= b * conj(l(i3));
|
|
144
|
-
}
|
|
145
|
-
}
|
|
211
|
+
Index i = IsLower ? k2 + k1 : k2 - k1;
|
|
212
|
+
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS
|
|
213
|
+
EIGEN_IF_CONSTEXPR(
|
|
214
|
+
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
|
215
|
+
i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
|
|
146
216
|
}
|
|
217
|
+
#endif
|
|
218
|
+
trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
|
|
219
|
+
actualPanelWidth, actual_cols, _tri + i + (i)*triStride, triStride,
|
|
220
|
+
_other + i * OtherInnerStride + j2 * otherStride, otherIncr, otherStride);
|
|
221
|
+
}
|
|
147
222
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
223
|
+
Index lengthTarget = actual_kc - k1 - actualPanelWidth;
|
|
224
|
+
Index startBlock = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
|
|
225
|
+
Index blockBOffset = IsLower ? k1 : lengthTarget;
|
|
151
226
|
|
|
152
|
-
|
|
153
|
-
|
|
227
|
+
// update the respective rows of B from other
|
|
228
|
+
pack_rhs(blockB + actual_kc * j2, other.getSubMapper(startBlock, j2), actualPanelWidth, actual_cols, actual_kc,
|
|
229
|
+
blockBOffset);
|
|
154
230
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
|
|
231
|
+
// GEBP
|
|
232
|
+
if (lengthTarget > 0) {
|
|
233
|
+
Index startTarget = IsLower ? k2 + k1 + actualPanelWidth : k2 - actual_kc;
|
|
159
234
|
|
|
160
|
-
|
|
235
|
+
pack_lhs(blockA, tri.getSubMapper(startTarget, startBlock), actualPanelWidth, lengthTarget);
|
|
161
236
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
}
|
|
237
|
+
gebp_kernel(other.getSubMapper(startTarget, j2), blockA, blockB + actual_kc * j2, lengthTarget,
|
|
238
|
+
actualPanelWidth, actual_cols, Scalar(-1), actualPanelWidth, actual_kc, 0, blockBOffset);
|
|
165
239
|
}
|
|
166
240
|
}
|
|
167
|
-
|
|
168
|
-
// R2 -= A21 * B => GEPP
|
|
169
|
-
{
|
|
170
|
-
Index start = IsLower ? k2+kc : 0;
|
|
171
|
-
Index end = IsLower ? size : k2-kc;
|
|
172
|
-
for(Index i2=start; i2<end; i2+=mc)
|
|
173
|
-
{
|
|
174
|
-
const Index actual_mc = (std::min)(mc,end-i2);
|
|
175
|
-
if (actual_mc>0)
|
|
176
|
-
{
|
|
177
|
-
pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
|
|
241
|
+
}
|
|
178
242
|
|
|
179
|
-
|
|
180
|
-
|
|
243
|
+
// R2 -= A21 * B => GEPP
|
|
244
|
+
{
|
|
245
|
+
Index start = IsLower ? k2 + kc : 0;
|
|
246
|
+
Index end = IsLower ? size : k2 - kc;
|
|
247
|
+
for (Index i2 = start; i2 < end; i2 += mc) {
|
|
248
|
+
const Index actual_mc = (std::min)(mc, end - i2);
|
|
249
|
+
if (actual_mc > 0) {
|
|
250
|
+
pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2 - kc), actual_kc, actual_mc);
|
|
251
|
+
|
|
252
|
+
gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
|
|
181
253
|
}
|
|
182
254
|
}
|
|
183
255
|
}
|
|
184
256
|
}
|
|
257
|
+
}
|
|
185
258
|
|
|
186
259
|
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
|
187
260
|
*/
|
|
188
261
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
189
|
-
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,
|
|
190
|
-
{
|
|
191
|
-
static EIGEN_DONT_INLINE void run(
|
|
192
|
-
|
|
193
|
-
const Scalar* _tri, Index triStride,
|
|
194
|
-
Scalar* _other, Index otherIncr, Index otherStride,
|
|
195
|
-
level3_blocking<Scalar,Scalar>& blocking);
|
|
262
|
+
struct triangular_solve_matrix<Scalar, Index, OnTheRight, Mode, Conjugate, TriStorageOrder, ColMajor,
|
|
263
|
+
OtherInnerStride> {
|
|
264
|
+
static EIGEN_DONT_INLINE void run(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other,
|
|
265
|
+
Index otherIncr, Index otherStride, level3_blocking<Scalar, Scalar>& blocking);
|
|
196
266
|
};
|
|
267
|
+
|
|
197
268
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
|
198
|
-
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
Index kc = blocking.kc(); // cache block size along the K direction
|
|
220
|
-
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
|
221
|
-
|
|
222
|
-
std::size_t sizeA = kc*mc;
|
|
223
|
-
std::size_t sizeB = kc*size;
|
|
224
|
-
|
|
225
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
226
|
-
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
227
|
-
|
|
228
|
-
conj_if<Conjugate> conj;
|
|
229
|
-
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
|
230
|
-
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
231
|
-
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
|
|
232
|
-
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
|
|
233
|
-
|
|
234
|
-
for(Index k2=IsLower ? size : 0;
|
|
235
|
-
IsLower ? k2>0 : k2<size;
|
|
236
|
-
IsLower ? k2-=kc : k2+=kc)
|
|
237
|
-
{
|
|
238
|
-
const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
|
|
239
|
-
Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
|
|
269
|
+
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheRight, Mode, Conjugate, TriStorageOrder, ColMajor,
|
|
270
|
+
OtherInnerStride>::run(Index size, Index otherSize, const Scalar* _tri,
|
|
271
|
+
Index triStride, Scalar* _other, Index otherIncr,
|
|
272
|
+
Index otherStride,
|
|
273
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
274
|
+
Index rows = otherSize;
|
|
275
|
+
|
|
276
|
+
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
|
|
277
|
+
EIGEN_IF_CONSTEXPR(
|
|
278
|
+
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
|
279
|
+
// TODO: Investigate better heuristics for cutoffs.
|
|
280
|
+
std::ptrdiff_t l1, l2, l3;
|
|
281
|
+
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
282
|
+
double L2Cap = 0.5; // 50% of L2 size
|
|
283
|
+
if (size < avx512_trsm_cutoff<Scalar>(l2, rows, L2Cap)) {
|
|
284
|
+
trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
|
|
285
|
+
size, rows, _tri, triStride, _other, 1, otherStride);
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
#endif
|
|
240
290
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
291
|
+
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
|
|
292
|
+
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
|
293
|
+
LhsMapper lhs(_other, otherStride, otherIncr);
|
|
294
|
+
RhsMapper rhs(_tri, triStride);
|
|
244
295
|
|
|
245
|
-
|
|
296
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
297
|
+
enum {
|
|
298
|
+
RhsStorageOrder = TriStorageOrder,
|
|
299
|
+
SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr),
|
|
300
|
+
IsLower = (Mode & Lower) == Lower
|
|
301
|
+
};
|
|
246
302
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
303
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
304
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
305
|
+
|
|
306
|
+
std::size_t sizeA = kc * mc;
|
|
307
|
+
std::size_t sizeB = kc * size;
|
|
308
|
+
|
|
309
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
310
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
311
|
+
|
|
312
|
+
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
|
|
313
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
314
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder, false, true> pack_rhs_panel;
|
|
315
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor,
|
|
316
|
+
false, true>
|
|
317
|
+
pack_lhs_panel;
|
|
318
|
+
|
|
319
|
+
for (Index k2 = IsLower ? size : 0; IsLower ? k2 > 0 : k2 < size; IsLower ? k2 -= kc : k2 += kc) {
|
|
320
|
+
const Index actual_kc = (std::min)(IsLower ? k2 : size - k2, kc);
|
|
321
|
+
Index actual_k2 = IsLower ? k2 - actual_kc : k2;
|
|
322
|
+
|
|
323
|
+
Index startPanel = IsLower ? 0 : k2 + actual_kc;
|
|
324
|
+
Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
|
|
325
|
+
Scalar* geb = blockB + actual_kc * actual_kc;
|
|
326
|
+
|
|
327
|
+
if (rs > 0) pack_rhs(geb, rhs.getSubMapper(actual_k2, startPanel), actual_kc, rs);
|
|
328
|
+
|
|
329
|
+
// triangular packing (we only pack the panels off the diagonal,
|
|
330
|
+
// neglecting the blocks overlapping the diagonal
|
|
331
|
+
{
|
|
332
|
+
for (Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
|
|
333
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
|
|
334
|
+
Index actual_j2 = actual_k2 + j2;
|
|
335
|
+
Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
|
|
336
|
+
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
337
|
+
|
|
338
|
+
if (panelLength > 0)
|
|
339
|
+
pack_rhs_panel(blockB + j2 * actual_kc, rhs.getSubMapper(actual_k2 + panelOffset, actual_j2), panelLength,
|
|
340
|
+
actualPanelWidth, actual_kc, panelOffset);
|
|
263
341
|
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
for (Index i2 = 0; i2 < rows; i2 += mc) {
|
|
345
|
+
const Index actual_mc = (std::min)(mc, rows - i2);
|
|
264
346
|
|
|
265
|
-
|
|
347
|
+
// triangular solver kernel
|
|
266
348
|
{
|
|
267
|
-
|
|
349
|
+
// for each small block of the diagonal (=> vertical panels of rhs)
|
|
350
|
+
for (Index j2 = IsLower ? (actual_kc - ((actual_kc % SmallPanelWidth) ? Index(actual_kc % SmallPanelWidth)
|
|
351
|
+
: Index(SmallPanelWidth)))
|
|
352
|
+
: 0;
|
|
353
|
+
IsLower ? j2 >= 0 : j2 < actual_kc; IsLower ? j2 -= SmallPanelWidth : j2 += SmallPanelWidth) {
|
|
354
|
+
Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
|
|
355
|
+
Index absolute_j2 = actual_k2 + j2;
|
|
356
|
+
Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
|
|
357
|
+
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
268
358
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
: 0;
|
|
276
|
-
IsLower ? j2>=0 : j2<actual_kc;
|
|
277
|
-
IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
|
|
278
|
-
{
|
|
279
|
-
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
|
|
280
|
-
Index absolute_j2 = actual_k2 + j2;
|
|
281
|
-
Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
|
|
282
|
-
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
|
|
283
|
-
|
|
284
|
-
// GEBP
|
|
285
|
-
if(panelLength>0)
|
|
286
|
-
{
|
|
287
|
-
gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
|
|
288
|
-
blockA, blockB+j2*actual_kc,
|
|
289
|
-
actual_mc, panelLength, actualPanelWidth,
|
|
290
|
-
Scalar(-1),
|
|
291
|
-
actual_kc, actual_kc, // strides
|
|
292
|
-
panelOffset, panelOffset); // offsets
|
|
293
|
-
}
|
|
359
|
+
// GEBP
|
|
360
|
+
if (panelLength > 0) {
|
|
361
|
+
gebp_kernel(lhs.getSubMapper(i2, absolute_j2), blockA, blockB + j2 * actual_kc, actual_mc, panelLength,
|
|
362
|
+
actualPanelWidth, Scalar(-1), actual_kc, actual_kc, // strides
|
|
363
|
+
panelOffset, panelOffset); // offsets
|
|
364
|
+
}
|
|
294
365
|
|
|
366
|
+
{
|
|
295
367
|
// unblocked triangular solve
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
for (Index k3=0; k3<k; ++k3)
|
|
302
|
-
{
|
|
303
|
-
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
|
|
304
|
-
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
|
|
305
|
-
for (Index i=0; i<actual_mc; ++i)
|
|
306
|
-
r(i) -= a(i) * b;
|
|
307
|
-
}
|
|
308
|
-
if((Mode & UnitDiag)==0)
|
|
309
|
-
{
|
|
310
|
-
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
|
311
|
-
for (Index i=0; i<actual_mc; ++i)
|
|
312
|
-
r(i) *= inv_rjj;
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
// pack the just computed part of lhs to A
|
|
317
|
-
pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
|
|
318
|
-
actualPanelWidth, actual_mc,
|
|
319
|
-
actual_kc, j2);
|
|
368
|
+
trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
|
|
369
|
+
/*Specialized=*/true>::kernel(actualPanelWidth, actual_mc,
|
|
370
|
+
_tri + absolute_j2 + absolute_j2 * triStride, triStride,
|
|
371
|
+
_other + i2 * OtherInnerStride + absolute_j2 * otherStride,
|
|
372
|
+
otherIncr, otherStride);
|
|
320
373
|
}
|
|
374
|
+
// pack the just computed part of lhs to A
|
|
375
|
+
pack_lhs_panel(blockA, lhs.getSubMapper(i2, absolute_j2), actualPanelWidth, actual_mc, actual_kc, j2);
|
|
321
376
|
}
|
|
322
|
-
|
|
323
|
-
if (rs>0)
|
|
324
|
-
gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
|
|
325
|
-
actual_mc, actual_kc, rs, Scalar(-1),
|
|
326
|
-
-1, -1, 0, 0);
|
|
327
377
|
}
|
|
378
|
+
|
|
379
|
+
if (rs > 0)
|
|
380
|
+
gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb, actual_mc, actual_kc, rs, Scalar(-1), -1, -1, 0, 0);
|
|
328
381
|
}
|
|
329
382
|
}
|
|
383
|
+
}
|
|
384
|
+
} // end namespace internal
|
|
330
385
|
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
} // end namespace Eigen
|
|
386
|
+
} // end namespace Eigen
|
|
334
387
|
|
|
335
|
-
#endif
|
|
388
|
+
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
|