@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -10,278 +10,246 @@
|
|
|
10
10
|
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|
|
11
11
|
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
19
|
|
|
17
20
|
// pack a selfadjoint block diagonal for use with the gebp_kernel
|
|
18
|
-
template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
|
|
19
|
-
struct symm_pack_lhs
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
{
|
|
21
|
+
template <typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
|
|
22
|
+
struct symm_pack_lhs {
|
|
23
|
+
template <int BlockRows>
|
|
24
|
+
inline void pack(Scalar* blockA, const const_blas_data_mapper<Scalar, Index, StorageOrder>& lhs, Index cols, Index i,
|
|
25
|
+
Index& count) {
|
|
24
26
|
// normal copy
|
|
25
|
-
for(Index k=0; k<i; k++)
|
|
26
|
-
for(Index w=0; w<BlockRows; w++)
|
|
27
|
-
blockA[count++] = lhs(i+w,k); // normal
|
|
27
|
+
for (Index k = 0; k < i; k++)
|
|
28
|
+
for (Index w = 0; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
|
|
28
29
|
// symmetric copy
|
|
29
30
|
Index h = 0;
|
|
30
|
-
for(Index k=i; k<i+BlockRows; k++)
|
|
31
|
-
|
|
32
|
-
for(Index w=0; w<h; w++)
|
|
33
|
-
blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
|
|
31
|
+
for (Index k = i; k < i + BlockRows; k++) {
|
|
32
|
+
for (Index w = 0; w < h; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
|
|
34
33
|
|
|
35
|
-
blockA[count++] = numext::real(lhs(k,k));
|
|
34
|
+
blockA[count++] = numext::real(lhs(k, k)); // real (diagonal)
|
|
36
35
|
|
|
37
|
-
for(Index w=h+1; w<BlockRows; w++)
|
|
38
|
-
blockA[count++] = lhs(i+w, k); // normal
|
|
36
|
+
for (Index w = h + 1; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
|
|
39
37
|
++h;
|
|
40
38
|
}
|
|
41
39
|
// transposed copy
|
|
42
|
-
for(Index k=i+BlockRows; k<cols; k++)
|
|
43
|
-
for(Index w=0; w<BlockRows; w++)
|
|
44
|
-
blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
|
|
40
|
+
for (Index k = i + BlockRows; k < cols; k++)
|
|
41
|
+
for (Index w = 0; w < BlockRows; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
|
|
45
42
|
}
|
|
46
|
-
void operator()(Scalar* blockA, const Scalar*
|
|
47
|
-
{
|
|
43
|
+
void operator()(Scalar* blockA, const Scalar* lhs_, Index lhsStride, Index cols, Index rows) {
|
|
48
44
|
typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
|
|
49
|
-
typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
45
|
+
typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half
|
|
46
|
+
QuarterPacket;
|
|
47
|
+
enum {
|
|
48
|
+
PacketSize = packet_traits<Scalar>::size,
|
|
49
|
+
HalfPacketSize = unpacket_traits<HalfPacket>::size,
|
|
50
|
+
QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
|
|
51
|
+
HasHalf = (int)HalfPacketSize < (int)PacketSize,
|
|
52
|
+
HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(lhs_, lhsStride);
|
|
57
56
|
Index count = 0;
|
|
58
|
-
//Index peeled_mc3 = (rows/Pack1)*Pack1;
|
|
59
|
-
|
|
60
|
-
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
|
|
61
|
-
const Index peeled_mc2 =
|
|
62
|
-
|
|
63
|
-
const Index
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if(
|
|
79
|
-
for(Index i=
|
|
57
|
+
// Index peeled_mc3 = (rows/Pack1)*Pack1;
|
|
58
|
+
|
|
59
|
+
const Index peeled_mc3 = Pack1 >= 3 * PacketSize ? (rows / (3 * PacketSize)) * (3 * PacketSize) : 0;
|
|
60
|
+
const Index peeled_mc2 =
|
|
61
|
+
Pack1 >= 2 * PacketSize ? peeled_mc3 + ((rows - peeled_mc3) / (2 * PacketSize)) * (2 * PacketSize) : 0;
|
|
62
|
+
const Index peeled_mc1 =
|
|
63
|
+
Pack1 >= 1 * PacketSize ? peeled_mc2 + ((rows - peeled_mc2) / (1 * PacketSize)) * (1 * PacketSize) : 0;
|
|
64
|
+
const Index peeled_mc_half =
|
|
65
|
+
Pack1 >= HalfPacketSize ? peeled_mc1 + ((rows - peeled_mc1) / (HalfPacketSize)) * (HalfPacketSize) : 0;
|
|
66
|
+
const Index peeled_mc_quarter =
|
|
67
|
+
Pack1 >= QuarterPacketSize
|
|
68
|
+
? peeled_mc_half + ((rows - peeled_mc_half) / (QuarterPacketSize)) * (QuarterPacketSize)
|
|
69
|
+
: 0;
|
|
70
|
+
|
|
71
|
+
if (Pack1 >= 3 * PacketSize)
|
|
72
|
+
for (Index i = 0; i < peeled_mc3; i += 3 * PacketSize) pack<3 * PacketSize>(blockA, lhs, cols, i, count);
|
|
73
|
+
|
|
74
|
+
if (Pack1 >= 2 * PacketSize)
|
|
75
|
+
for (Index i = peeled_mc3; i < peeled_mc2; i += 2 * PacketSize) pack<2 * PacketSize>(blockA, lhs, cols, i, count);
|
|
76
|
+
|
|
77
|
+
if (Pack1 >= 1 * PacketSize)
|
|
78
|
+
for (Index i = peeled_mc2; i < peeled_mc1; i += 1 * PacketSize) pack<1 * PacketSize>(blockA, lhs, cols, i, count);
|
|
79
|
+
|
|
80
|
+
if (HasHalf && Pack1 >= HalfPacketSize)
|
|
81
|
+
for (Index i = peeled_mc1; i < peeled_mc_half; i += HalfPacketSize)
|
|
80
82
|
pack<HalfPacketSize>(blockA, lhs, cols, i, count);
|
|
81
83
|
|
|
82
|
-
if(HasQuarter && Pack1>=QuarterPacketSize)
|
|
83
|
-
for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
|
|
84
|
+
if (HasQuarter && Pack1 >= QuarterPacketSize)
|
|
85
|
+
for (Index i = peeled_mc_half; i < peeled_mc_quarter; i += QuarterPacketSize)
|
|
84
86
|
pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
|
|
85
87
|
|
|
86
88
|
// do the same with mr==1
|
|
87
|
-
for(Index i=peeled_mc_quarter; i<rows; i++)
|
|
88
|
-
|
|
89
|
-
for(Index k=0; k<i; k++)
|
|
90
|
-
blockA[count++] = lhs(i, k); // normal
|
|
89
|
+
for (Index i = peeled_mc_quarter; i < rows; i++) {
|
|
90
|
+
for (Index k = 0; k < i; k++) blockA[count++] = lhs(i, k); // normal
|
|
91
91
|
|
|
92
|
-
blockA[count++] = numext::real(lhs(i, i));
|
|
92
|
+
blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
|
|
93
93
|
|
|
94
|
-
for(Index k=i+1; k<cols; k++)
|
|
95
|
-
blockA[count++] = numext::conj(lhs(k, i)); // transposed
|
|
94
|
+
for (Index k = i + 1; k < cols; k++) blockA[count++] = numext::conj(lhs(k, i)); // transposed
|
|
96
95
|
}
|
|
97
96
|
}
|
|
98
97
|
};
|
|
99
98
|
|
|
100
|
-
template<typename Scalar, typename Index, int nr, int StorageOrder>
|
|
101
|
-
struct symm_pack_rhs
|
|
102
|
-
{
|
|
99
|
+
template <typename Scalar, typename Index, int nr, int StorageOrder>
|
|
100
|
+
struct symm_pack_rhs {
|
|
103
101
|
enum { PacketSize = packet_traits<Scalar>::size };
|
|
104
|
-
void operator()(Scalar* blockB, const Scalar*
|
|
105
|
-
{
|
|
102
|
+
void operator()(Scalar* blockB, const Scalar* rhs_, Index rhsStride, Index rows, Index cols, Index k2) {
|
|
106
103
|
Index end_k = k2 + rows;
|
|
107
104
|
Index count = 0;
|
|
108
|
-
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(
|
|
109
|
-
Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
|
|
110
|
-
Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
|
|
105
|
+
const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(rhs_, rhsStride);
|
|
106
|
+
Index packet_cols8 = nr >= 8 ? (cols / 8) * 8 : 0;
|
|
107
|
+
Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
|
|
111
108
|
|
|
112
109
|
// first part: normal case
|
|
113
|
-
for(Index j2=0; j2<k2; j2+=nr)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
{
|
|
121
|
-
blockB[count+2] = rhs(k,j2+2);
|
|
122
|
-
blockB[count+3] = rhs(k,j2+3);
|
|
110
|
+
for (Index j2 = 0; j2 < k2; j2 += nr) {
|
|
111
|
+
for (Index k = k2; k < end_k; k++) {
|
|
112
|
+
blockB[count + 0] = rhs(k, j2 + 0);
|
|
113
|
+
blockB[count + 1] = rhs(k, j2 + 1);
|
|
114
|
+
if (nr >= 4) {
|
|
115
|
+
blockB[count + 2] = rhs(k, j2 + 2);
|
|
116
|
+
blockB[count + 3] = rhs(k, j2 + 3);
|
|
123
117
|
}
|
|
124
|
-
if (nr>=8)
|
|
125
|
-
|
|
126
|
-
blockB[count+
|
|
127
|
-
blockB[count+
|
|
128
|
-
blockB[count+
|
|
129
|
-
blockB[count+7] = rhs(k,j2+7);
|
|
118
|
+
if (nr >= 8) {
|
|
119
|
+
blockB[count + 4] = rhs(k, j2 + 4);
|
|
120
|
+
blockB[count + 5] = rhs(k, j2 + 5);
|
|
121
|
+
blockB[count + 6] = rhs(k, j2 + 6);
|
|
122
|
+
blockB[count + 7] = rhs(k, j2 + 7);
|
|
130
123
|
}
|
|
131
124
|
count += nr;
|
|
132
125
|
}
|
|
133
126
|
}
|
|
134
127
|
|
|
135
128
|
// second part: diagonal block
|
|
136
|
-
Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
|
|
137
|
-
if(nr>=8)
|
|
138
|
-
|
|
139
|
-
for(Index j2=k2; j2<end8; j2+=8)
|
|
140
|
-
{
|
|
129
|
+
Index end8 = nr >= 8 ? (std::min)(k2 + rows, packet_cols8) : k2;
|
|
130
|
+
if (nr >= 8) {
|
|
131
|
+
for (Index j2 = k2; j2 < end8; j2 += 8) {
|
|
141
132
|
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
|
142
133
|
// transpose
|
|
143
|
-
for(Index k=k2; k<j2; k++)
|
|
144
|
-
|
|
145
|
-
blockB[count+
|
|
146
|
-
blockB[count+
|
|
147
|
-
blockB[count+
|
|
148
|
-
blockB[count+
|
|
149
|
-
blockB[count+
|
|
150
|
-
blockB[count+
|
|
151
|
-
blockB[count+
|
|
152
|
-
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
|
134
|
+
for (Index k = k2; k < j2; k++) {
|
|
135
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
136
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
137
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
138
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
139
|
+
blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
|
|
140
|
+
blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
|
|
141
|
+
blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
|
|
142
|
+
blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
|
|
153
143
|
count += 8;
|
|
154
144
|
}
|
|
155
145
|
// symmetric
|
|
156
146
|
Index h = 0;
|
|
157
|
-
for(Index k=j2; k<j2+8; k++)
|
|
158
|
-
{
|
|
147
|
+
for (Index k = j2; k < j2 + 8; k++) {
|
|
159
148
|
// normal
|
|
160
|
-
for (Index w=0
|
|
161
|
-
blockB[count+w] = rhs(k,j2+w);
|
|
149
|
+
for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
|
|
162
150
|
|
|
163
|
-
blockB[count+h] = numext::real(rhs(k,k));
|
|
151
|
+
blockB[count + h] = numext::real(rhs(k, k));
|
|
164
152
|
|
|
165
153
|
// transpose
|
|
166
|
-
for (Index w=h+1
|
|
167
|
-
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
|
154
|
+
for (Index w = h + 1; w < 8; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
|
|
168
155
|
count += 8;
|
|
169
156
|
++h;
|
|
170
157
|
}
|
|
171
158
|
// normal
|
|
172
|
-
for(Index k=j2+8; k<end_k; k++)
|
|
173
|
-
|
|
174
|
-
blockB[count+
|
|
175
|
-
blockB[count+
|
|
176
|
-
blockB[count+
|
|
177
|
-
blockB[count+
|
|
178
|
-
blockB[count+
|
|
179
|
-
blockB[count+
|
|
180
|
-
blockB[count+
|
|
181
|
-
blockB[count+7] = rhs(k,j2+7);
|
|
159
|
+
for (Index k = j2 + 8; k < end_k; k++) {
|
|
160
|
+
blockB[count + 0] = rhs(k, j2 + 0);
|
|
161
|
+
blockB[count + 1] = rhs(k, j2 + 1);
|
|
162
|
+
blockB[count + 2] = rhs(k, j2 + 2);
|
|
163
|
+
blockB[count + 3] = rhs(k, j2 + 3);
|
|
164
|
+
blockB[count + 4] = rhs(k, j2 + 4);
|
|
165
|
+
blockB[count + 5] = rhs(k, j2 + 5);
|
|
166
|
+
blockB[count + 6] = rhs(k, j2 + 6);
|
|
167
|
+
blockB[count + 7] = rhs(k, j2 + 7);
|
|
182
168
|
count += 8;
|
|
183
169
|
}
|
|
184
170
|
}
|
|
185
171
|
}
|
|
186
|
-
if(nr>=4)
|
|
187
|
-
|
|
188
|
-
for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
|
|
189
|
-
{
|
|
172
|
+
if (nr >= 4) {
|
|
173
|
+
for (Index j2 = end8; j2 < (std::min)(k2 + rows, packet_cols4); j2 += 4) {
|
|
190
174
|
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
|
191
175
|
// transpose
|
|
192
|
-
for(Index k=k2; k<j2; k++)
|
|
193
|
-
|
|
194
|
-
blockB[count+
|
|
195
|
-
blockB[count+
|
|
196
|
-
blockB[count+
|
|
197
|
-
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
|
176
|
+
for (Index k = k2; k < j2; k++) {
|
|
177
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
178
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
179
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
180
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
198
181
|
count += 4;
|
|
199
182
|
}
|
|
200
183
|
// symmetric
|
|
201
184
|
Index h = 0;
|
|
202
|
-
for(Index k=j2; k<j2+4; k++)
|
|
203
|
-
{
|
|
185
|
+
for (Index k = j2; k < j2 + 4; k++) {
|
|
204
186
|
// normal
|
|
205
|
-
for (Index w=0
|
|
206
|
-
blockB[count+w] = rhs(k,j2+w);
|
|
187
|
+
for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
|
|
207
188
|
|
|
208
|
-
blockB[count+h] = numext::real(rhs(k,k));
|
|
189
|
+
blockB[count + h] = numext::real(rhs(k, k));
|
|
209
190
|
|
|
210
191
|
// transpose
|
|
211
|
-
for (Index w=h+1
|
|
212
|
-
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
|
192
|
+
for (Index w = h + 1; w < 4; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
|
|
213
193
|
count += 4;
|
|
214
194
|
++h;
|
|
215
195
|
}
|
|
216
196
|
// normal
|
|
217
|
-
for(Index k=j2+4; k<end_k; k++)
|
|
218
|
-
|
|
219
|
-
blockB[count+
|
|
220
|
-
blockB[count+
|
|
221
|
-
blockB[count+
|
|
222
|
-
blockB[count+3] = rhs(k,j2+3);
|
|
197
|
+
for (Index k = j2 + 4; k < end_k; k++) {
|
|
198
|
+
blockB[count + 0] = rhs(k, j2 + 0);
|
|
199
|
+
blockB[count + 1] = rhs(k, j2 + 1);
|
|
200
|
+
blockB[count + 2] = rhs(k, j2 + 2);
|
|
201
|
+
blockB[count + 3] = rhs(k, j2 + 3);
|
|
223
202
|
count += 4;
|
|
224
203
|
}
|
|
225
204
|
}
|
|
226
205
|
}
|
|
227
206
|
|
|
228
207
|
// third part: transposed
|
|
229
|
-
if(nr>=8)
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
blockB[count+
|
|
236
|
-
blockB[count+
|
|
237
|
-
blockB[count+
|
|
238
|
-
blockB[count+
|
|
239
|
-
blockB[count+
|
|
240
|
-
blockB[count+5] = numext::conj(rhs(j2+5,k));
|
|
241
|
-
blockB[count+6] = numext::conj(rhs(j2+6,k));
|
|
242
|
-
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
|
208
|
+
if (nr >= 8) {
|
|
209
|
+
for (Index j2 = k2 + rows; j2 < packet_cols8; j2 += 8) {
|
|
210
|
+
for (Index k = k2; k < end_k; k++) {
|
|
211
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
212
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
213
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
214
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
215
|
+
blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
|
|
216
|
+
blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
|
|
217
|
+
blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
|
|
218
|
+
blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
|
|
243
219
|
count += 8;
|
|
244
220
|
}
|
|
245
221
|
}
|
|
246
222
|
}
|
|
247
|
-
if(nr>=4)
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
blockB[count+
|
|
254
|
-
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
|
255
|
-
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
|
256
|
-
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
|
223
|
+
if (nr >= 4) {
|
|
224
|
+
for (Index j2 = (std::max)(packet_cols8, k2 + rows); j2 < packet_cols4; j2 += 4) {
|
|
225
|
+
for (Index k = k2; k < end_k; k++) {
|
|
226
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
227
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
228
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
229
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
257
230
|
count += 4;
|
|
258
231
|
}
|
|
259
232
|
}
|
|
260
233
|
}
|
|
261
234
|
|
|
262
235
|
// copy the remaining columns one at a time (=> the same with nr==1)
|
|
263
|
-
for(Index j2=packet_cols4; j2<cols; ++j2)
|
|
264
|
-
{
|
|
236
|
+
for (Index j2 = packet_cols4; j2 < cols; ++j2) {
|
|
265
237
|
// transpose
|
|
266
|
-
Index half = (std::min)(end_k,j2);
|
|
267
|
-
for(Index k=k2; k<half; k++)
|
|
268
|
-
|
|
269
|
-
blockB[count] = numext::conj(rhs(j2,k));
|
|
238
|
+
Index half = (std::min)(end_k, j2);
|
|
239
|
+
for (Index k = k2; k < half; k++) {
|
|
240
|
+
blockB[count] = numext::conj(rhs(j2, k));
|
|
270
241
|
count += 1;
|
|
271
242
|
}
|
|
272
243
|
|
|
273
|
-
if(half==j2 && half<k2+rows)
|
|
274
|
-
|
|
275
|
-
blockB[count] = numext::real(rhs(j2,j2));
|
|
244
|
+
if (half == j2 && half < k2 + rows) {
|
|
245
|
+
blockB[count] = numext::real(rhs(j2, j2));
|
|
276
246
|
count += 1;
|
|
277
|
-
}
|
|
278
|
-
else
|
|
247
|
+
} else
|
|
279
248
|
half--;
|
|
280
249
|
|
|
281
250
|
// normal
|
|
282
|
-
for(Index k=half+1; k<k2+rows; k++)
|
|
283
|
-
|
|
284
|
-
blockB[count] = rhs(k,j2);
|
|
251
|
+
for (Index k = half + 1; k < k2 + rows; k++) {
|
|
252
|
+
blockB[count] = rhs(k, j2);
|
|
285
253
|
count += 1;
|
|
286
254
|
}
|
|
287
255
|
}
|
|
@@ -291,254 +259,225 @@ struct symm_pack_rhs
|
|
|
291
259
|
/* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
|
|
292
260
|
* the general matrix matrix product.
|
|
293
261
|
*/
|
|
294
|
-
template <typename Scalar, typename Index,
|
|
295
|
-
int
|
|
296
|
-
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
|
297
|
-
int ResStorageOrder, int ResInnerStride>
|
|
262
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
|
263
|
+
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride>
|
|
298
264
|
struct product_selfadjoint_matrix;
|
|
299
265
|
|
|
300
|
-
template <typename Scalar, typename Index,
|
|
301
|
-
int
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
{
|
|
314
|
-
product_selfadjoint_matrix<Scalar, Index,
|
|
315
|
-
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
|
316
|
-
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
|
|
317
|
-
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
|
318
|
-
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
|
319
|
-
ColMajor,ResInnerStride>
|
|
320
|
-
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
266
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
|
267
|
+
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResInnerStride>
|
|
268
|
+
struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, LhsSelfAdjoint, ConjugateLhs, RhsStorageOrder,
|
|
269
|
+
RhsSelfAdjoint, ConjugateRhs, RowMajor, ResInnerStride> {
|
|
270
|
+
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs,
|
|
271
|
+
Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
|
|
272
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
273
|
+
product_selfadjoint_matrix<
|
|
274
|
+
Scalar, Index, logical_xor(RhsSelfAdjoint, RhsStorageOrder == RowMajor) ? ColMajor : RowMajor, RhsSelfAdjoint,
|
|
275
|
+
NumTraits<Scalar>::IsComplex && logical_xor(RhsSelfAdjoint, ConjugateRhs),
|
|
276
|
+
logical_xor(LhsSelfAdjoint, LhsStorageOrder == RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint,
|
|
277
|
+
NumTraits<Scalar>::IsComplex && logical_xor(LhsSelfAdjoint, ConjugateLhs), ColMajor,
|
|
278
|
+
ResInnerStride>::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
321
279
|
}
|
|
322
280
|
};
|
|
323
281
|
|
|
324
|
-
template <typename Scalar, typename Index,
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
static EIGEN_DONT_INLINE void run(
|
|
332
|
-
Index rows, Index cols,
|
|
333
|
-
const Scalar* _lhs, Index lhsStride,
|
|
334
|
-
const Scalar* _rhs, Index rhsStride,
|
|
335
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
336
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
282
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
283
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
284
|
+
struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false,
|
|
285
|
+
ConjugateRhs, ColMajor, ResInnerStride> {
|
|
286
|
+
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
|
|
287
|
+
Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
|
|
288
|
+
level3_blocking<Scalar, Scalar>& blocking);
|
|
337
289
|
};
|
|
338
290
|
|
|
339
|
-
template <typename Scalar, typename Index,
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
291
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
292
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
293
|
+
EIGEN_DONT_INLINE void
|
|
294
|
+
product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false, ConjugateRhs,
|
|
295
|
+
ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
|
|
296
|
+
const Scalar* rhs_, Index rhsStride, Scalar* res_,
|
|
297
|
+
Index resIncr, Index resStride, const Scalar& alpha,
|
|
298
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
299
|
+
Index size = rows;
|
|
300
|
+
|
|
301
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
302
|
+
|
|
303
|
+
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
304
|
+
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
|
|
305
|
+
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
306
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
307
|
+
LhsMapper lhs(lhs_, lhsStride);
|
|
308
|
+
LhsTransposeMapper lhs_transpose(lhs_, lhsStride);
|
|
309
|
+
RhsMapper rhs(rhs_, rhsStride);
|
|
310
|
+
ResMapper res(res_, resStride, resIncr);
|
|
311
|
+
|
|
312
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
313
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
314
|
+
// kc must be smaller than mc
|
|
315
|
+
kc = (std::min)(kc, mc);
|
|
316
|
+
std::size_t sizeA = kc * mc;
|
|
317
|
+
std::size_t sizeB = kc * cols;
|
|
318
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
319
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
320
|
+
|
|
321
|
+
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
322
|
+
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
323
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
324
|
+
gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
325
|
+
LhsStorageOrder == RowMajor ? ColMajor : RowMajor, true>
|
|
326
|
+
pack_lhs_transposed;
|
|
327
|
+
|
|
328
|
+
for (Index k2 = 0; k2 < size; k2 += kc) {
|
|
329
|
+
const Index actual_kc = (std::min)(k2 + kc, size) - k2;
|
|
330
|
+
|
|
331
|
+
// we have selected one row panel of rhs and one column panel of lhs
|
|
332
|
+
// pack rhs's panel into a sequential chunk of memory
|
|
333
|
+
// and expand each coeff to a constant packet for further reuse
|
|
334
|
+
pack_rhs(blockB, rhs.getSubMapper(k2, 0), actual_kc, cols);
|
|
335
|
+
|
|
336
|
+
// the select lhs's panel has to be split in three different parts:
|
|
337
|
+
// 1 - the transposed panel above the diagonal block => transposed packed copy
|
|
338
|
+
// 2 - the diagonal block => special packed copy
|
|
339
|
+
// 3 - the panel below the diagonal block => generic packed copy
|
|
340
|
+
for (Index i2 = 0; i2 < k2; i2 += mc) {
|
|
341
|
+
const Index actual_mc = (std::min)(i2 + mc, k2) - i2;
|
|
342
|
+
// transposed packed copy
|
|
343
|
+
pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
344
|
+
|
|
345
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
346
|
+
}
|
|
347
|
+
// the block diagonal
|
|
378
348
|
{
|
|
379
|
-
const Index
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
// pack rhs's panel into a sequential chunk of memory
|
|
383
|
-
// and expand each coeff to a constant packet for further reuse
|
|
384
|
-
pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
|
|
385
|
-
|
|
386
|
-
// the select lhs's panel has to be split in three different parts:
|
|
387
|
-
// 1 - the transposed panel above the diagonal block => transposed packed copy
|
|
388
|
-
// 2 - the diagonal block => special packed copy
|
|
389
|
-
// 3 - the panel below the diagonal block => generic packed copy
|
|
390
|
-
for(Index i2=0; i2<k2; i2+=mc)
|
|
391
|
-
{
|
|
392
|
-
const Index actual_mc = (std::min)(i2+mc,k2)-i2;
|
|
393
|
-
// transposed packed copy
|
|
394
|
-
pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
395
|
-
|
|
396
|
-
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
397
|
-
}
|
|
398
|
-
// the block diagonal
|
|
399
|
-
{
|
|
400
|
-
const Index actual_mc = (std::min)(k2+kc,size)-k2;
|
|
401
|
-
// symmetric packed copy
|
|
402
|
-
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
|
|
349
|
+
const Index actual_mc = (std::min)(k2 + kc, size) - k2;
|
|
350
|
+
// symmetric packed copy
|
|
351
|
+
pack_lhs(blockA, &lhs(k2, k2), lhsStride, actual_kc, actual_mc);
|
|
403
352
|
|
|
404
|
-
|
|
405
|
-
|
|
353
|
+
gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
354
|
+
}
|
|
406
355
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
356
|
+
for (Index i2 = k2 + kc; i2 < size; i2 += mc) {
|
|
357
|
+
const Index actual_mc = (std::min)(i2 + mc, size) - i2;
|
|
358
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
359
|
+
LhsStorageOrder, false>()(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
412
360
|
|
|
413
|
-
|
|
414
|
-
}
|
|
361
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
415
362
|
}
|
|
416
363
|
}
|
|
364
|
+
}
|
|
417
365
|
|
|
418
366
|
// matrix * selfadjoint product
|
|
419
|
-
template <typename Scalar, typename Index,
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
static EIGEN_DONT_INLINE void run(
|
|
427
|
-
Index rows, Index cols,
|
|
428
|
-
const Scalar* _lhs, Index lhsStride,
|
|
429
|
-
const Scalar* _rhs, Index rhsStride,
|
|
430
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
431
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
367
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
368
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
369
|
+
struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true,
|
|
370
|
+
ConjugateRhs, ColMajor, ResInnerStride> {
|
|
371
|
+
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
|
|
372
|
+
Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
|
|
373
|
+
level3_blocking<Scalar, Scalar>& blocking);
|
|
432
374
|
};
|
|
433
375
|
|
|
434
|
-
template <typename Scalar, typename Index,
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
476
|
-
|
|
477
|
-
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
478
|
-
}
|
|
376
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
377
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
378
|
+
EIGEN_DONT_INLINE void
|
|
379
|
+
product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true, ConjugateRhs,
|
|
380
|
+
ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
|
|
381
|
+
const Scalar* rhs_, Index rhsStride, Scalar* res_,
|
|
382
|
+
Index resIncr, Index resStride, const Scalar& alpha,
|
|
383
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
384
|
+
Index size = cols;
|
|
385
|
+
|
|
386
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
387
|
+
|
|
388
|
+
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
389
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
390
|
+
LhsMapper lhs(lhs_, lhsStride);
|
|
391
|
+
ResMapper res(res_, resStride, resIncr);
|
|
392
|
+
|
|
393
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
394
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
395
|
+
std::size_t sizeA = kc * mc;
|
|
396
|
+
std::size_t sizeB = kc * cols;
|
|
397
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
398
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
399
|
+
|
|
400
|
+
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
401
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
402
|
+
LhsStorageOrder>
|
|
403
|
+
pack_lhs;
|
|
404
|
+
symm_pack_rhs<Scalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
405
|
+
|
|
406
|
+
for (Index k2 = 0; k2 < size; k2 += kc) {
|
|
407
|
+
const Index actual_kc = (std::min)(k2 + kc, size) - k2;
|
|
408
|
+
|
|
409
|
+
pack_rhs(blockB, rhs_, rhsStride, actual_kc, cols, k2);
|
|
410
|
+
|
|
411
|
+
// => GEPP
|
|
412
|
+
for (Index i2 = 0; i2 < rows; i2 += mc) {
|
|
413
|
+
const Index actual_mc = (std::min)(i2 + mc, rows) - i2;
|
|
414
|
+
pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
415
|
+
|
|
416
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
479
417
|
}
|
|
480
418
|
}
|
|
419
|
+
}
|
|
481
420
|
|
|
482
|
-
}
|
|
421
|
+
} // end namespace internal
|
|
483
422
|
|
|
484
423
|
/***************************************************************************
|
|
485
|
-
* Wrapper to product_selfadjoint_matrix
|
|
486
|
-
***************************************************************************/
|
|
424
|
+
* Wrapper to product_selfadjoint_matrix
|
|
425
|
+
***************************************************************************/
|
|
487
426
|
|
|
488
427
|
namespace internal {
|
|
489
|
-
|
|
490
|
-
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
|
|
491
|
-
struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
428
|
+
|
|
429
|
+
template <typename Lhs, int LhsMode, typename Rhs, int RhsMode>
|
|
430
|
+
struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, RhsMode, false> {
|
|
431
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
432
|
+
|
|
495
433
|
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
|
496
434
|
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
|
497
435
|
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
|
498
436
|
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
|
499
|
-
|
|
437
|
+
|
|
500
438
|
enum {
|
|
501
|
-
LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
|
|
502
|
-
LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
|
|
503
|
-
RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
|
|
504
|
-
RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
|
|
439
|
+
LhsIsUpper = (LhsMode & (Upper | Lower)) == Upper,
|
|
440
|
+
LhsIsSelfAdjoint = (LhsMode & SelfAdjoint) == SelfAdjoint,
|
|
441
|
+
RhsIsUpper = (RhsMode & (Upper | Lower)) == Upper,
|
|
442
|
+
RhsIsSelfAdjoint = (RhsMode & SelfAdjoint) == SelfAdjoint
|
|
505
443
|
};
|
|
506
|
-
|
|
507
|
-
template<typename Dest>
|
|
508
|
-
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
|
|
509
|
-
{
|
|
510
|
-
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
|
|
511
444
|
|
|
512
|
-
|
|
513
|
-
|
|
445
|
+
template <typename Dest>
|
|
446
|
+
static void run(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
|
|
447
|
+
eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
|
|
448
|
+
|
|
449
|
+
add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
|
|
450
|
+
add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
|
|
514
451
|
|
|
515
|
-
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
|
|
516
|
-
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
|
452
|
+
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs);
|
|
517
453
|
|
|
518
|
-
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
|
519
|
-
|
|
454
|
+
typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Scalar, Scalar,
|
|
455
|
+
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
|
|
456
|
+
Lhs::MaxColsAtCompileTime, 1>
|
|
457
|
+
BlockingType;
|
|
520
458
|
|
|
521
459
|
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
|
|
522
460
|
|
|
523
|
-
internal::product_selfadjoint_matrix<
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
461
|
+
internal::product_selfadjoint_matrix<
|
|
462
|
+
Scalar, Index,
|
|
463
|
+
internal::logical_xor(LhsIsUpper, internal::traits<Lhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
464
|
+
LhsIsSelfAdjoint,
|
|
465
|
+
NumTraits<Scalar>::IsComplex && internal::logical_xor(LhsIsUpper, bool(LhsBlasTraits::NeedToConjugate)),
|
|
466
|
+
internal::logical_xor(RhsIsUpper, internal::traits<Rhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
467
|
+
RhsIsSelfAdjoint,
|
|
468
|
+
NumTraits<Scalar>::IsComplex && internal::logical_xor(RhsIsUpper, bool(RhsBlasTraits::NeedToConjugate)),
|
|
469
|
+
internal::traits<Dest>::Flags & RowMajorBit ? RowMajor : ColMajor,
|
|
470
|
+
Dest::InnerStrideAtCompileTime>::run(lhs.rows(), rhs.cols(), // sizes
|
|
471
|
+
&lhs.coeffRef(0, 0), lhs.outerStride(), // lhs info
|
|
472
|
+
&rhs.coeffRef(0, 0), rhs.outerStride(), // rhs info
|
|
473
|
+
&dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(), // result info
|
|
474
|
+
actualAlpha, blocking // alpha
|
|
475
|
+
);
|
|
537
476
|
}
|
|
538
477
|
};
|
|
539
478
|
|
|
540
|
-
}
|
|
479
|
+
} // end namespace internal
|
|
541
480
|
|
|
542
|
-
}
|
|
481
|
+
} // end namespace Eigen
|
|
543
482
|
|
|
544
|
-
#endif
|
|
483
|
+
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|