@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -10,261 +10,246 @@
|
|
|
10
10
|
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|
|
11
11
|
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
19
|
|
|
17
20
|
// pack a selfadjoint block diagonal for use with the gebp_kernel
|
|
18
|
-
template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
|
|
19
|
-
struct symm_pack_lhs
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
{
|
|
21
|
+
template <typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
|
|
22
|
+
struct symm_pack_lhs {
|
|
23
|
+
template <int BlockRows>
|
|
24
|
+
inline void pack(Scalar* blockA, const const_blas_data_mapper<Scalar, Index, StorageOrder>& lhs, Index cols, Index i,
|
|
25
|
+
Index& count) {
|
|
24
26
|
// normal copy
|
|
25
|
-
for(Index k=0; k<i; k++)
|
|
26
|
-
for(Index w=0; w<BlockRows; w++)
|
|
27
|
-
blockA[count++] = lhs(i+w,k); // normal
|
|
27
|
+
for (Index k = 0; k < i; k++)
|
|
28
|
+
for (Index w = 0; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
|
|
28
29
|
// symmetric copy
|
|
29
30
|
Index h = 0;
|
|
30
|
-
for(Index k=i; k<i+BlockRows; k++)
|
|
31
|
-
|
|
32
|
-
for(Index w=0; w<h; w++)
|
|
33
|
-
blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
|
|
31
|
+
for (Index k = i; k < i + BlockRows; k++) {
|
|
32
|
+
for (Index w = 0; w < h; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
|
|
34
33
|
|
|
35
|
-
blockA[count++] = numext::real(lhs(k,k));
|
|
34
|
+
blockA[count++] = numext::real(lhs(k, k)); // real (diagonal)
|
|
36
35
|
|
|
37
|
-
for(Index w=h+1; w<BlockRows; w++)
|
|
38
|
-
blockA[count++] = lhs(i+w, k); // normal
|
|
36
|
+
for (Index w = h + 1; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
|
|
39
37
|
++h;
|
|
40
38
|
}
|
|
41
39
|
// transposed copy
|
|
42
|
-
for(Index k=i+BlockRows; k<cols; k++)
|
|
43
|
-
for(Index w=0; w<BlockRows; w++)
|
|
44
|
-
blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
|
|
40
|
+
for (Index k = i + BlockRows; k < cols; k++)
|
|
41
|
+
for (Index w = 0; w < BlockRows; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
|
|
45
42
|
}
|
|
46
|
-
void operator()(Scalar* blockA, const Scalar*
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
void operator()(Scalar* blockA, const Scalar* lhs_, Index lhsStride, Index cols, Index rows) {
|
|
44
|
+
typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
|
|
45
|
+
typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half
|
|
46
|
+
QuarterPacket;
|
|
47
|
+
enum {
|
|
48
|
+
PacketSize = packet_traits<Scalar>::size,
|
|
49
|
+
HalfPacketSize = unpacket_traits<HalfPacket>::size,
|
|
50
|
+
QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
|
|
51
|
+
HasHalf = (int)HalfPacketSize < (int)PacketSize,
|
|
52
|
+
HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(lhs_, lhsStride);
|
|
50
56
|
Index count = 0;
|
|
51
|
-
//Index peeled_mc3 = (rows/Pack1)*Pack1;
|
|
52
|
-
|
|
53
|
-
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
|
|
54
|
-
const Index peeled_mc2 =
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
if(Pack1>=
|
|
66
|
-
for(Index i=
|
|
67
|
-
|
|
57
|
+
// Index peeled_mc3 = (rows/Pack1)*Pack1;
|
|
58
|
+
|
|
59
|
+
const Index peeled_mc3 = Pack1 >= 3 * PacketSize ? (rows / (3 * PacketSize)) * (3 * PacketSize) : 0;
|
|
60
|
+
const Index peeled_mc2 =
|
|
61
|
+
Pack1 >= 2 * PacketSize ? peeled_mc3 + ((rows - peeled_mc3) / (2 * PacketSize)) * (2 * PacketSize) : 0;
|
|
62
|
+
const Index peeled_mc1 =
|
|
63
|
+
Pack1 >= 1 * PacketSize ? peeled_mc2 + ((rows - peeled_mc2) / (1 * PacketSize)) * (1 * PacketSize) : 0;
|
|
64
|
+
const Index peeled_mc_half =
|
|
65
|
+
Pack1 >= HalfPacketSize ? peeled_mc1 + ((rows - peeled_mc1) / (HalfPacketSize)) * (HalfPacketSize) : 0;
|
|
66
|
+
const Index peeled_mc_quarter =
|
|
67
|
+
Pack1 >= QuarterPacketSize
|
|
68
|
+
? peeled_mc_half + ((rows - peeled_mc_half) / (QuarterPacketSize)) * (QuarterPacketSize)
|
|
69
|
+
: 0;
|
|
70
|
+
|
|
71
|
+
if (Pack1 >= 3 * PacketSize)
|
|
72
|
+
for (Index i = 0; i < peeled_mc3; i += 3 * PacketSize) pack<3 * PacketSize>(blockA, lhs, cols, i, count);
|
|
73
|
+
|
|
74
|
+
if (Pack1 >= 2 * PacketSize)
|
|
75
|
+
for (Index i = peeled_mc3; i < peeled_mc2; i += 2 * PacketSize) pack<2 * PacketSize>(blockA, lhs, cols, i, count);
|
|
76
|
+
|
|
77
|
+
if (Pack1 >= 1 * PacketSize)
|
|
78
|
+
for (Index i = peeled_mc2; i < peeled_mc1; i += 1 * PacketSize) pack<1 * PacketSize>(blockA, lhs, cols, i, count);
|
|
79
|
+
|
|
80
|
+
if (HasHalf && Pack1 >= HalfPacketSize)
|
|
81
|
+
for (Index i = peeled_mc1; i < peeled_mc_half; i += HalfPacketSize)
|
|
82
|
+
pack<HalfPacketSize>(blockA, lhs, cols, i, count);
|
|
83
|
+
|
|
84
|
+
if (HasQuarter && Pack1 >= QuarterPacketSize)
|
|
85
|
+
for (Index i = peeled_mc_half; i < peeled_mc_quarter; i += QuarterPacketSize)
|
|
86
|
+
pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
|
|
68
87
|
|
|
69
88
|
// do the same with mr==1
|
|
70
|
-
for(Index i=
|
|
71
|
-
|
|
72
|
-
for(Index k=0; k<i; k++)
|
|
73
|
-
blockA[count++] = lhs(i, k); // normal
|
|
89
|
+
for (Index i = peeled_mc_quarter; i < rows; i++) {
|
|
90
|
+
for (Index k = 0; k < i; k++) blockA[count++] = lhs(i, k); // normal
|
|
74
91
|
|
|
75
|
-
blockA[count++] = numext::real(lhs(i, i));
|
|
92
|
+
blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
|
|
76
93
|
|
|
77
|
-
for(Index k=i+1; k<cols; k++)
|
|
78
|
-
blockA[count++] = numext::conj(lhs(k, i)); // transposed
|
|
94
|
+
for (Index k = i + 1; k < cols; k++) blockA[count++] = numext::conj(lhs(k, i)); // transposed
|
|
79
95
|
}
|
|
80
96
|
}
|
|
81
97
|
};
|
|
82
98
|
|
|
83
|
-
template<typename Scalar, typename Index, int nr, int StorageOrder>
|
|
84
|
-
struct symm_pack_rhs
|
|
85
|
-
{
|
|
99
|
+
template <typename Scalar, typename Index, int nr, int StorageOrder>
|
|
100
|
+
struct symm_pack_rhs {
|
|
86
101
|
enum { PacketSize = packet_traits<Scalar>::size };
|
|
87
|
-
void operator()(Scalar* blockB, const Scalar*
|
|
88
|
-
{
|
|
102
|
+
void operator()(Scalar* blockB, const Scalar* rhs_, Index rhsStride, Index rows, Index cols, Index k2) {
|
|
89
103
|
Index end_k = k2 + rows;
|
|
90
104
|
Index count = 0;
|
|
91
|
-
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(
|
|
92
|
-
Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
|
|
93
|
-
Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
|
|
105
|
+
const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(rhs_, rhsStride);
|
|
106
|
+
Index packet_cols8 = nr >= 8 ? (cols / 8) * 8 : 0;
|
|
107
|
+
Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
|
|
94
108
|
|
|
95
109
|
// first part: normal case
|
|
96
|
-
for(Index j2=0; j2<k2; j2+=nr)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
{
|
|
104
|
-
blockB[count+2] = rhs(k,j2+2);
|
|
105
|
-
blockB[count+3] = rhs(k,j2+3);
|
|
110
|
+
for (Index j2 = 0; j2 < k2; j2 += nr) {
|
|
111
|
+
for (Index k = k2; k < end_k; k++) {
|
|
112
|
+
blockB[count + 0] = rhs(k, j2 + 0);
|
|
113
|
+
blockB[count + 1] = rhs(k, j2 + 1);
|
|
114
|
+
if (nr >= 4) {
|
|
115
|
+
blockB[count + 2] = rhs(k, j2 + 2);
|
|
116
|
+
blockB[count + 3] = rhs(k, j2 + 3);
|
|
106
117
|
}
|
|
107
|
-
if (nr>=8)
|
|
108
|
-
|
|
109
|
-
blockB[count+
|
|
110
|
-
blockB[count+
|
|
111
|
-
blockB[count+
|
|
112
|
-
blockB[count+7] = rhs(k,j2+7);
|
|
118
|
+
if (nr >= 8) {
|
|
119
|
+
blockB[count + 4] = rhs(k, j2 + 4);
|
|
120
|
+
blockB[count + 5] = rhs(k, j2 + 5);
|
|
121
|
+
blockB[count + 6] = rhs(k, j2 + 6);
|
|
122
|
+
blockB[count + 7] = rhs(k, j2 + 7);
|
|
113
123
|
}
|
|
114
124
|
count += nr;
|
|
115
125
|
}
|
|
116
126
|
}
|
|
117
127
|
|
|
118
128
|
// second part: diagonal block
|
|
119
|
-
Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
|
|
120
|
-
if(nr>=8)
|
|
121
|
-
|
|
122
|
-
for(Index j2=k2; j2<end8; j2+=8)
|
|
123
|
-
{
|
|
129
|
+
Index end8 = nr >= 8 ? (std::min)(k2 + rows, packet_cols8) : k2;
|
|
130
|
+
if (nr >= 8) {
|
|
131
|
+
for (Index j2 = k2; j2 < end8; j2 += 8) {
|
|
124
132
|
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
|
125
133
|
// transpose
|
|
126
|
-
for(Index k=k2; k<j2; k++)
|
|
127
|
-
|
|
128
|
-
blockB[count+
|
|
129
|
-
blockB[count+
|
|
130
|
-
blockB[count+
|
|
131
|
-
blockB[count+
|
|
132
|
-
blockB[count+
|
|
133
|
-
blockB[count+
|
|
134
|
-
blockB[count+
|
|
135
|
-
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
|
134
|
+
for (Index k = k2; k < j2; k++) {
|
|
135
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
136
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
137
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
138
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
139
|
+
blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
|
|
140
|
+
blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
|
|
141
|
+
blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
|
|
142
|
+
blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
|
|
136
143
|
count += 8;
|
|
137
144
|
}
|
|
138
145
|
// symmetric
|
|
139
146
|
Index h = 0;
|
|
140
|
-
for(Index k=j2; k<j2+8; k++)
|
|
141
|
-
{
|
|
147
|
+
for (Index k = j2; k < j2 + 8; k++) {
|
|
142
148
|
// normal
|
|
143
|
-
for (Index w=0
|
|
144
|
-
blockB[count+w] = rhs(k,j2+w);
|
|
149
|
+
for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
|
|
145
150
|
|
|
146
|
-
blockB[count+h] = numext::real(rhs(k,k));
|
|
151
|
+
blockB[count + h] = numext::real(rhs(k, k));
|
|
147
152
|
|
|
148
153
|
// transpose
|
|
149
|
-
for (Index w=h+1
|
|
150
|
-
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
|
154
|
+
for (Index w = h + 1; w < 8; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
|
|
151
155
|
count += 8;
|
|
152
156
|
++h;
|
|
153
157
|
}
|
|
154
158
|
// normal
|
|
155
|
-
for(Index k=j2+8; k<end_k; k++)
|
|
156
|
-
|
|
157
|
-
blockB[count+
|
|
158
|
-
blockB[count+
|
|
159
|
-
blockB[count+
|
|
160
|
-
blockB[count+
|
|
161
|
-
blockB[count+
|
|
162
|
-
blockB[count+
|
|
163
|
-
blockB[count+
|
|
164
|
-
blockB[count+7] = rhs(k,j2+7);
|
|
159
|
+
for (Index k = j2 + 8; k < end_k; k++) {
|
|
160
|
+
blockB[count + 0] = rhs(k, j2 + 0);
|
|
161
|
+
blockB[count + 1] = rhs(k, j2 + 1);
|
|
162
|
+
blockB[count + 2] = rhs(k, j2 + 2);
|
|
163
|
+
blockB[count + 3] = rhs(k, j2 + 3);
|
|
164
|
+
blockB[count + 4] = rhs(k, j2 + 4);
|
|
165
|
+
blockB[count + 5] = rhs(k, j2 + 5);
|
|
166
|
+
blockB[count + 6] = rhs(k, j2 + 6);
|
|
167
|
+
blockB[count + 7] = rhs(k, j2 + 7);
|
|
165
168
|
count += 8;
|
|
166
169
|
}
|
|
167
170
|
}
|
|
168
171
|
}
|
|
169
|
-
if(nr>=4)
|
|
170
|
-
|
|
171
|
-
for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
|
|
172
|
-
{
|
|
172
|
+
if (nr >= 4) {
|
|
173
|
+
for (Index j2 = end8; j2 < (std::min)(k2 + rows, packet_cols4); j2 += 4) {
|
|
173
174
|
// again we can split vertically in three different parts (transpose, symmetric, normal)
|
|
174
175
|
// transpose
|
|
175
|
-
for(Index k=k2; k<j2; k++)
|
|
176
|
-
|
|
177
|
-
blockB[count+
|
|
178
|
-
blockB[count+
|
|
179
|
-
blockB[count+
|
|
180
|
-
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
|
176
|
+
for (Index k = k2; k < j2; k++) {
|
|
177
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
178
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
179
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
180
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
181
181
|
count += 4;
|
|
182
182
|
}
|
|
183
183
|
// symmetric
|
|
184
184
|
Index h = 0;
|
|
185
|
-
for(Index k=j2; k<j2+4; k++)
|
|
186
|
-
{
|
|
185
|
+
for (Index k = j2; k < j2 + 4; k++) {
|
|
187
186
|
// normal
|
|
188
|
-
for (Index w=0
|
|
189
|
-
blockB[count+w] = rhs(k,j2+w);
|
|
187
|
+
for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
|
|
190
188
|
|
|
191
|
-
blockB[count+h] = numext::real(rhs(k,k));
|
|
189
|
+
blockB[count + h] = numext::real(rhs(k, k));
|
|
192
190
|
|
|
193
191
|
// transpose
|
|
194
|
-
for (Index w=h+1
|
|
195
|
-
blockB[count+w] = numext::conj(rhs(j2+w,k));
|
|
192
|
+
for (Index w = h + 1; w < 4; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
|
|
196
193
|
count += 4;
|
|
197
194
|
++h;
|
|
198
195
|
}
|
|
199
196
|
// normal
|
|
200
|
-
for(Index k=j2+4; k<end_k; k++)
|
|
201
|
-
|
|
202
|
-
blockB[count+
|
|
203
|
-
blockB[count+
|
|
204
|
-
blockB[count+
|
|
205
|
-
blockB[count+3] = rhs(k,j2+3);
|
|
197
|
+
for (Index k = j2 + 4; k < end_k; k++) {
|
|
198
|
+
blockB[count + 0] = rhs(k, j2 + 0);
|
|
199
|
+
blockB[count + 1] = rhs(k, j2 + 1);
|
|
200
|
+
blockB[count + 2] = rhs(k, j2 + 2);
|
|
201
|
+
blockB[count + 3] = rhs(k, j2 + 3);
|
|
206
202
|
count += 4;
|
|
207
203
|
}
|
|
208
204
|
}
|
|
209
205
|
}
|
|
210
206
|
|
|
211
207
|
// third part: transposed
|
|
212
|
-
if(nr>=8)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
blockB[count+
|
|
219
|
-
blockB[count+
|
|
220
|
-
blockB[count+
|
|
221
|
-
blockB[count+
|
|
222
|
-
blockB[count+
|
|
223
|
-
blockB[count+5] = numext::conj(rhs(j2+5,k));
|
|
224
|
-
blockB[count+6] = numext::conj(rhs(j2+6,k));
|
|
225
|
-
blockB[count+7] = numext::conj(rhs(j2+7,k));
|
|
208
|
+
if (nr >= 8) {
|
|
209
|
+
for (Index j2 = k2 + rows; j2 < packet_cols8; j2 += 8) {
|
|
210
|
+
for (Index k = k2; k < end_k; k++) {
|
|
211
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
212
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
213
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
214
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
215
|
+
blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
|
|
216
|
+
blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
|
|
217
|
+
blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
|
|
218
|
+
blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
|
|
226
219
|
count += 8;
|
|
227
220
|
}
|
|
228
221
|
}
|
|
229
222
|
}
|
|
230
|
-
if(nr>=4)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
blockB[count+
|
|
237
|
-
blockB[count+1] = numext::conj(rhs(j2+1,k));
|
|
238
|
-
blockB[count+2] = numext::conj(rhs(j2+2,k));
|
|
239
|
-
blockB[count+3] = numext::conj(rhs(j2+3,k));
|
|
223
|
+
if (nr >= 4) {
|
|
224
|
+
for (Index j2 = (std::max)(packet_cols8, k2 + rows); j2 < packet_cols4; j2 += 4) {
|
|
225
|
+
for (Index k = k2; k < end_k; k++) {
|
|
226
|
+
blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
|
|
227
|
+
blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
|
|
228
|
+
blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
|
|
229
|
+
blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
|
|
240
230
|
count += 4;
|
|
241
231
|
}
|
|
242
232
|
}
|
|
243
233
|
}
|
|
244
234
|
|
|
245
235
|
// copy the remaining columns one at a time (=> the same with nr==1)
|
|
246
|
-
for(Index j2=packet_cols4; j2<cols; ++j2)
|
|
247
|
-
{
|
|
236
|
+
for (Index j2 = packet_cols4; j2 < cols; ++j2) {
|
|
248
237
|
// transpose
|
|
249
|
-
Index half = (std::min)(end_k,j2);
|
|
250
|
-
for(Index k=k2; k<half; k++)
|
|
251
|
-
|
|
252
|
-
blockB[count] = numext::conj(rhs(j2,k));
|
|
238
|
+
Index half = (std::min)(end_k, j2);
|
|
239
|
+
for (Index k = k2; k < half; k++) {
|
|
240
|
+
blockB[count] = numext::conj(rhs(j2, k));
|
|
253
241
|
count += 1;
|
|
254
242
|
}
|
|
255
243
|
|
|
256
|
-
if(half==j2 && half<k2+rows)
|
|
257
|
-
|
|
258
|
-
blockB[count] = numext::real(rhs(j2,j2));
|
|
244
|
+
if (half == j2 && half < k2 + rows) {
|
|
245
|
+
blockB[count] = numext::real(rhs(j2, j2));
|
|
259
246
|
count += 1;
|
|
260
|
-
}
|
|
261
|
-
else
|
|
247
|
+
} else
|
|
262
248
|
half--;
|
|
263
249
|
|
|
264
250
|
// normal
|
|
265
|
-
for(Index k=half+1; k<k2+rows; k++)
|
|
266
|
-
|
|
267
|
-
blockB[count] = rhs(k,j2);
|
|
251
|
+
for (Index k = half + 1; k < k2 + rows; k++) {
|
|
252
|
+
blockB[count] = rhs(k, j2);
|
|
268
253
|
count += 1;
|
|
269
254
|
}
|
|
270
255
|
}
|
|
@@ -274,254 +259,225 @@ struct symm_pack_rhs
|
|
|
274
259
|
/* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
|
|
275
260
|
* the general matrix matrix product.
|
|
276
261
|
*/
|
|
277
|
-
template <typename Scalar, typename Index,
|
|
278
|
-
int
|
|
279
|
-
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
|
280
|
-
int ResStorageOrder, int ResInnerStride>
|
|
262
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
|
263
|
+
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride>
|
|
281
264
|
struct product_selfadjoint_matrix;
|
|
282
265
|
|
|
283
|
-
template <typename Scalar, typename Index,
|
|
284
|
-
int
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
{
|
|
297
|
-
product_selfadjoint_matrix<Scalar, Index,
|
|
298
|
-
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
|
299
|
-
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
|
|
300
|
-
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
|
301
|
-
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
|
302
|
-
ColMajor,ResInnerStride>
|
|
303
|
-
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
266
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
|
267
|
+
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResInnerStride>
|
|
268
|
+
struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, LhsSelfAdjoint, ConjugateLhs, RhsStorageOrder,
|
|
269
|
+
RhsSelfAdjoint, ConjugateRhs, RowMajor, ResInnerStride> {
|
|
270
|
+
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs,
|
|
271
|
+
Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
|
|
272
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
273
|
+
product_selfadjoint_matrix<
|
|
274
|
+
Scalar, Index, logical_xor(RhsSelfAdjoint, RhsStorageOrder == RowMajor) ? ColMajor : RowMajor, RhsSelfAdjoint,
|
|
275
|
+
NumTraits<Scalar>::IsComplex && logical_xor(RhsSelfAdjoint, ConjugateRhs),
|
|
276
|
+
logical_xor(LhsSelfAdjoint, LhsStorageOrder == RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint,
|
|
277
|
+
NumTraits<Scalar>::IsComplex && logical_xor(LhsSelfAdjoint, ConjugateLhs), ColMajor,
|
|
278
|
+
ResInnerStride>::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
|
304
279
|
}
|
|
305
280
|
};
|
|
306
281
|
|
|
307
|
-
template <typename Scalar, typename Index,
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
static EIGEN_DONT_INLINE void run(
|
|
315
|
-
Index rows, Index cols,
|
|
316
|
-
const Scalar* _lhs, Index lhsStride,
|
|
317
|
-
const Scalar* _rhs, Index rhsStride,
|
|
318
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
319
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
282
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
283
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
284
|
+
struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false,
|
|
285
|
+
ConjugateRhs, ColMajor, ResInnerStride> {
|
|
286
|
+
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
|
|
287
|
+
Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
|
|
288
|
+
level3_blocking<Scalar, Scalar>& blocking);
|
|
320
289
|
};
|
|
321
290
|
|
|
322
|
-
template <typename Scalar, typename Index,
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
291
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
292
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
293
|
+
EIGEN_DONT_INLINE void
|
|
294
|
+
product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false, ConjugateRhs,
|
|
295
|
+
ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
|
|
296
|
+
const Scalar* rhs_, Index rhsStride, Scalar* res_,
|
|
297
|
+
Index resIncr, Index resStride, const Scalar& alpha,
|
|
298
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
299
|
+
Index size = rows;
|
|
300
|
+
|
|
301
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
302
|
+
|
|
303
|
+
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
304
|
+
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
|
|
305
|
+
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
|
306
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
307
|
+
LhsMapper lhs(lhs_, lhsStride);
|
|
308
|
+
LhsTransposeMapper lhs_transpose(lhs_, lhsStride);
|
|
309
|
+
RhsMapper rhs(rhs_, rhsStride);
|
|
310
|
+
ResMapper res(res_, resStride, resIncr);
|
|
311
|
+
|
|
312
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
313
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
314
|
+
// kc must be smaller than mc
|
|
315
|
+
kc = (std::min)(kc, mc);
|
|
316
|
+
std::size_t sizeA = kc * mc;
|
|
317
|
+
std::size_t sizeB = kc * cols;
|
|
318
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
319
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
320
|
+
|
|
321
|
+
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
322
|
+
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
|
323
|
+
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
324
|
+
gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
325
|
+
LhsStorageOrder == RowMajor ? ColMajor : RowMajor, true>
|
|
326
|
+
pack_lhs_transposed;
|
|
327
|
+
|
|
328
|
+
for (Index k2 = 0; k2 < size; k2 += kc) {
|
|
329
|
+
const Index actual_kc = (std::min)(k2 + kc, size) - k2;
|
|
330
|
+
|
|
331
|
+
// we have selected one row panel of rhs and one column panel of lhs
|
|
332
|
+
// pack rhs's panel into a sequential chunk of memory
|
|
333
|
+
// and expand each coeff to a constant packet for further reuse
|
|
334
|
+
pack_rhs(blockB, rhs.getSubMapper(k2, 0), actual_kc, cols);
|
|
335
|
+
|
|
336
|
+
// the select lhs's panel has to be split in three different parts:
|
|
337
|
+
// 1 - the transposed panel above the diagonal block => transposed packed copy
|
|
338
|
+
// 2 - the diagonal block => special packed copy
|
|
339
|
+
// 3 - the panel below the diagonal block => generic packed copy
|
|
340
|
+
for (Index i2 = 0; i2 < k2; i2 += mc) {
|
|
341
|
+
const Index actual_mc = (std::min)(i2 + mc, k2) - i2;
|
|
342
|
+
// transposed packed copy
|
|
343
|
+
pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
344
|
+
|
|
345
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
346
|
+
}
|
|
347
|
+
// the block diagonal
|
|
361
348
|
{
|
|
362
|
-
const Index
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
// pack rhs's panel into a sequential chunk of memory
|
|
366
|
-
// and expand each coeff to a constant packet for further reuse
|
|
367
|
-
pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
|
|
368
|
-
|
|
369
|
-
// the select lhs's panel has to be split in three different parts:
|
|
370
|
-
// 1 - the transposed panel above the diagonal block => transposed packed copy
|
|
371
|
-
// 2 - the diagonal block => special packed copy
|
|
372
|
-
// 3 - the panel below the diagonal block => generic packed copy
|
|
373
|
-
for(Index i2=0; i2<k2; i2+=mc)
|
|
374
|
-
{
|
|
375
|
-
const Index actual_mc = (std::min)(i2+mc,k2)-i2;
|
|
376
|
-
// transposed packed copy
|
|
377
|
-
pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
378
|
-
|
|
379
|
-
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
380
|
-
}
|
|
381
|
-
// the block diagonal
|
|
382
|
-
{
|
|
383
|
-
const Index actual_mc = (std::min)(k2+kc,size)-k2;
|
|
384
|
-
// symmetric packed copy
|
|
385
|
-
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
|
|
349
|
+
const Index actual_mc = (std::min)(k2 + kc, size) - k2;
|
|
350
|
+
// symmetric packed copy
|
|
351
|
+
pack_lhs(blockA, &lhs(k2, k2), lhsStride, actual_kc, actual_mc);
|
|
386
352
|
|
|
387
|
-
|
|
388
|
-
|
|
353
|
+
gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
354
|
+
}
|
|
389
355
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
356
|
+
for (Index i2 = k2 + kc; i2 < size; i2 += mc) {
|
|
357
|
+
const Index actual_mc = (std::min)(i2 + mc, size) - i2;
|
|
358
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
359
|
+
LhsStorageOrder, false>()(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
395
360
|
|
|
396
|
-
|
|
397
|
-
}
|
|
361
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
398
362
|
}
|
|
399
363
|
}
|
|
364
|
+
}
|
|
400
365
|
|
|
401
366
|
// matrix * selfadjoint product
|
|
402
|
-
template <typename Scalar, typename Index,
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
static EIGEN_DONT_INLINE void run(
|
|
410
|
-
Index rows, Index cols,
|
|
411
|
-
const Scalar* _lhs, Index lhsStride,
|
|
412
|
-
const Scalar* _rhs, Index rhsStride,
|
|
413
|
-
Scalar* res, Index resIncr, Index resStride,
|
|
414
|
-
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
|
367
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
368
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
369
|
+
struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true,
|
|
370
|
+
ConjugateRhs, ColMajor, ResInnerStride> {
|
|
371
|
+
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
|
|
372
|
+
Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
|
|
373
|
+
level3_blocking<Scalar, Scalar>& blocking);
|
|
415
374
|
};
|
|
416
375
|
|
|
417
|
-
template <typename Scalar, typename Index,
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
459
|
-
|
|
460
|
-
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
461
|
-
}
|
|
376
|
+
template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
|
|
377
|
+
bool ConjugateRhs, int ResInnerStride>
|
|
378
|
+
EIGEN_DONT_INLINE void
|
|
379
|
+
product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true, ConjugateRhs,
|
|
380
|
+
ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
|
|
381
|
+
const Scalar* rhs_, Index rhsStride, Scalar* res_,
|
|
382
|
+
Index resIncr, Index resStride, const Scalar& alpha,
|
|
383
|
+
level3_blocking<Scalar, Scalar>& blocking) {
|
|
384
|
+
Index size = cols;
|
|
385
|
+
|
|
386
|
+
typedef gebp_traits<Scalar, Scalar> Traits;
|
|
387
|
+
|
|
388
|
+
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
|
389
|
+
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
|
390
|
+
LhsMapper lhs(lhs_, lhsStride);
|
|
391
|
+
ResMapper res(res_, resStride, resIncr);
|
|
392
|
+
|
|
393
|
+
Index kc = blocking.kc(); // cache block size along the K direction
|
|
394
|
+
Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
|
|
395
|
+
std::size_t sizeA = kc * mc;
|
|
396
|
+
std::size_t sizeB = kc * cols;
|
|
397
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
|
398
|
+
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
|
399
|
+
|
|
400
|
+
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
|
401
|
+
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
|
|
402
|
+
LhsStorageOrder>
|
|
403
|
+
pack_lhs;
|
|
404
|
+
symm_pack_rhs<Scalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
|
405
|
+
|
|
406
|
+
for (Index k2 = 0; k2 < size; k2 += kc) {
|
|
407
|
+
const Index actual_kc = (std::min)(k2 + kc, size) - k2;
|
|
408
|
+
|
|
409
|
+
pack_rhs(blockB, rhs_, rhsStride, actual_kc, cols, k2);
|
|
410
|
+
|
|
411
|
+
// => GEPP
|
|
412
|
+
for (Index i2 = 0; i2 < rows; i2 += mc) {
|
|
413
|
+
const Index actual_mc = (std::min)(i2 + mc, rows) - i2;
|
|
414
|
+
pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
|
|
415
|
+
|
|
416
|
+
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
|
462
417
|
}
|
|
463
418
|
}
|
|
419
|
+
}
|
|
464
420
|
|
|
465
|
-
}
|
|
421
|
+
} // end namespace internal
|
|
466
422
|
|
|
467
423
|
/***************************************************************************
|
|
468
|
-
* Wrapper to product_selfadjoint_matrix
|
|
469
|
-
***************************************************************************/
|
|
424
|
+
* Wrapper to product_selfadjoint_matrix
|
|
425
|
+
***************************************************************************/
|
|
470
426
|
|
|
471
427
|
namespace internal {
|
|
472
|
-
|
|
473
|
-
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
|
|
474
|
-
struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
428
|
+
|
|
429
|
+
template <typename Lhs, int LhsMode, typename Rhs, int RhsMode>
|
|
430
|
+
struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, RhsMode, false> {
|
|
431
|
+
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
432
|
+
|
|
478
433
|
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
|
479
434
|
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
|
480
435
|
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
|
481
436
|
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
|
482
|
-
|
|
437
|
+
|
|
483
438
|
enum {
|
|
484
|
-
LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
|
|
485
|
-
LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
|
|
486
|
-
RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
|
|
487
|
-
RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
|
|
439
|
+
LhsIsUpper = (LhsMode & (Upper | Lower)) == Upper,
|
|
440
|
+
LhsIsSelfAdjoint = (LhsMode & SelfAdjoint) == SelfAdjoint,
|
|
441
|
+
RhsIsUpper = (RhsMode & (Upper | Lower)) == Upper,
|
|
442
|
+
RhsIsSelfAdjoint = (RhsMode & SelfAdjoint) == SelfAdjoint
|
|
488
443
|
};
|
|
489
|
-
|
|
490
|
-
template<typename Dest>
|
|
491
|
-
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
|
|
492
|
-
{
|
|
493
|
-
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
|
|
494
444
|
|
|
495
|
-
|
|
496
|
-
|
|
445
|
+
template <typename Dest>
|
|
446
|
+
static void run(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
|
|
447
|
+
eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
|
|
448
|
+
|
|
449
|
+
add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
|
|
450
|
+
add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
|
|
497
451
|
|
|
498
|
-
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
|
|
499
|
-
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
|
452
|
+
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs);
|
|
500
453
|
|
|
501
|
-
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
|
502
|
-
|
|
454
|
+
typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Scalar, Scalar,
|
|
455
|
+
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
|
|
456
|
+
Lhs::MaxColsAtCompileTime, 1>
|
|
457
|
+
BlockingType;
|
|
503
458
|
|
|
504
459
|
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
|
|
505
460
|
|
|
506
|
-
internal::product_selfadjoint_matrix<
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
461
|
+
internal::product_selfadjoint_matrix<
|
|
462
|
+
Scalar, Index,
|
|
463
|
+
internal::logical_xor(LhsIsUpper, internal::traits<Lhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
464
|
+
LhsIsSelfAdjoint,
|
|
465
|
+
NumTraits<Scalar>::IsComplex && internal::logical_xor(LhsIsUpper, bool(LhsBlasTraits::NeedToConjugate)),
|
|
466
|
+
internal::logical_xor(RhsIsUpper, internal::traits<Rhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
|
|
467
|
+
RhsIsSelfAdjoint,
|
|
468
|
+
NumTraits<Scalar>::IsComplex && internal::logical_xor(RhsIsUpper, bool(RhsBlasTraits::NeedToConjugate)),
|
|
469
|
+
internal::traits<Dest>::Flags & RowMajorBit ? RowMajor : ColMajor,
|
|
470
|
+
Dest::InnerStrideAtCompileTime>::run(lhs.rows(), rhs.cols(), // sizes
|
|
471
|
+
&lhs.coeffRef(0, 0), lhs.outerStride(), // lhs info
|
|
472
|
+
&rhs.coeffRef(0, 0), rhs.outerStride(), // rhs info
|
|
473
|
+
&dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(), // result info
|
|
474
|
+
actualAlpha, blocking // alpha
|
|
475
|
+
);
|
|
520
476
|
}
|
|
521
477
|
};
|
|
522
478
|
|
|
523
|
-
}
|
|
479
|
+
} // end namespace internal
|
|
524
480
|
|
|
525
|
-
}
|
|
481
|
+
} // end namespace Eigen
|
|
526
482
|
|
|
527
|
-
#endif
|
|
483
|
+
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
|