@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -33,104 +33,107 @@
|
|
|
33
33
|
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
|
|
34
34
|
#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
// IWYU pragma: private
|
|
37
|
+
#include "../InternalHeaderCheck.h"
|
|
38
|
+
|
|
39
|
+
namespace Eigen {
|
|
37
40
|
|
|
38
41
|
namespace internal {
|
|
39
42
|
|
|
40
43
|
/**********************************************************************
|
|
41
|
-
* This file implements general matrix-vector multiplication using BLAS
|
|
42
|
-
* gemv function via partial specialization of
|
|
43
|
-
* general_matrix_vector_product::run(..) method for float, double,
|
|
44
|
-
* std::complex<float> and std::complex<double> types
|
|
45
|
-
**********************************************************************/
|
|
44
|
+
* This file implements general matrix-vector multiplication using BLAS
|
|
45
|
+
* gemv function via partial specialization of
|
|
46
|
+
* general_matrix_vector_product::run(..) method for float, double,
|
|
47
|
+
* std::complex<float> and std::complex<double> types
|
|
48
|
+
**********************************************************************/
|
|
46
49
|
|
|
47
50
|
// gemv specialization
|
|
48
51
|
|
|
49
|
-
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar,
|
|
52
|
+
template <typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar,
|
|
53
|
+
bool ConjugateRhs>
|
|
50
54
|
struct general_matrix_vector_product_gemv;
|
|
51
55
|
|
|
52
|
-
#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar)
|
|
53
|
-
template<typename Index, bool ConjugateLhs, bool ConjugateRhs>
|
|
54
|
-
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
{
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
template<typename Index, bool ConjugateLhs, bool ConjugateRhs>
|
|
71
|
-
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
}
|
|
81
|
-
}; \
|
|
56
|
+
#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
|
|
57
|
+
template <typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
|
58
|
+
struct general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, ColMajor, \
|
|
59
|
+
ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, \
|
|
60
|
+
ConjugateRhs, Specialized> { \
|
|
61
|
+
static void run(Index rows, Index cols, const const_blas_data_mapper<Scalar, Index, ColMajor>& lhs, \
|
|
62
|
+
const const_blas_data_mapper<Scalar, Index, RowMajor>& rhs, Scalar* res, Index resIncr, \
|
|
63
|
+
Scalar alpha) { \
|
|
64
|
+
if (ConjugateLhs) { \
|
|
65
|
+
general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, ColMajor, \
|
|
66
|
+
ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, \
|
|
67
|
+
ConjugateRhs, BuiltIn>::run(rows, cols, lhs, rhs, res, resIncr, alpha); \
|
|
68
|
+
} else { \
|
|
69
|
+
general_matrix_vector_product_gemv<Index, Scalar, ColMajor, ConjugateLhs, Scalar, ConjugateRhs>::run( \
|
|
70
|
+
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
|
71
|
+
} \
|
|
72
|
+
} \
|
|
73
|
+
}; \
|
|
74
|
+
template <typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
|
75
|
+
struct general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, RowMajor, \
|
|
76
|
+
ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, \
|
|
77
|
+
ConjugateRhs, Specialized> { \
|
|
78
|
+
static void run(Index rows, Index cols, const const_blas_data_mapper<Scalar, Index, RowMajor>& lhs, \
|
|
79
|
+
const const_blas_data_mapper<Scalar, Index, ColMajor>& rhs, Scalar* res, Index resIncr, \
|
|
80
|
+
Scalar alpha) { \
|
|
81
|
+
general_matrix_vector_product_gemv<Index, Scalar, RowMajor, ConjugateLhs, Scalar, ConjugateRhs>::run( \
|
|
82
|
+
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
|
83
|
+
} \
|
|
84
|
+
};
|
|
82
85
|
|
|
83
86
|
EIGEN_BLAS_GEMV_SPECIALIZE(double)
|
|
84
87
|
EIGEN_BLAS_GEMV_SPECIALIZE(float)
|
|
85
88
|
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
|
|
86
89
|
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
|
|
87
90
|
|
|
88
|
-
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC)
|
|
89
|
-
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs>
|
|
90
|
-
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
\
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
}\
|
|
118
|
-
};
|
|
91
|
+
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE, BLASTYPE, BLASFUNC) \
|
|
92
|
+
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
|
|
93
|
+
struct general_matrix_vector_product_gemv<Index, EIGTYPE, LhsStorageOrder, ConjugateLhs, EIGTYPE, ConjugateRhs> { \
|
|
94
|
+
typedef Matrix<EIGTYPE, Dynamic, 1, ColMajor> GEMVVector; \
|
|
95
|
+
\
|
|
96
|
+
static void run(Index rows, Index cols, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* rhs, Index rhsIncr, \
|
|
97
|
+
EIGTYPE* res, Index resIncr, EIGTYPE alpha) { \
|
|
98
|
+
if (rows == 0 || cols == 0) return; \
|
|
99
|
+
BlasIndex m = convert_index<BlasIndex>(rows), n = convert_index<BlasIndex>(cols), \
|
|
100
|
+
lda = convert_index<BlasIndex>(lhsStride), incx = convert_index<BlasIndex>(rhsIncr), \
|
|
101
|
+
incy = convert_index<BlasIndex>(resIncr); \
|
|
102
|
+
const EIGTYPE beta(1); \
|
|
103
|
+
const EIGTYPE* x_ptr; \
|
|
104
|
+
char trans = (LhsStorageOrder == ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
|
|
105
|
+
if (LhsStorageOrder == RowMajor) { \
|
|
106
|
+
m = convert_index<BlasIndex>(cols); \
|
|
107
|
+
n = convert_index<BlasIndex>(rows); \
|
|
108
|
+
} \
|
|
109
|
+
GEMVVector x_tmp; \
|
|
110
|
+
if (ConjugateRhs) { \
|
|
111
|
+
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs, cols, 1, InnerStride<>(incx)); \
|
|
112
|
+
x_tmp = map_x.conjugate(); \
|
|
113
|
+
x_ptr = x_tmp.data(); \
|
|
114
|
+
incx = 1; \
|
|
115
|
+
} else { \
|
|
116
|
+
x_ptr = rhs; \
|
|
117
|
+
} \
|
|
118
|
+
BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, \
|
|
119
|
+
(const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
|
120
|
+
} \
|
|
121
|
+
};
|
|
119
122
|
|
|
120
123
|
#ifdef EIGEN_USE_MKL
|
|
121
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(double,
|
|
122
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(float,
|
|
124
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
|
|
125
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
|
|
123
126
|
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
|
|
124
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8
|
|
127
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, cgemv)
|
|
125
128
|
#else
|
|
126
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(double,
|
|
127
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(float,
|
|
129
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
|
|
130
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
|
|
128
131
|
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
|
|
129
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,
|
|
132
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
|
|
130
133
|
#endif
|
|
131
134
|
|
|
132
|
-
}
|
|
135
|
+
} // namespace internal
|
|
133
136
|
|
|
134
|
-
}
|
|
137
|
+
} // end namespace Eigen
|
|
135
138
|
|
|
136
|
-
#endif
|
|
139
|
+
#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
|
|
@@ -10,171 +10,273 @@
|
|
|
10
10
|
#ifndef EIGEN_PARALLELIZER_H
|
|
11
11
|
#define EIGEN_PARALLELIZER_H
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
#include
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
// Note that in the following, there are 3 different uses of the concept
|
|
17
|
+
// "number of threads":
|
|
18
|
+
// 1. Max number of threads used by OpenMP or ThreadPool.
|
|
19
|
+
// * For OpenMP this is typically the value set by the OMP_NUM_THREADS
|
|
20
|
+
// environment variable, or by a call to omp_set_num_threads() prior to
|
|
21
|
+
// calling Eigen.
|
|
22
|
+
// * For ThreadPool, this is the number of threads in the ThreadPool.
|
|
23
|
+
// 2. Max number of threads currently allowed to be used by parallel Eigen
|
|
24
|
+
// operations. This is set by setNbThreads(), and cannot exceed the value
|
|
25
|
+
// in 1.
|
|
26
|
+
// 3. The actual number of threads used for a given parallel Eigen operation.
|
|
27
|
+
// This is typically computed on the fly using a cost model and cannot exceed
|
|
28
|
+
// the value in 2.
|
|
29
|
+
// * For OpenMP, this is typically the number of threads specified in individual
|
|
30
|
+
// "omp parallel" pragmas associated with an Eigen operation.
|
|
31
|
+
// * For ThreadPool, it is the number of concurrent tasks scheduled in the
|
|
32
|
+
// threadpool for a given Eigen operation. Notice that since the threadpool
|
|
33
|
+
// uses task stealing, there is no way to limit the number of concurrently
|
|
34
|
+
// executing tasks to below the number in 1. except by limiting the total
|
|
35
|
+
// number of tasks in flight.
|
|
36
|
+
|
|
37
|
+
#if defined(EIGEN_HAS_OPENMP) && defined(EIGEN_GEMM_THREADPOOL)
|
|
38
|
+
#error "EIGEN_HAS_OPENMP and EIGEN_GEMM_THREADPOOL may not both be defined."
|
|
15
39
|
#endif
|
|
16
40
|
|
|
17
41
|
namespace Eigen {
|
|
18
42
|
|
|
19
43
|
namespace internal {
|
|
20
|
-
|
|
21
|
-
/** \internal */
|
|
22
|
-
inline void manage_multi_threading(Action action, int* v)
|
|
23
|
-
{
|
|
24
|
-
static int m_maxThreads = -1;
|
|
25
|
-
EIGEN_UNUSED_VARIABLE(m_maxThreads)
|
|
26
|
-
|
|
27
|
-
if(action==SetAction)
|
|
28
|
-
{
|
|
29
|
-
eigen_internal_assert(v!=0);
|
|
30
|
-
m_maxThreads = *v;
|
|
31
|
-
}
|
|
32
|
-
else if(action==GetAction)
|
|
33
|
-
{
|
|
34
|
-
eigen_internal_assert(v!=0);
|
|
35
|
-
#ifdef EIGEN_HAS_OPENMP
|
|
36
|
-
if(m_maxThreads>0)
|
|
37
|
-
*v = m_maxThreads;
|
|
38
|
-
else
|
|
39
|
-
*v = omp_get_max_threads();
|
|
40
|
-
#else
|
|
41
|
-
*v = 1;
|
|
42
|
-
#endif
|
|
43
|
-
}
|
|
44
|
-
else
|
|
45
|
-
{
|
|
46
|
-
eigen_internal_assert(false);
|
|
47
|
-
}
|
|
44
|
+
inline void manage_multi_threading(Action action, int* v);
|
|
48
45
|
}
|
|
49
46
|
|
|
50
|
-
|
|
47
|
+
// Public APIs.
|
|
51
48
|
|
|
52
49
|
/** Must be call first when calling Eigen from multiple threads */
|
|
53
|
-
inline void initParallel()
|
|
54
|
-
{
|
|
55
|
-
int nbt;
|
|
56
|
-
internal::manage_multi_threading(GetAction, &nbt);
|
|
57
|
-
std::ptrdiff_t l1, l2, l3;
|
|
58
|
-
internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
59
|
-
}
|
|
50
|
+
EIGEN_DEPRECATED_WITH_REASON("Initialization is no longer needed.") inline void initParallel() {}
|
|
60
51
|
|
|
61
52
|
/** \returns the max number of threads reserved for Eigen
|
|
62
|
-
|
|
63
|
-
inline int nbThreads()
|
|
64
|
-
{
|
|
53
|
+
* \sa setNbThreads */
|
|
54
|
+
inline int nbThreads() {
|
|
65
55
|
int ret;
|
|
66
56
|
internal::manage_multi_threading(GetAction, &ret);
|
|
67
57
|
return ret;
|
|
68
58
|
}
|
|
69
59
|
|
|
70
60
|
/** Sets the max number of threads reserved for Eigen
|
|
71
|
-
|
|
72
|
-
inline void setNbThreads(int v)
|
|
73
|
-
|
|
74
|
-
|
|
61
|
+
* \sa nbThreads */
|
|
62
|
+
inline void setNbThreads(int v) { internal::manage_multi_threading(SetAction, &v); }
|
|
63
|
+
|
|
64
|
+
#ifdef EIGEN_GEMM_THREADPOOL
|
|
65
|
+
// Sets the ThreadPool used by Eigen parallel Gemm.
|
|
66
|
+
//
|
|
67
|
+
// NOTICE: This function has a known race condition with
|
|
68
|
+
// parallelize_gemm below, and should not be called while
|
|
69
|
+
// an instance of that function is running.
|
|
70
|
+
//
|
|
71
|
+
// TODO(rmlarsen): Make the device API available instead of
|
|
72
|
+
// storing a local static pointer variable to avoid this issue.
|
|
73
|
+
inline ThreadPool* setGemmThreadPool(ThreadPool* new_pool) {
|
|
74
|
+
static ThreadPool* pool = nullptr;
|
|
75
|
+
if (new_pool != nullptr) {
|
|
76
|
+
// This will wait for work in all threads in *pool to finish,
|
|
77
|
+
// then destroy the old ThreadPool, and then replace it with new_pool.
|
|
78
|
+
pool = new_pool;
|
|
79
|
+
// Reset the number of threads to the number of threads on the new pool.
|
|
80
|
+
setNbThreads(pool->NumThreads());
|
|
81
|
+
}
|
|
82
|
+
return pool;
|
|
75
83
|
}
|
|
76
84
|
|
|
85
|
+
// Gets the ThreadPool used by Eigen parallel Gemm.
|
|
86
|
+
inline ThreadPool* getGemmThreadPool() { return setGemmThreadPool(nullptr); }
|
|
87
|
+
#endif
|
|
88
|
+
|
|
77
89
|
namespace internal {
|
|
78
90
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
91
|
+
// Implementation.
|
|
92
|
+
|
|
93
|
+
#if defined(EIGEN_USE_BLAS) || (!defined(EIGEN_HAS_OPENMP) && !defined(EIGEN_GEMM_THREADPOOL))
|
|
94
|
+
|
|
95
|
+
inline void manage_multi_threading(Action action, int* v) {
|
|
96
|
+
if (action == SetAction) {
|
|
97
|
+
eigen_internal_assert(v != nullptr);
|
|
98
|
+
} else if (action == GetAction) {
|
|
99
|
+
eigen_internal_assert(v != nullptr);
|
|
100
|
+
*v = 1;
|
|
101
|
+
} else {
|
|
102
|
+
eigen_internal_assert(false);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
template <typename Index>
|
|
106
|
+
struct GemmParallelInfo {};
|
|
107
|
+
template <bool Condition, typename Functor, typename Index>
|
|
108
|
+
EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index cols, Index /*unused*/,
|
|
109
|
+
bool /*unused*/) {
|
|
110
|
+
func(0, rows, 0, cols);
|
|
111
|
+
}
|
|
82
112
|
|
|
83
|
-
// volatile is not enough on all architectures (see bug 1572)
|
|
84
|
-
// to guarantee that when thread A says to thread B that it is
|
|
85
|
-
// done with packing a block, then all writes have been really
|
|
86
|
-
// carried out... C++11 memory model+atomic guarantees this.
|
|
87
|
-
#if EIGEN_HAS_CXX11_ATOMIC
|
|
88
|
-
std::atomic<Index> sync;
|
|
89
|
-
std::atomic<int> users;
|
|
90
113
|
#else
|
|
91
|
-
Index volatile sync;
|
|
92
|
-
int volatile users;
|
|
93
|
-
#endif
|
|
94
114
|
|
|
115
|
+
template <typename Index>
|
|
116
|
+
struct GemmParallelTaskInfo {
|
|
117
|
+
GemmParallelTaskInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
|
118
|
+
std::atomic<Index> sync;
|
|
119
|
+
std::atomic<int> users;
|
|
95
120
|
Index lhs_start;
|
|
96
121
|
Index lhs_length;
|
|
97
122
|
};
|
|
98
123
|
|
|
99
|
-
template<
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
124
|
+
template <typename Index>
|
|
125
|
+
struct GemmParallelInfo {
|
|
126
|
+
const int logical_thread_id;
|
|
127
|
+
const int num_threads;
|
|
128
|
+
GemmParallelTaskInfo<Index>* task_info;
|
|
129
|
+
|
|
130
|
+
GemmParallelInfo(int logical_thread_id_, int num_threads_, GemmParallelTaskInfo<Index>* task_info_)
|
|
131
|
+
: logical_thread_id(logical_thread_id_), num_threads(num_threads_), task_info(task_info_) {}
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
inline void manage_multi_threading(Action action, int* v) {
|
|
135
|
+
static int m_maxThreads = -1;
|
|
136
|
+
if (action == SetAction) {
|
|
137
|
+
eigen_internal_assert(v != nullptr);
|
|
138
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
139
|
+
// Calling action == SetAction and *v = 0 means
|
|
140
|
+
// restoring m_maxThreads to the maximum number of threads specified
|
|
141
|
+
// for OpenMP.
|
|
142
|
+
eigen_internal_assert(*v >= 0);
|
|
143
|
+
int omp_threads = omp_get_max_threads();
|
|
144
|
+
m_maxThreads = (*v == 0 ? omp_threads : std::min(*v, omp_threads));
|
|
145
|
+
#elif defined(EIGEN_GEMM_THREADPOOL)
|
|
146
|
+
// Calling action == SetAction and *v = 0 means
|
|
147
|
+
// restoring m_maxThreads to the number of threads in the ThreadPool,
|
|
148
|
+
// which defaults to 1 if no pool was provided.
|
|
149
|
+
eigen_internal_assert(*v >= 0);
|
|
150
|
+
ThreadPool* pool = getGemmThreadPool();
|
|
151
|
+
int pool_threads = pool != nullptr ? pool->NumThreads() : 1;
|
|
152
|
+
m_maxThreads = (*v == 0 ? pool_threads : numext::mini(pool_threads, *v));
|
|
153
|
+
#endif
|
|
154
|
+
} else if (action == GetAction) {
|
|
155
|
+
eigen_internal_assert(v != nullptr);
|
|
156
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
157
|
+
if (m_maxThreads > 0)
|
|
158
|
+
*v = m_maxThreads;
|
|
159
|
+
else
|
|
160
|
+
*v = omp_get_max_threads();
|
|
115
161
|
#else
|
|
162
|
+
*v = m_maxThreads;
|
|
163
|
+
#endif
|
|
164
|
+
} else {
|
|
165
|
+
eigen_internal_assert(false);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
116
168
|
|
|
117
|
-
|
|
169
|
+
template <bool Condition, typename Functor, typename Index>
|
|
170
|
+
EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose) {
|
|
171
|
+
// Dynamically check whether we should even try to execute in parallel.
|
|
118
172
|
// The conditions are:
|
|
119
173
|
// - the max number of threads we can create is greater than 1
|
|
120
174
|
// - we are not already in a parallel code
|
|
121
175
|
// - the sizes are large enough
|
|
122
176
|
|
|
123
177
|
// compute the maximal number of threads from the size of the product:
|
|
124
|
-
// This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at
|
|
178
|
+
// This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at
|
|
179
|
+
// once.
|
|
125
180
|
Index size = transpose ? rows : cols;
|
|
126
|
-
Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
|
|
181
|
+
Index pb_max_threads = std::max<Index>(1, size / Functor::Traits::nr);
|
|
127
182
|
|
|
128
183
|
// compute the maximal number of threads from the total amount of work:
|
|
129
|
-
double work = static_cast<double>(rows) * static_cast<double>(cols) *
|
|
130
|
-
static_cast<double>(depth);
|
|
184
|
+
double work = static_cast<double>(rows) * static_cast<double>(cols) * static_cast<double>(depth);
|
|
131
185
|
double kMinTaskSize = 50000; // FIXME improve this heuristic.
|
|
132
|
-
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>(
|
|
186
|
+
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>(work / kMinTaskSize)));
|
|
133
187
|
|
|
134
188
|
// compute the number of threads we are going to use
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
// if multi-threading is explicitly disabled, not useful, or if we already are
|
|
138
|
-
// then abort multi-threading
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
189
|
+
int threads = std::min<int>(nbThreads(), static_cast<int>(pb_max_threads));
|
|
190
|
+
|
|
191
|
+
// if multi-threading is explicitly disabled, not useful, or if we already are
|
|
192
|
+
// inside a parallel session, then abort multi-threading
|
|
193
|
+
bool dont_parallelize = (!Condition) || (threads <= 1);
|
|
194
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
195
|
+
// don't parallelize if we are executing in a parallel context already.
|
|
196
|
+
dont_parallelize |= omp_get_num_threads() > 1;
|
|
197
|
+
#elif defined(EIGEN_GEMM_THREADPOOL)
|
|
198
|
+
// don't parallelize if we have a trivial threadpool or the current thread id
|
|
199
|
+
// is != -1, indicating that we are already executing on a thread inside the pool.
|
|
200
|
+
// In other words, we do not allow nested parallelism, since this would lead to
|
|
201
|
+
// deadlocks due to the workstealing nature of the threadpool.
|
|
202
|
+
ThreadPool* pool = getGemmThreadPool();
|
|
203
|
+
dont_parallelize |= (pool == nullptr || pool->CurrentThreadId() != -1);
|
|
204
|
+
#endif
|
|
205
|
+
if (dont_parallelize) return func(0, rows, 0, cols);
|
|
142
206
|
|
|
143
|
-
Eigen::initParallel();
|
|
144
207
|
func.initParallelSession(threads);
|
|
145
208
|
|
|
146
|
-
if(transpose)
|
|
147
|
-
std::swap(rows,cols);
|
|
209
|
+
if (transpose) std::swap(rows, cols);
|
|
148
210
|
|
|
149
|
-
ei_declare_aligned_stack_constructed_variable(
|
|
211
|
+
ei_declare_aligned_stack_constructed_variable(GemmParallelTaskInfo<Index>, task_info, threads, 0);
|
|
150
212
|
|
|
151
|
-
|
|
213
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
214
|
+
#pragma omp parallel num_threads(threads)
|
|
152
215
|
{
|
|
153
216
|
Index i = omp_get_thread_num();
|
|
154
|
-
// Note that the actual number of threads might be lower than the number of
|
|
217
|
+
// Note that the actual number of threads might be lower than the number of
|
|
218
|
+
// requested ones
|
|
155
219
|
Index actual_threads = omp_get_num_threads();
|
|
220
|
+
GemmParallelInfo<Index> info(static_cast<int>(i), static_cast<int>(actual_threads), task_info);
|
|
221
|
+
|
|
222
|
+
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
|
223
|
+
Index blockRows = (rows / actual_threads);
|
|
224
|
+
blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
|
|
225
|
+
|
|
226
|
+
Index r0 = i * blockRows;
|
|
227
|
+
Index actualBlockRows = (i + 1 == actual_threads) ? rows - r0 : blockRows;
|
|
156
228
|
|
|
229
|
+
Index c0 = i * blockCols;
|
|
230
|
+
Index actualBlockCols = (i + 1 == actual_threads) ? cols - c0 : blockCols;
|
|
231
|
+
|
|
232
|
+
info.task_info[i].lhs_start = r0;
|
|
233
|
+
info.task_info[i].lhs_length = actualBlockRows;
|
|
234
|
+
|
|
235
|
+
if (transpose)
|
|
236
|
+
func(c0, actualBlockCols, 0, rows, &info);
|
|
237
|
+
else
|
|
238
|
+
func(0, rows, c0, actualBlockCols, &info);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
#elif defined(EIGEN_GEMM_THREADPOOL)
|
|
242
|
+
Barrier barrier(threads);
|
|
243
|
+
auto task = [=, &func, &barrier, &task_info](int i) {
|
|
244
|
+
Index actual_threads = threads;
|
|
245
|
+
GemmParallelInfo<Index> info(i, static_cast<int>(actual_threads), task_info);
|
|
157
246
|
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
|
158
247
|
Index blockRows = (rows / actual_threads);
|
|
159
|
-
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
|
248
|
+
blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
|
|
160
249
|
|
|
161
|
-
Index r0 = i*blockRows;
|
|
162
|
-
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
|
250
|
+
Index r0 = i * blockRows;
|
|
251
|
+
Index actualBlockRows = (i + 1 == actual_threads) ? rows - r0 : blockRows;
|
|
163
252
|
|
|
164
|
-
Index c0 = i*blockCols;
|
|
165
|
-
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
|
|
253
|
+
Index c0 = i * blockCols;
|
|
254
|
+
Index actualBlockCols = (i + 1 == actual_threads) ? cols - c0 : blockCols;
|
|
166
255
|
|
|
167
|
-
info[i].lhs_start = r0;
|
|
168
|
-
info[i].lhs_length = actualBlockRows;
|
|
256
|
+
info.task_info[i].lhs_start = r0;
|
|
257
|
+
info.task_info[i].lhs_length = actualBlockRows;
|
|
169
258
|
|
|
170
|
-
if(transpose)
|
|
171
|
-
|
|
259
|
+
if (transpose)
|
|
260
|
+
func(c0, actualBlockCols, 0, rows, &info);
|
|
261
|
+
else
|
|
262
|
+
func(0, rows, c0, actualBlockCols, &info);
|
|
263
|
+
|
|
264
|
+
barrier.Notify();
|
|
265
|
+
};
|
|
266
|
+
// Notice that we do not schedule more than "threads" tasks, which allows us to
|
|
267
|
+
// limit number of running threads, even if the threadpool itself was constructed
|
|
268
|
+
// with a larger number of threads.
|
|
269
|
+
for (int i = 0; i < threads - 1; ++i) {
|
|
270
|
+
pool->Schedule([=, task = std::move(task)] { task(i); });
|
|
172
271
|
}
|
|
272
|
+
task(threads - 1);
|
|
273
|
+
barrier.Wait();
|
|
173
274
|
#endif
|
|
174
275
|
}
|
|
175
276
|
|
|
176
|
-
|
|
277
|
+
#endif
|
|
177
278
|
|
|
178
|
-
}
|
|
279
|
+
} // end namespace internal
|
|
280
|
+
} // end namespace Eigen
|
|
179
281
|
|
|
180
|
-
#endif
|
|
282
|
+
#endif // EIGEN_PARALLELIZER_H
|