@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -33,104 +33,107 @@
|
|
|
33
33
|
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
|
|
34
34
|
#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
// IWYU pragma: private
|
|
37
|
+
#include "../InternalHeaderCheck.h"
|
|
38
|
+
|
|
39
|
+
namespace Eigen {
|
|
37
40
|
|
|
38
41
|
namespace internal {
|
|
39
42
|
|
|
40
43
|
/**********************************************************************
|
|
41
|
-
* This file implements general matrix-vector multiplication using BLAS
|
|
42
|
-
* gemv function via partial specialization of
|
|
43
|
-
* general_matrix_vector_product::run(..) method for float, double,
|
|
44
|
-
* std::complex<float> and std::complex<double> types
|
|
45
|
-
**********************************************************************/
|
|
44
|
+
* This file implements general matrix-vector multiplication using BLAS
|
|
45
|
+
* gemv function via partial specialization of
|
|
46
|
+
* general_matrix_vector_product::run(..) method for float, double,
|
|
47
|
+
* std::complex<float> and std::complex<double> types
|
|
48
|
+
**********************************************************************/
|
|
46
49
|
|
|
47
50
|
// gemv specialization
|
|
48
51
|
|
|
49
|
-
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar,
|
|
52
|
+
template <typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar,
|
|
53
|
+
bool ConjugateRhs>
|
|
50
54
|
struct general_matrix_vector_product_gemv;
|
|
51
55
|
|
|
52
|
-
#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar)
|
|
53
|
-
template<typename Index, bool ConjugateLhs, bool ConjugateRhs>
|
|
54
|
-
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
{
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
template<typename Index, bool ConjugateLhs, bool ConjugateRhs>
|
|
71
|
-
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
}
|
|
81
|
-
}; \
|
|
56
|
+
#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
|
|
57
|
+
template <typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
|
58
|
+
struct general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, ColMajor, \
|
|
59
|
+
ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, \
|
|
60
|
+
ConjugateRhs, Specialized> { \
|
|
61
|
+
static void run(Index rows, Index cols, const const_blas_data_mapper<Scalar, Index, ColMajor>& lhs, \
|
|
62
|
+
const const_blas_data_mapper<Scalar, Index, RowMajor>& rhs, Scalar* res, Index resIncr, \
|
|
63
|
+
Scalar alpha) { \
|
|
64
|
+
if (ConjugateLhs) { \
|
|
65
|
+
general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, ColMajor, \
|
|
66
|
+
ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, \
|
|
67
|
+
ConjugateRhs, BuiltIn>::run(rows, cols, lhs, rhs, res, resIncr, alpha); \
|
|
68
|
+
} else { \
|
|
69
|
+
general_matrix_vector_product_gemv<Index, Scalar, ColMajor, ConjugateLhs, Scalar, ConjugateRhs>::run( \
|
|
70
|
+
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
|
71
|
+
} \
|
|
72
|
+
} \
|
|
73
|
+
}; \
|
|
74
|
+
template <typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
|
75
|
+
struct general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, RowMajor, \
|
|
76
|
+
ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, \
|
|
77
|
+
ConjugateRhs, Specialized> { \
|
|
78
|
+
static void run(Index rows, Index cols, const const_blas_data_mapper<Scalar, Index, RowMajor>& lhs, \
|
|
79
|
+
const const_blas_data_mapper<Scalar, Index, ColMajor>& rhs, Scalar* res, Index resIncr, \
|
|
80
|
+
Scalar alpha) { \
|
|
81
|
+
general_matrix_vector_product_gemv<Index, Scalar, RowMajor, ConjugateLhs, Scalar, ConjugateRhs>::run( \
|
|
82
|
+
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
|
83
|
+
} \
|
|
84
|
+
};
|
|
82
85
|
|
|
83
86
|
EIGEN_BLAS_GEMV_SPECIALIZE(double)
|
|
84
87
|
EIGEN_BLAS_GEMV_SPECIALIZE(float)
|
|
85
88
|
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
|
|
86
89
|
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
|
|
87
90
|
|
|
88
|
-
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC)
|
|
89
|
-
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs>
|
|
90
|
-
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
\
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
}\
|
|
118
|
-
};
|
|
91
|
+
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE, BLASTYPE, BLASFUNC) \
|
|
92
|
+
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
|
|
93
|
+
struct general_matrix_vector_product_gemv<Index, EIGTYPE, LhsStorageOrder, ConjugateLhs, EIGTYPE, ConjugateRhs> { \
|
|
94
|
+
typedef Matrix<EIGTYPE, Dynamic, 1, ColMajor> GEMVVector; \
|
|
95
|
+
\
|
|
96
|
+
static void run(Index rows, Index cols, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* rhs, Index rhsIncr, \
|
|
97
|
+
EIGTYPE* res, Index resIncr, EIGTYPE alpha) { \
|
|
98
|
+
if (rows == 0 || cols == 0) return; \
|
|
99
|
+
BlasIndex m = convert_index<BlasIndex>(rows), n = convert_index<BlasIndex>(cols), \
|
|
100
|
+
lda = convert_index<BlasIndex>(lhsStride), incx = convert_index<BlasIndex>(rhsIncr), \
|
|
101
|
+
incy = convert_index<BlasIndex>(resIncr); \
|
|
102
|
+
const EIGTYPE beta(1); \
|
|
103
|
+
const EIGTYPE* x_ptr; \
|
|
104
|
+
char trans = (LhsStorageOrder == ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
|
|
105
|
+
if (LhsStorageOrder == RowMajor) { \
|
|
106
|
+
m = convert_index<BlasIndex>(cols); \
|
|
107
|
+
n = convert_index<BlasIndex>(rows); \
|
|
108
|
+
} \
|
|
109
|
+
GEMVVector x_tmp; \
|
|
110
|
+
if (ConjugateRhs) { \
|
|
111
|
+
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs, cols, 1, InnerStride<>(incx)); \
|
|
112
|
+
x_tmp = map_x.conjugate(); \
|
|
113
|
+
x_ptr = x_tmp.data(); \
|
|
114
|
+
incx = 1; \
|
|
115
|
+
} else { \
|
|
116
|
+
x_ptr = rhs; \
|
|
117
|
+
} \
|
|
118
|
+
BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, \
|
|
119
|
+
(const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
|
120
|
+
} \
|
|
121
|
+
};
|
|
119
122
|
|
|
120
123
|
#ifdef EIGEN_USE_MKL
|
|
121
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(double,
|
|
122
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(float,
|
|
124
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
|
|
125
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
|
|
123
126
|
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
|
|
124
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8
|
|
127
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, cgemv)
|
|
125
128
|
#else
|
|
126
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(double,
|
|
127
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(float,
|
|
129
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
|
|
130
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
|
|
128
131
|
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
|
|
129
|
-
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,
|
|
132
|
+
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
|
|
130
133
|
#endif
|
|
131
134
|
|
|
132
|
-
}
|
|
135
|
+
} // namespace internal
|
|
133
136
|
|
|
134
|
-
}
|
|
137
|
+
} // end namespace Eigen
|
|
135
138
|
|
|
136
|
-
#endif
|
|
139
|
+
#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
|
|
@@ -10,157 +10,273 @@
|
|
|
10
10
|
#ifndef EIGEN_PARALLELIZER_H
|
|
11
11
|
#define EIGEN_PARALLELIZER_H
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../InternalHeaderCheck.h"
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
// Note that in the following, there are 3 different uses of the concept
|
|
17
|
+
// "number of threads":
|
|
18
|
+
// 1. Max number of threads used by OpenMP or ThreadPool.
|
|
19
|
+
// * For OpenMP this is typically the value set by the OMP_NUM_THREADS
|
|
20
|
+
// environment variable, or by a call to omp_set_num_threads() prior to
|
|
21
|
+
// calling Eigen.
|
|
22
|
+
// * For ThreadPool, this is the number of threads in the ThreadPool.
|
|
23
|
+
// 2. Max number of threads currently allowed to be used by parallel Eigen
|
|
24
|
+
// operations. This is set by setNbThreads(), and cannot exceed the value
|
|
25
|
+
// in 1.
|
|
26
|
+
// 3. The actual number of threads used for a given parallel Eigen operation.
|
|
27
|
+
// This is typically computed on the fly using a cost model and cannot exceed
|
|
28
|
+
// the value in 2.
|
|
29
|
+
// * For OpenMP, this is typically the number of threads specified in individual
|
|
30
|
+
// "omp parallel" pragmas associated with an Eigen operation.
|
|
31
|
+
// * For ThreadPool, it is the number of concurrent tasks scheduled in the
|
|
32
|
+
// threadpool for a given Eigen operation. Notice that since the threadpool
|
|
33
|
+
// uses task stealing, there is no way to limit the number of concurrently
|
|
34
|
+
// executing tasks to below the number in 1. except by limiting the total
|
|
35
|
+
// number of tasks in flight.
|
|
16
36
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
static int m_maxThreads = -1;
|
|
21
|
-
EIGEN_UNUSED_VARIABLE(m_maxThreads);
|
|
37
|
+
#if defined(EIGEN_HAS_OPENMP) && defined(EIGEN_GEMM_THREADPOOL)
|
|
38
|
+
#error "EIGEN_HAS_OPENMP and EIGEN_GEMM_THREADPOOL may not both be defined."
|
|
39
|
+
#endif
|
|
22
40
|
|
|
23
|
-
|
|
24
|
-
{
|
|
25
|
-
eigen_internal_assert(v!=0);
|
|
26
|
-
m_maxThreads = *v;
|
|
27
|
-
}
|
|
28
|
-
else if(action==GetAction)
|
|
29
|
-
{
|
|
30
|
-
eigen_internal_assert(v!=0);
|
|
31
|
-
#ifdef EIGEN_HAS_OPENMP
|
|
32
|
-
if(m_maxThreads>0)
|
|
33
|
-
*v = m_maxThreads;
|
|
34
|
-
else
|
|
35
|
-
*v = omp_get_max_threads();
|
|
36
|
-
#else
|
|
37
|
-
*v = 1;
|
|
38
|
-
#endif
|
|
39
|
-
}
|
|
40
|
-
else
|
|
41
|
-
{
|
|
42
|
-
eigen_internal_assert(false);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
41
|
+
namespace Eigen {
|
|
45
42
|
|
|
43
|
+
namespace internal {
|
|
44
|
+
inline void manage_multi_threading(Action action, int* v);
|
|
46
45
|
}
|
|
47
46
|
|
|
47
|
+
// Public APIs.
|
|
48
|
+
|
|
48
49
|
/** Must be call first when calling Eigen from multiple threads */
|
|
49
|
-
inline void initParallel()
|
|
50
|
-
{
|
|
51
|
-
int nbt;
|
|
52
|
-
internal::manage_multi_threading(GetAction, &nbt);
|
|
53
|
-
std::ptrdiff_t l1, l2, l3;
|
|
54
|
-
internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
|
55
|
-
}
|
|
50
|
+
EIGEN_DEPRECATED_WITH_REASON("Initialization is no longer needed.") inline void initParallel() {}
|
|
56
51
|
|
|
57
52
|
/** \returns the max number of threads reserved for Eigen
|
|
58
|
-
|
|
59
|
-
inline int nbThreads()
|
|
60
|
-
{
|
|
53
|
+
* \sa setNbThreads */
|
|
54
|
+
inline int nbThreads() {
|
|
61
55
|
int ret;
|
|
62
56
|
internal::manage_multi_threading(GetAction, &ret);
|
|
63
57
|
return ret;
|
|
64
58
|
}
|
|
65
59
|
|
|
66
60
|
/** Sets the max number of threads reserved for Eigen
|
|
67
|
-
|
|
68
|
-
inline void setNbThreads(int v)
|
|
69
|
-
|
|
70
|
-
|
|
61
|
+
* \sa nbThreads */
|
|
62
|
+
inline void setNbThreads(int v) { internal::manage_multi_threading(SetAction, &v); }
|
|
63
|
+
|
|
64
|
+
#ifdef EIGEN_GEMM_THREADPOOL
|
|
65
|
+
// Sets the ThreadPool used by Eigen parallel Gemm.
|
|
66
|
+
//
|
|
67
|
+
// NOTICE: This function has a known race condition with
|
|
68
|
+
// parallelize_gemm below, and should not be called while
|
|
69
|
+
// an instance of that function is running.
|
|
70
|
+
//
|
|
71
|
+
// TODO(rmlarsen): Make the device API available instead of
|
|
72
|
+
// storing a local static pointer variable to avoid this issue.
|
|
73
|
+
inline ThreadPool* setGemmThreadPool(ThreadPool* new_pool) {
|
|
74
|
+
static ThreadPool* pool = nullptr;
|
|
75
|
+
if (new_pool != nullptr) {
|
|
76
|
+
// This will wait for work in all threads in *pool to finish,
|
|
77
|
+
// then destroy the old ThreadPool, and then replace it with new_pool.
|
|
78
|
+
pool = new_pool;
|
|
79
|
+
// Reset the number of threads to the number of threads on the new pool.
|
|
80
|
+
setNbThreads(pool->NumThreads());
|
|
81
|
+
}
|
|
82
|
+
return pool;
|
|
71
83
|
}
|
|
72
84
|
|
|
85
|
+
// Gets the ThreadPool used by Eigen parallel Gemm.
|
|
86
|
+
inline ThreadPool* getGemmThreadPool() { return setGemmThreadPool(nullptr); }
|
|
87
|
+
#endif
|
|
88
|
+
|
|
73
89
|
namespace internal {
|
|
74
90
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
91
|
+
// Implementation.
|
|
92
|
+
|
|
93
|
+
#if defined(EIGEN_USE_BLAS) || (!defined(EIGEN_HAS_OPENMP) && !defined(EIGEN_GEMM_THREADPOOL))
|
|
94
|
+
|
|
95
|
+
inline void manage_multi_threading(Action action, int* v) {
|
|
96
|
+
if (action == SetAction) {
|
|
97
|
+
eigen_internal_assert(v != nullptr);
|
|
98
|
+
} else if (action == GetAction) {
|
|
99
|
+
eigen_internal_assert(v != nullptr);
|
|
100
|
+
*v = 1;
|
|
101
|
+
} else {
|
|
102
|
+
eigen_internal_assert(false);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
template <typename Index>
|
|
106
|
+
struct GemmParallelInfo {};
|
|
107
|
+
template <bool Condition, typename Functor, typename Index>
|
|
108
|
+
EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index cols, Index /*unused*/,
|
|
109
|
+
bool /*unused*/) {
|
|
110
|
+
func(0, rows, 0, cols);
|
|
111
|
+
}
|
|
78
112
|
|
|
79
|
-
|
|
80
|
-
int volatile users;
|
|
113
|
+
#else
|
|
81
114
|
|
|
115
|
+
template <typename Index>
|
|
116
|
+
struct GemmParallelTaskInfo {
|
|
117
|
+
GemmParallelTaskInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
|
118
|
+
std::atomic<Index> sync;
|
|
119
|
+
std::atomic<int> users;
|
|
82
120
|
Index lhs_start;
|
|
83
121
|
Index lhs_length;
|
|
84
122
|
};
|
|
85
123
|
|
|
86
|
-
template<
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
124
|
+
template <typename Index>
|
|
125
|
+
struct GemmParallelInfo {
|
|
126
|
+
const int logical_thread_id;
|
|
127
|
+
const int num_threads;
|
|
128
|
+
GemmParallelTaskInfo<Index>* task_info;
|
|
129
|
+
|
|
130
|
+
GemmParallelInfo(int logical_thread_id_, int num_threads_, GemmParallelTaskInfo<Index>* task_info_)
|
|
131
|
+
: logical_thread_id(logical_thread_id_), num_threads(num_threads_), task_info(task_info_) {}
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
inline void manage_multi_threading(Action action, int* v) {
|
|
135
|
+
static int m_maxThreads = -1;
|
|
136
|
+
if (action == SetAction) {
|
|
137
|
+
eigen_internal_assert(v != nullptr);
|
|
138
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
139
|
+
// Calling action == SetAction and *v = 0 means
|
|
140
|
+
// restoring m_maxThreads to the maximum number of threads specified
|
|
141
|
+
// for OpenMP.
|
|
142
|
+
eigen_internal_assert(*v >= 0);
|
|
143
|
+
int omp_threads = omp_get_max_threads();
|
|
144
|
+
m_maxThreads = (*v == 0 ? omp_threads : std::min(*v, omp_threads));
|
|
145
|
+
#elif defined(EIGEN_GEMM_THREADPOOL)
|
|
146
|
+
// Calling action == SetAction and *v = 0 means
|
|
147
|
+
// restoring m_maxThreads to the number of threads in the ThreadPool,
|
|
148
|
+
// which defaults to 1 if no pool was provided.
|
|
149
|
+
eigen_internal_assert(*v >= 0);
|
|
150
|
+
ThreadPool* pool = getGemmThreadPool();
|
|
151
|
+
int pool_threads = pool != nullptr ? pool->NumThreads() : 1;
|
|
152
|
+
m_maxThreads = (*v == 0 ? pool_threads : numext::mini(pool_threads, *v));
|
|
153
|
+
#endif
|
|
154
|
+
} else if (action == GetAction) {
|
|
155
|
+
eigen_internal_assert(v != nullptr);
|
|
156
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
157
|
+
if (m_maxThreads > 0)
|
|
158
|
+
*v = m_maxThreads;
|
|
159
|
+
else
|
|
160
|
+
*v = omp_get_max_threads();
|
|
99
161
|
#else
|
|
162
|
+
*v = m_maxThreads;
|
|
163
|
+
#endif
|
|
164
|
+
} else {
|
|
165
|
+
eigen_internal_assert(false);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
100
168
|
|
|
101
|
-
|
|
169
|
+
template <bool Condition, typename Functor, typename Index>
|
|
170
|
+
EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose) {
|
|
171
|
+
// Dynamically check whether we should even try to execute in parallel.
|
|
102
172
|
// The conditions are:
|
|
103
173
|
// - the max number of threads we can create is greater than 1
|
|
104
174
|
// - we are not already in a parallel code
|
|
105
175
|
// - the sizes are large enough
|
|
106
176
|
|
|
107
177
|
// compute the maximal number of threads from the size of the product:
|
|
108
|
-
// This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at
|
|
178
|
+
// This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at
|
|
179
|
+
// once.
|
|
109
180
|
Index size = transpose ? rows : cols;
|
|
110
|
-
Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
|
|
181
|
+
Index pb_max_threads = std::max<Index>(1, size / Functor::Traits::nr);
|
|
111
182
|
|
|
112
183
|
// compute the maximal number of threads from the total amount of work:
|
|
113
|
-
double work = static_cast<double>(rows) * static_cast<double>(cols) *
|
|
114
|
-
static_cast<double>(depth);
|
|
184
|
+
double work = static_cast<double>(rows) * static_cast<double>(cols) * static_cast<double>(depth);
|
|
115
185
|
double kMinTaskSize = 50000; // FIXME improve this heuristic.
|
|
116
|
-
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
|
|
186
|
+
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>(work / kMinTaskSize)));
|
|
117
187
|
|
|
118
188
|
// compute the number of threads we are going to use
|
|
119
|
-
|
|
189
|
+
int threads = std::min<int>(nbThreads(), static_cast<int>(pb_max_threads));
|
|
120
190
|
|
|
121
|
-
// if multi-threading is
|
|
122
|
-
// then abort multi-threading
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
191
|
+
// if multi-threading is explicitly disabled, not useful, or if we already are
|
|
192
|
+
// inside a parallel session, then abort multi-threading
|
|
193
|
+
bool dont_parallelize = (!Condition) || (threads <= 1);
|
|
194
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
195
|
+
// don't parallelize if we are executing in a parallel context already.
|
|
196
|
+
dont_parallelize |= omp_get_num_threads() > 1;
|
|
197
|
+
#elif defined(EIGEN_GEMM_THREADPOOL)
|
|
198
|
+
// don't parallelize if we have a trivial threadpool or the current thread id
|
|
199
|
+
// is != -1, indicating that we are already executing on a thread inside the pool.
|
|
200
|
+
// In other words, we do not allow nested parallelism, since this would lead to
|
|
201
|
+
// deadlocks due to the workstealing nature of the threadpool.
|
|
202
|
+
ThreadPool* pool = getGemmThreadPool();
|
|
203
|
+
dont_parallelize |= (pool == nullptr || pool->CurrentThreadId() != -1);
|
|
204
|
+
#endif
|
|
205
|
+
if (dont_parallelize) return func(0, rows, 0, cols);
|
|
126
206
|
|
|
127
|
-
Eigen::initParallel();
|
|
128
207
|
func.initParallelSession(threads);
|
|
129
208
|
|
|
130
|
-
if(transpose)
|
|
131
|
-
std::swap(rows,cols);
|
|
209
|
+
if (transpose) std::swap(rows, cols);
|
|
132
210
|
|
|
133
|
-
ei_declare_aligned_stack_constructed_variable(
|
|
211
|
+
ei_declare_aligned_stack_constructed_variable(GemmParallelTaskInfo<Index>, task_info, threads, 0);
|
|
134
212
|
|
|
135
|
-
|
|
213
|
+
#if defined(EIGEN_HAS_OPENMP)
|
|
214
|
+
#pragma omp parallel num_threads(threads)
|
|
136
215
|
{
|
|
137
216
|
Index i = omp_get_thread_num();
|
|
138
|
-
// Note that the actual number of threads might be lower than the number of
|
|
217
|
+
// Note that the actual number of threads might be lower than the number of
|
|
218
|
+
// requested ones
|
|
139
219
|
Index actual_threads = omp_get_num_threads();
|
|
220
|
+
GemmParallelInfo<Index> info(static_cast<int>(i), static_cast<int>(actual_threads), task_info);
|
|
221
|
+
|
|
222
|
+
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
|
223
|
+
Index blockRows = (rows / actual_threads);
|
|
224
|
+
blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
|
|
225
|
+
|
|
226
|
+
Index r0 = i * blockRows;
|
|
227
|
+
Index actualBlockRows = (i + 1 == actual_threads) ? rows - r0 : blockRows;
|
|
140
228
|
|
|
229
|
+
Index c0 = i * blockCols;
|
|
230
|
+
Index actualBlockCols = (i + 1 == actual_threads) ? cols - c0 : blockCols;
|
|
231
|
+
|
|
232
|
+
info.task_info[i].lhs_start = r0;
|
|
233
|
+
info.task_info[i].lhs_length = actualBlockRows;
|
|
234
|
+
|
|
235
|
+
if (transpose)
|
|
236
|
+
func(c0, actualBlockCols, 0, rows, &info);
|
|
237
|
+
else
|
|
238
|
+
func(0, rows, c0, actualBlockCols, &info);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
#elif defined(EIGEN_GEMM_THREADPOOL)
|
|
242
|
+
Barrier barrier(threads);
|
|
243
|
+
auto task = [=, &func, &barrier, &task_info](int i) {
|
|
244
|
+
Index actual_threads = threads;
|
|
245
|
+
GemmParallelInfo<Index> info(i, static_cast<int>(actual_threads), task_info);
|
|
141
246
|
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
|
142
247
|
Index blockRows = (rows / actual_threads);
|
|
143
|
-
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
|
248
|
+
blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
|
|
144
249
|
|
|
145
|
-
Index r0 = i*blockRows;
|
|
146
|
-
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
|
250
|
+
Index r0 = i * blockRows;
|
|
251
|
+
Index actualBlockRows = (i + 1 == actual_threads) ? rows - r0 : blockRows;
|
|
147
252
|
|
|
148
|
-
Index c0 = i*blockCols;
|
|
149
|
-
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
|
|
253
|
+
Index c0 = i * blockCols;
|
|
254
|
+
Index actualBlockCols = (i + 1 == actual_threads) ? cols - c0 : blockCols;
|
|
150
255
|
|
|
151
|
-
info[i].lhs_start = r0;
|
|
152
|
-
info[i].lhs_length = actualBlockRows;
|
|
256
|
+
info.task_info[i].lhs_start = r0;
|
|
257
|
+
info.task_info[i].lhs_length = actualBlockRows;
|
|
153
258
|
|
|
154
|
-
if(transpose)
|
|
155
|
-
func(c0, actualBlockCols, 0, rows, info);
|
|
259
|
+
if (transpose)
|
|
260
|
+
func(c0, actualBlockCols, 0, rows, &info);
|
|
156
261
|
else
|
|
157
|
-
func(0, rows, c0, actualBlockCols, info);
|
|
262
|
+
func(0, rows, c0, actualBlockCols, &info);
|
|
263
|
+
|
|
264
|
+
barrier.Notify();
|
|
265
|
+
};
|
|
266
|
+
// Notice that we do not schedule more than "threads" tasks, which allows us to
|
|
267
|
+
// limit number of running threads, even if the threadpool itself was constructed
|
|
268
|
+
// with a larger number of threads.
|
|
269
|
+
for (int i = 0; i < threads - 1; ++i) {
|
|
270
|
+
pool->Schedule([=, task = std::move(task)] { task(i); });
|
|
158
271
|
}
|
|
272
|
+
task(threads - 1);
|
|
273
|
+
barrier.Wait();
|
|
159
274
|
#endif
|
|
160
275
|
}
|
|
161
276
|
|
|
162
|
-
|
|
277
|
+
#endif
|
|
163
278
|
|
|
164
|
-
}
|
|
279
|
+
} // end namespace internal
|
|
280
|
+
} // end namespace Eigen
|
|
165
281
|
|
|
166
|
-
#endif
|
|
282
|
+
#endif // EIGEN_PARALLELIZER_H
|