@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -10,123 +10,14 @@
|
|
|
10
10
|
#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
|
11
11
|
#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
|
12
12
|
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
13
16
|
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923
|
|
19
|
-
|
|
20
|
-
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
|
21
|
-
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
|
22
|
-
|
|
23
|
-
#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
|
24
|
-
const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
|
|
25
|
-
|
|
26
|
-
#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
|
27
|
-
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
|
|
28
|
-
|
|
29
|
-
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
|
30
|
-
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
|
31
|
-
|
|
32
|
-
#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
|
|
33
|
-
const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
|
|
34
|
-
|
|
35
|
-
#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
|
|
36
|
-
const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
|
|
37
|
-
|
|
38
|
-
template <>
|
|
39
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
|
40
|
-
plog<Packet16f>(const Packet16f& _x) {
|
|
41
|
-
return plog_float(_x);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
template <>
|
|
45
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
|
46
|
-
plog<Packet8d>(const Packet8d& _x) {
|
|
47
|
-
return plog_double(_x);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
|
|
51
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
|
|
52
|
-
|
|
53
|
-
template <>
|
|
54
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
|
55
|
-
plog2<Packet16f>(const Packet16f& _x) {
|
|
56
|
-
return plog2_float(_x);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
template <>
|
|
60
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
|
61
|
-
plog2<Packet8d>(const Packet8d& _x) {
|
|
62
|
-
return plog2_double(_x);
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
|
|
66
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
|
|
67
|
-
|
|
68
|
-
// Exponential function. Works by writing "x = m*log(2) + r" where
|
|
69
|
-
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
|
70
|
-
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
|
71
|
-
template <>
|
|
72
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
|
73
|
-
pexp<Packet16f>(const Packet16f& _x) {
|
|
74
|
-
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
|
75
|
-
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
|
76
|
-
_EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
|
77
|
-
|
|
78
|
-
_EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
|
79
|
-
_EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
|
80
|
-
|
|
81
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
|
82
|
-
|
|
83
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
|
84
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
|
85
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
|
86
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
|
87
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
|
88
|
-
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
|
89
|
-
|
|
90
|
-
// Clamp x.
|
|
91
|
-
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
|
|
92
|
-
|
|
93
|
-
// Express exp(x) as exp(m*ln(2) + r), start by extracting
|
|
94
|
-
// m = floor(x/ln(2) + 0.5).
|
|
95
|
-
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
|
|
96
|
-
|
|
97
|
-
// Get r = x - m*ln(2). Note that we can do this without losing more than one
|
|
98
|
-
// ulp precision due to the FMA instruction.
|
|
99
|
-
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
|
100
|
-
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
|
101
|
-
Packet16f r2 = pmul(r, r);
|
|
102
|
-
Packet16f r3 = pmul(r2, r);
|
|
103
|
-
|
|
104
|
-
// Evaluate the polynomial approximant,improved by instruction-level parallelism.
|
|
105
|
-
Packet16f y, y1, y2;
|
|
106
|
-
y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
|
|
107
|
-
y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
|
|
108
|
-
y2 = padd(r, p16f_1);
|
|
109
|
-
y = pmadd(y, r, p16f_cephes_exp_p2);
|
|
110
|
-
y1 = pmadd(y1, r, p16f_cephes_exp_p5);
|
|
111
|
-
y = pmadd(y, r3, y1);
|
|
112
|
-
y = pmadd(y, r2, y2);
|
|
113
|
-
|
|
114
|
-
// Build emm0 = 2^m.
|
|
115
|
-
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
|
116
|
-
emm0 = _mm512_slli_epi32(emm0, 23);
|
|
117
|
-
|
|
118
|
-
// Return 2^m * exp(r).
|
|
119
|
-
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
template <>
|
|
123
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
|
124
|
-
pexp<Packet8d>(const Packet8d& _x) {
|
|
125
|
-
return pexp_double(_x);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
|
|
129
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
|
|
19
|
+
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f)
|
|
20
|
+
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d)
|
|
130
21
|
|
|
131
22
|
template <>
|
|
132
23
|
EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
|
|
@@ -154,49 +45,19 @@ EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exp
|
|
|
154
45
|
return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
|
|
155
46
|
}
|
|
156
47
|
|
|
157
|
-
// Functions for sqrt.
|
|
158
|
-
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
|
159
|
-
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
|
160
|
-
// exact solution. The main advantage of this approach is not just speed, but
|
|
161
|
-
// also the fact that it can be inlined and pipelined with other computations,
|
|
162
|
-
// further reducing its effective latency.
|
|
163
48
|
#if EIGEN_FAST_MATH
|
|
164
49
|
template <>
|
|
165
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
166
|
-
|
|
167
|
-
Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
|
|
168
|
-
__mmask16 denormal_mask = _mm512_kand(
|
|
169
|
-
_mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
|
|
170
|
-
_CMP_LT_OQ),
|
|
171
|
-
_mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
|
|
172
|
-
|
|
173
|
-
Packet16f x = _mm512_rsqrt14_ps(_x);
|
|
174
|
-
|
|
175
|
-
// Do a single step of Newton's iteration.
|
|
176
|
-
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
|
|
177
|
-
|
|
178
|
-
// Flush results for denormals to zero.
|
|
179
|
-
return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
|
|
50
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f psqrt<Packet16f>(const Packet16f& x) {
|
|
51
|
+
return generic_sqrt_newton_step<Packet16f>::run(x, _mm512_rsqrt14_ps(x));
|
|
180
52
|
}
|
|
181
53
|
|
|
182
54
|
template <>
|
|
183
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
184
|
-
|
|
185
|
-
Packet8d
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
_mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
|
|
190
|
-
|
|
191
|
-
Packet8d x = _mm512_rsqrt14_pd(_x);
|
|
192
|
-
|
|
193
|
-
// Do a single step of Newton's iteration.
|
|
194
|
-
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
|
|
195
|
-
|
|
196
|
-
// Do a second step of Newton's iteration.
|
|
197
|
-
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
|
|
198
|
-
|
|
199
|
-
return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
|
|
55
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
|
56
|
+
#ifdef EIGEN_VECTORIZE_AVX512ER
|
|
57
|
+
return generic_sqrt_newton_step<Packet8d, /*Steps=*/1>::run(x, _mm512_rsqrt28_pd(x));
|
|
58
|
+
#else
|
|
59
|
+
return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(x, _mm512_rsqrt14_pd(x));
|
|
60
|
+
#endif
|
|
200
61
|
}
|
|
201
62
|
#else
|
|
202
63
|
template <>
|
|
@@ -210,12 +71,8 @@ EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
|
|
210
71
|
}
|
|
211
72
|
#endif
|
|
212
73
|
|
|
213
|
-
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
|
|
214
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
|
|
215
|
-
|
|
216
74
|
// prsqrt for float.
|
|
217
75
|
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
|
218
|
-
|
|
219
76
|
template <>
|
|
220
77
|
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
|
221
78
|
return _mm512_rsqrt28_ps(x);
|
|
@@ -223,137 +80,59 @@ EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
|
|
223
80
|
#elif EIGEN_FAST_MATH
|
|
224
81
|
|
|
225
82
|
template <>
|
|
226
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
227
|
-
|
|
228
|
-
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
|
|
229
|
-
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
|
230
|
-
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
|
231
|
-
|
|
232
|
-
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
|
233
|
-
|
|
234
|
-
// Identity infinite, negative and denormal arguments.
|
|
235
|
-
__mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);
|
|
236
|
-
__mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);
|
|
237
|
-
__mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;
|
|
238
|
-
|
|
239
|
-
// Compute an approximate result using the rsqrt intrinsic, forcing +inf
|
|
240
|
-
// for denormals for consistency with AVX and SSE implementations.
|
|
241
|
-
Packet16f y_approx = _mm512_rsqrt14_ps(_x);
|
|
242
|
-
|
|
243
|
-
// Do a single step of Newton-Raphson iteration to improve the approximation.
|
|
244
|
-
// This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
|
|
245
|
-
// It is essential to evaluate the inner term like this because forming
|
|
246
|
-
// y_n^2 may over- or underflow.
|
|
247
|
-
Packet16f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p16f_one_point_five));
|
|
248
|
-
|
|
249
|
-
// Select the result of the Newton-Raphson step for positive finite arguments.
|
|
250
|
-
// For other arguments, choose the output of the intrinsic. This will
|
|
251
|
-
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
|
252
|
-
return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
|
|
253
|
-
}
|
|
254
|
-
#else
|
|
255
|
-
|
|
256
|
-
template <>
|
|
257
|
-
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
|
258
|
-
_EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
|
|
259
|
-
return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
|
|
83
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
|
84
|
+
return generic_rsqrt_newton_step<Packet16f, /*Steps=*/1>::run(x, _mm512_rsqrt14_ps(x));
|
|
260
85
|
}
|
|
261
86
|
#endif
|
|
262
87
|
|
|
263
|
-
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
|
264
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
|
|
265
|
-
|
|
266
88
|
// prsqrt for double.
|
|
267
89
|
#if EIGEN_FAST_MATH
|
|
268
90
|
template <>
|
|
269
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
|
273
|
-
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
|
|
274
|
-
|
|
275
|
-
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
|
276
|
-
|
|
277
|
-
// Identity infinite, negative and denormal arguments.
|
|
278
|
-
__mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
|
|
279
|
-
__mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
|
|
280
|
-
__mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
|
|
281
|
-
|
|
282
|
-
// Compute an approximate result using the rsqrt intrinsic, forcing +inf
|
|
283
|
-
// for denormals for consistency with AVX and SSE implementations.
|
|
284
|
-
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
|
285
|
-
Packet8d y_approx = _mm512_rsqrt28_pd(_x);
|
|
91
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d prsqrt<Packet8d>(const Packet8d& x) {
|
|
92
|
+
#ifdef EIGEN_VECTORIZE_AVX512ER
|
|
93
|
+
return generic_rsqrt_newton_step<Packet8d, /*Steps=*/1>::run(x, _mm512_rsqrt28_pd(x));
|
|
286
94
|
#else
|
|
287
|
-
Packet8d
|
|
288
|
-
#endif
|
|
289
|
-
// Do one or two steps of Newton-Raphson's to improve the approximation, depending on the
|
|
290
|
-
// starting accuracy (either 2^-14 or 2^-28, depending on whether AVX512ER is available).
|
|
291
|
-
// The Newton-Raphson algorithm has quadratic convergence and roughly doubles the number
|
|
292
|
-
// of correct digits for each step.
|
|
293
|
-
// This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
|
|
294
|
-
// It is essential to evaluate the inner term like this because forming
|
|
295
|
-
// y_n^2 may over- or underflow.
|
|
296
|
-
Packet8d y_newton = pmul(y_approx, pmadd(neg_half, pmul(y_approx, y_approx), p8d_one_point_five));
|
|
297
|
-
#if !defined(EIGEN_VECTORIZE_AVX512ER)
|
|
298
|
-
y_newton = pmul(y_newton, pmadd(y_newton, pmul(neg_half, y_newton), p8d_one_point_five));
|
|
95
|
+
return generic_rsqrt_newton_step<Packet8d, /*Steps=*/2>::run(x, _mm512_rsqrt14_pd(x));
|
|
299
96
|
#endif
|
|
300
|
-
// Select the result of the Newton-Raphson step for positive finite arguments.
|
|
301
|
-
// For other arguments, choose the output of the intrinsic. This will
|
|
302
|
-
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
|
303
|
-
return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
|
|
304
97
|
}
|
|
305
|
-
|
|
98
|
+
|
|
306
99
|
template <>
|
|
307
|
-
EIGEN_STRONG_INLINE
|
|
308
|
-
|
|
309
|
-
return
|
|
310
|
-
|
|
100
|
+
EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
|
|
101
|
+
#ifdef EIGEN_VECTORIZE_AVX512ER
|
|
102
|
+
return _mm512_rcp28_ps(a);
|
|
103
|
+
#else
|
|
104
|
+
return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
|
|
311
105
|
#endif
|
|
312
|
-
|
|
313
|
-
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
314
|
-
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
|
315
|
-
return generic_plog1p(_x);
|
|
316
106
|
}
|
|
107
|
+
#endif
|
|
317
108
|
|
|
318
|
-
|
|
109
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
|
|
110
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
|
|
111
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp2)
|
|
112
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
|
113
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
|
|
319
114
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
|
|
115
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
|
|
116
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
|
117
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
|
|
118
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
|
|
119
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
|
|
120
|
+
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
|
|
320
121
|
|
|
321
|
-
|
|
322
|
-
Packet16f
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
122
|
+
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
|
123
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
|
|
124
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
|
|
125
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp2)
|
|
326
126
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
template <>
|
|
333
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
|
334
|
-
psin<Packet16f>(const Packet16f& _x) {
|
|
335
|
-
return psin_float(_x);
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
template <>
|
|
339
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
|
340
|
-
pcos<Packet16f>(const Packet16f& _x) {
|
|
341
|
-
return pcos_float(_x);
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
template <>
|
|
345
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
|
346
|
-
ptanh<Packet16f>(const Packet16f& _x) {
|
|
347
|
-
return internal::generic_fast_tanh_float(_x);
|
|
348
|
-
}
|
|
349
|
-
|
|
127
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
|
|
128
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
|
|
129
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
|
|
130
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
|
131
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
|
350
132
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
|
|
351
|
-
F16_PACKET_FUNCTION(Packet16f, Packet16h,
|
|
133
|
+
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
|
|
352
134
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
|
|
353
|
-
|
|
354
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
|
|
355
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
|
|
356
|
-
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
|
|
135
|
+
#endif // EIGEN_VECTORIZE_AVX512FP16
|
|
357
136
|
|
|
358
137
|
} // end namespace internal
|
|
359
138
|
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2025 The Eigen Authors.
|
|
5
|
+
//
|
|
6
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
+
|
|
10
|
+
#ifndef EIGEN_MATH_FUNCTIONS_FP16_AVX512_H
|
|
11
|
+
#define EIGEN_MATH_FUNCTIONS_FP16_AVX512_H
|
|
12
|
+
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
namespace Eigen {
|
|
17
|
+
namespace internal {
|
|
18
|
+
|
|
19
|
+
EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
|
|
20
|
+
__m512i result = _mm512_castsi256_si512(_mm256_castph_si256(a));
|
|
21
|
+
result = _mm512_inserti64x4(result, _mm256_castph_si256(b), 1);
|
|
22
|
+
return _mm512_castsi512_ph(result);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
|
|
26
|
+
a = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_castph_si512(x)));
|
|
27
|
+
b = _mm256_castsi256_ph(_mm512_extracti64x4_epi64(_mm512_castph_si512(x), 1));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
#define _EIGEN_GENERATE_FP16_MATH_FUNCTION(func) \
|
|
31
|
+
template <> \
|
|
32
|
+
EIGEN_STRONG_INLINE Packet8h func<Packet8h>(const Packet8h& a) { \
|
|
33
|
+
return float2half(func(half2float(a))); \
|
|
34
|
+
} \
|
|
35
|
+
\
|
|
36
|
+
template <> \
|
|
37
|
+
EIGEN_STRONG_INLINE Packet16h func<Packet16h>(const Packet16h& a) { \
|
|
38
|
+
return float2half(func(half2float(a))); \
|
|
39
|
+
} \
|
|
40
|
+
\
|
|
41
|
+
template <> \
|
|
42
|
+
EIGEN_STRONG_INLINE Packet32h func<Packet32h>(const Packet32h& a) { \
|
|
43
|
+
Packet16h low; \
|
|
44
|
+
Packet16h high; \
|
|
45
|
+
extract2Packet16h(a, low, high); \
|
|
46
|
+
return combine2Packet16h(func(low), func(high)); \
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(psin)
|
|
50
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(pcos)
|
|
51
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(plog)
|
|
52
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(plog2)
|
|
53
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(plog1p)
|
|
54
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(pexp)
|
|
55
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(pexpm1)
|
|
56
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(pexp2)
|
|
57
|
+
_EIGEN_GENERATE_FP16_MATH_FUNCTION(ptanh)
|
|
58
|
+
#undef _EIGEN_GENERATE_FP16_MATH_FUNCTION
|
|
59
|
+
|
|
60
|
+
// pfrexp
|
|
61
|
+
template <>
|
|
62
|
+
EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
|
|
63
|
+
return pfrexp_generic(a, exponent);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// pldexp
|
|
67
|
+
template <>
|
|
68
|
+
EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
|
|
69
|
+
return pldexp_generic(a, exponent);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
} // end namespace internal
|
|
73
|
+
} // end namespace Eigen
|
|
74
|
+
|
|
75
|
+
#endif // EIGEN_MATH_FUNCTIONS_FP16_AVX512_H
|