npm - @smake/eigen - Versions diffs - 1.0.2 → 1.1.1 - Mend

@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (435) hide show

package/README.md +1 -1
package/eigen/Eigen/AccelerateSupport +52 -0
package/eigen/Eigen/Cholesky +18 -21
package/eigen/Eigen/CholmodSupport +28 -28
package/eigen/Eigen/Core +235 -326
package/eigen/Eigen/Eigenvalues +16 -14
package/eigen/Eigen/Geometry +21 -24
package/eigen/Eigen/Householder +9 -8
package/eigen/Eigen/IterativeLinearSolvers +8 -4
package/eigen/Eigen/Jacobi +14 -14
package/eigen/Eigen/KLUSupport +43 -0
package/eigen/Eigen/LU +16 -20
package/eigen/Eigen/MetisSupport +12 -12
package/eigen/Eigen/OrderingMethods +54 -54
package/eigen/Eigen/PaStiXSupport +23 -20
package/eigen/Eigen/PardisoSupport +17 -14
package/eigen/Eigen/QR +18 -21
package/eigen/Eigen/QtAlignedMalloc +5 -13
package/eigen/Eigen/SPQRSupport +21 -14
package/eigen/Eigen/SVD +23 -18
package/eigen/Eigen/Sparse +1 -4
package/eigen/Eigen/SparseCholesky +18 -23
package/eigen/Eigen/SparseCore +18 -17
package/eigen/Eigen/SparseLU +12 -8
package/eigen/Eigen/SparseQR +16 -14
package/eigen/Eigen/StdDeque +5 -2
package/eigen/Eigen/StdList +5 -2
package/eigen/Eigen/StdVector +5 -2
package/eigen/Eigen/SuperLUSupport +30 -24
package/eigen/Eigen/ThreadPool +80 -0
package/eigen/Eigen/UmfPackSupport +19 -17
package/eigen/Eigen/Version +14 -0
package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
package/eigen/Eigen/src/Core/Array.h +341 -294
package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
package/eigen/Eigen/src/Core/Assign.h +30 -40
package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
package/eigen/Eigen/src/Core/Block.h +375 -398
package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
package/eigen/Eigen/src/Core/DenseBase.h +632 -571
package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
package/eigen/Eigen/src/Core/Diagonal.h +169 -210
package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
package/eigen/Eigen/src/Core/Dot.h +172 -222
package/eigen/Eigen/src/Core/EigenBase.h +75 -85
package/eigen/Eigen/src/Core/Fill.h +138 -0
package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
package/eigen/Eigen/src/Core/IO.h +147 -139
package/eigen/Eigen/src/Core/IndexedView.h +321 -0
package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Core/Inverse.h +56 -66
package/eigen/Eigen/src/Core/Map.h +124 -142
package/eigen/Eigen/src/Core/MapBase.h +256 -281
package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
package/eigen/Eigen/src/Core/Matrix.h +491 -416
package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
package/eigen/Eigen/src/Core/NestByValue.h +66 -85
package/eigen/Eigen/src/Core/NoAlias.h +79 -85
package/eigen/Eigen/src/Core/NumTraits.h +235 -148
package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
package/eigen/Eigen/src/Core/Product.h +260 -139
package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
package/eigen/Eigen/src/Core/Random.h +161 -136
package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
package/eigen/Eigen/src/Core/RealView.h +250 -0
package/eigen/Eigen/src/Core/Redux.h +366 -336
package/eigen/Eigen/src/Core/Ref.h +308 -209
package/eigen/Eigen/src/Core/Replicate.h +94 -106
package/eigen/Eigen/src/Core/Reshaped.h +398 -0
package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
package/eigen/Eigen/src/Core/Reverse.h +136 -145
package/eigen/Eigen/src/Core/Select.h +70 -140
package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
package/eigen/Eigen/src/Core/Solve.h +97 -111
package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
package/eigen/Eigen/src/Core/SolverBase.h +138 -101
package/eigen/Eigen/src/Core/StableNorm.h +156 -160
package/eigen/Eigen/src/Core/StlIterators.h +619 -0
package/eigen/Eigen/src/Core/Stride.h +91 -88
package/eigen/Eigen/src/Core/Swap.h +70 -38
package/eigen/Eigen/src/Core/Transpose.h +295 -273
package/eigen/Eigen/src/Core/Transpositions.h +272 -317
package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
package/eigen/Eigen/src/Core/Visitor.h +480 -216
package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
package/eigen/Eigen/src/Core/util/Assert.h +158 -0
package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
package/eigen/Eigen/src/Core/util/Constants.h +314 -263
package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
package/eigen/Eigen/src/Core/util/Macros.h +939 -646
package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
package/eigen/Eigen/src/Core/util/Meta.h +618 -426
package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
package/eigen/Eigen/src/Geometry/Transform.h +896 -953
package/eigen/Eigen/src/Geometry/Translation.h +100 -98
package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
package/eigen/Eigen/src/Householder/Householder.h +104 -122
package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
package/eigen/Eigen/src/LU/Determinant.h +60 -63
package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
package/eigen/Eigen/src/StlSupport/details.h +48 -50
package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
package/eigen/Eigen/src/misc/Image.h +41 -43
package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/misc/Kernel.h +39 -41
package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
package/eigen/Eigen/src/misc/blas.h +83 -426
package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
package/lib/LibEigen.d.ts +4 -0
package/lib/LibEigen.js +14 -0
package/lib/index.d.ts +1 -1
package/lib/index.js +7 -3
package/package.json +2 -10
package/eigen/Eigen/CMakeLists.txt +0 -19
package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
package/eigen/Eigen/src/misc/lapack.h +0 -152
package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
package/lib/eigen.d.ts +0 -2
package/lib/eigen.js +0 -15

package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h CHANGED Viewed

@@ -10,378 +10,130 @@
 #ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
 #define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
 namespace Eigen {
 namespace internal {
+EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f)
+EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d)
-// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
-#if EIGEN_GNUC_AT_LEAST(5, 3)
-#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
-  const Packet16f p16f_##NAME = pset1<Packet16f>(X)
-#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
-  const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
-#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
-  const Packet8d p8d_##NAME = pset1<Packet8d>(X)
-#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
-  const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
-// Natural logarithm
-// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
-// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
-// be easily approximated by a polynomial centered on m=1 for stability.
-#if defined(EIGEN_VECTORIZE_AVX512DQ)
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
-plog<Packet16f>(const Packet16f& _x) {
-  Packet16f x = _x;
-  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
-  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
-  // The smallest non denormalized float number.
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(pos_inf, 0x7f800000);
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
-  // Polynomial coefficients.
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
-  // invalid_mask is set to true when x is NaN
-  __mmask16 invalid_mask =  _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
-  __mmask16 iszero_mask  =  _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_OQ);
-  // Truncate input values to the minimum positive normal.
-  x = pmax(x, p16f_min_norm_pos);
-  // Extract the shifted exponents.
-  Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
-  Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
-  // Set the exponents to -1, i.e. x are in the range [0.5,1).
-  x = _mm512_and_ps(x, p16f_inv_mant_mask);
-  x = _mm512_or_ps(x, p16f_half);
-  // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
-  // and shift by -1. The values are then centered around 0, which improves
-  // the stability of the polynomial evaluation.
-  //   if( x < SQRTHF ) {
-  //     e -= 1;
-  //     x = x + x - 1.0;
-  //   } else { x = x - 1.0; }
-  __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
-  Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
-  x = psub(x, p16f_1);
-  e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
-  x = padd(x, tmp);
-  Packet16f x2 = pmul(x, x);
-  Packet16f x3 = pmul(x2, x);
-  // Evaluate the polynomial approximant of degree 8 in three parts, probably
-  // to improve instruction-level parallelism.
-  Packet16f y, y1, y2;
-  y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
-  y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
-  y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
-  y = pmadd(y, x, p16f_cephes_log_p2);
-  y1 = pmadd(y1, x, p16f_cephes_log_p5);
-  y2 = pmadd(y2, x, p16f_cephes_log_p8);
-  y = pmadd(y, x3, y1);
-  y = pmadd(y, x3, y2);
-  y = pmul(y, x3);
-  // Add the logarithm of the exponent back to the result of the interpolation.
-  y1 = pmul(e, p16f_cephes_log_q1);
-  tmp = pmul(x2, p16f_half);
-  y = padd(y, y1);
-  x = psub(x, tmp);
-  y2 = pmul(e, p16f_cephes_log_q2);
-  x = padd(x, y);
-  x = padd(x, y2);
-  __mmask16 pos_inf_mask = _mm512_cmp_ps_mask(_x,p16f_pos_inf,_CMP_EQ_OQ);
-  // Filter out invalid inputs, i.e.:
-  //  - negative arg will be NAN,
-  //  - 0 will be -INF.
-  //  - +INF will be +INF
-  return _mm512_mask_blend_ps(iszero_mask,
-            _mm512_mask_blend_ps(invalid_mask,
-              _mm512_mask_blend_ps(pos_inf_mask,x,p16f_pos_inf),
-              p16f_nan),
-            p16f_minus_inf);
+EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
+  Packet16f fexponent;
+  const Packet16h out = float2half(pfrexp<Packet16f>(half2float(a), fexponent));
+  exponent = float2half(fexponent);
+  return out;
 }
-#endif
-// Exponential function. Works by writing "x = m*log(2) + r" where
-// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
-// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
-pexp<Packet16f>(const Packet16f& _x) {
-  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
-  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
-  _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
-  _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
-  // Clamp x.
-  Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
-  // Express exp(x) as exp(m*ln(2) + r), start by extracting
-  // m = floor(x/ln(2) + 0.5).
-  Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
-  // Get r = x - m*ln(2). Note that we can do this without losing more than one
-  // ulp precision due to the FMA instruction.
-  _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
-  Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
-  Packet16f r2 = pmul(r, r);
-  // TODO(gonnet): Split into odd/even polynomials and try to exploit
-  //               instruction-level parallelism.
-  Packet16f y = p16f_cephes_exp_p0;
-  y = pmadd(y, r, p16f_cephes_exp_p1);
-  y = pmadd(y, r, p16f_cephes_exp_p2);
-  y = pmadd(y, r, p16f_cephes_exp_p3);
-  y = pmadd(y, r, p16f_cephes_exp_p4);
-  y = pmadd(y, r, p16f_cephes_exp_p5);
-  y = pmadd(y, r2, r);
-  y = padd(y, p16f_1);
-  // Build emm0 = 2^m.
-  Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
-  emm0 = _mm512_slli_epi32(emm0, 23);
-  // Return 2^m * exp(r).
-  return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
+EIGEN_STRONG_INLINE Packet16h pldexp(const Packet16h& a, const Packet16h& exponent) {
+  return float2half(pldexp<Packet16f>(half2float(a), half2float(exponent)));
 }
-/*template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
-pexp<Packet8d>(const Packet8d& _x) {
-  Packet8d x = _x;
-  _EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
-  _EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
-  _EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
-  _EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
-  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
-  // clamp x
-  x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
-  // Express exp(x) as exp(g + n*log(2)).
-  const Packet8d n =
-      _mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
-  // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
-  // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
-  // digits right.
-  const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
-  const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
-  x = psub(x, nC1);
-  x = psub(x, nC2);
-  const Packet8d x2 = pmul(x, x);
-  // Evaluate the numerator polynomial of the rational interpolant.
-  Packet8d px = p8d_cephes_exp_p0;
-  px = pmadd(px, x2, p8d_cephes_exp_p1);
-  px = pmadd(px, x2, p8d_cephes_exp_p2);
-  px = pmul(px, x);
-  // Evaluate the denominator polynomial of the rational interpolant.
-  Packet8d qx = p8d_cephes_exp_q0;
-  qx = pmadd(qx, x2, p8d_cephes_exp_q1);
-  qx = pmadd(qx, x2, p8d_cephes_exp_q2);
-  qx = pmadd(qx, x2, p8d_cephes_exp_q3);
-  // I don't really get this bit, copied from the SSE2 routines, so...
-  // TODO(gonnet): Figure out what is going on here, perhaps find a better
-  // rational interpolant?
-  x = _mm512_div_pd(px, psub(qx, px));
-  x = pmadd(p8d_2, x, p8d_1);
-  // Build e=2^n.
-  const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
-      _mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
+template <>
+EIGEN_STRONG_INLINE Packet16bf pfrexp(const Packet16bf& a, Packet16bf& exponent) {
+  Packet16f fexponent;
+  const Packet16bf out = F32ToBf16(pfrexp<Packet16f>(Bf16ToF32(a), fexponent));
+  exponent = F32ToBf16(fexponent);
+  return out;
+}
-  // Construct the result 2^n * exp(g) = e * x. The max is used to catch
-  // non-finite values in the input.
-  return pmax(pmul(x, e), _x);
-  }*/
+template <>
+EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exponent) {
+  return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
+}
-// Functions for sqrt.
-// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
-// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
-// exact solution. The main advantage of this approach is not just speed, but
-// also the fact that it can be inlined and pipelined with other computations,
-// further reducing its effective latency.
 #if EIGEN_FAST_MATH
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
-psqrt<Packet16f>(const Packet16f& _x) {
-  Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
-  __mmask16 denormal_mask = _mm512_kand(
-      _mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
-                        _CMP_LT_OQ),
-      _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
-  Packet16f x = _mm512_rsqrt14_ps(_x);
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
-  // Flush results for denormals to zero.
-  return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f psqrt<Packet16f>(const Packet16f& x) {
+  return generic_sqrt_newton_step<Packet16f>::run(x, _mm512_rsqrt14_ps(x));
 }
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
-psqrt<Packet8d>(const Packet8d& _x) {
-  Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));
-  __mmask16 denormal_mask = _mm512_kand(
-      _mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
-                        _CMP_LT_OQ),
-      _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
-  Packet8d x = _mm512_rsqrt14_pd(_x);
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
-  // Do a second step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
-  return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d psqrt<Packet8d>(const Packet8d& x) {
+#ifdef EIGEN_VECTORIZE_AVX512ER
+  return generic_sqrt_newton_step<Packet8d, /*Steps=*/1>::run(x, _mm512_rsqrt28_pd(x));
+#else
+  return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(x, _mm512_rsqrt14_pd(x));
+#endif
 }
 #else
 template <>
 EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
   return _mm512_sqrt_ps(x);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
   return _mm512_sqrt_pd(x);
 }
 #endif
-// Functions for rsqrt.
-// Almost identical to the sqrt routine, just leave out the last multiplication
-// and fill in NaN/Inf where needed. Note that this function only exists as an
-// iterative version for doubles since there is no instruction for diretly
-// computing the reciprocal square root in AVX-512.
-#ifdef EIGEN_FAST_MATH
+// prsqrt for float.
+#if defined(EIGEN_VECTORIZE_AVX512ER)
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
-prsqrt<Packet16f>(const Packet16f& _x) {
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
-  _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
-  Packet16f neg_half = pmul(_x, p16f_minus_half);
-  // select only the inverse sqrt of positive normal inputs (denormals are
-  // flushed to zero and cause infs as well).
-  __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
-  Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
-  // Fill in NaNs and Infs for the negative/zero entries.
-  __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
-  Packet16f infs_and_nans = _mm512_mask_blend_ps(
-      neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
-  // Insert NaNs and Infs in all the right places.
-  return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
+EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
+  return _mm512_rsqrt28_ps(x);
 }
+#elif EIGEN_FAST_MATH
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
-prsqrt<Packet8d>(const Packet8d& _x) {
-  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
-  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
-  _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
-  _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
-  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
-  Packet8d neg_half = pmul(_x, p8d_minus_half);
-  // select only the inverse sqrt of positive normal inputs (denormals are
-  // flushed to zero and cause infs as well).
-  __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
-  Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
-  // Fill in NaNs and Infs for the negative/zero entries.
-  __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
-  Packet8d infs_and_nans = _mm512_mask_blend_pd(
-      neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
-  // Do a first step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
-  // Do a second step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
-  // Insert NaNs and Infs in all the right places.
-  return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f prsqrt<Packet16f>(const Packet16f& x) {
+  return generic_rsqrt_newton_step<Packet16f, /*Steps=*/1>::run(x, _mm512_rsqrt14_ps(x));
 }
-#elif defined(EIGEN_VECTORIZE_AVX512ER)
+#endif
+// prsqrt for double.
+#if EIGEN_FAST_MATH
 template <>
-EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
-  return _mm512_rsqrt28_ps(x);
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d prsqrt<Packet8d>(const Packet8d& x) {
+#ifdef EIGEN_VECTORIZE_AVX512ER
+  return generic_rsqrt_newton_step<Packet8d, /*Steps=*/1>::run(x, _mm512_rsqrt28_pd(x));
+#else
+  return generic_rsqrt_newton_step<Packet8d, /*Steps=*/2>::run(x, _mm512_rsqrt14_pd(x));
+#endif
 }
+template <>
+EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
+#ifdef EIGEN_VECTORIZE_AVX512ER
+  return _mm512_rcp28_ps(a);
+#else
+  return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
 #endif
+}
 #endif
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp2)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
+#ifndef EIGEN_VECTORIZE_AVX512FP16
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp2)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
+F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
+#endif  // EIGEN_VECTORIZE_AVX512FP16
 }  // end namespace internal
 }  // end namespace Eigen

package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h ADDED Viewed

@@ -0,0 +1,75 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2025 The Eigen Authors.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_MATH_FUNCTIONS_FP16_AVX512_H
+#define EIGEN_MATH_FUNCTIONS_FP16_AVX512_H
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
+  __m512i result = _mm512_castsi256_si512(_mm256_castph_si256(a));
+  result = _mm512_inserti64x4(result, _mm256_castph_si256(b), 1);
+  return _mm512_castsi512_ph(result);
+}
+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
+  a = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_castph_si512(x)));
+  b = _mm256_castsi256_ph(_mm512_extracti64x4_epi64(_mm512_castph_si512(x), 1));
+}
+#define _EIGEN_GENERATE_FP16_MATH_FUNCTION(func)                      \
+  template <>                                                         \
+  EIGEN_STRONG_INLINE Packet8h func<Packet8h>(const Packet8h& a) {    \
+    return float2half(func(half2float(a)));                           \
+  }                                                                   \
+                                                                      \
+  template <>                                                         \
+  EIGEN_STRONG_INLINE Packet16h func<Packet16h>(const Packet16h& a) { \
+    return float2half(func(half2float(a)));                           \
+  }                                                                   \
+                                                                      \
+  template <>                                                         \
+  EIGEN_STRONG_INLINE Packet32h func<Packet32h>(const Packet32h& a) { \
+    Packet16h low;                                                    \
+    Packet16h high;                                                   \
+    extract2Packet16h(a, low, high);                                  \
+    return combine2Packet16h(func(low), func(high));                  \
+  }
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(psin)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(pcos)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(plog)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(plog2)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(plog1p)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(pexp)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(pexpm1)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(pexp2)
+_EIGEN_GENERATE_FP16_MATH_FUNCTION(ptanh)
+#undef _EIGEN_GENERATE_FP16_MATH_FUNCTION
+// pfrexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
+  return pfrexp_generic(a, exponent);
+}
+// pldexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
+  return pldexp_generic(a, exponent);
+}
+}  // end namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_MATH_FUNCTIONS_FP16_AVX512_H