@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -10,428 +10,119 @@
|
|
|
10
10
|
#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
|
|
11
11
|
#define EIGEN_MATH_FUNCTIONS_AVX_H
|
|
12
12
|
|
|
13
|
-
/* The sin
|
|
13
|
+
/* The sin and cos functions of this file are loosely derived from
|
|
14
14
|
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
+
// IWYU pragma: private
|
|
18
|
+
#include "../../InternalHeaderCheck.h"
|
|
19
|
+
|
|
17
20
|
namespace Eigen {
|
|
18
21
|
|
|
19
22
|
namespace internal {
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
{
|
|
23
|
-
#ifdef EIGEN_VECTORIZE_AVX2
|
|
24
|
-
return _mm256_slli_epi32(v, n);
|
|
25
|
-
#else
|
|
26
|
-
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
|
|
27
|
-
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
|
|
28
|
-
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
|
29
|
-
#endif
|
|
30
|
-
}
|
|
24
|
+
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
|
|
31
25
|
|
|
32
|
-
|
|
33
|
-
|
|
26
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet4d)
|
|
27
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet4d)
|
|
28
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet4d)
|
|
29
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(exp, Packet4d)
|
|
30
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet4d)
|
|
31
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(cbrt, Packet4d)
|
|
34
32
|
#ifdef EIGEN_VECTORIZE_AVX2
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
|
|
38
|
-
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
|
|
39
|
-
return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
|
|
33
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d)
|
|
34
|
+
EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d)
|
|
40
35
|
#endif
|
|
41
|
-
|
|
36
|
+
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4d)
|
|
37
|
+
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4d)
|
|
42
38
|
|
|
43
|
-
//
|
|
44
|
-
//
|
|
45
|
-
//
|
|
46
|
-
//
|
|
39
|
+
// Notice that for newer processors, it is counterproductive to use Newton
|
|
40
|
+
// iteration for square root. In particular, Skylake and Zen2 processors
|
|
41
|
+
// have approximately doubled throughput of the _mm_sqrt_ps instruction
|
|
42
|
+
// compared to their predecessors.
|
|
47
43
|
template <>
|
|
48
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
49
|
-
|
|
50
|
-
Packet8f x = _x;
|
|
51
|
-
|
|
52
|
-
// Some useful values.
|
|
53
|
-
_EIGEN_DECLARE_CONST_Packet8i(one, 1);
|
|
54
|
-
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
|
55
|
-
_EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
|
|
56
|
-
_EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
|
|
57
|
-
_EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
|
|
58
|
-
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
|
|
59
|
-
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
|
|
60
|
-
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
|
|
61
|
-
_EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
|
|
62
|
-
|
|
63
|
-
// Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
|
|
64
|
-
Packet8f z = pmul(x, p8f_one_over_pi);
|
|
65
|
-
Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
|
|
66
|
-
x = pmadd(shift, p8f_neg_pi_first, x);
|
|
67
|
-
x = pmadd(shift, p8f_neg_pi_second, x);
|
|
68
|
-
x = pmadd(shift, p8f_neg_pi_third, x);
|
|
69
|
-
z = pmul(x, p8f_four_over_pi);
|
|
70
|
-
|
|
71
|
-
// Make a mask for the entries that need flipping, i.e. wherever the shift
|
|
72
|
-
// is odd.
|
|
73
|
-
Packet8i shift_ints = _mm256_cvtps_epi32(shift);
|
|
74
|
-
Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
|
|
75
|
-
Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
|
|
76
|
-
|
|
77
|
-
// Create a mask for which interpolant to use, i.e. if z > 1, then the mask
|
|
78
|
-
// is set to ones for that entry.
|
|
79
|
-
Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
|
|
80
|
-
|
|
81
|
-
// Evaluate the polynomial for the interval [1,3] in z.
|
|
82
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
|
|
83
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
|
|
84
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
|
|
85
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
|
|
86
|
-
Packet8f z_minus_two = psub(z, p8f_two);
|
|
87
|
-
Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
|
|
88
|
-
Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
|
|
89
|
-
right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
|
|
90
|
-
right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
|
|
91
|
-
|
|
92
|
-
// Evaluate the polynomial for the interval [-1,1] in z.
|
|
93
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
|
|
94
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
|
|
95
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
|
|
96
|
-
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
|
|
97
|
-
Packet8f z2 = pmul(z, z);
|
|
98
|
-
Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
|
|
99
|
-
left = pmadd(left, z2, p8f_coeff_left_3);
|
|
100
|
-
left = pmadd(left, z2, p8f_coeff_left_1);
|
|
101
|
-
left = pmul(left, z);
|
|
102
|
-
|
|
103
|
-
// Assemble the results, i.e. select the left and right polynomials.
|
|
104
|
-
left = _mm256_andnot_ps(ival_mask, left);
|
|
105
|
-
right = _mm256_and_ps(ival_mask, right);
|
|
106
|
-
Packet8f res = _mm256_or_ps(left, right);
|
|
107
|
-
|
|
108
|
-
// Flip the sign on the odd intervals and return the result.
|
|
109
|
-
res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
|
|
110
|
-
return res;
|
|
44
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f psqrt<Packet8f>(const Packet8f& _x) {
|
|
45
|
+
return _mm256_sqrt_ps(_x);
|
|
111
46
|
}
|
|
112
|
-
|
|
113
|
-
// Natural logarithm
|
|
114
|
-
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
|
115
|
-
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
|
116
|
-
// be easily approximated by a polynomial centered on m=1 for stability.
|
|
117
|
-
// TODO(gonnet): Further reduce the interval allowing for lower-degree
|
|
118
|
-
// polynomial interpolants -> ... -> profit!
|
|
119
47
|
template <>
|
|
120
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
121
|
-
|
|
122
|
-
Packet8f x = _x;
|
|
123
|
-
_EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
|
|
124
|
-
_EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
|
|
125
|
-
_EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
|
|
126
|
-
|
|
127
|
-
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
|
128
|
-
|
|
129
|
-
// The smallest non denormalized float number.
|
|
130
|
-
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
|
|
131
|
-
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
|
|
132
|
-
|
|
133
|
-
// Polynomial coefficients.
|
|
134
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
|
|
135
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
|
|
136
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
|
|
137
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
|
|
138
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
|
|
139
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
|
|
140
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
|
|
141
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
|
|
142
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
|
|
143
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
|
|
144
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
|
|
145
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
|
|
146
|
-
|
|
147
|
-
Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
|
|
148
|
-
Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
|
|
149
|
-
|
|
150
|
-
// Truncate input values to the minimum positive normal.
|
|
151
|
-
x = pmax(x, p8f_min_norm_pos);
|
|
152
|
-
|
|
153
|
-
Packet8f emm0 = pshiftright(x,23);
|
|
154
|
-
Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
|
|
155
|
-
|
|
156
|
-
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
|
157
|
-
x = _mm256_and_ps(x, p8f_inv_mant_mask);
|
|
158
|
-
x = _mm256_or_ps(x, p8f_half);
|
|
159
|
-
|
|
160
|
-
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
|
161
|
-
// and shift by -1. The values are then centered around 0, which improves
|
|
162
|
-
// the stability of the polynomial evaluation.
|
|
163
|
-
// if( x < SQRTHF ) {
|
|
164
|
-
// e -= 1;
|
|
165
|
-
// x = x + x - 1.0;
|
|
166
|
-
// } else { x = x - 1.0; }
|
|
167
|
-
Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
|
|
168
|
-
Packet8f tmp = _mm256_and_ps(x, mask);
|
|
169
|
-
x = psub(x, p8f_1);
|
|
170
|
-
e = psub(e, _mm256_and_ps(p8f_1, mask));
|
|
171
|
-
x = padd(x, tmp);
|
|
172
|
-
|
|
173
|
-
Packet8f x2 = pmul(x, x);
|
|
174
|
-
Packet8f x3 = pmul(x2, x);
|
|
175
|
-
|
|
176
|
-
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
|
177
|
-
// to improve instruction-level parallelism.
|
|
178
|
-
Packet8f y, y1, y2;
|
|
179
|
-
y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
|
|
180
|
-
y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
|
|
181
|
-
y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
|
|
182
|
-
y = pmadd(y, x, p8f_cephes_log_p2);
|
|
183
|
-
y1 = pmadd(y1, x, p8f_cephes_log_p5);
|
|
184
|
-
y2 = pmadd(y2, x, p8f_cephes_log_p8);
|
|
185
|
-
y = pmadd(y, x3, y1);
|
|
186
|
-
y = pmadd(y, x3, y2);
|
|
187
|
-
y = pmul(y, x3);
|
|
188
|
-
|
|
189
|
-
// Add the logarithm of the exponent back to the result of the interpolation.
|
|
190
|
-
y1 = pmul(e, p8f_cephes_log_q1);
|
|
191
|
-
tmp = pmul(x2, p8f_half);
|
|
192
|
-
y = padd(y, y1);
|
|
193
|
-
x = psub(x, tmp);
|
|
194
|
-
y2 = pmul(e, p8f_cephes_log_q2);
|
|
195
|
-
x = padd(x, y);
|
|
196
|
-
x = padd(x, y2);
|
|
197
|
-
|
|
198
|
-
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
|
199
|
-
return _mm256_or_ps(
|
|
200
|
-
_mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
|
|
201
|
-
_mm256_and_ps(iszero_mask, p8f_minus_inf));
|
|
48
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d psqrt<Packet4d>(const Packet4d& _x) {
|
|
49
|
+
return _mm256_sqrt_pd(_x);
|
|
202
50
|
}
|
|
203
51
|
|
|
204
|
-
//
|
|
205
|
-
|
|
206
|
-
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
|
52
|
+
// Even on Skylake, using Newton iteration is a win for reciprocal square root.
|
|
53
|
+
#if EIGEN_FAST_MATH
|
|
207
54
|
template <>
|
|
208
|
-
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
|
|
218
|
-
|
|
219
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
|
|
220
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
|
|
221
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
|
|
222
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
|
|
223
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
|
|
224
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
|
|
225
|
-
|
|
226
|
-
// Clamp x.
|
|
227
|
-
Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
|
|
55
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f prsqrt<Packet8f>(const Packet8f& a) {
|
|
56
|
+
// _mm256_rsqrt_ps returns -inf for negative denormals.
|
|
57
|
+
// _mm512_rsqrt**_ps returns -NaN for negative denormals. We may want
|
|
58
|
+
// consistency here.
|
|
59
|
+
// const Packet8f rsqrt = pselect(pcmp_lt(a, pzero(a)),
|
|
60
|
+
// pset1<Packet8f>(-NumTraits<float>::quiet_NaN()),
|
|
61
|
+
// _mm256_rsqrt_ps(a));
|
|
62
|
+
return generic_rsqrt_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rsqrt_ps(a));
|
|
63
|
+
}
|
|
228
64
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
Packet8f
|
|
65
|
+
template <>
|
|
66
|
+
EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a) {
|
|
67
|
+
return generic_reciprocal_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rcp_ps(a));
|
|
68
|
+
}
|
|
232
69
|
|
|
233
|
-
// Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
|
|
234
|
-
// subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
|
|
235
|
-
// truncation errors. Note that we don't use the "pmadd" function here to
|
|
236
|
-
// ensure that a precision-preserving FMA instruction is used.
|
|
237
|
-
#ifdef EIGEN_VECTORIZE_FMA
|
|
238
|
-
_EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
|
|
239
|
-
Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
|
|
240
|
-
#else
|
|
241
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
|
|
242
|
-
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
|
|
243
|
-
Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
|
|
244
|
-
r = psub(r, pmul(m, p8f_cephes_exp_C2));
|
|
245
70
|
#endif
|
|
246
71
|
|
|
247
|
-
Packet8f r2 = pmul(r, r);
|
|
248
|
-
|
|
249
|
-
// TODO(gonnet): Split into odd/even polynomials and try to exploit
|
|
250
|
-
// instruction-level parallelism.
|
|
251
|
-
Packet8f y = p8f_cephes_exp_p0;
|
|
252
|
-
y = pmadd(y, r, p8f_cephes_exp_p1);
|
|
253
|
-
y = pmadd(y, r, p8f_cephes_exp_p2);
|
|
254
|
-
y = pmadd(y, r, p8f_cephes_exp_p3);
|
|
255
|
-
y = pmadd(y, r, p8f_cephes_exp_p4);
|
|
256
|
-
y = pmadd(y, r, p8f_cephes_exp_p5);
|
|
257
|
-
y = pmadd(y, r2, r);
|
|
258
|
-
y = padd(y, p8f_1);
|
|
259
|
-
|
|
260
|
-
// Build emm0 = 2^m.
|
|
261
|
-
Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
|
|
262
|
-
emm0 = pshiftleft(emm0, 23);
|
|
263
|
-
|
|
264
|
-
// Return 2^m * exp(r).
|
|
265
|
-
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// Hyperbolic Tangent function.
|
|
269
72
|
template <>
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
73
|
+
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
|
|
74
|
+
Packet8f fexponent;
|
|
75
|
+
const Packet8h out = float2half(pfrexp<Packet8f>(half2float(a), fexponent));
|
|
76
|
+
exponent = float2half(fexponent);
|
|
77
|
+
return out;
|
|
273
78
|
}
|
|
274
79
|
|
|
275
80
|
template <>
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
Packet4d x = _x;
|
|
279
|
-
|
|
280
|
-
_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
|
|
281
|
-
_EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
|
|
282
|
-
_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
|
|
283
|
-
|
|
284
|
-
_EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
|
|
285
|
-
_EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
|
|
286
|
-
|
|
287
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
|
|
288
|
-
|
|
289
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
|
|
290
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
|
|
291
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
|
|
292
|
-
|
|
293
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
|
|
294
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
|
|
295
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
|
|
296
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
|
|
297
|
-
|
|
298
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
|
|
299
|
-
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
|
|
300
|
-
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
|
|
301
|
-
|
|
302
|
-
Packet4d tmp, fx;
|
|
303
|
-
|
|
304
|
-
// clamp x
|
|
305
|
-
x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
|
|
306
|
-
// Express exp(x) as exp(g + n*log(2)).
|
|
307
|
-
fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
|
|
308
|
-
|
|
309
|
-
// Get the integer modulus of log(2), i.e. the "n" described above.
|
|
310
|
-
fx = _mm256_floor_pd(fx);
|
|
311
|
-
|
|
312
|
-
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
|
313
|
-
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
|
314
|
-
// digits right.
|
|
315
|
-
tmp = pmul(fx, p4d_cephes_exp_C1);
|
|
316
|
-
Packet4d z = pmul(fx, p4d_cephes_exp_C2);
|
|
317
|
-
x = psub(x, tmp);
|
|
318
|
-
x = psub(x, z);
|
|
319
|
-
|
|
320
|
-
Packet4d x2 = pmul(x, x);
|
|
321
|
-
|
|
322
|
-
// Evaluate the numerator polynomial of the rational interpolant.
|
|
323
|
-
Packet4d px = p4d_cephes_exp_p0;
|
|
324
|
-
px = pmadd(px, x2, p4d_cephes_exp_p1);
|
|
325
|
-
px = pmadd(px, x2, p4d_cephes_exp_p2);
|
|
326
|
-
px = pmul(px, x);
|
|
327
|
-
|
|
328
|
-
// Evaluate the denominator polynomial of the rational interpolant.
|
|
329
|
-
Packet4d qx = p4d_cephes_exp_q0;
|
|
330
|
-
qx = pmadd(qx, x2, p4d_cephes_exp_q1);
|
|
331
|
-
qx = pmadd(qx, x2, p4d_cephes_exp_q2);
|
|
332
|
-
qx = pmadd(qx, x2, p4d_cephes_exp_q3);
|
|
333
|
-
|
|
334
|
-
// I don't really get this bit, copied from the SSE2 routines, so...
|
|
335
|
-
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
|
336
|
-
// rational interpolant?
|
|
337
|
-
x = _mm256_div_pd(px, psub(qx, px));
|
|
338
|
-
x = pmadd(p4d_2, x, p4d_1);
|
|
339
|
-
|
|
340
|
-
// Build e=2^n by constructing the exponents in a 128-bit vector and
|
|
341
|
-
// shifting them to where they belong in double-precision values.
|
|
342
|
-
__m128i emm0 = _mm256_cvtpd_epi32(fx);
|
|
343
|
-
emm0 = _mm_add_epi32(emm0, p4i_1023);
|
|
344
|
-
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
|
|
345
|
-
__m128i lo = _mm_slli_epi64(emm0, 52);
|
|
346
|
-
__m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
|
|
347
|
-
__m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
|
|
348
|
-
e = _mm256_insertf128_si256(e, hi, 1);
|
|
349
|
-
|
|
350
|
-
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
|
351
|
-
// non-finite values in the input.
|
|
352
|
-
return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
|
|
81
|
+
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent) {
|
|
82
|
+
return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
|
|
353
83
|
}
|
|
354
84
|
|
|
355
|
-
// Functions for sqrt.
|
|
356
|
-
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
|
357
|
-
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
|
358
|
-
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
|
359
|
-
// The main advantage of this approach is not just speed, but also the fact that
|
|
360
|
-
// it can be inlined and pipelined with other computations, further reducing its
|
|
361
|
-
// effective latency. This is similar to Quake3's fast inverse square root.
|
|
362
|
-
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
|
363
|
-
#if EIGEN_FAST_MATH
|
|
364
85
|
template <>
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
_CMP_LT_OQ),
|
|
371
|
-
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
|
|
372
|
-
|
|
373
|
-
// Compute approximate reciprocal sqrt.
|
|
374
|
-
Packet8f x = _mm256_rsqrt_ps(_x);
|
|
375
|
-
// Do a single step of Newton's iteration.
|
|
376
|
-
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
|
|
377
|
-
// Flush results for denormals to zero.
|
|
378
|
-
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
|
|
86
|
+
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
|
|
87
|
+
Packet8f fexponent;
|
|
88
|
+
const Packet8bf out = F32ToBf16(pfrexp<Packet8f>(Bf16ToF32(a), fexponent));
|
|
89
|
+
exponent = F32ToBf16(fexponent);
|
|
90
|
+
return out;
|
|
379
91
|
}
|
|
380
|
-
#else
|
|
381
|
-
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
382
|
-
Packet8f psqrt<Packet8f>(const Packet8f& x) {
|
|
383
|
-
return _mm256_sqrt_ps(x);
|
|
384
|
-
}
|
|
385
|
-
#endif
|
|
386
|
-
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
387
|
-
Packet4d psqrt<Packet4d>(const Packet4d& x) {
|
|
388
|
-
return _mm256_sqrt_pd(x);
|
|
389
|
-
}
|
|
390
|
-
#if EIGEN_FAST_MATH
|
|
391
|
-
|
|
392
|
-
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
393
|
-
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
|
394
|
-
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
|
|
395
|
-
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
|
|
396
|
-
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
|
|
397
|
-
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
|
|
398
|
-
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
|
|
399
|
-
|
|
400
|
-
Packet8f neg_half = pmul(_x, p8f_minus_half);
|
|
401
92
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
|
|
406
|
-
|
|
407
|
-
// Fill in NaNs and Infs for the negative/zero entries.
|
|
408
|
-
Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
|
|
409
|
-
Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
|
|
410
|
-
Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
|
|
411
|
-
_mm256_and_ps(zero_mask, p8f_inf));
|
|
412
|
-
|
|
413
|
-
// Do a single step of Newton's iteration.
|
|
414
|
-
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
|
|
415
|
-
|
|
416
|
-
// Insert NaNs and Infs in all the right places.
|
|
417
|
-
return _mm256_or_ps(x, infs_and_nans);
|
|
93
|
+
template <>
|
|
94
|
+
EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& exponent) {
|
|
95
|
+
return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
|
|
418
96
|
}
|
|
419
97
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
Packet8f
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
98
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
|
|
99
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
|
100
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp2)
|
|
101
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
|
|
102
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
|
|
103
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
|
|
104
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
|
|
105
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
|
|
106
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
|
107
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
|
|
108
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
|
109
|
+
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
|
110
|
+
|
|
111
|
+
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
|
112
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
|
113
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
|
114
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp2)
|
|
115
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
|
|
116
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
|
|
117
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
|
|
118
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
|
|
119
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
|
|
120
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
|
121
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
|
122
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
|
123
|
+
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
|
426
124
|
#endif
|
|
427
125
|
|
|
428
|
-
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
429
|
-
Packet4d prsqrt<Packet4d>(const Packet4d& x) {
|
|
430
|
-
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
|
431
|
-
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
|
|
435
126
|
} // end namespace internal
|
|
436
127
|
|
|
437
128
|
} // end namespace Eigen
|