@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -9,10 +9,6 @@
|
|
|
9
9
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
10
10
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
11
11
|
|
|
12
|
-
/* The sin, cos, exp, and log functions of this file come from
|
|
13
|
-
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
12
|
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
|
17
13
|
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
|
18
14
|
|
|
@@ -20,180 +16,28 @@ namespace Eigen {
|
|
|
20
16
|
|
|
21
17
|
namespace internal {
|
|
22
18
|
|
|
23
|
-
static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
|
24
|
-
static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
|
25
|
-
static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
|
26
|
-
static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
|
27
|
-
|
|
28
|
-
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
|
29
|
-
|
|
30
|
-
/* the smallest non denormalized float number */
|
|
31
|
-
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
|
32
|
-
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
|
|
33
|
-
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
|
|
34
|
-
|
|
35
|
-
/* natural logarithm computed for 4 simultaneous float
|
|
36
|
-
return NaN for x <= 0
|
|
37
|
-
*/
|
|
38
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
|
39
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
|
40
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
|
|
41
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
|
42
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
|
|
43
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
|
|
44
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
|
|
45
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
|
|
46
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
|
|
47
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
|
|
48
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
|
49
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
|
50
|
-
|
|
51
|
-
static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
|
52
|
-
static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
|
53
|
-
|
|
54
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
|
55
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
|
56
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
|
57
|
-
|
|
58
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
|
59
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
|
60
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
|
61
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
|
62
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
|
63
|
-
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
|
64
|
-
|
|
65
|
-
#ifdef __VSX__
|
|
66
|
-
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
|
67
|
-
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
|
68
|
-
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
|
69
|
-
|
|
70
|
-
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
|
71
|
-
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
|
72
|
-
|
|
73
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
|
74
|
-
|
|
75
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
|
76
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
|
77
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
|
78
|
-
|
|
79
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
|
80
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
|
81
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
|
82
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
|
83
|
-
|
|
84
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
|
85
|
-
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
|
86
|
-
|
|
87
|
-
#ifdef __POWER8_VECTOR__
|
|
88
|
-
static Packet2l p2l_1023 = { 1023, 1023 };
|
|
89
|
-
static Packet2ul p2ul_52 = { 52, 52 };
|
|
90
|
-
#endif
|
|
91
|
-
|
|
92
|
-
#endif
|
|
93
|
-
|
|
94
19
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
95
20
|
Packet4f plog<Packet4f>(const Packet4f& _x)
|
|
96
21
|
{
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
Packet4i emm0;
|
|
100
|
-
|
|
101
|
-
/* isvalid_mask is 0 if x < 0 or x is NaN. */
|
|
102
|
-
Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
|
|
103
|
-
Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
|
|
104
|
-
|
|
105
|
-
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
|
106
|
-
emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
|
|
107
|
-
reinterpret_cast<Packet4ui>(p4i_23));
|
|
108
|
-
|
|
109
|
-
/* keep only the fractional part */
|
|
110
|
-
x = pand(x, p4f_inv_mant_mask);
|
|
111
|
-
x = por(x, p4f_half);
|
|
112
|
-
|
|
113
|
-
emm0 = psub(emm0, p4i_0x7f);
|
|
114
|
-
Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
|
|
115
|
-
|
|
116
|
-
/* part2:
|
|
117
|
-
if( x < SQRTHF ) {
|
|
118
|
-
e -= 1;
|
|
119
|
-
x = x + x - 1.0;
|
|
120
|
-
} else { x = x - 1.0; }
|
|
121
|
-
*/
|
|
122
|
-
Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
|
|
123
|
-
Packet4f tmp = pand(x, mask);
|
|
124
|
-
x = psub(x, p4f_1);
|
|
125
|
-
e = psub(e, pand(p4f_1, mask));
|
|
126
|
-
x = padd(x, tmp);
|
|
127
|
-
|
|
128
|
-
Packet4f x2 = pmul(x,x);
|
|
129
|
-
Packet4f x3 = pmul(x2,x);
|
|
130
|
-
|
|
131
|
-
Packet4f y, y1, y2;
|
|
132
|
-
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
|
133
|
-
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
|
134
|
-
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
|
135
|
-
y = pmadd(y , x, p4f_cephes_log_p2);
|
|
136
|
-
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
|
137
|
-
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
|
138
|
-
y = pmadd(y, x3, y1);
|
|
139
|
-
y = pmadd(y, x3, y2);
|
|
140
|
-
y = pmul(y, x3);
|
|
141
|
-
|
|
142
|
-
y1 = pmul(e, p4f_cephes_log_q1);
|
|
143
|
-
tmp = pmul(x2, p4f_half);
|
|
144
|
-
y = padd(y, y1);
|
|
145
|
-
x = psub(x, tmp);
|
|
146
|
-
y2 = pmul(e, p4f_cephes_log_q2);
|
|
147
|
-
x = padd(x, y);
|
|
148
|
-
x = padd(x, y2);
|
|
149
|
-
// negative arg will be NAN, 0 will be -INF
|
|
150
|
-
x = vec_sel(x, p4f_minus_inf, iszero_mask);
|
|
151
|
-
x = vec_sel(p4f_minus_nan, x, isvalid_mask);
|
|
152
|
-
return x;
|
|
22
|
+
return plog_float(_x);
|
|
153
23
|
}
|
|
154
24
|
|
|
155
25
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
156
26
|
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
|
157
27
|
{
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
Packet4f tmp, fx;
|
|
161
|
-
Packet4i emm0;
|
|
162
|
-
|
|
163
|
-
// clamp x
|
|
164
|
-
x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
|
|
165
|
-
|
|
166
|
-
// express exp(x) as exp(g + n*log(2))
|
|
167
|
-
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
|
|
168
|
-
|
|
169
|
-
fx = pfloor(fx);
|
|
170
|
-
|
|
171
|
-
tmp = pmul(fx, p4f_cephes_exp_C1);
|
|
172
|
-
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
|
|
173
|
-
x = psub(x, tmp);
|
|
174
|
-
x = psub(x, z);
|
|
175
|
-
|
|
176
|
-
z = pmul(x,x);
|
|
177
|
-
|
|
178
|
-
Packet4f y = p4f_cephes_exp_p0;
|
|
179
|
-
y = pmadd(y, x, p4f_cephes_exp_p1);
|
|
180
|
-
y = pmadd(y, x, p4f_cephes_exp_p2);
|
|
181
|
-
y = pmadd(y, x, p4f_cephes_exp_p3);
|
|
182
|
-
y = pmadd(y, x, p4f_cephes_exp_p4);
|
|
183
|
-
y = pmadd(y, x, p4f_cephes_exp_p5);
|
|
184
|
-
y = pmadd(y, z, x);
|
|
185
|
-
y = padd(y, p4f_1);
|
|
28
|
+
return pexp_float(_x);
|
|
29
|
+
}
|
|
186
30
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
31
|
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
32
|
+
Packet4f psin<Packet4f>(const Packet4f& _x)
|
|
33
|
+
{
|
|
34
|
+
return psin_float(_x);
|
|
35
|
+
}
|
|
191
36
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
return
|
|
196
|
-
isnumber_mask);
|
|
37
|
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
38
|
+
Packet4f pcos<Packet4f>(const Packet4f& _x)
|
|
39
|
+
{
|
|
40
|
+
return pcos_float(_x);
|
|
197
41
|
}
|
|
198
42
|
|
|
199
43
|
#ifndef EIGEN_COMP_CLANG
|
|
@@ -225,95 +69,19 @@ Packet2d psqrt<Packet2d>(const Packet2d& x)
|
|
|
225
69
|
return vec_sqrt(x);
|
|
226
70
|
}
|
|
227
71
|
|
|
228
|
-
// VSX support varies between different compilers and even different
|
|
229
|
-
// versions of the same compiler. For gcc version >= 4.9.3, we can use
|
|
230
|
-
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
|
|
231
|
-
// a slow version that works with older compilers.
|
|
232
|
-
// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
|
|
233
|
-
// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
|
|
234
|
-
static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
|
|
235
|
-
#if EIGEN_GNUC_AT_LEAST(5, 4) || \
|
|
236
|
-
(EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
|
|
237
|
-
return vec_cts(x, 0); // TODO: check clang version.
|
|
238
|
-
#else
|
|
239
|
-
double tmp[2];
|
|
240
|
-
memcpy(tmp, &x, sizeof(tmp));
|
|
241
|
-
Packet2l l = { static_cast<long long>(tmp[0]),
|
|
242
|
-
static_cast<long long>(tmp[1]) };
|
|
243
|
-
return l;
|
|
244
|
-
#endif
|
|
245
|
-
}
|
|
246
|
-
|
|
247
72
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
248
73
|
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
|
249
74
|
{
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
Packet2d tmp, fx;
|
|
253
|
-
Packet2l emm0;
|
|
254
|
-
|
|
255
|
-
// clamp x
|
|
256
|
-
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
|
|
257
|
-
|
|
258
|
-
/* express exp(x) as exp(g + n*log(2)) */
|
|
259
|
-
fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
|
|
260
|
-
|
|
261
|
-
fx = pfloor(fx);
|
|
262
|
-
|
|
263
|
-
tmp = pmul(fx, p2d_cephes_exp_C1);
|
|
264
|
-
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
|
|
265
|
-
x = psub(x, tmp);
|
|
266
|
-
x = psub(x, z);
|
|
267
|
-
|
|
268
|
-
Packet2d x2 = pmul(x,x);
|
|
269
|
-
|
|
270
|
-
Packet2d px = p2d_cephes_exp_p0;
|
|
271
|
-
px = pmadd(px, x2, p2d_cephes_exp_p1);
|
|
272
|
-
px = pmadd(px, x2, p2d_cephes_exp_p2);
|
|
273
|
-
px = pmul (px, x);
|
|
274
|
-
|
|
275
|
-
Packet2d qx = p2d_cephes_exp_q0;
|
|
276
|
-
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
|
|
277
|
-
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
|
|
278
|
-
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
|
|
279
|
-
|
|
280
|
-
x = pdiv(px,psub(qx,px));
|
|
281
|
-
x = pmadd(p2d_2,x,p2d_1);
|
|
282
|
-
|
|
283
|
-
// build 2^n
|
|
284
|
-
emm0 = ConvertToPacket2l(fx);
|
|
285
|
-
|
|
286
|
-
#ifdef __POWER8_VECTOR__
|
|
287
|
-
emm0 = vec_add(emm0, p2l_1023);
|
|
288
|
-
emm0 = vec_sl(emm0, p2ul_52);
|
|
289
|
-
#else
|
|
290
|
-
// Code is a bit complex for POWER7. There is actually a
|
|
291
|
-
// vec_xxsldi intrinsic but it is not supported by some gcc versions.
|
|
292
|
-
// So we shift (52-32) bits and do a word swap with zeros.
|
|
293
|
-
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
|
|
294
|
-
_EIGEN_DECLARE_CONST_Packet4i(20, 20); // 52 - 32
|
|
295
|
-
|
|
296
|
-
Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
|
|
297
|
-
emm04i = vec_add(emm04i, p4i_1023);
|
|
298
|
-
emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
|
|
299
|
-
static const Packet16uc perm = {
|
|
300
|
-
0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
|
|
301
|
-
0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
|
|
302
|
-
#ifdef _BIG_ENDIAN
|
|
303
|
-
emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
|
|
304
|
-
#else
|
|
305
|
-
emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
|
|
306
|
-
#endif
|
|
307
|
-
|
|
75
|
+
return pexp_double(_x);
|
|
76
|
+
}
|
|
308
77
|
#endif
|
|
309
78
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
79
|
+
// Hyperbolic Tangent function.
|
|
80
|
+
template <>
|
|
81
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
|
82
|
+
ptanh<Packet4f>(const Packet4f& x) {
|
|
83
|
+
return internal::generic_fast_tanh_float(x);
|
|
315
84
|
}
|
|
316
|
-
#endif
|
|
317
85
|
|
|
318
86
|
} // end namespace internal
|
|
319
87
|
|