@smake/eigen 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +1 -1
- package/eigen/COPYING.MINPACK +51 -52
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +2 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -44,23 +44,29 @@ struct default_packet_traits
|
|
|
44
44
|
enum {
|
|
45
45
|
HasHalfPacket = 0,
|
|
46
46
|
|
|
47
|
-
HasAdd
|
|
48
|
-
HasSub
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
47
|
+
HasAdd = 1,
|
|
48
|
+
HasSub = 1,
|
|
49
|
+
HasShift = 1,
|
|
50
|
+
HasMul = 1,
|
|
51
|
+
HasNegate = 1,
|
|
52
|
+
HasAbs = 1,
|
|
53
|
+
HasArg = 0,
|
|
54
|
+
HasAbs2 = 1,
|
|
55
|
+
HasAbsDiff = 0,
|
|
56
|
+
HasMin = 1,
|
|
57
|
+
HasMax = 1,
|
|
58
|
+
HasConj = 1,
|
|
57
59
|
HasSetLinear = 1,
|
|
58
|
-
HasBlend
|
|
60
|
+
HasBlend = 0,
|
|
61
|
+
// This flag is used to indicate whether packet comparison is supported.
|
|
62
|
+
// pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
|
|
63
|
+
HasCmp = 0,
|
|
59
64
|
|
|
60
65
|
HasDiv = 0,
|
|
61
66
|
HasSqrt = 0,
|
|
62
67
|
HasRsqrt = 0,
|
|
63
68
|
HasExp = 0,
|
|
69
|
+
HasExpm1 = 0,
|
|
64
70
|
HasLog = 0,
|
|
65
71
|
HasLog1p = 0,
|
|
66
72
|
HasLog10 = 0,
|
|
@@ -81,14 +87,18 @@ struct default_packet_traits
|
|
|
81
87
|
HasPolygamma = 0,
|
|
82
88
|
HasErf = 0,
|
|
83
89
|
HasErfc = 0,
|
|
90
|
+
HasNdtri = 0,
|
|
91
|
+
HasBessel = 0,
|
|
84
92
|
HasIGamma = 0,
|
|
93
|
+
HasIGammaDerA = 0,
|
|
94
|
+
HasGammaSampleDerAlpha = 0,
|
|
85
95
|
HasIGammac = 0,
|
|
86
96
|
HasBetaInc = 0,
|
|
87
97
|
|
|
88
98
|
HasRound = 0,
|
|
99
|
+
HasRint = 0,
|
|
89
100
|
HasFloor = 0,
|
|
90
101
|
HasCeil = 0,
|
|
91
|
-
|
|
92
102
|
HasSign = 0
|
|
93
103
|
};
|
|
94
104
|
};
|
|
@@ -119,6 +129,22 @@ template<typename T> struct packet_traits : default_packet_traits
|
|
|
119
129
|
|
|
120
130
|
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
|
|
121
131
|
|
|
132
|
+
template<typename T> struct unpacket_traits
|
|
133
|
+
{
|
|
134
|
+
typedef T type;
|
|
135
|
+
typedef T half;
|
|
136
|
+
enum
|
|
137
|
+
{
|
|
138
|
+
size = 1,
|
|
139
|
+
alignment = 1,
|
|
140
|
+
vectorizable = false,
|
|
141
|
+
masked_load_available=false,
|
|
142
|
+
masked_store_available=false
|
|
143
|
+
};
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
|
|
147
|
+
|
|
122
148
|
template <typename Src, typename Tgt> struct type_casting_traits {
|
|
123
149
|
enum {
|
|
124
150
|
VectorizedCast = 0,
|
|
@@ -127,6 +153,34 @@ template <typename Src, typename Tgt> struct type_casting_traits {
|
|
|
127
153
|
};
|
|
128
154
|
};
|
|
129
155
|
|
|
156
|
+
/** \internal Wrapper to ensure that multiple packet types can map to the same
|
|
157
|
+
same underlying vector type. */
|
|
158
|
+
template<typename T, int unique_id = 0>
|
|
159
|
+
struct eigen_packet_wrapper
|
|
160
|
+
{
|
|
161
|
+
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
|
162
|
+
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
|
163
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
|
|
164
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
|
|
165
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
|
|
166
|
+
m_val = v;
|
|
167
|
+
return *this;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
T m_val;
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
/** \internal A convenience utility for determining if the type is a scalar.
|
|
175
|
+
* This is used to enable some generic packet implementations.
|
|
176
|
+
*/
|
|
177
|
+
template<typename Packet>
|
|
178
|
+
struct is_scalar {
|
|
179
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
180
|
+
enum {
|
|
181
|
+
value = internal::is_same<Packet, Scalar>::value
|
|
182
|
+
};
|
|
183
|
+
};
|
|
130
184
|
|
|
131
185
|
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
|
132
186
|
template <typename SrcPacket, typename TgtPacket>
|
|
@@ -139,75 +193,406 @@ EIGEN_DEVICE_FUNC inline TgtPacket
|
|
|
139
193
|
pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
|
|
140
194
|
return static_cast<TgtPacket>(a);
|
|
141
195
|
}
|
|
142
|
-
|
|
143
196
|
template <typename SrcPacket, typename TgtPacket>
|
|
144
197
|
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
145
198
|
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
|
|
146
199
|
return static_cast<TgtPacket>(a);
|
|
147
200
|
}
|
|
201
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
202
|
+
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
203
|
+
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
|
|
204
|
+
const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
|
|
205
|
+
return static_cast<TgtPacket>(a);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/** \internal \returns reinterpret_cast<Target>(a) */
|
|
209
|
+
template <typename Target, typename Packet>
|
|
210
|
+
EIGEN_DEVICE_FUNC inline Target
|
|
211
|
+
preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
|
|
148
212
|
|
|
149
213
|
/** \internal \returns a + b (coeff-wise) */
|
|
150
214
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
151
|
-
padd(const Packet& a,
|
|
152
|
-
|
|
215
|
+
padd(const Packet& a, const Packet& b) { return a+b; }
|
|
216
|
+
// Avoid compiler warning for boolean algebra.
|
|
217
|
+
template<> EIGEN_DEVICE_FUNC inline bool
|
|
218
|
+
padd(const bool& a, const bool& b) { return a || b; }
|
|
153
219
|
|
|
154
220
|
/** \internal \returns a - b (coeff-wise) */
|
|
155
221
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
156
|
-
psub(const Packet& a,
|
|
157
|
-
const Packet& b) { return a-b; }
|
|
222
|
+
psub(const Packet& a, const Packet& b) { return a-b; }
|
|
158
223
|
|
|
159
224
|
/** \internal \returns -a (coeff-wise) */
|
|
160
225
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
161
226
|
pnegate(const Packet& a) { return -a; }
|
|
162
227
|
|
|
163
|
-
|
|
228
|
+
template<> EIGEN_DEVICE_FUNC inline bool
|
|
229
|
+
pnegate(const bool& a) { return !a; }
|
|
164
230
|
|
|
231
|
+
/** \internal \returns conj(a) (coeff-wise) */
|
|
165
232
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
166
233
|
pconj(const Packet& a) { return numext::conj(a); }
|
|
167
234
|
|
|
168
235
|
/** \internal \returns a * b (coeff-wise) */
|
|
169
236
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
170
|
-
pmul(const Packet& a,
|
|
171
|
-
|
|
237
|
+
pmul(const Packet& a, const Packet& b) { return a*b; }
|
|
238
|
+
// Avoid compiler warning for boolean algebra.
|
|
239
|
+
template<> EIGEN_DEVICE_FUNC inline bool
|
|
240
|
+
pmul(const bool& a, const bool& b) { return a && b; }
|
|
172
241
|
|
|
173
242
|
/** \internal \returns a / b (coeff-wise) */
|
|
174
243
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
175
|
-
pdiv(const Packet& a,
|
|
176
|
-
|
|
244
|
+
pdiv(const Packet& a, const Packet& b) { return a/b; }
|
|
245
|
+
|
|
246
|
+
// In the generic case, memset to all one bits.
|
|
247
|
+
template<typename Packet, typename EnableIf = void>
|
|
248
|
+
struct ptrue_impl {
|
|
249
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
|
|
250
|
+
Packet b;
|
|
251
|
+
memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
|
|
252
|
+
return b;
|
|
253
|
+
}
|
|
254
|
+
};
|
|
177
255
|
|
|
178
|
-
|
|
256
|
+
// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
|
|
257
|
+
// Although this is technically not a valid bitmask, the scalar path for pselect
|
|
258
|
+
// uses a comparison to zero, so this should still work in most cases. We don't
|
|
259
|
+
// have another option, since the scalar type requires initialization.
|
|
260
|
+
template<typename T>
|
|
261
|
+
struct ptrue_impl<T,
|
|
262
|
+
typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
|
|
263
|
+
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
|
|
264
|
+
return T(1);
|
|
265
|
+
}
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
/** \internal \returns one bits. */
|
|
179
269
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
180
|
-
|
|
181
|
-
|
|
270
|
+
ptrue(const Packet& a) {
|
|
271
|
+
return ptrue_impl<Packet>::run(a);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// In the general case, memset to zero.
|
|
275
|
+
template<typename Packet, typename EnableIf = void>
|
|
276
|
+
struct pzero_impl {
|
|
277
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
|
278
|
+
Packet b;
|
|
279
|
+
memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
|
|
280
|
+
return b;
|
|
281
|
+
}
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
// For scalars, explicitly set to Scalar(0), since the underlying representation
|
|
285
|
+
// for zero may not consist of all-zero bits.
|
|
286
|
+
template<typename T>
|
|
287
|
+
struct pzero_impl<T,
|
|
288
|
+
typename internal::enable_if<is_scalar<T>::value>::type> {
|
|
289
|
+
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
|
|
290
|
+
return T(0);
|
|
291
|
+
}
|
|
292
|
+
};
|
|
182
293
|
|
|
183
|
-
/** \internal \returns
|
|
294
|
+
/** \internal \returns packet of zeros */
|
|
184
295
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
185
|
-
|
|
186
|
-
|
|
296
|
+
pzero(const Packet& a) {
|
|
297
|
+
return pzero_impl<Packet>::run(a);
|
|
298
|
+
}
|
|
187
299
|
|
|
188
|
-
/** \internal \returns
|
|
300
|
+
/** \internal \returns a <= b as a bit mask */
|
|
189
301
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
190
|
-
|
|
302
|
+
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
|
|
191
303
|
|
|
192
|
-
/** \internal \returns
|
|
304
|
+
/** \internal \returns a < b as a bit mask */
|
|
193
305
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
194
|
-
|
|
306
|
+
pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
|
|
307
|
+
|
|
308
|
+
/** \internal \returns a == b as a bit mask */
|
|
309
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
310
|
+
pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
|
|
311
|
+
|
|
312
|
+
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
|
|
313
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
314
|
+
pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
|
|
315
|
+
|
|
316
|
+
template<typename T>
|
|
317
|
+
struct bit_and {
|
|
318
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
|
|
319
|
+
return a & b;
|
|
320
|
+
}
|
|
321
|
+
};
|
|
322
|
+
|
|
323
|
+
template<typename T>
|
|
324
|
+
struct bit_or {
|
|
325
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
|
|
326
|
+
return a | b;
|
|
327
|
+
}
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
template<typename T>
|
|
331
|
+
struct bit_xor {
|
|
332
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
|
|
333
|
+
return a ^ b;
|
|
334
|
+
}
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
template<typename T>
|
|
338
|
+
struct bit_not {
|
|
339
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
|
|
340
|
+
return ~a;
|
|
341
|
+
}
|
|
342
|
+
};
|
|
343
|
+
|
|
344
|
+
// Use operators &, |, ^, ~.
|
|
345
|
+
template<typename T>
|
|
346
|
+
struct operator_bitwise_helper {
|
|
347
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
|
|
348
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
|
|
349
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
|
|
350
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
|
|
351
|
+
};
|
|
352
|
+
|
|
353
|
+
// Apply binary operations byte-by-byte
|
|
354
|
+
template<typename T>
|
|
355
|
+
struct bytewise_bitwise_helper {
|
|
356
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
|
|
357
|
+
return binary(a, b, bit_and<unsigned char>());
|
|
358
|
+
}
|
|
359
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
|
|
360
|
+
return binary(a, b, bit_or<unsigned char>());
|
|
361
|
+
}
|
|
362
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
|
|
363
|
+
return binary(a, b, bit_xor<unsigned char>());
|
|
364
|
+
}
|
|
365
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
|
|
366
|
+
return unary(a,bit_not<unsigned char>());
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
private:
|
|
370
|
+
template<typename Op>
|
|
371
|
+
EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
|
|
372
|
+
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
373
|
+
T c;
|
|
374
|
+
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
|
375
|
+
for (size_t i = 0; i < sizeof(T); ++i) {
|
|
376
|
+
*c_ptr++ = op(*a_ptr++);
|
|
377
|
+
}
|
|
378
|
+
return c;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
template<typename Op>
|
|
382
|
+
EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
|
|
383
|
+
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
384
|
+
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
|
|
385
|
+
T c;
|
|
386
|
+
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
|
387
|
+
for (size_t i = 0; i < sizeof(T); ++i) {
|
|
388
|
+
*c_ptr++ = op(*a_ptr++, *b_ptr++);
|
|
389
|
+
}
|
|
390
|
+
return c;
|
|
391
|
+
}
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
// In the general case, use byte-by-byte manipulation.
|
|
395
|
+
template<typename T, typename EnableIf = void>
|
|
396
|
+
struct bitwise_helper : public bytewise_bitwise_helper<T> {};
|
|
397
|
+
|
|
398
|
+
// For integers or non-trivial scalars, use binary operators.
|
|
399
|
+
template<typename T>
|
|
400
|
+
struct bitwise_helper<T,
|
|
401
|
+
typename internal::enable_if<
|
|
402
|
+
is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
|
|
403
|
+
> : public operator_bitwise_helper<T> {};
|
|
195
404
|
|
|
196
405
|
/** \internal \returns the bitwise and of \a a and \a b */
|
|
197
406
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
198
|
-
pand(const Packet& a, const Packet& b) {
|
|
407
|
+
pand(const Packet& a, const Packet& b) {
|
|
408
|
+
return bitwise_helper<Packet>::bitwise_and(a, b);
|
|
409
|
+
}
|
|
199
410
|
|
|
200
411
|
/** \internal \returns the bitwise or of \a a and \a b */
|
|
201
412
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
202
|
-
por(const Packet& a, const Packet& b) {
|
|
413
|
+
por(const Packet& a, const Packet& b) {
|
|
414
|
+
return bitwise_helper<Packet>::bitwise_or(a, b);
|
|
415
|
+
}
|
|
203
416
|
|
|
204
417
|
/** \internal \returns the bitwise xor of \a a and \a b */
|
|
205
418
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
206
|
-
pxor(const Packet& a, const Packet& b) {
|
|
419
|
+
pxor(const Packet& a, const Packet& b) {
|
|
420
|
+
return bitwise_helper<Packet>::bitwise_xor(a, b);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/** \internal \returns the bitwise not of \a a */
|
|
424
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
425
|
+
pnot(const Packet& a) {
|
|
426
|
+
return bitwise_helper<Packet>::bitwise_not(a);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/** \internal \returns the bitwise and of \a a and not \a b */
|
|
430
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
431
|
+
pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
|
|
432
|
+
|
|
433
|
+
// In the general case, use bitwise select.
|
|
434
|
+
template<typename Packet, typename EnableIf = void>
|
|
435
|
+
struct pselect_impl {
|
|
436
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
|
437
|
+
return por(pand(a,mask),pandnot(b,mask));
|
|
438
|
+
}
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
// For scalars, use ternary select.
|
|
442
|
+
template<typename Packet>
|
|
443
|
+
struct pselect_impl<Packet,
|
|
444
|
+
typename internal::enable_if<is_scalar<Packet>::value>::type > {
|
|
445
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
|
446
|
+
return numext::equal_strict(mask, Packet(0)) ? b : a;
|
|
447
|
+
}
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
/** \internal \returns \a or \b for each field in packet according to \mask */
|
|
451
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
452
|
+
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
|
453
|
+
return pselect_impl<Packet>::run(mask, a, b);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
|
|
457
|
+
const bool& cond, const bool& a, const bool& b) {
|
|
458
|
+
return cond ? a : b;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/** \internal \returns the min or of \a a and \a b (coeff-wise)
|
|
462
|
+
If either \a a or \a b are NaN, the result is implementation defined. */
|
|
463
|
+
template<int NaNPropagation>
|
|
464
|
+
struct pminmax_impl {
|
|
465
|
+
template <typename Packet, typename Op>
|
|
466
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
467
|
+
return op(a,b);
|
|
468
|
+
}
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
|
472
|
+
If either \a a or \a b are NaN, NaN is returned. */
|
|
473
|
+
template<>
|
|
474
|
+
struct pminmax_impl<PropagateNaN> {
|
|
475
|
+
template <typename Packet, typename Op>
|
|
476
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
477
|
+
Packet not_nan_mask_a = pcmp_eq(a, a);
|
|
478
|
+
Packet not_nan_mask_b = pcmp_eq(b, b);
|
|
479
|
+
return pselect(not_nan_mask_a,
|
|
480
|
+
pselect(not_nan_mask_b, op(a, b), b),
|
|
481
|
+
a);
|
|
482
|
+
}
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
|
486
|
+
If both \a a and \a b are NaN, NaN is returned.
|
|
487
|
+
Equivalent to std::fmin(a, b). */
|
|
488
|
+
template<>
|
|
489
|
+
struct pminmax_impl<PropagateNumbers> {
|
|
490
|
+
template <typename Packet, typename Op>
|
|
491
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
492
|
+
Packet not_nan_mask_a = pcmp_eq(a, a);
|
|
493
|
+
Packet not_nan_mask_b = pcmp_eq(b, b);
|
|
494
|
+
return pselect(not_nan_mask_a,
|
|
495
|
+
pselect(not_nan_mask_b, op(a, b), a),
|
|
496
|
+
b);
|
|
497
|
+
}
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
#ifndef SYCL_DEVICE_ONLY
|
|
502
|
+
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
|
|
503
|
+
#else
|
|
504
|
+
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
|
|
505
|
+
[](const Type& a, const Type& b) { \
|
|
506
|
+
return Func(a, b);}
|
|
507
|
+
#endif
|
|
508
|
+
|
|
509
|
+
/** \internal \returns the min of \a a and \a b (coeff-wise).
|
|
510
|
+
If \a a or \b b is NaN, the return value is implementation defined. */
|
|
511
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
512
|
+
pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
|
|
513
|
+
|
|
514
|
+
/** \internal \returns the min of \a a and \a b (coeff-wise).
|
|
515
|
+
NaNPropagation determines the NaN propagation semantics. */
|
|
516
|
+
template <int NaNPropagation, typename Packet>
|
|
517
|
+
EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
|
518
|
+
return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
/** \internal \returns the max of \a a and \a b (coeff-wise)
|
|
522
|
+
If \a a or \b b is NaN, the return value is implementation defined. */
|
|
523
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
524
|
+
pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
|
|
525
|
+
|
|
526
|
+
/** \internal \returns the max of \a a and \a b (coeff-wise).
|
|
527
|
+
NaNPropagation determines the NaN propagation semantics. */
|
|
528
|
+
template <int NaNPropagation, typename Packet>
|
|
529
|
+
EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
|
530
|
+
return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/** \internal \returns the absolute value of \a a */
|
|
534
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
535
|
+
pabs(const Packet& a) { return numext::abs(a); }
|
|
536
|
+
template<> EIGEN_DEVICE_FUNC inline unsigned int
|
|
537
|
+
pabs(const unsigned int& a) { return a; }
|
|
538
|
+
template<> EIGEN_DEVICE_FUNC inline unsigned long
|
|
539
|
+
pabs(const unsigned long& a) { return a; }
|
|
540
|
+
template<> EIGEN_DEVICE_FUNC inline unsigned long long
|
|
541
|
+
pabs(const unsigned long long& a) { return a; }
|
|
542
|
+
|
|
543
|
+
/** \internal \returns the addsub value of \a a,b */
|
|
544
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
545
|
+
paddsub(const Packet& a, const Packet& b) {
|
|
546
|
+
return pselect(peven_mask(a), padd(a, b), psub(a, b));
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/** \internal \returns the phase angle of \a a */
|
|
550
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
551
|
+
parg(const Packet& a) { using numext::arg; return arg(a); }
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
/** \internal \returns \a a logically shifted by N bits to the right */
|
|
555
|
+
template<int N> EIGEN_DEVICE_FUNC inline int
|
|
556
|
+
parithmetic_shift_right(const int& a) { return a >> N; }
|
|
557
|
+
template<int N> EIGEN_DEVICE_FUNC inline long int
|
|
558
|
+
parithmetic_shift_right(const long int& a) { return a >> N; }
|
|
559
|
+
|
|
560
|
+
/** \internal \returns \a a arithmetically shifted by N bits to the right */
|
|
561
|
+
template<int N> EIGEN_DEVICE_FUNC inline int
|
|
562
|
+
plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
|
|
563
|
+
template<int N> EIGEN_DEVICE_FUNC inline long int
|
|
564
|
+
plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
|
|
207
565
|
|
|
208
|
-
/** \internal \returns
|
|
566
|
+
/** \internal \returns \a a shifted by N bits to the left */
|
|
567
|
+
template<int N> EIGEN_DEVICE_FUNC inline int
|
|
568
|
+
plogical_shift_left(const int& a) { return a << N; }
|
|
569
|
+
template<int N> EIGEN_DEVICE_FUNC inline long int
|
|
570
|
+
plogical_shift_left(const long int& a) { return a << N; }
|
|
571
|
+
|
|
572
|
+
/** \internal \returns the significant and exponent of the underlying floating point numbers
|
|
573
|
+
* See https://en.cppreference.com/w/cpp/numeric/math/frexp
|
|
574
|
+
*/
|
|
575
|
+
template <typename Packet>
|
|
576
|
+
EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
|
|
577
|
+
int exp;
|
|
578
|
+
EIGEN_USING_STD(frexp);
|
|
579
|
+
Packet result = static_cast<Packet>(frexp(a, &exp));
|
|
580
|
+
exponent = static_cast<Packet>(exp);
|
|
581
|
+
return result;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/** \internal \returns a * 2^((int)exponent)
|
|
585
|
+
* See https://en.cppreference.com/w/cpp/numeric/math/ldexp
|
|
586
|
+
*/
|
|
587
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
588
|
+
pldexp(const Packet &a, const Packet &exponent) {
|
|
589
|
+
EIGEN_USING_STD(ldexp)
|
|
590
|
+
return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/** \internal \returns the min of \a a and \a b (coeff-wise) */
|
|
209
594
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
210
|
-
|
|
595
|
+
pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
|
|
211
596
|
|
|
212
597
|
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
|
213
598
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
@@ -217,10 +602,22 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
|
|
217
602
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
218
603
|
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
|
219
604
|
|
|
605
|
+
/** \internal \returns a packet version of \a *from, (un-aligned masked load)
|
|
606
|
+
* There is no generic implementation. We only have implementations for specialized
|
|
607
|
+
* cases. Generic case should not be called.
|
|
608
|
+
*/
|
|
609
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
|
610
|
+
typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
|
|
611
|
+
ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
|
|
612
|
+
|
|
220
613
|
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
|
221
614
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
222
615
|
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
|
223
616
|
|
|
617
|
+
/** \internal \returns a packet with constant coefficients set from bits */
|
|
618
|
+
template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
|
|
619
|
+
pset1frombits(BitsType a);
|
|
620
|
+
|
|
224
621
|
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
|
|
225
622
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
226
623
|
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
|
|
@@ -237,7 +634,7 @@ ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
|
|
237
634
|
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
|
|
238
635
|
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
|
|
239
636
|
* Currently, this function is only used in matrix products.
|
|
240
|
-
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
|
637
|
+
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
|
241
638
|
*/
|
|
242
639
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
243
640
|
ploadquad(const typename unpacket_traits<Packet>::type* from)
|
|
@@ -281,6 +678,20 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
|
|
281
678
|
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
|
282
679
|
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
|
|
283
680
|
|
|
681
|
+
/** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
|
|
682
|
+
where x is the value of all 1-bits. */
|
|
683
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
684
|
+
peven_mask(const Packet& /*a*/) {
|
|
685
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
686
|
+
const size_t n = unpacket_traits<Packet>::size;
|
|
687
|
+
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
688
|
+
for(size_t i = 0; i < n; ++i) {
|
|
689
|
+
memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
|
690
|
+
}
|
|
691
|
+
return ploadu<Packet>(elements);
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
|
|
284
695
|
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
|
285
696
|
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
|
|
286
697
|
{ (*to) = from; }
|
|
@@ -289,6 +700,15 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(
|
|
|
289
700
|
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
|
|
290
701
|
{ (*to) = from; }
|
|
291
702
|
|
|
703
|
+
/** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
|
|
704
|
+
* There is no generic implementation. We only have implementations for specialized
|
|
705
|
+
* cases. Generic case should not be called.
|
|
706
|
+
*/
|
|
707
|
+
template<typename Scalar, typename Packet>
|
|
708
|
+
EIGEN_DEVICE_FUNC inline
|
|
709
|
+
typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
|
|
710
|
+
pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
|
|
711
|
+
|
|
292
712
|
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
|
|
293
713
|
{ return ploadu<Packet>(from); }
|
|
294
714
|
|
|
@@ -298,8 +718,10 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
|
|
|
298
718
|
/** \internal tries to do cache prefetching of \a addr */
|
|
299
719
|
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
|
|
300
720
|
{
|
|
301
|
-
#
|
|
302
|
-
|
|
721
|
+
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
722
|
+
// do nothing
|
|
723
|
+
#elif defined(EIGEN_CUDA_ARCH)
|
|
724
|
+
#if defined(__LP64__) || EIGEN_OS_WIN64
|
|
303
725
|
// 64-bit pointer operand constraint for inlined asm
|
|
304
726
|
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
|
|
305
727
|
#else
|
|
@@ -311,39 +733,6 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
|
|
|
311
733
|
#endif
|
|
312
734
|
}
|
|
313
735
|
|
|
314
|
-
/** \internal \returns the first element of a packet */
|
|
315
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
|
316
|
-
{ return a; }
|
|
317
|
-
|
|
318
|
-
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
|
|
319
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
320
|
-
preduxp(const Packet* vecs) { return vecs[0]; }
|
|
321
|
-
|
|
322
|
-
/** \internal \returns the sum of the elements of \a a*/
|
|
323
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
|
324
|
-
{ return a; }
|
|
325
|
-
|
|
326
|
-
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
|
|
327
|
-
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
|
328
|
-
* For packet-size smaller or equal to 4, this boils down to a noop.
|
|
329
|
-
*/
|
|
330
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
|
331
|
-
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
|
332
|
-
predux_downto4(const Packet& a)
|
|
333
|
-
{ return a; }
|
|
334
|
-
|
|
335
|
-
/** \internal \returns the product of the elements of \a a*/
|
|
336
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
|
337
|
-
{ return a; }
|
|
338
|
-
|
|
339
|
-
/** \internal \returns the min of the elements of \a a*/
|
|
340
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
|
341
|
-
{ return a; }
|
|
342
|
-
|
|
343
|
-
/** \internal \returns the max of the elements of \a a*/
|
|
344
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
|
345
|
-
{ return a; }
|
|
346
|
-
|
|
347
736
|
/** \internal \returns the reversed elements of \a a*/
|
|
348
737
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
|
|
349
738
|
{ return a; }
|
|
@@ -351,7 +740,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
|
|
|
351
740
|
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
|
352
741
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
|
|
353
742
|
{
|
|
354
|
-
return Packet(
|
|
743
|
+
return Packet(numext::imag(a),numext::real(a));
|
|
355
744
|
}
|
|
356
745
|
|
|
357
746
|
/**************************
|
|
@@ -360,47 +749,51 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet
|
|
|
360
749
|
|
|
361
750
|
/** \internal \returns the sine of \a a (coeff-wise) */
|
|
362
751
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
363
|
-
Packet psin(const Packet& a) {
|
|
752
|
+
Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
|
|
364
753
|
|
|
365
754
|
/** \internal \returns the cosine of \a a (coeff-wise) */
|
|
366
755
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
367
|
-
Packet pcos(const Packet& a) {
|
|
756
|
+
Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
|
|
368
757
|
|
|
369
758
|
/** \internal \returns the tan of \a a (coeff-wise) */
|
|
370
759
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
371
|
-
Packet ptan(const Packet& a) {
|
|
760
|
+
Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
|
|
372
761
|
|
|
373
762
|
/** \internal \returns the arc sine of \a a (coeff-wise) */
|
|
374
763
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
375
|
-
Packet pasin(const Packet& a) {
|
|
764
|
+
Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
|
|
376
765
|
|
|
377
766
|
/** \internal \returns the arc cosine of \a a (coeff-wise) */
|
|
378
767
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
379
|
-
Packet pacos(const Packet& a) {
|
|
768
|
+
Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
|
|
380
769
|
|
|
381
770
|
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
|
382
771
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
383
|
-
Packet patan(const Packet& a) {
|
|
772
|
+
Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
|
|
384
773
|
|
|
385
774
|
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
|
386
775
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
387
|
-
Packet psinh(const Packet& a) {
|
|
776
|
+
Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
|
|
388
777
|
|
|
389
778
|
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
|
390
779
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
391
|
-
Packet pcosh(const Packet& a) {
|
|
780
|
+
Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
|
|
392
781
|
|
|
393
782
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
|
394
783
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
395
|
-
Packet ptanh(const Packet& a) {
|
|
784
|
+
Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
|
|
396
785
|
|
|
397
786
|
/** \internal \returns the exp of \a a (coeff-wise) */
|
|
398
787
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
399
|
-
Packet pexp(const Packet& a) {
|
|
788
|
+
Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
|
|
789
|
+
|
|
790
|
+
/** \internal \returns the expm1 of \a a (coeff-wise) */
|
|
791
|
+
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
792
|
+
Packet pexpm1(const Packet& a) { return numext::expm1(a); }
|
|
400
793
|
|
|
401
794
|
/** \internal \returns the log of \a a (coeff-wise) */
|
|
402
795
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
403
|
-
Packet plog(const Packet& a) {
|
|
796
|
+
Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
|
|
404
797
|
|
|
405
798
|
/** \internal \returns the log1p of \a a (coeff-wise) */
|
|
406
799
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
@@ -408,16 +801,24 @@ Packet plog1p(const Packet& a) { return numext::log1p(a); }
|
|
|
408
801
|
|
|
409
802
|
/** \internal \returns the log10 of \a a (coeff-wise) */
|
|
410
803
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
411
|
-
Packet plog10(const Packet& a) {
|
|
804
|
+
Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
|
|
805
|
+
|
|
806
|
+
/** \internal \returns the log10 of \a a (coeff-wise) */
|
|
807
|
+
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
808
|
+
Packet plog2(const Packet& a) {
|
|
809
|
+
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
|
810
|
+
return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
|
|
811
|
+
}
|
|
412
812
|
|
|
413
813
|
/** \internal \returns the square-root of \a a (coeff-wise) */
|
|
414
814
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
415
|
-
Packet psqrt(const Packet& a) {
|
|
815
|
+
Packet psqrt(const Packet& a) { return numext::sqrt(a); }
|
|
416
816
|
|
|
417
817
|
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
|
418
818
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
419
819
|
Packet prsqrt(const Packet& a) {
|
|
420
|
-
|
|
820
|
+
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
|
821
|
+
return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
|
|
421
822
|
}
|
|
422
823
|
|
|
423
824
|
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
|
@@ -428,15 +829,121 @@ Packet pround(const Packet& a) { using numext::round; return round(a); }
|
|
|
428
829
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
429
830
|
Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
|
|
430
831
|
|
|
832
|
+
/** \internal \returns the rounded value of \a a (coeff-wise) with current
|
|
833
|
+
* rounding mode */
|
|
834
|
+
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
835
|
+
Packet print(const Packet& a) { using numext::rint; return rint(a); }
|
|
836
|
+
|
|
431
837
|
/** \internal \returns the ceil of \a a (coeff-wise) */
|
|
432
838
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
433
839
|
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
|
434
840
|
|
|
841
|
+
/** \internal \returns the first element of a packet */
|
|
842
|
+
template<typename Packet>
|
|
843
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
|
844
|
+
pfirst(const Packet& a)
|
|
845
|
+
{ return a; }
|
|
846
|
+
|
|
847
|
+
/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
|
|
848
|
+
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
|
849
|
+
* For packet-size smaller or equal to 4, this boils down to a noop.
|
|
850
|
+
*/
|
|
851
|
+
template<typename Packet>
|
|
852
|
+
EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
|
853
|
+
predux_half_dowto4(const Packet& a)
|
|
854
|
+
{ return a; }
|
|
855
|
+
|
|
856
|
+
// Slow generic implementation of Packet reduction.
|
|
857
|
+
template <typename Packet, typename Op>
|
|
858
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
|
859
|
+
predux_helper(const Packet& a, Op op) {
|
|
860
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
861
|
+
const size_t n = unpacket_traits<Packet>::size;
|
|
862
|
+
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
863
|
+
pstoreu<Scalar>(elements, a);
|
|
864
|
+
for(size_t k = n / 2; k > 0; k /= 2) {
|
|
865
|
+
for(size_t i = 0; i < k; ++i) {
|
|
866
|
+
elements[i] = op(elements[i], elements[i + k]);
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
return elements[0];
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
/** \internal \returns the sum of the elements of \a a*/
|
|
873
|
+
template<typename Packet>
|
|
874
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
|
875
|
+
predux(const Packet& a)
|
|
876
|
+
{
|
|
877
|
+
return a;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/** \internal \returns the product of the elements of \a a */
|
|
881
|
+
template <typename Packet>
|
|
882
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
|
|
883
|
+
const Packet& a) {
|
|
884
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
885
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
/** \internal \returns the min of the elements of \a a */
|
|
889
|
+
template <typename Packet>
|
|
890
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
|
|
891
|
+
const Packet &a) {
|
|
892
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
893
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
template <int NaNPropagation, typename Packet>
|
|
897
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
|
|
898
|
+
const Packet& a) {
|
|
899
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
900
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
/** \internal \returns the min of the elements of \a a */
|
|
904
|
+
template <typename Packet>
|
|
905
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
|
|
906
|
+
const Packet &a) {
|
|
907
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
908
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
template <int NaNPropagation, typename Packet>
|
|
912
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
|
|
913
|
+
const Packet& a) {
|
|
914
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
915
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
#undef EIGEN_BINARY_OP_NAN_PROPAGATION
|
|
919
|
+
|
|
920
|
+
/** \internal \returns true if all coeffs of \a a means "true"
|
|
921
|
+
* It is supposed to be called on values returned by pcmp_*.
|
|
922
|
+
*/
|
|
923
|
+
// not needed yet
|
|
924
|
+
// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
|
|
925
|
+
// { return bool(a); }
|
|
926
|
+
|
|
927
|
+
/** \internal \returns true if any coeffs of \a a means "true"
|
|
928
|
+
* It is supposed to be called on values returned by pcmp_*.
|
|
929
|
+
*/
|
|
930
|
+
template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
|
|
931
|
+
{
|
|
932
|
+
// Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
|
|
933
|
+
// It is expected that "true" is either:
|
|
934
|
+
// - Scalar(1)
|
|
935
|
+
// - bits full of ones (NaN for floats),
|
|
936
|
+
// - or first bit equals to 1 (1 for ints, smallest denormal for floats).
|
|
937
|
+
// For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
|
|
938
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
939
|
+
return numext::not_equal_strict(predux(a), Scalar(0));
|
|
940
|
+
}
|
|
941
|
+
|
|
435
942
|
/***************************************************************************
|
|
436
943
|
* The following functions might not have to be overwritten for vectorized types
|
|
437
944
|
***************************************************************************/
|
|
438
945
|
|
|
439
|
-
/** \internal copy a packet with constant
|
|
946
|
+
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
|
|
440
947
|
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
|
|
441
948
|
template<typename Packet>
|
|
442
949
|
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
|
|
@@ -484,41 +991,12 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
|
|
|
484
991
|
return ploadt<Packet, LoadMode>(from);
|
|
485
992
|
}
|
|
486
993
|
|
|
487
|
-
/** \internal default implementation of palign() allowing partial specialization */
|
|
488
|
-
template<int Offset,typename PacketType>
|
|
489
|
-
struct palign_impl
|
|
490
|
-
{
|
|
491
|
-
// by default data are aligned, so there is nothing to be done :)
|
|
492
|
-
static inline void run(PacketType&, const PacketType&) {}
|
|
493
|
-
};
|
|
494
|
-
|
|
495
|
-
/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
|
|
496
|
-
* of \a first and \a Offset first elements of \a second.
|
|
497
|
-
*
|
|
498
|
-
* This function is currently only used to optimize matrix-vector products on unligned matrices.
|
|
499
|
-
* It takes 2 packets that represent a contiguous memory array, and returns a packet starting
|
|
500
|
-
* at the position \a Offset. For instance, for packets of 4 elements, we have:
|
|
501
|
-
* Input:
|
|
502
|
-
* - first = {f0,f1,f2,f3}
|
|
503
|
-
* - second = {s0,s1,s2,s3}
|
|
504
|
-
* Output:
|
|
505
|
-
* - if Offset==0 then {f0,f1,f2,f3}
|
|
506
|
-
* - if Offset==1 then {f1,f2,f3,s0}
|
|
507
|
-
* - if Offset==2 then {f2,f3,s0,s1}
|
|
508
|
-
* - if Offset==3 then {f3,s0,s1,s3}
|
|
509
|
-
*/
|
|
510
|
-
template<int Offset,typename PacketType>
|
|
511
|
-
inline void palign(PacketType& first, const PacketType& second)
|
|
512
|
-
{
|
|
513
|
-
palign_impl<Offset,PacketType>::run(first,second);
|
|
514
|
-
}
|
|
515
|
-
|
|
516
994
|
/***************************************************************************
|
|
517
995
|
* Fast complex products (GCC generates a function call which is very slow)
|
|
518
996
|
***************************************************************************/
|
|
519
997
|
|
|
520
998
|
// Eigen+CUDA does not support complexes.
|
|
521
|
-
#
|
|
999
|
+
#if !defined(EIGEN_GPUCC)
|
|
522
1000
|
|
|
523
1001
|
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
|
|
524
1002
|
{ return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
|
|
@@ -555,34 +1033,6 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
|
|
555
1033
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
|
556
1034
|
}
|
|
557
1035
|
|
|
558
|
-
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
|
|
559
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
560
|
-
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
|
|
561
|
-
{
|
|
562
|
-
// Default implementation based on pblend.
|
|
563
|
-
// It must be specialized for higher performance.
|
|
564
|
-
Selector<unpacket_traits<Packet>::size> mask;
|
|
565
|
-
mask.select[0] = true;
|
|
566
|
-
// This for loop should be optimized away by the compiler.
|
|
567
|
-
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
|
|
568
|
-
mask.select[i] = false;
|
|
569
|
-
return pblend(mask, pset1<Packet>(b), a);
|
|
570
|
-
}
|
|
571
|
-
|
|
572
|
-
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
|
|
573
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
574
|
-
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
|
575
|
-
{
|
|
576
|
-
// Default implementation based on pblend.
|
|
577
|
-
// It must be specialized for higher performance.
|
|
578
|
-
Selector<unpacket_traits<Packet>::size> mask;
|
|
579
|
-
// This for loop should be optimized away by the compiler.
|
|
580
|
-
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
|
|
581
|
-
mask.select[i] = false;
|
|
582
|
-
mask.select[unpacket_traits<Packet>::size-1] = true;
|
|
583
|
-
return pblend(mask, pset1<Packet>(b), a);
|
|
584
|
-
}
|
|
585
|
-
|
|
586
1036
|
} // end namespace internal
|
|
587
1037
|
|
|
588
1038
|
} // end namespace Eigen
|