@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -11,17 +11,20 @@
|
|
|
11
11
|
#ifndef EIGEN_GENERIC_PACKET_MATH_H
|
|
12
12
|
#define EIGEN_GENERIC_PACKET_MATH_H
|
|
13
13
|
|
|
14
|
+
// IWYU pragma: private
|
|
15
|
+
#include "./InternalHeaderCheck.h"
|
|
16
|
+
|
|
14
17
|
namespace Eigen {
|
|
15
18
|
|
|
16
19
|
namespace internal {
|
|
17
20
|
|
|
18
21
|
/** \internal
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
* \file GenericPacketMath.h
|
|
23
|
+
*
|
|
24
|
+
* Default implementation for types not supported by the vectorization.
|
|
25
|
+
* In practice these functions are provided to make easier the writing
|
|
26
|
+
* of generic vectorized code.
|
|
27
|
+
*/
|
|
25
28
|
|
|
26
29
|
#ifndef EIGEN_DEBUG_ALIGNED_LOAD
|
|
27
30
|
#define EIGEN_DEBUG_ALIGNED_LOAD
|
|
@@ -39,267 +42,950 @@ namespace internal {
|
|
|
39
42
|
#define EIGEN_DEBUG_UNALIGNED_STORE
|
|
40
43
|
#endif
|
|
41
44
|
|
|
42
|
-
struct default_packet_traits
|
|
43
|
-
{
|
|
45
|
+
struct default_packet_traits {
|
|
44
46
|
enum {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
HasMul
|
|
47
|
+
// Ops that are implemented for most types.
|
|
48
|
+
HasAdd = 1,
|
|
49
|
+
HasSub = 1,
|
|
50
|
+
HasShift = 1,
|
|
51
|
+
HasMul = 1,
|
|
50
52
|
HasNegate = 1,
|
|
51
|
-
HasAbs
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
HasConj = 1,
|
|
53
|
+
HasAbs = 1,
|
|
54
|
+
HasAbs2 = 1,
|
|
55
|
+
HasMin = 1,
|
|
56
|
+
HasMax = 1,
|
|
57
|
+
HasConj = 1,
|
|
57
58
|
HasSetLinear = 1,
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
59
|
+
HasSign = 1,
|
|
60
|
+
// By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
|
|
61
|
+
// types
|
|
62
|
+
HasRound = 1,
|
|
63
|
+
|
|
64
|
+
HasArg = 0,
|
|
65
|
+
HasAbsDiff = 0,
|
|
66
|
+
HasBlend = 0,
|
|
67
|
+
// This flag is used to indicate whether packet comparison is supported.
|
|
68
|
+
// pcmp_eq and pcmp_lt should be defined for it to be true.
|
|
69
|
+
HasCmp = 0,
|
|
70
|
+
|
|
71
|
+
HasDiv = 0,
|
|
72
|
+
HasReciprocal = 0,
|
|
73
|
+
HasSqrt = 0,
|
|
74
|
+
HasRsqrt = 0,
|
|
75
|
+
HasCbrt = 0,
|
|
76
|
+
HasExp = 0,
|
|
77
|
+
HasExpm1 = 0,
|
|
78
|
+
HasLog = 0,
|
|
79
|
+
HasLog1p = 0,
|
|
80
|
+
HasLog10 = 0,
|
|
81
|
+
HasPow = 0,
|
|
82
|
+
HasSin = 0,
|
|
83
|
+
HasCos = 0,
|
|
84
|
+
HasTan = 0,
|
|
85
|
+
HasASin = 0,
|
|
86
|
+
HasACos = 0,
|
|
87
|
+
HasATan = 0,
|
|
88
|
+
HasATanh = 0,
|
|
89
|
+
HasSinh = 0,
|
|
90
|
+
HasCosh = 0,
|
|
91
|
+
HasTanh = 0,
|
|
78
92
|
HasLGamma = 0,
|
|
79
93
|
HasDiGamma = 0,
|
|
80
94
|
HasZeta = 0,
|
|
81
95
|
HasPolygamma = 0,
|
|
82
96
|
HasErf = 0,
|
|
83
97
|
HasErfc = 0,
|
|
98
|
+
HasNdtri = 0,
|
|
99
|
+
HasBessel = 0,
|
|
84
100
|
HasIGamma = 0,
|
|
101
|
+
HasIGammaDerA = 0,
|
|
102
|
+
HasGammaSampleDerAlpha = 0,
|
|
85
103
|
HasIGammac = 0,
|
|
86
|
-
HasBetaInc = 0
|
|
87
|
-
|
|
88
|
-
HasRound = 0,
|
|
89
|
-
HasFloor = 0,
|
|
90
|
-
HasCeil = 0,
|
|
91
|
-
|
|
92
|
-
HasSign = 0
|
|
104
|
+
HasBetaInc = 0
|
|
93
105
|
};
|
|
94
106
|
};
|
|
95
107
|
|
|
96
|
-
template<typename T>
|
|
97
|
-
{
|
|
108
|
+
template <typename T>
|
|
109
|
+
struct packet_traits : default_packet_traits {
|
|
98
110
|
typedef T type;
|
|
99
111
|
typedef T half;
|
|
100
112
|
enum {
|
|
101
113
|
Vectorizable = 0,
|
|
102
114
|
size = 1,
|
|
103
115
|
AlignedOnScalar = 0,
|
|
104
|
-
HasHalfPacket = 0
|
|
105
116
|
};
|
|
106
117
|
enum {
|
|
107
|
-
HasAdd
|
|
108
|
-
HasSub
|
|
109
|
-
HasMul
|
|
118
|
+
HasAdd = 0,
|
|
119
|
+
HasSub = 0,
|
|
120
|
+
HasMul = 0,
|
|
110
121
|
HasNegate = 0,
|
|
111
|
-
HasAbs
|
|
112
|
-
HasAbs2
|
|
113
|
-
HasMin
|
|
114
|
-
HasMax
|
|
115
|
-
HasConj
|
|
122
|
+
HasAbs = 0,
|
|
123
|
+
HasAbs2 = 0,
|
|
124
|
+
HasMin = 0,
|
|
125
|
+
HasMax = 0,
|
|
126
|
+
HasConj = 0,
|
|
116
127
|
HasSetLinear = 0
|
|
117
128
|
};
|
|
118
129
|
};
|
|
119
130
|
|
|
120
|
-
template<typename T>
|
|
131
|
+
template <typename T>
|
|
132
|
+
struct packet_traits<const T> : packet_traits<T> {};
|
|
121
133
|
|
|
122
|
-
template <typename
|
|
134
|
+
template <typename T>
|
|
135
|
+
struct unpacket_traits {
|
|
136
|
+
typedef T type;
|
|
137
|
+
typedef T half;
|
|
138
|
+
typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
|
|
123
139
|
enum {
|
|
124
|
-
|
|
140
|
+
size = 1,
|
|
141
|
+
alignment = alignof(T),
|
|
142
|
+
vectorizable = false,
|
|
143
|
+
masked_load_available = false,
|
|
144
|
+
masked_store_available = false
|
|
145
|
+
};
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
template <typename T>
|
|
149
|
+
struct unpacket_traits<const T> : unpacket_traits<T> {};
|
|
150
|
+
|
|
151
|
+
/** \internal A convenience utility for determining if the type is a scalar.
|
|
152
|
+
* This is used to enable some generic packet implementations.
|
|
153
|
+
*/
|
|
154
|
+
template <typename Packet>
|
|
155
|
+
struct is_scalar {
|
|
156
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
157
|
+
enum { value = internal::is_same<Packet, Scalar>::value };
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
|
|
161
|
+
// 1) the packets are the same type, or
|
|
162
|
+
// 2) the packets differ only in sign.
|
|
163
|
+
// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
|
|
164
|
+
template <typename SrcPacket, typename TgtPacket,
|
|
165
|
+
bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
|
|
166
|
+
struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
|
|
167
|
+
template <>
|
|
168
|
+
struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
|
|
169
|
+
template <>
|
|
170
|
+
struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
|
|
171
|
+
template <>
|
|
172
|
+
struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
|
|
173
|
+
template <>
|
|
174
|
+
struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
|
|
175
|
+
|
|
176
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
177
|
+
struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
|
|
178
|
+
using SrcScalar = typename unpacket_traits<SrcPacket>::type;
|
|
179
|
+
static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
|
|
180
|
+
using TgtScalar = typename unpacket_traits<TgtPacket>::type;
|
|
181
|
+
static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
|
|
182
|
+
static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
|
|
186
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
187
|
+
struct is_degenerate {
|
|
188
|
+
static constexpr bool value =
|
|
189
|
+
is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
template <typename Packet>
|
|
193
|
+
struct is_half {
|
|
194
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
195
|
+
static constexpr int Size = unpacket_traits<Packet>::size;
|
|
196
|
+
using DefaultPacket = typename packet_traits<Scalar>::type;
|
|
197
|
+
static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
|
|
198
|
+
static constexpr bool value = Size != 1 && Size < DefaultSize;
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
template <typename Src, typename Tgt>
|
|
202
|
+
struct type_casting_traits {
|
|
203
|
+
enum {
|
|
204
|
+
VectorizedCast =
|
|
205
|
+
is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
|
|
125
206
|
SrcCoeffRatio = 1,
|
|
126
207
|
TgtCoeffRatio = 1
|
|
127
208
|
};
|
|
128
209
|
};
|
|
129
210
|
|
|
211
|
+
// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
|
|
212
|
+
template <typename Src, typename Tgt>
|
|
213
|
+
struct vectorized_type_casting_traits {
|
|
214
|
+
enum : int {
|
|
215
|
+
DefaultSrcPacketSize = packet_traits<Src>::size,
|
|
216
|
+
DefaultTgtPacketSize = packet_traits<Tgt>::size,
|
|
217
|
+
VectorizedCast = 1,
|
|
218
|
+
SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
|
|
219
|
+
TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
|
|
220
|
+
};
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
/** \internal Wrapper to ensure that multiple packet types can map to the same
|
|
224
|
+
same underlying vector type. */
|
|
225
|
+
template <typename T, int unique_id = 0>
|
|
226
|
+
struct eigen_packet_wrapper {
|
|
227
|
+
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
|
228
|
+
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
|
229
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
|
|
230
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
|
|
231
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
|
|
232
|
+
m_val = v;
|
|
233
|
+
return *this;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
T m_val;
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
|
|
240
|
+
struct preinterpret_generic;
|
|
241
|
+
|
|
242
|
+
template <typename Target, typename Packet>
|
|
243
|
+
struct preinterpret_generic<Target, Packet, false> {
|
|
244
|
+
// the packets are not the same, attempt scalar bit_cast
|
|
245
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
|
|
246
|
+
return numext::bit_cast<Target, Packet>(a);
|
|
247
|
+
}
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
template <typename Packet>
|
|
251
|
+
struct preinterpret_generic<Packet, Packet, true> {
|
|
252
|
+
// the packets are the same type: do nothing
|
|
253
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
template <typename ComplexPacket>
|
|
257
|
+
struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
|
|
258
|
+
using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
|
|
259
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
/** \internal \returns reinterpret_cast<Target>(a) */
|
|
263
|
+
template <typename Target, typename Packet>
|
|
264
|
+
EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
|
|
265
|
+
return preinterpret_generic<Target, Packet>::run(a);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
|
|
269
|
+
bool TgtIsHalf = is_half<TgtPacket>::value>
|
|
270
|
+
struct pcast_generic;
|
|
271
|
+
|
|
272
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
273
|
+
struct pcast_generic<SrcPacket, TgtPacket, false, false> {
|
|
274
|
+
// the packets are not degenerate: attempt scalar static_cast
|
|
275
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
|
|
276
|
+
return cast_impl<SrcPacket, TgtPacket>::run(a);
|
|
277
|
+
}
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
template <typename Packet>
|
|
281
|
+
struct pcast_generic<Packet, Packet, true, false> {
|
|
282
|
+
// the packets are the same: do nothing
|
|
283
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
|
|
287
|
+
struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
|
|
288
|
+
// the packets are degenerate: preinterpret is equivalent to pcast
|
|
289
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
|
|
290
|
+
};
|
|
130
291
|
|
|
131
292
|
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
|
132
293
|
template <typename SrcPacket, typename TgtPacket>
|
|
133
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
134
|
-
|
|
135
|
-
return static_cast<TgtPacket>(a);
|
|
294
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
|
|
295
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a);
|
|
136
296
|
}
|
|
137
297
|
template <typename SrcPacket, typename TgtPacket>
|
|
138
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
139
|
-
|
|
140
|
-
|
|
298
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
|
|
299
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
|
|
300
|
+
}
|
|
301
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
302
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
|
|
303
|
+
const SrcPacket& d) {
|
|
304
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
|
|
141
305
|
}
|
|
142
|
-
|
|
143
306
|
template <typename SrcPacket, typename TgtPacket>
|
|
144
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
145
|
-
|
|
146
|
-
|
|
307
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
|
|
308
|
+
const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
|
|
309
|
+
const SrcPacket& h) {
|
|
310
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
|
|
147
311
|
}
|
|
148
312
|
|
|
313
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
314
|
+
struct pcast_generic<SrcPacket, TgtPacket, false, true> {
|
|
315
|
+
// TgtPacket is a half packet of some other type
|
|
316
|
+
// perform cast and truncate result
|
|
317
|
+
using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
|
|
318
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
|
|
319
|
+
return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
|
|
320
|
+
}
|
|
321
|
+
};
|
|
322
|
+
|
|
149
323
|
/** \internal \returns a + b (coeff-wise) */
|
|
150
|
-
template<typename Packet>
|
|
151
|
-
padd(const Packet& a,
|
|
152
|
-
|
|
324
|
+
template <typename Packet>
|
|
325
|
+
EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
|
|
326
|
+
return a + b;
|
|
327
|
+
}
|
|
328
|
+
// Avoid compiler warning for boolean algebra.
|
|
329
|
+
template <>
|
|
330
|
+
EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
|
|
331
|
+
return a || b;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/** \internal \returns a packet version of \a *from, (un-aligned masked add)
|
|
335
|
+
* There is no generic implementation. We only have implementations for specialized
|
|
336
|
+
* cases. Generic case should not be called.
|
|
337
|
+
*/
|
|
338
|
+
template <typename Packet>
|
|
339
|
+
EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
|
|
340
|
+
const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
|
|
153
341
|
|
|
154
342
|
/** \internal \returns a - b (coeff-wise) */
|
|
155
|
-
template<typename Packet>
|
|
156
|
-
psub(const Packet& a,
|
|
157
|
-
|
|
343
|
+
template <typename Packet>
|
|
344
|
+
EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
|
|
345
|
+
return a - b;
|
|
346
|
+
}
|
|
158
347
|
|
|
159
348
|
/** \internal \returns -a (coeff-wise) */
|
|
160
|
-
template<typename Packet>
|
|
161
|
-
pnegate(const Packet& a) {
|
|
349
|
+
template <typename Packet>
|
|
350
|
+
EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
|
|
351
|
+
EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
|
|
352
|
+
NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
|
|
353
|
+
return numext::negate(a);
|
|
354
|
+
}
|
|
162
355
|
|
|
163
356
|
/** \internal \returns conj(a) (coeff-wise) */
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
357
|
+
template <typename Packet>
|
|
358
|
+
EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
|
|
359
|
+
return numext::conj(a);
|
|
360
|
+
}
|
|
167
361
|
|
|
168
362
|
/** \internal \returns a * b (coeff-wise) */
|
|
169
|
-
template<typename Packet>
|
|
170
|
-
pmul(const Packet& a,
|
|
171
|
-
|
|
363
|
+
template <typename Packet>
|
|
364
|
+
EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
|
|
365
|
+
return a * b;
|
|
366
|
+
}
|
|
367
|
+
// Avoid compiler warning for boolean algebra.
|
|
368
|
+
template <>
|
|
369
|
+
EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
|
|
370
|
+
return a && b;
|
|
371
|
+
}
|
|
172
372
|
|
|
173
373
|
/** \internal \returns a / b (coeff-wise) */
|
|
174
|
-
template<typename Packet>
|
|
175
|
-
pdiv(const Packet& a,
|
|
176
|
-
|
|
374
|
+
template <typename Packet>
|
|
375
|
+
EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
|
|
376
|
+
return a / b;
|
|
377
|
+
}
|
|
378
|
+
// Avoid compiler warning for boolean algebra.
|
|
379
|
+
template <>
|
|
380
|
+
EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
|
|
381
|
+
return a && b;
|
|
382
|
+
}
|
|
177
383
|
|
|
178
|
-
|
|
179
|
-
template<typename Packet
|
|
180
|
-
|
|
181
|
-
|
|
384
|
+
// In the generic packet case, memset to all one bits.
|
|
385
|
+
template <typename Packet, typename EnableIf = void>
|
|
386
|
+
struct ptrue_impl {
|
|
387
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
|
388
|
+
Packet b;
|
|
389
|
+
memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
|
|
390
|
+
return b;
|
|
391
|
+
}
|
|
392
|
+
};
|
|
182
393
|
|
|
183
|
-
|
|
184
|
-
template<typename
|
|
185
|
-
|
|
186
|
-
|
|
394
|
+
// Use a value of one for scalars.
|
|
395
|
+
template <typename Scalar>
|
|
396
|
+
struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
|
|
397
|
+
static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
|
|
398
|
+
};
|
|
187
399
|
|
|
188
|
-
|
|
189
|
-
template
|
|
190
|
-
|
|
400
|
+
// For booleans, we can only directly set a valid `bool` value to avoid UB.
|
|
401
|
+
template <>
|
|
402
|
+
struct ptrue_impl<bool, void> {
|
|
403
|
+
static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
|
|
404
|
+
};
|
|
191
405
|
|
|
192
|
-
/** \internal \returns
|
|
193
|
-
template<typename Packet>
|
|
194
|
-
|
|
406
|
+
/** \internal \returns one bits. */
|
|
407
|
+
template <typename Packet>
|
|
408
|
+
EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
|
|
409
|
+
return ptrue_impl<Packet>::run(a);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// In the general packet case, memset to zero.
|
|
413
|
+
template <typename Packet, typename EnableIf = void>
|
|
414
|
+
struct pzero_impl {
|
|
415
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
|
416
|
+
Packet b;
|
|
417
|
+
memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
|
|
418
|
+
return b;
|
|
419
|
+
}
|
|
420
|
+
};
|
|
421
|
+
|
|
422
|
+
// For scalars, explicitly set to Scalar(0), since the underlying representation
|
|
423
|
+
// for zero may not consist of all-zero bits.
|
|
424
|
+
template <typename T>
|
|
425
|
+
struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
|
|
426
|
+
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
/** \internal \returns packet of zeros */
|
|
430
|
+
template <typename Packet>
|
|
431
|
+
EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
|
|
432
|
+
return pzero_impl<Packet>::run(a);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
template <typename T>
|
|
436
|
+
struct bit_and {
|
|
437
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
template <typename T>
|
|
441
|
+
struct bit_or {
|
|
442
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
template <typename T>
|
|
446
|
+
struct bit_xor {
|
|
447
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
template <typename T>
|
|
451
|
+
struct bit_not {
|
|
452
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
template <>
|
|
456
|
+
struct bit_and<bool> {
|
|
457
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
template <>
|
|
461
|
+
struct bit_or<bool> {
|
|
462
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
template <>
|
|
466
|
+
struct bit_xor<bool> {
|
|
467
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
|
|
468
|
+
};
|
|
469
|
+
|
|
470
|
+
template <>
|
|
471
|
+
struct bit_not<bool> {
|
|
472
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
// Use operators &, |, ^, ~.
|
|
476
|
+
template <typename T>
|
|
477
|
+
struct operator_bitwise_helper {
|
|
478
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
|
|
479
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
|
|
480
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
|
|
481
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
|
|
482
|
+
};
|
|
483
|
+
|
|
484
|
+
// Apply binary operations byte-by-byte
|
|
485
|
+
template <typename T>
|
|
486
|
+
struct bytewise_bitwise_helper {
|
|
487
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
|
|
488
|
+
return binary(a, b, bit_and<unsigned char>());
|
|
489
|
+
}
|
|
490
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
|
|
491
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
|
|
492
|
+
return binary(a, b, bit_xor<unsigned char>());
|
|
493
|
+
}
|
|
494
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
|
|
495
|
+
|
|
496
|
+
private:
|
|
497
|
+
template <typename Op>
|
|
498
|
+
EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
|
|
499
|
+
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
500
|
+
T c;
|
|
501
|
+
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
|
502
|
+
for (size_t i = 0; i < sizeof(T); ++i) {
|
|
503
|
+
*c_ptr++ = op(*a_ptr++);
|
|
504
|
+
}
|
|
505
|
+
return c;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
template <typename Op>
|
|
509
|
+
EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
|
|
510
|
+
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
511
|
+
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
|
|
512
|
+
T c;
|
|
513
|
+
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
|
514
|
+
for (size_t i = 0; i < sizeof(T); ++i) {
|
|
515
|
+
*c_ptr++ = op(*a_ptr++, *b_ptr++);
|
|
516
|
+
}
|
|
517
|
+
return c;
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
// In the general case, use byte-by-byte manipulation.
|
|
522
|
+
template <typename T, typename EnableIf = void>
|
|
523
|
+
struct bitwise_helper : public bytewise_bitwise_helper<T> {};
|
|
524
|
+
|
|
525
|
+
// For integers or non-trivial scalars, use binary operators.
|
|
526
|
+
template <typename T>
|
|
527
|
+
struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
|
|
528
|
+
(NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
|
|
529
|
+
: public operator_bitwise_helper<T> {};
|
|
195
530
|
|
|
196
531
|
/** \internal \returns the bitwise and of \a a and \a b */
|
|
197
|
-
template<typename Packet>
|
|
198
|
-
pand(const Packet& a, const Packet& b) {
|
|
532
|
+
template <typename Packet>
|
|
533
|
+
EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
|
|
534
|
+
return bitwise_helper<Packet>::bitwise_and(a, b);
|
|
535
|
+
}
|
|
199
536
|
|
|
200
537
|
/** \internal \returns the bitwise or of \a a and \a b */
|
|
201
|
-
template<typename Packet>
|
|
202
|
-
por(const Packet& a, const Packet& b) {
|
|
538
|
+
template <typename Packet>
|
|
539
|
+
EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
|
|
540
|
+
return bitwise_helper<Packet>::bitwise_or(a, b);
|
|
541
|
+
}
|
|
203
542
|
|
|
204
543
|
/** \internal \returns the bitwise xor of \a a and \a b */
|
|
205
|
-
template<typename Packet>
|
|
206
|
-
pxor(const Packet& a, const Packet& b) {
|
|
544
|
+
template <typename Packet>
|
|
545
|
+
EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
|
|
546
|
+
return bitwise_helper<Packet>::bitwise_xor(a, b);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/** \internal \returns the bitwise not of \a a */
|
|
550
|
+
template <typename Packet>
|
|
551
|
+
EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
|
|
552
|
+
return bitwise_helper<Packet>::bitwise_not(a);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/** \internal \returns the bitwise and of \a a and not \a b */
|
|
556
|
+
template <typename Packet>
|
|
557
|
+
EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
|
|
558
|
+
return pand(a, pnot(b));
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/** \internal \returns a < b as a bit mask */
|
|
562
|
+
template <typename Packet>
|
|
563
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
|
|
564
|
+
return a < b ? ptrue(a) : pzero(a);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/** \internal \returns a == b as a bit mask */
|
|
568
|
+
template <typename Packet>
|
|
569
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
|
|
570
|
+
return a == b ? ptrue(a) : pzero(a);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/** \internal \returns a <= b as a bit mask */
|
|
574
|
+
template <typename Packet>
|
|
575
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
|
|
576
|
+
return por(pcmp_eq(a, b), pcmp_lt(a, b));
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
|
|
580
|
+
template <typename Packet>
|
|
581
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
|
|
582
|
+
return a >= b ? pzero(a) : ptrue(a);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// In the general case, use bitwise select.
|
|
586
|
+
template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
|
|
587
|
+
struct pselect_impl {
|
|
588
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
|
589
|
+
return por(pand(a, mask), pandnot(b, mask));
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
// For scalars, use ternary select.
|
|
594
|
+
template <typename Packet>
|
|
595
|
+
struct pselect_impl<Packet, true> {
|
|
596
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
|
597
|
+
return numext::select(mask, a, b);
|
|
598
|
+
}
|
|
599
|
+
};
|
|
600
|
+
|
|
601
|
+
/** \internal \returns \a or \b for each field in packet according to \mask */
|
|
602
|
+
template <typename Packet>
|
|
603
|
+
EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
|
604
|
+
return pselect_impl<Packet>::run(mask, a, b);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
template <>
|
|
608
|
+
EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
|
|
609
|
+
return cond ? a : b;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/** \internal \returns the min or of \a a and \a b (coeff-wise)
|
|
613
|
+
If either \a a or \a b are NaN, the result is implementation defined. */
|
|
614
|
+
template <int NaNPropagation, bool IsInteger>
|
|
615
|
+
struct pminmax_impl {
|
|
616
|
+
template <typename Packet, typename Op>
|
|
617
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
618
|
+
return op(a, b);
|
|
619
|
+
}
|
|
620
|
+
};
|
|
207
621
|
|
|
208
|
-
/** \internal \returns the
|
|
209
|
-
|
|
210
|
-
|
|
622
|
+
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
|
623
|
+
If either \a a or \a b are NaN, NaN is returned. */
|
|
624
|
+
template <>
|
|
625
|
+
struct pminmax_impl<PropagateNaN, false> {
|
|
626
|
+
template <typename Packet, typename Op>
|
|
627
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
628
|
+
Packet not_nan_mask_a = pcmp_eq(a, a);
|
|
629
|
+
Packet not_nan_mask_b = pcmp_eq(b, b);
|
|
630
|
+
return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
|
|
631
|
+
}
|
|
632
|
+
};
|
|
633
|
+
|
|
634
|
+
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
|
635
|
+
If both \a a and \a b are NaN, NaN is returned.
|
|
636
|
+
Equivalent to std::fmin(a, b). */
|
|
637
|
+
template <>
|
|
638
|
+
struct pminmax_impl<PropagateNumbers, false> {
|
|
639
|
+
template <typename Packet, typename Op>
|
|
640
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
641
|
+
Packet not_nan_mask_a = pcmp_eq(a, a);
|
|
642
|
+
Packet not_nan_mask_b = pcmp_eq(b, b);
|
|
643
|
+
return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
|
|
644
|
+
}
|
|
645
|
+
};
|
|
646
|
+
|
|
647
|
+
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
|
|
648
|
+
|
|
649
|
+
/** \internal \returns the min of \a a and \a b (coeff-wise).
|
|
650
|
+
If \a a or \b b is NaN, the return value is implementation defined. */
|
|
651
|
+
template <typename Packet>
|
|
652
|
+
EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
|
653
|
+
return numext::mini(a, b);
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/** \internal \returns the min of \a a and \a b (coeff-wise).
|
|
657
|
+
NaNPropagation determines the NaN propagation semantics. */
|
|
658
|
+
template <int NaNPropagation, typename Packet>
|
|
659
|
+
EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
|
660
|
+
constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
|
|
661
|
+
return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
/** \internal \returns the max of \a a and \a b (coeff-wise)
|
|
665
|
+
If \a a or \b b is NaN, the return value is implementation defined. */
|
|
666
|
+
template <typename Packet>
|
|
667
|
+
EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
|
668
|
+
return numext::maxi(a, b);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
/** \internal \returns the max of \a a and \a b (coeff-wise).
|
|
672
|
+
NaNPropagation determines the NaN propagation semantics. */
|
|
673
|
+
template <int NaNPropagation, typename Packet>
|
|
674
|
+
EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
|
675
|
+
constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
|
|
676
|
+
return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
/** \internal \returns the absolute value of \a a */
|
|
680
|
+
template <typename Packet>
|
|
681
|
+
EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
|
|
682
|
+
return numext::abs(a);
|
|
683
|
+
}
|
|
684
|
+
template <>
|
|
685
|
+
EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
|
|
686
|
+
return a;
|
|
687
|
+
}
|
|
688
|
+
template <>
|
|
689
|
+
EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
|
|
690
|
+
return a;
|
|
691
|
+
}
|
|
692
|
+
template <>
|
|
693
|
+
EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
|
|
694
|
+
return a;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/** \internal \returns the addsub value of \a a,b */
|
|
698
|
+
template <typename Packet>
|
|
699
|
+
EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
|
|
700
|
+
return pselect(peven_mask(a), padd(a, b), psub(a, b));
|
|
701
|
+
}
|
|
211
702
|
|
|
212
|
-
/** \internal \returns
|
|
213
|
-
template<typename Packet>
|
|
214
|
-
|
|
703
|
+
/** \internal \returns the phase angle of \a a */
|
|
704
|
+
template <typename Packet>
|
|
705
|
+
EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
|
|
706
|
+
using numext::arg;
|
|
707
|
+
return arg(a);
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
/** \internal \returns \a a arithmetically shifted by N bits to the right */
|
|
711
|
+
template <int N, typename T>
|
|
712
|
+
EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
|
|
713
|
+
return numext::arithmetic_shift_right(a, N);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/** \internal \returns \a a logically shifted by N bits to the right */
|
|
717
|
+
template <int N, typename T>
|
|
718
|
+
EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
|
|
719
|
+
return numext::logical_shift_right(a, N);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
/** \internal \returns \a a shifted by N bits to the left */
|
|
723
|
+
template <int N, typename T>
|
|
724
|
+
EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
|
|
725
|
+
return numext::logical_shift_left(a, N);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
/** \internal \returns the significant and exponent of the underlying floating point numbers
|
|
729
|
+
* See https://en.cppreference.com/w/cpp/numeric/math/frexp
|
|
730
|
+
*/
|
|
731
|
+
template <typename Packet>
|
|
732
|
+
EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
|
|
733
|
+
int exp;
|
|
734
|
+
EIGEN_USING_STD(frexp);
|
|
735
|
+
Packet result = static_cast<Packet>(frexp(a, &exp));
|
|
736
|
+
exponent = static_cast<Packet>(exp);
|
|
737
|
+
return result;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/** \internal \returns a * 2^((int)exponent)
|
|
741
|
+
* See https://en.cppreference.com/w/cpp/numeric/math/ldexp
|
|
742
|
+
*/
|
|
743
|
+
template <typename Packet>
|
|
744
|
+
EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
|
|
745
|
+
EIGEN_USING_STD(ldexp)
|
|
746
|
+
return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/** \internal \returns the min of \a a and \a b (coeff-wise) */
|
|
750
|
+
template <typename Packet>
|
|
751
|
+
EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
|
|
752
|
+
return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
/** \internal \returns a packet version of \a *from, from must be properly aligned */
|
|
756
|
+
template <typename Packet>
|
|
757
|
+
EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
|
|
758
|
+
return *from;
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
/** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
|
|
762
|
+
* offset indicates the starting element in which to load and
|
|
763
|
+
* offset + n <= unpacket_traits::size
|
|
764
|
+
* All elements before offset and after the last element loaded will initialized with zero */
|
|
765
|
+
template <typename Packet>
|
|
766
|
+
EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
|
|
767
|
+
const Index offset = 0) {
|
|
768
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
769
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
|
770
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
771
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
|
|
772
|
+
for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
|
|
773
|
+
elements[i] = from[i - offset];
|
|
774
|
+
}
|
|
775
|
+
return pload<Packet>(elements);
|
|
776
|
+
}
|
|
215
777
|
|
|
216
778
|
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
|
217
|
-
template<typename Packet>
|
|
218
|
-
ploadu(const typename unpacket_traits<Packet>::type* from) {
|
|
779
|
+
template <typename Packet>
|
|
780
|
+
EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
|
|
781
|
+
return *from;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
|
|
785
|
+
* All elements after the last element loaded will initialized with zero */
|
|
786
|
+
template <typename Packet>
|
|
787
|
+
EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
|
|
788
|
+
const Index offset = 0) {
|
|
789
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
790
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
|
791
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
792
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
|
|
793
|
+
for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
|
|
794
|
+
elements[i] = from[i - offset];
|
|
795
|
+
}
|
|
796
|
+
return pload<Packet>(elements);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
/** \internal \returns a packet version of \a *from, (un-aligned masked load)
|
|
800
|
+
* There is no generic implementation. We only have implementations for specialized
|
|
801
|
+
* cases. Generic case should not be called.
|
|
802
|
+
*/
|
|
803
|
+
template <typename Packet>
|
|
804
|
+
EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
|
|
805
|
+
const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
|
|
219
806
|
|
|
220
807
|
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
|
221
|
-
template<typename Packet>
|
|
222
|
-
pset1(const typename unpacket_traits<Packet>::type& a) {
|
|
808
|
+
template <typename Packet>
|
|
809
|
+
EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
|
|
810
|
+
return a;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
/** \internal \returns a packet with constant coefficients set from bits */
|
|
814
|
+
template <typename Packet, typename BitsType>
|
|
815
|
+
EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
|
|
223
816
|
|
|
224
817
|
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
|
|
225
|
-
template<typename Packet>
|
|
226
|
-
pload1(const typename unpacket_traits<Packet>::type
|
|
818
|
+
template <typename Packet>
|
|
819
|
+
EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
|
|
820
|
+
return pset1<Packet>(*a);
|
|
821
|
+
}
|
|
227
822
|
|
|
228
823
|
/** \internal \returns a packet with elements of \a *from duplicated.
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
template<typename Packet>
|
|
234
|
-
ploaddup(const typename unpacket_traits<Packet>::type* from) {
|
|
824
|
+
* For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
|
|
825
|
+
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
|
826
|
+
* Currently, this function is only used for scalar * complex products.
|
|
827
|
+
*/
|
|
828
|
+
template <typename Packet>
|
|
829
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
|
|
830
|
+
return *from;
|
|
831
|
+
}
|
|
235
832
|
|
|
236
833
|
/** \internal \returns a packet with elements of \a *from quadrupled.
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
template<typename Packet>
|
|
243
|
-
ploadquad(const typename unpacket_traits<Packet>::type* from)
|
|
244
|
-
|
|
834
|
+
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
|
|
835
|
+
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
|
|
836
|
+
* Currently, this function is only used in matrix products.
|
|
837
|
+
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
|
838
|
+
*/
|
|
839
|
+
template <typename Packet>
|
|
840
|
+
EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
|
|
841
|
+
return pload1<Packet>(from);
|
|
842
|
+
}
|
|
245
843
|
|
|
246
844
|
/** \internal equivalent to
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
template<typename Packet>
|
|
256
|
-
inline void pbroadcast4(const typename unpacket_traits<Packet>::type
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
a3 = pload1<Packet>(a+3);
|
|
845
|
+
* \code
|
|
846
|
+
* a0 = pload1(a+0);
|
|
847
|
+
* a1 = pload1(a+1);
|
|
848
|
+
* a2 = pload1(a+2);
|
|
849
|
+
* a3 = pload1(a+3);
|
|
850
|
+
* \endcode
|
|
851
|
+
* \sa pset1, pload1, ploaddup, pbroadcast2
|
|
852
|
+
*/
|
|
853
|
+
template <typename Packet>
|
|
854
|
+
EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
|
|
855
|
+
Packet& a2, Packet& a3) {
|
|
856
|
+
a0 = pload1<Packet>(a + 0);
|
|
857
|
+
a1 = pload1<Packet>(a + 1);
|
|
858
|
+
a2 = pload1<Packet>(a + 2);
|
|
859
|
+
a3 = pload1<Packet>(a + 3);
|
|
263
860
|
}
|
|
264
861
|
|
|
265
862
|
/** \internal equivalent to
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
template<typename Packet>
|
|
273
|
-
inline void pbroadcast2(const typename unpacket_traits<Packet>::type
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
a0 = pload1<Packet>(a+0);
|
|
277
|
-
a1 = pload1<Packet>(a+1);
|
|
863
|
+
* \code
|
|
864
|
+
* a0 = pload1(a+0);
|
|
865
|
+
* a1 = pload1(a+1);
|
|
866
|
+
* \endcode
|
|
867
|
+
* \sa pset1, pload1, ploaddup, pbroadcast4
|
|
868
|
+
*/
|
|
869
|
+
template <typename Packet>
|
|
870
|
+
EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
|
|
871
|
+
a0 = pload1<Packet>(a + 0);
|
|
872
|
+
a1 = pload1<Packet>(a + 1);
|
|
278
873
|
}
|
|
279
874
|
|
|
280
875
|
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
|
281
|
-
template<typename Packet>
|
|
282
|
-
plset(const typename unpacket_traits<Packet>::type& a) {
|
|
876
|
+
template <typename Packet>
|
|
877
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
|
|
878
|
+
return a;
|
|
879
|
+
}
|
|
283
880
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
881
|
+
template <typename Packet, typename EnableIf = void>
|
|
882
|
+
struct peven_mask_impl {
|
|
883
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
|
|
884
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
885
|
+
const size_t n = unpacket_traits<Packet>::size;
|
|
886
|
+
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
887
|
+
for (size_t i = 0; i < n; ++i) {
|
|
888
|
+
memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
|
889
|
+
}
|
|
890
|
+
return ploadu<Packet>(elements);
|
|
891
|
+
}
|
|
892
|
+
};
|
|
893
|
+
|
|
894
|
+
template <typename Scalar>
|
|
895
|
+
struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
|
|
896
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
|
|
897
|
+
};
|
|
898
|
+
|
|
899
|
+
/** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
|
|
900
|
+
where x is the value of all 1-bits. */
|
|
901
|
+
template <typename Packet>
|
|
902
|
+
EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
|
|
903
|
+
return peven_mask_impl<Packet>::run(a);
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
|
|
907
|
+
template <typename Scalar, typename Packet>
|
|
908
|
+
EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
|
|
909
|
+
(*to) = from;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
/** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
|
|
913
|
+
* offset indicates the starting element in which to store and
|
|
914
|
+
* offset + n <= unpacket_traits::size */
|
|
915
|
+
template <typename Scalar, typename Packet>
|
|
916
|
+
EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
|
|
917
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
918
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
|
919
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
|
920
|
+
pstore<Scalar>(elements, from);
|
|
921
|
+
for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
|
|
922
|
+
to[i] = elements[i + offset];
|
|
923
|
+
}
|
|
924
|
+
}
|
|
287
925
|
|
|
288
926
|
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
|
289
|
-
template<typename Scalar, typename Packet>
|
|
290
|
-
|
|
927
|
+
template <typename Scalar, typename Packet>
|
|
928
|
+
EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
|
|
929
|
+
(*to) = from;
|
|
930
|
+
}
|
|
291
931
|
|
|
292
|
-
|
|
293
|
-
|
|
932
|
+
/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
|
|
933
|
+
template <typename Scalar, typename Packet>
|
|
934
|
+
EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
|
|
935
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
936
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
|
937
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
|
938
|
+
pstore<Scalar>(elements, from);
|
|
939
|
+
for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
|
|
940
|
+
to[i] = elements[i + offset];
|
|
941
|
+
}
|
|
942
|
+
}
|
|
294
943
|
|
|
295
|
-
|
|
296
|
-
|
|
944
|
+
/** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
|
|
945
|
+
* There is no generic implementation. We only have implementations for specialized
|
|
946
|
+
* cases. Generic case should not be called.
|
|
947
|
+
*/
|
|
948
|
+
template <typename Scalar, typename Packet>
|
|
949
|
+
EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
|
|
950
|
+
Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
|
|
951
|
+
|
|
952
|
+
template <typename Scalar, typename Packet>
|
|
953
|
+
EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
|
|
954
|
+
return ploadu<Packet>(from);
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
template <typename Scalar, typename Packet>
|
|
958
|
+
EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
|
|
959
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
960
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
|
|
961
|
+
for (Index i = 0; i < numext::mini(n, packet_size); i++) {
|
|
962
|
+
elements[i] = from[i * stride];
|
|
963
|
+
}
|
|
964
|
+
return pload<Packet>(elements);
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
template <typename Scalar, typename Packet>
|
|
968
|
+
EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
|
|
969
|
+
pstore(to, from);
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
template <typename Scalar, typename Packet>
|
|
973
|
+
EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
|
|
974
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
975
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
|
976
|
+
pstore<Scalar>(elements, from);
|
|
977
|
+
for (Index i = 0; i < numext::mini(n, packet_size); i++) {
|
|
978
|
+
to[i * stride] = elements[i];
|
|
979
|
+
}
|
|
980
|
+
}
|
|
297
981
|
|
|
298
982
|
/** \internal tries to do cache prefetching of \a addr */
|
|
299
|
-
template<typename Scalar>
|
|
300
|
-
{
|
|
301
|
-
#
|
|
302
|
-
|
|
983
|
+
template <typename Scalar>
|
|
984
|
+
EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
|
|
985
|
+
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
986
|
+
// do nothing
|
|
987
|
+
#elif defined(EIGEN_CUDA_ARCH)
|
|
988
|
+
#if defined(__LP64__) || EIGEN_OS_WIN64
|
|
303
989
|
// 64-bit pointer operand constraint for inlined asm
|
|
304
990
|
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
|
|
305
991
|
#else
|
|
@@ -311,280 +997,706 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
|
|
|
311
997
|
#endif
|
|
312
998
|
}
|
|
313
999
|
|
|
314
|
-
/** \internal \returns the first element of a packet */
|
|
315
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
|
316
|
-
{ return a; }
|
|
317
|
-
|
|
318
|
-
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
|
|
319
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
320
|
-
preduxp(const Packet* vecs) { return vecs[0]; }
|
|
321
|
-
|
|
322
|
-
/** \internal \returns the sum of the elements of \a a*/
|
|
323
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
|
324
|
-
{ return a; }
|
|
325
|
-
|
|
326
|
-
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
|
|
327
|
-
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
|
328
|
-
* For packet-size smaller or equal to 4, this boils down to a noop.
|
|
329
|
-
*/
|
|
330
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
|
331
|
-
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
|
332
|
-
predux_downto4(const Packet& a)
|
|
333
|
-
{ return a; }
|
|
334
|
-
|
|
335
|
-
/** \internal \returns the product of the elements of \a a*/
|
|
336
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
|
337
|
-
{ return a; }
|
|
338
|
-
|
|
339
|
-
/** \internal \returns the min of the elements of \a a*/
|
|
340
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
|
341
|
-
{ return a; }
|
|
342
|
-
|
|
343
|
-
/** \internal \returns the max of the elements of \a a*/
|
|
344
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
|
345
|
-
{ return a; }
|
|
346
|
-
|
|
347
1000
|
/** \internal \returns the reversed elements of \a a*/
|
|
348
|
-
template<typename Packet>
|
|
349
|
-
|
|
1001
|
+
template <typename Packet>
|
|
1002
|
+
EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
|
|
1003
|
+
return a;
|
|
1004
|
+
}
|
|
350
1005
|
|
|
351
1006
|
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
|
352
|
-
template<typename Packet>
|
|
353
|
-
{
|
|
354
|
-
return Packet(
|
|
1007
|
+
template <typename Packet>
|
|
1008
|
+
EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
|
|
1009
|
+
return Packet(numext::imag(a), numext::real(a));
|
|
355
1010
|
}
|
|
356
1011
|
|
|
357
1012
|
/**************************
|
|
358
|
-
* Special math functions
|
|
359
|
-
***************************/
|
|
1013
|
+
* Special math functions
|
|
1014
|
+
***************************/
|
|
1015
|
+
|
|
1016
|
+
/** \internal \returns isnan(a) */
|
|
1017
|
+
template <typename Packet>
|
|
1018
|
+
EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
|
|
1019
|
+
return pandnot(ptrue(a), pcmp_eq(a, a));
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
/** \internal \returns isinf(a) */
|
|
1023
|
+
template <typename Packet>
|
|
1024
|
+
EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
|
|
1025
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1026
|
+
constexpr Scalar inf = NumTraits<Scalar>::infinity();
|
|
1027
|
+
return pcmp_eq(pabs(a), pset1<Packet>(inf));
|
|
1028
|
+
}
|
|
360
1029
|
|
|
361
1030
|
/** \internal \returns the sine of \a a (coeff-wise) */
|
|
362
|
-
template<typename Packet>
|
|
363
|
-
Packet psin(const Packet& a) {
|
|
1031
|
+
template <typename Packet>
|
|
1032
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
|
|
1033
|
+
EIGEN_USING_STD(sin);
|
|
1034
|
+
return sin(a);
|
|
1035
|
+
}
|
|
364
1036
|
|
|
365
1037
|
/** \internal \returns the cosine of \a a (coeff-wise) */
|
|
366
|
-
template<typename Packet>
|
|
367
|
-
Packet pcos(const Packet& a) {
|
|
1038
|
+
template <typename Packet>
|
|
1039
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
|
|
1040
|
+
EIGEN_USING_STD(cos);
|
|
1041
|
+
return cos(a);
|
|
1042
|
+
}
|
|
368
1043
|
|
|
369
1044
|
/** \internal \returns the tan of \a a (coeff-wise) */
|
|
370
|
-
template<typename Packet>
|
|
371
|
-
Packet ptan(const Packet& a) {
|
|
1045
|
+
template <typename Packet>
|
|
1046
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
|
|
1047
|
+
EIGEN_USING_STD(tan);
|
|
1048
|
+
return tan(a);
|
|
1049
|
+
}
|
|
372
1050
|
|
|
373
1051
|
/** \internal \returns the arc sine of \a a (coeff-wise) */
|
|
374
|
-
template<typename Packet>
|
|
375
|
-
Packet pasin(const Packet& a) {
|
|
1052
|
+
template <typename Packet>
|
|
1053
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
|
|
1054
|
+
EIGEN_USING_STD(asin);
|
|
1055
|
+
return asin(a);
|
|
1056
|
+
}
|
|
376
1057
|
|
|
377
1058
|
/** \internal \returns the arc cosine of \a a (coeff-wise) */
|
|
378
|
-
template<typename Packet>
|
|
379
|
-
Packet pacos(const Packet& a) {
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
Packet patan(const Packet& a) { using std::atan; return atan(a); }
|
|
1059
|
+
template <typename Packet>
|
|
1060
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
|
|
1061
|
+
EIGEN_USING_STD(acos);
|
|
1062
|
+
return acos(a);
|
|
1063
|
+
}
|
|
384
1064
|
|
|
385
1065
|
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
|
386
|
-
template<typename Packet>
|
|
387
|
-
Packet psinh(const Packet& a) {
|
|
1066
|
+
template <typename Packet>
|
|
1067
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
|
|
1068
|
+
EIGEN_USING_STD(sinh);
|
|
1069
|
+
return sinh(a);
|
|
1070
|
+
}
|
|
388
1071
|
|
|
389
1072
|
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
|
390
|
-
template<typename Packet>
|
|
391
|
-
Packet pcosh(const Packet& a) {
|
|
1073
|
+
template <typename Packet>
|
|
1074
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
|
|
1075
|
+
EIGEN_USING_STD(cosh);
|
|
1076
|
+
return cosh(a);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
|
1080
|
+
template <typename Packet>
|
|
1081
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
|
|
1082
|
+
EIGEN_USING_STD(atan);
|
|
1083
|
+
return atan(a);
|
|
1084
|
+
}
|
|
392
1085
|
|
|
393
1086
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
|
394
|
-
template<typename Packet>
|
|
395
|
-
Packet ptanh(const Packet& a) {
|
|
1087
|
+
template <typename Packet>
|
|
1088
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
|
|
1089
|
+
EIGEN_USING_STD(tanh);
|
|
1090
|
+
return tanh(a);
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
|
1094
|
+
template <typename Packet>
|
|
1095
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
|
|
1096
|
+
EIGEN_USING_STD(atanh);
|
|
1097
|
+
return atanh(a);
|
|
1098
|
+
}
|
|
396
1099
|
|
|
397
1100
|
/** \internal \returns the exp of \a a (coeff-wise) */
|
|
398
|
-
template<typename Packet>
|
|
399
|
-
Packet pexp(const Packet& a) {
|
|
1101
|
+
template <typename Packet>
|
|
1102
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
|
|
1103
|
+
return numext::exp(a);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
/** \internal \returns the exp2 of \a a (coeff-wise) */
|
|
1107
|
+
template <typename Packet>
|
|
1108
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
|
|
1109
|
+
return numext::exp2(a);
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
/** \internal \returns the expm1 of \a a (coeff-wise) */
|
|
1113
|
+
template <typename Packet>
|
|
1114
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
|
|
1115
|
+
return numext::expm1(a);
|
|
1116
|
+
}
|
|
400
1117
|
|
|
401
1118
|
/** \internal \returns the log of \a a (coeff-wise) */
|
|
402
|
-
template<typename Packet>
|
|
403
|
-
Packet plog(const Packet& a) {
|
|
1119
|
+
template <typename Packet>
|
|
1120
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
|
|
1121
|
+
EIGEN_USING_STD(log);
|
|
1122
|
+
return log(a);
|
|
1123
|
+
}
|
|
404
1124
|
|
|
405
1125
|
/** \internal \returns the log1p of \a a (coeff-wise) */
|
|
406
|
-
template<typename Packet>
|
|
407
|
-
Packet plog1p(const Packet& a) {
|
|
1126
|
+
template <typename Packet>
|
|
1127
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
|
|
1128
|
+
return numext::log1p(a);
|
|
1129
|
+
}
|
|
408
1130
|
|
|
409
1131
|
/** \internal \returns the log10 of \a a (coeff-wise) */
|
|
410
|
-
template<typename Packet>
|
|
411
|
-
Packet plog10(const Packet& a) {
|
|
1132
|
+
template <typename Packet>
|
|
1133
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
|
|
1134
|
+
EIGEN_USING_STD(log10);
|
|
1135
|
+
return log10(a);
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
/** \internal \returns the log2 of \a a (coeff-wise) */
|
|
1139
|
+
template <typename Packet>
|
|
1140
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
|
|
1141
|
+
using Scalar = typename internal::unpacket_traits<Packet>::type;
|
|
1142
|
+
using RealScalar = typename NumTraits<Scalar>::Real;
|
|
1143
|
+
return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
|
|
1144
|
+
}
|
|
412
1145
|
|
|
413
1146
|
/** \internal \returns the square-root of \a a (coeff-wise) */
|
|
414
|
-
template<typename Packet>
|
|
415
|
-
Packet psqrt(const Packet& a) {
|
|
1147
|
+
template <typename Packet>
|
|
1148
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
|
|
1149
|
+
return numext::sqrt(a);
|
|
1150
|
+
}
|
|
416
1151
|
|
|
417
|
-
/** \internal \returns the
|
|
418
|
-
template<typename Packet>
|
|
419
|
-
Packet
|
|
420
|
-
return
|
|
1152
|
+
/** \internal \returns the cube-root of \a a (coeff-wise) */
|
|
1153
|
+
template <typename Packet>
|
|
1154
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
|
|
1155
|
+
return numext::cbrt(a);
|
|
421
1156
|
}
|
|
422
1157
|
|
|
1158
|
+
template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
|
|
1159
|
+
bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
|
|
1160
|
+
struct nearest_integer_packetop_impl {
|
|
1161
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
|
|
1162
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
|
|
1163
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
|
|
1164
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
|
|
1165
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
|
|
1166
|
+
};
|
|
1167
|
+
|
|
423
1168
|
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
|
424
|
-
template<typename Packet>
|
|
425
|
-
Packet pround(const Packet& a) {
|
|
1169
|
+
template <typename Packet>
|
|
1170
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
|
|
1171
|
+
return nearest_integer_packetop_impl<Packet>::run_round(a);
|
|
1172
|
+
}
|
|
426
1173
|
|
|
427
1174
|
/** \internal \returns the floor of \a a (coeff-wise) */
|
|
428
|
-
template<typename Packet>
|
|
429
|
-
Packet pfloor(const Packet& a) {
|
|
1175
|
+
template <typename Packet>
|
|
1176
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
|
|
1177
|
+
return nearest_integer_packetop_impl<Packet>::run_floor(a);
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
/** \internal \returns the rounded value of \a a (coeff-wise) with current
|
|
1181
|
+
* rounding mode */
|
|
1182
|
+
template <typename Packet>
|
|
1183
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
|
|
1184
|
+
return nearest_integer_packetop_impl<Packet>::run_rint(a);
|
|
1185
|
+
}
|
|
430
1186
|
|
|
431
1187
|
/** \internal \returns the ceil of \a a (coeff-wise) */
|
|
432
|
-
template<typename Packet>
|
|
433
|
-
Packet pceil(const Packet& a) {
|
|
1188
|
+
template <typename Packet>
|
|
1189
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
|
|
1190
|
+
return nearest_integer_packetop_impl<Packet>::run_ceil(a);
|
|
1191
|
+
}
|
|
434
1192
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
template<typename Packet>
|
|
442
|
-
|
|
443
|
-
{
|
|
444
|
-
|
|
1193
|
+
/** \internal \returns the truncation of \a a (coeff-wise) */
|
|
1194
|
+
template <typename Packet>
|
|
1195
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
|
|
1196
|
+
return nearest_integer_packetop_impl<Packet>::run_trunc(a);
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
template <typename Packet, typename EnableIf = void>
|
|
1200
|
+
struct psign_impl {
|
|
1201
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
|
|
1202
|
+
};
|
|
1203
|
+
|
|
1204
|
+
/** \internal \returns the sign of \a a (coeff-wise) */
|
|
1205
|
+
template <typename Packet>
|
|
1206
|
+
EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
|
|
1207
|
+
return psign_impl<Packet>::run(a);
|
|
445
1208
|
}
|
|
446
1209
|
|
|
1210
|
+
template <>
|
|
1211
|
+
EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
|
|
1212
|
+
return a;
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
/** \internal \returns the first element of a packet */
|
|
1216
|
+
template <typename Packet>
|
|
1217
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
|
|
1218
|
+
return a;
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
|
|
1222
|
+
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
|
1223
|
+
* For packet-size smaller or equal to 4, this boils down to a noop.
|
|
1224
|
+
*/
|
|
1225
|
+
template <typename Packet>
|
|
1226
|
+
EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
|
|
1227
|
+
typename unpacket_traits<Packet>::half, Packet>
|
|
1228
|
+
predux_half_dowto4(const Packet& a) {
|
|
1229
|
+
return a;
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
// Slow generic implementation of Packet reduction.
|
|
1233
|
+
template <typename Packet, typename Op>
|
|
1234
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
|
|
1235
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1236
|
+
const size_t n = unpacket_traits<Packet>::size;
|
|
1237
|
+
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
1238
|
+
pstoreu<Scalar>(elements, a);
|
|
1239
|
+
for (size_t k = n / 2; k > 0; k /= 2) {
|
|
1240
|
+
for (size_t i = 0; i < k; ++i) {
|
|
1241
|
+
elements[i] = op(elements[i], elements[i + k]);
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
return elements[0];
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
/** \internal \returns the sum of the elements of \a a*/
|
|
1248
|
+
template <typename Packet>
|
|
1249
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
|
|
1250
|
+
return a;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
/** \internal \returns the product of the elements of \a a */
|
|
1254
|
+
template <typename Packet>
|
|
1255
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
|
|
1256
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1257
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
/** \internal \returns the min of the elements of \a a */
|
|
1261
|
+
template <typename Packet>
|
|
1262
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
|
1263
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1264
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
/** \internal \returns the max of the elements of \a a */
|
|
1268
|
+
template <typename Packet>
|
|
1269
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
|
|
1270
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1271
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
template <int NaNPropagation, typename Packet>
|
|
1275
|
+
struct predux_min_max_helper_impl {
|
|
1276
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1277
|
+
static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
|
|
1278
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
|
|
1279
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
|
|
1280
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
|
|
1281
|
+
}
|
|
1282
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
|
|
1283
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
|
|
1284
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
|
1285
|
+
}
|
|
1286
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
|
|
1287
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
|
|
1288
|
+
return predux_min(a);
|
|
1289
|
+
}
|
|
1290
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
|
|
1291
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
|
|
1292
|
+
return predux_max(a);
|
|
1293
|
+
}
|
|
1294
|
+
};
|
|
1295
|
+
|
|
1296
|
+
template <int NaNPropagation, typename Packet>
|
|
1297
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
|
1298
|
+
return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
template <int NaNPropagation, typename Packet>
|
|
1302
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
|
|
1303
|
+
return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1306
|
+
#undef EIGEN_BINARY_OP_NAN_PROPAGATION
|
|
1307
|
+
|
|
1308
|
+
/** \internal \returns true if all coeffs of \a a means "true"
|
|
1309
|
+
* It is supposed to be called on values returned by pcmp_*.
|
|
1310
|
+
*/
|
|
1311
|
+
// not needed yet
|
|
1312
|
+
// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
|
|
1313
|
+
// { return bool(a); }
|
|
1314
|
+
|
|
1315
|
+
/** \internal \returns true if any coeffs of \a a means "true"
|
|
1316
|
+
* It is supposed to be called on values returned by pcmp_*.
|
|
1317
|
+
*/
|
|
1318
|
+
template <typename Packet>
|
|
1319
|
+
EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
|
|
1320
|
+
// Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
|
|
1321
|
+
// It is expected that "true" is either:
|
|
1322
|
+
// - Scalar(1)
|
|
1323
|
+
// - bits full of ones (NaN for floats),
|
|
1324
|
+
// - or first bit equals to 1 (1 for ints, smallest denormal for floats).
|
|
1325
|
+
// For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
|
|
1326
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1327
|
+
return numext::not_equal_strict(predux(a), Scalar(0));
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
/***************************************************************************
|
|
1331
|
+
* The following functions might not have to be overwritten for vectorized types
|
|
1332
|
+
***************************************************************************/
|
|
1333
|
+
|
|
1334
|
+
template <typename Packet, typename EnableIf = void>
|
|
1335
|
+
struct pmadd_impl {
|
|
1336
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1337
|
+
return padd(pmul(a, b), c);
|
|
1338
|
+
}
|
|
1339
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1340
|
+
return psub(pmul(a, b), c);
|
|
1341
|
+
}
|
|
1342
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1343
|
+
return psub(c, pmul(a, b));
|
|
1344
|
+
}
|
|
1345
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1346
|
+
return pnegate(pmadd(a, b, c));
|
|
1347
|
+
}
|
|
1348
|
+
};
|
|
1349
|
+
|
|
1350
|
+
template <typename Scalar>
|
|
1351
|
+
struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
|
|
1352
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1353
|
+
return numext::madd<Scalar>(a, b, c);
|
|
1354
|
+
}
|
|
1355
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1356
|
+
return numext::madd<Scalar>(a, b, Scalar(-c));
|
|
1357
|
+
}
|
|
1358
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1359
|
+
return numext::madd<Scalar>(Scalar(-a), b, c);
|
|
1360
|
+
}
|
|
1361
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1362
|
+
return -Scalar(numext::madd<Scalar>(a, b, c));
|
|
1363
|
+
}
|
|
1364
|
+
};
|
|
1365
|
+
|
|
1366
|
+
// Multiply-add instructions.
|
|
447
1367
|
/** \internal \returns a * b + c (coeff-wise) */
|
|
448
|
-
template<typename Packet>
|
|
449
|
-
pmadd(const Packet&
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
1368
|
+
template <typename Packet>
|
|
1369
|
+
EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1370
|
+
return pmadd_impl<Packet>::pmadd(a, b, c);
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
/** \internal \returns a * b - c (coeff-wise) */
|
|
1374
|
+
template <typename Packet>
|
|
1375
|
+
EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1376
|
+
return pmadd_impl<Packet>::pmsub(a, b, c);
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
/** \internal \returns -(a * b) + c (coeff-wise) */
|
|
1380
|
+
template <typename Packet>
|
|
1381
|
+
EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1382
|
+
return pmadd_impl<Packet>::pnmadd(a, b, c);
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
/** \internal \returns -((a * b + c) (coeff-wise) */
|
|
1386
|
+
template <typename Packet>
|
|
1387
|
+
EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1388
|
+
return pmadd_impl<Packet>::pnmsub(a, b, c);
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
|
|
1392
|
+
*/
|
|
1393
|
+
// NOTE: this function must really be templated on the packet type (think about different packet types for the same
|
|
1394
|
+
// scalar type)
|
|
1395
|
+
template <typename Packet>
|
|
1396
|
+
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
|
|
1397
|
+
pstore(to, pset1<Packet>(a));
|
|
1398
|
+
}
|
|
453
1399
|
|
|
454
1400
|
/** \internal \returns a packet version of \a *from.
|
|
455
|
-
|
|
456
|
-
template<typename Packet, int Alignment>
|
|
457
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
|
458
|
-
|
|
459
|
-
if(Alignment >= unpacket_traits<Packet>::alignment)
|
|
1401
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1402
|
+
template <typename Packet, int Alignment>
|
|
1403
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
|
|
1404
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
460
1405
|
return pload<Packet>(from);
|
|
461
1406
|
else
|
|
462
1407
|
return ploadu<Packet>(from);
|
|
463
1408
|
}
|
|
464
1409
|
|
|
1410
|
+
/** \internal \returns n elements of a packet version of \a *from.
|
|
1411
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1412
|
+
template <typename Packet, int Alignment>
|
|
1413
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
|
|
1414
|
+
const Index n, const Index offset = 0) {
|
|
1415
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
1416
|
+
return pload_partial<Packet>(from, n, offset);
|
|
1417
|
+
else
|
|
1418
|
+
return ploadu_partial<Packet>(from, n, offset);
|
|
1419
|
+
}
|
|
1420
|
+
|
|
465
1421
|
/** \internal copy the packet \a from to \a *to.
|
|
466
|
-
|
|
467
|
-
template<typename Scalar, typename Packet, int Alignment>
|
|
468
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
|
|
469
|
-
|
|
470
|
-
if(Alignment >= unpacket_traits<Packet>::alignment)
|
|
1422
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1423
|
+
template <typename Scalar, typename Packet, int Alignment>
|
|
1424
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
|
|
1425
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
471
1426
|
pstore(to, from);
|
|
472
1427
|
else
|
|
473
1428
|
pstoreu(to, from);
|
|
474
1429
|
}
|
|
475
1430
|
|
|
476
|
-
/** \internal
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
1431
|
+
/** \internal copy n elements of the packet \a from to \a *to.
|
|
1432
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1433
|
+
template <typename Scalar, typename Packet, int Alignment>
|
|
1434
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
|
|
1435
|
+
const Index offset = 0) {
|
|
1436
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
1437
|
+
pstore_partial(to, from, n, offset);
|
|
1438
|
+
else
|
|
1439
|
+
pstoreu_partial(to, from, n, offset);
|
|
485
1440
|
}
|
|
486
1441
|
|
|
487
|
-
/** \internal
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
|
|
496
|
-
* of \a first and \a Offset first elements of \a second.
|
|
497
|
-
*
|
|
498
|
-
* This function is currently only used to optimize matrix-vector products on unligned matrices.
|
|
499
|
-
* It takes 2 packets that represent a contiguous memory array, and returns a packet starting
|
|
500
|
-
* at the position \a Offset. For instance, for packets of 4 elements, we have:
|
|
501
|
-
* Input:
|
|
502
|
-
* - first = {f0,f1,f2,f3}
|
|
503
|
-
* - second = {s0,s1,s2,s3}
|
|
504
|
-
* Output:
|
|
505
|
-
* - if Offset==0 then {f0,f1,f2,f3}
|
|
506
|
-
* - if Offset==1 then {f1,f2,f3,s0}
|
|
507
|
-
* - if Offset==2 then {f2,f3,s0,s1}
|
|
508
|
-
* - if Offset==3 then {f3,s0,s1,s3}
|
|
509
|
-
*/
|
|
510
|
-
template<int Offset,typename PacketType>
|
|
511
|
-
inline void palign(PacketType& first, const PacketType& second)
|
|
512
|
-
{
|
|
513
|
-
palign_impl<Offset,PacketType>::run(first,second);
|
|
1442
|
+
/** \internal \returns a packet version of \a *from.
|
|
1443
|
+
* Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
|
|
1444
|
+
* hardware if available to speedup the loading of data that won't be modified
|
|
1445
|
+
* by the current computation.
|
|
1446
|
+
*/
|
|
1447
|
+
template <typename Packet, int LoadMode>
|
|
1448
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
|
|
1449
|
+
return ploadt<Packet, LoadMode>(from);
|
|
514
1450
|
}
|
|
515
1451
|
|
|
516
1452
|
/***************************************************************************
|
|
517
|
-
* Fast complex products (GCC generates a function call which is very slow)
|
|
518
|
-
***************************************************************************/
|
|
1453
|
+
* Fast complex products (GCC generates a function call which is very slow)
|
|
1454
|
+
***************************************************************************/
|
|
519
1455
|
|
|
520
1456
|
// Eigen+CUDA does not support complexes.
|
|
521
|
-
#
|
|
1457
|
+
#if !defined(EIGEN_GPUCC)
|
|
522
1458
|
|
|
523
|
-
template<>
|
|
524
|
-
|
|
1459
|
+
template <>
|
|
1460
|
+
inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
|
|
1461
|
+
return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
|
|
1462
|
+
}
|
|
525
1463
|
|
|
526
|
-
template<>
|
|
527
|
-
|
|
1464
|
+
template <>
|
|
1465
|
+
inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
|
|
1466
|
+
return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
|
|
1467
|
+
}
|
|
528
1468
|
|
|
529
1469
|
#endif
|
|
530
1470
|
|
|
531
|
-
|
|
532
1471
|
/***************************************************************************
|
|
533
1472
|
* PacketBlock, that is a collection of N packets where the number of words
|
|
534
1473
|
* in the packet is a multiple of N.
|
|
535
|
-
***************************************************************************/
|
|
536
|
-
template <typename Packet,int N=unpacket_traits<Packet>::size>
|
|
1474
|
+
***************************************************************************/
|
|
1475
|
+
template <typename Packet, int N = unpacket_traits<Packet>::size>
|
|
1476
|
+
struct PacketBlock {
|
|
537
1477
|
Packet packet[N];
|
|
538
1478
|
};
|
|
539
1479
|
|
|
540
|
-
template<typename Packet>
|
|
541
|
-
ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
|
|
1480
|
+
template <typename Packet>
|
|
1481
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
|
|
542
1482
|
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
|
|
543
1483
|
}
|
|
544
1484
|
|
|
545
1485
|
/***************************************************************************
|
|
546
1486
|
* Selector, i.e. vector of N boolean values used to select (i.e. blend)
|
|
547
1487
|
* words from 2 packets.
|
|
548
|
-
***************************************************************************/
|
|
549
|
-
template <size_t N>
|
|
1488
|
+
***************************************************************************/
|
|
1489
|
+
template <size_t N>
|
|
1490
|
+
struct Selector {
|
|
550
1491
|
bool select[N];
|
|
551
1492
|
};
|
|
552
1493
|
|
|
553
|
-
template<typename Packet>
|
|
554
|
-
pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
|
|
1494
|
+
template <typename Packet>
|
|
1495
|
+
EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
|
|
1496
|
+
const Packet& thenPacket, const Packet& elsePacket) {
|
|
555
1497
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
|
556
1498
|
}
|
|
557
1499
|
|
|
558
|
-
/** \internal \returns
|
|
559
|
-
template<typename Packet>
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
1500
|
+
/** \internal \returns 1 / a (coeff-wise) */
|
|
1501
|
+
template <typename Packet>
|
|
1502
|
+
EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
|
|
1503
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1504
|
+
return pdiv(pset1<Packet>(Scalar(1)), a);
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
|
1508
|
+
template <typename Packet>
|
|
1509
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
|
|
1510
|
+
return preciprocal<Packet>(psqrt(a));
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
|
|
1514
|
+
bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
|
|
1515
|
+
struct psignbit_impl;
|
|
1516
|
+
template <typename Packet, bool IsInteger>
|
|
1517
|
+
struct psignbit_impl<Packet, true, IsInteger> {
|
|
1518
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
|
|
1519
|
+
};
|
|
1520
|
+
template <typename Packet>
|
|
1521
|
+
struct psignbit_impl<Packet, false, false> {
|
|
1522
|
+
// generic implementation if not specialized in PacketMath.h
|
|
1523
|
+
// slower than arithmetic shift
|
|
1524
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1525
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
|
|
1526
|
+
const Packet cst_pos_one = pset1<Packet>(Scalar(1));
|
|
1527
|
+
const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
|
|
1528
|
+
return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
|
|
1529
|
+
}
|
|
1530
|
+
};
|
|
1531
|
+
template <typename Packet>
|
|
1532
|
+
struct psignbit_impl<Packet, false, true> {
|
|
1533
|
+
// generic implementation for integer packets
|
|
1534
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
|
|
1535
|
+
};
|
|
1536
|
+
/** \internal \returns the sign bit of \a a as a bitmask*/
|
|
1537
|
+
template <typename Packet>
|
|
1538
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
|
|
1539
|
+
return psignbit_impl<Packet>::run(a);
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
|
|
1543
|
+
template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
|
|
1544
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
|
|
1545
|
+
return numext::atan2(y, x);
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
|
|
1549
|
+
template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
|
|
1550
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
|
|
1551
|
+
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
|
1552
|
+
|
|
1553
|
+
// See https://en.cppreference.com/w/cpp/numeric/math/atan2
|
|
1554
|
+
// for how corner cases are supposed to be handled according to the
|
|
1555
|
+
// IEEE floating-point standard (IEC 60559).
|
|
1556
|
+
const Packet kSignMask = pset1<Packet>(-Scalar(0));
|
|
1557
|
+
const Packet kZero = pzero(x);
|
|
1558
|
+
const Packet kOne = pset1<Packet>(Scalar(1));
|
|
1559
|
+
const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
|
|
1560
|
+
|
|
1561
|
+
const Packet x_has_signbit = psignbit(x);
|
|
1562
|
+
const Packet y_signmask = pand(y, kSignMask);
|
|
1563
|
+
const Packet x_signmask = pand(x, kSignMask);
|
|
1564
|
+
const Packet result_signmask = pxor(y_signmask, x_signmask);
|
|
1565
|
+
const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
|
|
1566
|
+
|
|
1567
|
+
const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
|
|
1568
|
+
const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
|
|
1569
|
+
|
|
1570
|
+
Packet arg = pdiv(y, x);
|
|
1571
|
+
arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
|
|
1572
|
+
arg = pselect(x_and_y_are_zero, result_signmask, arg);
|
|
1573
|
+
|
|
1574
|
+
Packet result = patan(arg);
|
|
1575
|
+
result = padd(result, shift);
|
|
1576
|
+
return result;
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
/** \internal \returns the argument of \a a as a complex number */
|
|
1580
|
+
template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
|
|
1581
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
|
|
1582
|
+
return Packet(numext::arg(a));
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
/** \internal \returns the argument of \a a as a complex number */
|
|
1586
|
+
template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
|
|
1587
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
|
|
1588
|
+
EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
|
|
1589
|
+
THIS METHOD IS FOR COMPLEX TYPES ONLY)
|
|
1590
|
+
using RealPacket = typename unpacket_traits<Packet>::as_real;
|
|
1591
|
+
// a // r i r i ...
|
|
1592
|
+
RealPacket aflip = pcplxflip(a).v; // i r i r ...
|
|
1593
|
+
RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
|
|
1594
|
+
return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
|
1598
|
+
* outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/
|
|
1599
|
+
template <typename Packet>
|
|
1600
|
+
EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
|
1601
|
+
Index count) {
|
|
1602
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1603
|
+
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
|
1604
|
+
eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
|
|
1605
|
+
Scalar aux[PacketSize] = {};
|
|
1606
|
+
for (Index k = begin; k < begin + count; k++) {
|
|
1607
|
+
aux[k] = from[k];
|
|
1608
|
+
}
|
|
1609
|
+
return ploadu<Packet>(aux);
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
|
1613
|
+
* outside this range are not defined. \a *from must be aligned, and cannot be null.*/
|
|
1614
|
+
template <typename Packet>
|
|
1615
|
+
EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
|
1616
|
+
Index count) {
|
|
1617
|
+
return ploaduSegment<Packet>(from, begin, count);
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
|
1621
|
+
Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be
|
|
1622
|
+
null if \a count is zero.*/
|
|
1623
|
+
template <typename Scalar, typename Packet>
|
|
1624
|
+
EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
|
1625
|
+
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
|
1626
|
+
eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
|
|
1627
|
+
Scalar aux[PacketSize];
|
|
1628
|
+
pstoreu<Scalar, Packet>(aux, from);
|
|
1629
|
+
for (Index k = begin; k < begin + count; k++) {
|
|
1630
|
+
to[k] = aux[k];
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1634
|
+
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
|
1635
|
+
Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be
|
|
1636
|
+
null.*/
|
|
1637
|
+
template <typename Scalar, typename Packet>
|
|
1638
|
+
EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
|
1639
|
+
return pstoreuSegment(to, from, begin, count);
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
|
1643
|
+
* outside this range are not defined.*/
|
|
1644
|
+
template <typename Packet, int Alignment>
|
|
1645
|
+
EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
|
1646
|
+
Index count) {
|
|
1647
|
+
constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
|
|
1648
|
+
if (Alignment >= RequiredAlignment) {
|
|
1649
|
+
return ploadSegment<Packet>(from, begin, count);
|
|
1650
|
+
} else {
|
|
1651
|
+
return ploaduSegment<Packet>(from, begin, count);
|
|
1652
|
+
}
|
|
570
1653
|
}
|
|
571
1654
|
|
|
572
|
-
/** \internal
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
{
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
mask.select[unpacket_traits<Packet>::size-1] = true;
|
|
583
|
-
return pblend(mask, pset1<Packet>(b), a);
|
|
1655
|
+
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
|
1656
|
+
Elements outside of the range [begin, begin + count) are not defined.*/
|
|
1657
|
+
template <typename Scalar, typename Packet, int Alignment>
|
|
1658
|
+
EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
|
1659
|
+
constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
|
|
1660
|
+
if (Alignment >= RequiredAlignment) {
|
|
1661
|
+
pstoreSegment<Scalar, Packet>(to, from, begin, count);
|
|
1662
|
+
} else {
|
|
1663
|
+
pstoreuSegment<Scalar, Packet>(to, from, begin, count);
|
|
1664
|
+
}
|
|
584
1665
|
}
|
|
585
1666
|
|
|
586
|
-
|
|
1667
|
+
#ifndef EIGEN_NO_IO
|
|
1668
|
+
|
|
1669
|
+
template <typename Packet>
|
|
1670
|
+
class StreamablePacket {
|
|
1671
|
+
public:
|
|
1672
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1673
|
+
StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
|
|
1674
|
+
|
|
1675
|
+
friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
|
|
1676
|
+
os << "{" << packet.v_[0];
|
|
1677
|
+
for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
|
|
1678
|
+
os << "," << packet.v_[i];
|
|
1679
|
+
}
|
|
1680
|
+
os << "}";
|
|
1681
|
+
return os;
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
private:
|
|
1685
|
+
Scalar v_[unpacket_traits<Packet>::size];
|
|
1686
|
+
};
|
|
1687
|
+
|
|
1688
|
+
/**
|
|
1689
|
+
* \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging.
|
|
1690
|
+
*/
|
|
1691
|
+
template <typename Packet>
|
|
1692
|
+
StreamablePacket<Packet> postream(const Packet& packet) {
|
|
1693
|
+
return StreamablePacket<Packet>(packet);
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
#endif // EIGEN_NO_IO
|
|
1697
|
+
|
|
1698
|
+
} // end namespace internal
|
|
587
1699
|
|
|
588
|
-
}
|
|
1700
|
+
} // end namespace Eigen
|
|
589
1701
|
|
|
590
|
-
#endif
|
|
1702
|
+
#endif // EIGEN_GENERIC_PACKET_MATH_H
|