@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -11,17 +11,20 @@
|
|
|
11
11
|
#ifndef EIGEN_GENERIC_PACKET_MATH_H
|
|
12
12
|
#define EIGEN_GENERIC_PACKET_MATH_H
|
|
13
13
|
|
|
14
|
+
// IWYU pragma: private
|
|
15
|
+
#include "./InternalHeaderCheck.h"
|
|
16
|
+
|
|
14
17
|
namespace Eigen {
|
|
15
18
|
|
|
16
19
|
namespace internal {
|
|
17
20
|
|
|
18
21
|
/** \internal
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
* \file GenericPacketMath.h
|
|
23
|
+
*
|
|
24
|
+
* Default implementation for types not supported by the vectorization.
|
|
25
|
+
* In practice these functions are provided to make easier the writing
|
|
26
|
+
* of generic vectorized code.
|
|
27
|
+
*/
|
|
25
28
|
|
|
26
29
|
#ifndef EIGEN_DEBUG_ALIGNED_LOAD
|
|
27
30
|
#define EIGEN_DEBUG_ALIGNED_LOAD
|
|
@@ -39,48 +42,53 @@ namespace internal {
|
|
|
39
42
|
#define EIGEN_DEBUG_UNALIGNED_STORE
|
|
40
43
|
#endif
|
|
41
44
|
|
|
42
|
-
struct default_packet_traits
|
|
43
|
-
{
|
|
45
|
+
struct default_packet_traits {
|
|
44
46
|
enum {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
HasMin = 1,
|
|
57
|
-
HasMax = 1,
|
|
58
|
-
HasConj = 1,
|
|
47
|
+
// Ops that are implemented for most types.
|
|
48
|
+
HasAdd = 1,
|
|
49
|
+
HasSub = 1,
|
|
50
|
+
HasShift = 1,
|
|
51
|
+
HasMul = 1,
|
|
52
|
+
HasNegate = 1,
|
|
53
|
+
HasAbs = 1,
|
|
54
|
+
HasAbs2 = 1,
|
|
55
|
+
HasMin = 1,
|
|
56
|
+
HasMax = 1,
|
|
57
|
+
HasConj = 1,
|
|
59
58
|
HasSetLinear = 1,
|
|
60
|
-
|
|
59
|
+
HasSign = 1,
|
|
60
|
+
// By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
|
|
61
|
+
// types
|
|
62
|
+
HasRound = 1,
|
|
63
|
+
|
|
64
|
+
HasArg = 0,
|
|
65
|
+
HasAbsDiff = 0,
|
|
66
|
+
HasBlend = 0,
|
|
61
67
|
// This flag is used to indicate whether packet comparison is supported.
|
|
62
|
-
// pcmp_eq
|
|
63
|
-
HasCmp
|
|
64
|
-
|
|
65
|
-
HasDiv
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
68
|
+
// pcmp_eq and pcmp_lt should be defined for it to be true.
|
|
69
|
+
HasCmp = 0,
|
|
70
|
+
|
|
71
|
+
HasDiv = 0,
|
|
72
|
+
HasReciprocal = 0,
|
|
73
|
+
HasSqrt = 0,
|
|
74
|
+
HasRsqrt = 0,
|
|
75
|
+
HasCbrt = 0,
|
|
76
|
+
HasExp = 0,
|
|
77
|
+
HasExpm1 = 0,
|
|
78
|
+
HasLog = 0,
|
|
79
|
+
HasLog1p = 0,
|
|
80
|
+
HasLog10 = 0,
|
|
81
|
+
HasPow = 0,
|
|
82
|
+
HasSin = 0,
|
|
83
|
+
HasCos = 0,
|
|
84
|
+
HasTan = 0,
|
|
85
|
+
HasASin = 0,
|
|
86
|
+
HasACos = 0,
|
|
87
|
+
HasATan = 0,
|
|
88
|
+
HasATanh = 0,
|
|
89
|
+
HasSinh = 0,
|
|
90
|
+
HasCosh = 0,
|
|
91
|
+
HasTanh = 0,
|
|
84
92
|
HasLGamma = 0,
|
|
85
93
|
HasDiGamma = 0,
|
|
86
94
|
HasZeta = 0,
|
|
@@ -93,76 +101,134 @@ struct default_packet_traits
|
|
|
93
101
|
HasIGammaDerA = 0,
|
|
94
102
|
HasGammaSampleDerAlpha = 0,
|
|
95
103
|
HasIGammac = 0,
|
|
96
|
-
HasBetaInc = 0
|
|
97
|
-
|
|
98
|
-
HasRound = 0,
|
|
99
|
-
HasRint = 0,
|
|
100
|
-
HasFloor = 0,
|
|
101
|
-
HasCeil = 0,
|
|
102
|
-
HasSign = 0
|
|
104
|
+
HasBetaInc = 0
|
|
103
105
|
};
|
|
104
106
|
};
|
|
105
107
|
|
|
106
|
-
template<typename T>
|
|
107
|
-
{
|
|
108
|
+
template <typename T>
|
|
109
|
+
struct packet_traits : default_packet_traits {
|
|
108
110
|
typedef T type;
|
|
109
111
|
typedef T half;
|
|
110
112
|
enum {
|
|
111
113
|
Vectorizable = 0,
|
|
112
114
|
size = 1,
|
|
113
115
|
AlignedOnScalar = 0,
|
|
114
|
-
HasHalfPacket = 0
|
|
115
116
|
};
|
|
116
117
|
enum {
|
|
117
|
-
HasAdd
|
|
118
|
-
HasSub
|
|
119
|
-
HasMul
|
|
118
|
+
HasAdd = 0,
|
|
119
|
+
HasSub = 0,
|
|
120
|
+
HasMul = 0,
|
|
120
121
|
HasNegate = 0,
|
|
121
|
-
HasAbs
|
|
122
|
-
HasAbs2
|
|
123
|
-
HasMin
|
|
124
|
-
HasMax
|
|
125
|
-
HasConj
|
|
122
|
+
HasAbs = 0,
|
|
123
|
+
HasAbs2 = 0,
|
|
124
|
+
HasMin = 0,
|
|
125
|
+
HasMax = 0,
|
|
126
|
+
HasConj = 0,
|
|
126
127
|
HasSetLinear = 0
|
|
127
128
|
};
|
|
128
129
|
};
|
|
129
130
|
|
|
130
|
-
template<typename T>
|
|
131
|
+
template <typename T>
|
|
132
|
+
struct packet_traits<const T> : packet_traits<T> {};
|
|
131
133
|
|
|
132
|
-
template<typename T>
|
|
133
|
-
{
|
|
134
|
+
template <typename T>
|
|
135
|
+
struct unpacket_traits {
|
|
134
136
|
typedef T type;
|
|
135
137
|
typedef T half;
|
|
136
|
-
|
|
137
|
-
{
|
|
138
|
+
typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
|
|
139
|
+
enum {
|
|
138
140
|
size = 1,
|
|
139
|
-
alignment =
|
|
141
|
+
alignment = alignof(T),
|
|
140
142
|
vectorizable = false,
|
|
141
|
-
masked_load_available=false,
|
|
142
|
-
masked_store_available=false
|
|
143
|
+
masked_load_available = false,
|
|
144
|
+
masked_store_available = false
|
|
143
145
|
};
|
|
144
146
|
};
|
|
145
147
|
|
|
146
|
-
template<typename T>
|
|
148
|
+
template <typename T>
|
|
149
|
+
struct unpacket_traits<const T> : unpacket_traits<T> {};
|
|
150
|
+
|
|
151
|
+
/** \internal A convenience utility for determining if the type is a scalar.
|
|
152
|
+
* This is used to enable some generic packet implementations.
|
|
153
|
+
*/
|
|
154
|
+
template <typename Packet>
|
|
155
|
+
struct is_scalar {
|
|
156
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
157
|
+
enum { value = internal::is_same<Packet, Scalar>::value };
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
|
|
161
|
+
// 1) the packets are the same type, or
|
|
162
|
+
// 2) the packets differ only in sign.
|
|
163
|
+
// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
|
|
164
|
+
template <typename SrcPacket, typename TgtPacket,
|
|
165
|
+
bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
|
|
166
|
+
struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
|
|
167
|
+
template <>
|
|
168
|
+
struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
|
|
169
|
+
template <>
|
|
170
|
+
struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
|
|
171
|
+
template <>
|
|
172
|
+
struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
|
|
173
|
+
template <>
|
|
174
|
+
struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
|
|
147
175
|
|
|
148
|
-
template <typename
|
|
176
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
177
|
+
struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
|
|
178
|
+
using SrcScalar = typename unpacket_traits<SrcPacket>::type;
|
|
179
|
+
static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
|
|
180
|
+
using TgtScalar = typename unpacket_traits<TgtPacket>::type;
|
|
181
|
+
static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
|
|
182
|
+
static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
|
|
186
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
187
|
+
struct is_degenerate {
|
|
188
|
+
static constexpr bool value =
|
|
189
|
+
is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
template <typename Packet>
|
|
193
|
+
struct is_half {
|
|
194
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
195
|
+
static constexpr int Size = unpacket_traits<Packet>::size;
|
|
196
|
+
using DefaultPacket = typename packet_traits<Scalar>::type;
|
|
197
|
+
static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
|
|
198
|
+
static constexpr bool value = Size != 1 && Size < DefaultSize;
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
template <typename Src, typename Tgt>
|
|
202
|
+
struct type_casting_traits {
|
|
149
203
|
enum {
|
|
150
|
-
VectorizedCast =
|
|
204
|
+
VectorizedCast =
|
|
205
|
+
is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
|
|
151
206
|
SrcCoeffRatio = 1,
|
|
152
207
|
TgtCoeffRatio = 1
|
|
153
208
|
};
|
|
154
209
|
};
|
|
155
210
|
|
|
211
|
+
// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
|
|
212
|
+
template <typename Src, typename Tgt>
|
|
213
|
+
struct vectorized_type_casting_traits {
|
|
214
|
+
enum : int {
|
|
215
|
+
DefaultSrcPacketSize = packet_traits<Src>::size,
|
|
216
|
+
DefaultTgtPacketSize = packet_traits<Tgt>::size,
|
|
217
|
+
VectorizedCast = 1,
|
|
218
|
+
SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
|
|
219
|
+
TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
|
|
220
|
+
};
|
|
221
|
+
};
|
|
222
|
+
|
|
156
223
|
/** \internal Wrapper to ensure that multiple packet types can map to the same
|
|
157
224
|
same underlying vector type. */
|
|
158
|
-
template<typename T, int unique_id = 0>
|
|
159
|
-
struct eigen_packet_wrapper
|
|
160
|
-
{
|
|
225
|
+
template <typename T, int unique_id = 0>
|
|
226
|
+
struct eigen_packet_wrapper {
|
|
161
227
|
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
|
162
228
|
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
|
163
|
-
EIGEN_ALWAYS_INLINE eigen_packet_wrapper()
|
|
164
|
-
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T
|
|
165
|
-
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T
|
|
229
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
|
|
230
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
|
|
231
|
+
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
|
|
166
232
|
m_val = v;
|
|
167
233
|
return *this;
|
|
168
234
|
}
|
|
@@ -170,109 +236,181 @@ struct eigen_packet_wrapper
|
|
|
170
236
|
T m_val;
|
|
171
237
|
};
|
|
172
238
|
|
|
239
|
+
template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
|
|
240
|
+
struct preinterpret_generic;
|
|
173
241
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
242
|
+
template <typename Target, typename Packet>
|
|
243
|
+
struct preinterpret_generic<Target, Packet, false> {
|
|
244
|
+
// the packets are not the same, attempt scalar bit_cast
|
|
245
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
|
|
246
|
+
return numext::bit_cast<Target, Packet>(a);
|
|
247
|
+
}
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
template <typename Packet>
|
|
251
|
+
struct preinterpret_generic<Packet, Packet, true> {
|
|
252
|
+
// the packets are the same type: do nothing
|
|
253
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
template <typename ComplexPacket>
|
|
257
|
+
struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
|
|
258
|
+
using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
|
|
259
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
/** \internal \returns reinterpret_cast<Target>(a) */
|
|
263
|
+
template <typename Target, typename Packet>
|
|
264
|
+
EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
|
|
265
|
+
return preinterpret_generic<Target, Packet>::run(a);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
|
|
269
|
+
bool TgtIsHalf = is_half<TgtPacket>::value>
|
|
270
|
+
struct pcast_generic;
|
|
271
|
+
|
|
272
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
273
|
+
struct pcast_generic<SrcPacket, TgtPacket, false, false> {
|
|
274
|
+
// the packets are not degenerate: attempt scalar static_cast
|
|
275
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
|
|
276
|
+
return cast_impl<SrcPacket, TgtPacket>::run(a);
|
|
277
|
+
}
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
template <typename Packet>
|
|
281
|
+
struct pcast_generic<Packet, Packet, true, false> {
|
|
282
|
+
// the packets are the same: do nothing
|
|
283
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
|
|
287
|
+
struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
|
|
288
|
+
// the packets are degenerate: preinterpret is equivalent to pcast
|
|
289
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
|
|
183
290
|
};
|
|
184
291
|
|
|
185
292
|
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
|
186
293
|
template <typename SrcPacket, typename TgtPacket>
|
|
187
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
188
|
-
|
|
189
|
-
return static_cast<TgtPacket>(a);
|
|
294
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
|
|
295
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a);
|
|
190
296
|
}
|
|
191
297
|
template <typename SrcPacket, typename TgtPacket>
|
|
192
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
193
|
-
|
|
194
|
-
return static_cast<TgtPacket>(a);
|
|
298
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
|
|
299
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
|
|
195
300
|
}
|
|
196
301
|
template <typename SrcPacket, typename TgtPacket>
|
|
197
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
198
|
-
|
|
199
|
-
return
|
|
302
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
|
|
303
|
+
const SrcPacket& d) {
|
|
304
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
|
|
200
305
|
}
|
|
201
306
|
template <typename SrcPacket, typename TgtPacket>
|
|
202
|
-
EIGEN_DEVICE_FUNC inline TgtPacket
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
return
|
|
307
|
+
EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
|
|
308
|
+
const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
|
|
309
|
+
const SrcPacket& h) {
|
|
310
|
+
return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
|
|
206
311
|
}
|
|
207
312
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
313
|
+
template <typename SrcPacket, typename TgtPacket>
|
|
314
|
+
struct pcast_generic<SrcPacket, TgtPacket, false, true> {
|
|
315
|
+
// TgtPacket is a half packet of some other type
|
|
316
|
+
// perform cast and truncate result
|
|
317
|
+
using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
|
|
318
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
|
|
319
|
+
return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
|
|
320
|
+
}
|
|
321
|
+
};
|
|
212
322
|
|
|
213
323
|
/** \internal \returns a + b (coeff-wise) */
|
|
214
|
-
template<typename Packet>
|
|
215
|
-
padd(const Packet& a, const Packet& b) {
|
|
324
|
+
template <typename Packet>
|
|
325
|
+
EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
|
|
326
|
+
return a + b;
|
|
327
|
+
}
|
|
216
328
|
// Avoid compiler warning for boolean algebra.
|
|
217
|
-
template<>
|
|
218
|
-
padd(const bool& a, const bool& b) {
|
|
329
|
+
template <>
|
|
330
|
+
EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
|
|
331
|
+
return a || b;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/** \internal \returns a packet version of \a *from, (un-aligned masked add)
|
|
335
|
+
* There is no generic implementation. We only have implementations for specialized
|
|
336
|
+
* cases. Generic case should not be called.
|
|
337
|
+
*/
|
|
338
|
+
template <typename Packet>
|
|
339
|
+
EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
|
|
340
|
+
const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
|
|
219
341
|
|
|
220
342
|
/** \internal \returns a - b (coeff-wise) */
|
|
221
|
-
template<typename Packet>
|
|
222
|
-
psub(const Packet& a, const Packet& b) {
|
|
343
|
+
template <typename Packet>
|
|
344
|
+
EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
|
|
345
|
+
return a - b;
|
|
346
|
+
}
|
|
223
347
|
|
|
224
348
|
/** \internal \returns -a (coeff-wise) */
|
|
225
|
-
template<typename Packet>
|
|
226
|
-
pnegate(const Packet& a) {
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
349
|
+
template <typename Packet>
|
|
350
|
+
EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
|
|
351
|
+
EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
|
|
352
|
+
NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
|
|
353
|
+
return numext::negate(a);
|
|
354
|
+
}
|
|
230
355
|
|
|
231
356
|
/** \internal \returns conj(a) (coeff-wise) */
|
|
232
|
-
template<typename Packet>
|
|
233
|
-
pconj(const Packet& a) {
|
|
357
|
+
template <typename Packet>
|
|
358
|
+
EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
|
|
359
|
+
return numext::conj(a);
|
|
360
|
+
}
|
|
234
361
|
|
|
235
362
|
/** \internal \returns a * b (coeff-wise) */
|
|
236
|
-
template<typename Packet>
|
|
237
|
-
pmul(const Packet& a, const Packet& b) {
|
|
363
|
+
template <typename Packet>
|
|
364
|
+
EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
|
|
365
|
+
return a * b;
|
|
366
|
+
}
|
|
238
367
|
// Avoid compiler warning for boolean algebra.
|
|
239
|
-
template<>
|
|
240
|
-
pmul(const bool& a, const bool& b) {
|
|
368
|
+
template <>
|
|
369
|
+
EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
|
|
370
|
+
return a && b;
|
|
371
|
+
}
|
|
241
372
|
|
|
242
373
|
/** \internal \returns a / b (coeff-wise) */
|
|
243
|
-
template<typename Packet>
|
|
244
|
-
pdiv(const Packet& a, const Packet& b) {
|
|
374
|
+
template <typename Packet>
|
|
375
|
+
EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
|
|
376
|
+
return a / b;
|
|
377
|
+
}
|
|
378
|
+
// Avoid compiler warning for boolean algebra.
|
|
379
|
+
template <>
|
|
380
|
+
EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
|
|
381
|
+
return a && b;
|
|
382
|
+
}
|
|
245
383
|
|
|
246
|
-
// In the generic case, memset to all one bits.
|
|
247
|
-
template<typename Packet, typename EnableIf = void>
|
|
384
|
+
// In the generic packet case, memset to all one bits.
|
|
385
|
+
template <typename Packet, typename EnableIf = void>
|
|
248
386
|
struct ptrue_impl {
|
|
249
|
-
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
|
|
387
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
|
250
388
|
Packet b;
|
|
251
389
|
memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
|
|
252
390
|
return b;
|
|
253
391
|
}
|
|
254
392
|
};
|
|
255
393
|
|
|
256
|
-
//
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
}
|
|
394
|
+
// Use a value of one for scalars.
|
|
395
|
+
template <typename Scalar>
|
|
396
|
+
struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
|
|
397
|
+
static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
|
|
398
|
+
};
|
|
399
|
+
|
|
400
|
+
// For booleans, we can only directly set a valid `bool` value to avoid UB.
|
|
401
|
+
template <>
|
|
402
|
+
struct ptrue_impl<bool, void> {
|
|
403
|
+
static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
|
|
266
404
|
};
|
|
267
405
|
|
|
268
406
|
/** \internal \returns one bits. */
|
|
269
|
-
template<typename Packet>
|
|
270
|
-
ptrue(const Packet& a) {
|
|
407
|
+
template <typename Packet>
|
|
408
|
+
EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
|
|
271
409
|
return ptrue_impl<Packet>::run(a);
|
|
272
410
|
}
|
|
273
411
|
|
|
274
|
-
// In the general case, memset to zero.
|
|
275
|
-
template<typename Packet, typename EnableIf = void>
|
|
412
|
+
// In the general packet case, memset to zero.
|
|
413
|
+
template <typename Packet, typename EnableIf = void>
|
|
276
414
|
struct pzero_impl {
|
|
277
415
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
|
278
416
|
Packet b;
|
|
@@ -283,66 +421,59 @@ struct pzero_impl {
|
|
|
283
421
|
|
|
284
422
|
// For scalars, explicitly set to Scalar(0), since the underlying representation
|
|
285
423
|
// for zero may not consist of all-zero bits.
|
|
286
|
-
template<typename T>
|
|
287
|
-
struct pzero_impl<T,
|
|
288
|
-
|
|
289
|
-
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
|
|
290
|
-
return T(0);
|
|
291
|
-
}
|
|
424
|
+
template <typename T>
|
|
425
|
+
struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
|
|
426
|
+
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
|
|
292
427
|
};
|
|
293
428
|
|
|
294
429
|
/** \internal \returns packet of zeros */
|
|
295
|
-
template<typename Packet>
|
|
296
|
-
pzero(const Packet& a) {
|
|
430
|
+
template <typename Packet>
|
|
431
|
+
EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
|
|
297
432
|
return pzero_impl<Packet>::run(a);
|
|
298
433
|
}
|
|
299
434
|
|
|
300
|
-
|
|
301
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
302
|
-
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
|
|
303
|
-
|
|
304
|
-
/** \internal \returns a < b as a bit mask */
|
|
305
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
306
|
-
pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
|
|
307
|
-
|
|
308
|
-
/** \internal \returns a == b as a bit mask */
|
|
309
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
310
|
-
pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
|
|
311
|
-
|
|
312
|
-
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
|
|
313
|
-
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
314
|
-
pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
|
|
315
|
-
|
|
316
|
-
template<typename T>
|
|
435
|
+
template <typename T>
|
|
317
436
|
struct bit_and {
|
|
318
|
-
EIGEN_DEVICE_FUNC
|
|
319
|
-
return a & b;
|
|
320
|
-
}
|
|
437
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
|
|
321
438
|
};
|
|
322
439
|
|
|
323
|
-
template<typename T>
|
|
440
|
+
template <typename T>
|
|
324
441
|
struct bit_or {
|
|
325
|
-
EIGEN_DEVICE_FUNC
|
|
326
|
-
return a | b;
|
|
327
|
-
}
|
|
442
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
|
|
328
443
|
};
|
|
329
444
|
|
|
330
|
-
template<typename T>
|
|
445
|
+
template <typename T>
|
|
331
446
|
struct bit_xor {
|
|
332
|
-
EIGEN_DEVICE_FUNC
|
|
333
|
-
return a ^ b;
|
|
334
|
-
}
|
|
447
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
|
|
335
448
|
};
|
|
336
449
|
|
|
337
|
-
template<typename T>
|
|
450
|
+
template <typename T>
|
|
338
451
|
struct bit_not {
|
|
339
|
-
EIGEN_DEVICE_FUNC
|
|
340
|
-
|
|
341
|
-
|
|
452
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
template <>
|
|
456
|
+
struct bit_and<bool> {
|
|
457
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
template <>
|
|
461
|
+
struct bit_or<bool> {
|
|
462
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
template <>
|
|
466
|
+
struct bit_xor<bool> {
|
|
467
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
|
|
468
|
+
};
|
|
469
|
+
|
|
470
|
+
template <>
|
|
471
|
+
struct bit_not<bool> {
|
|
472
|
+
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
|
|
342
473
|
};
|
|
343
474
|
|
|
344
475
|
// Use operators &, |, ^, ~.
|
|
345
|
-
template<typename T>
|
|
476
|
+
template <typename T>
|
|
346
477
|
struct operator_bitwise_helper {
|
|
347
478
|
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
|
|
348
479
|
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
|
|
@@ -351,23 +482,19 @@ struct operator_bitwise_helper {
|
|
|
351
482
|
};
|
|
352
483
|
|
|
353
484
|
// Apply binary operations byte-by-byte
|
|
354
|
-
template<typename T>
|
|
485
|
+
template <typename T>
|
|
355
486
|
struct bytewise_bitwise_helper {
|
|
356
487
|
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
|
|
357
488
|
return binary(a, b, bit_and<unsigned char>());
|
|
358
489
|
}
|
|
359
|
-
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
|
|
360
|
-
return binary(a, b, bit_or<unsigned char>());
|
|
361
|
-
}
|
|
490
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
|
|
362
491
|
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
|
|
363
492
|
return binary(a, b, bit_xor<unsigned char>());
|
|
364
493
|
}
|
|
365
|
-
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
|
|
366
|
-
|
|
367
|
-
}
|
|
368
|
-
|
|
494
|
+
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
|
|
495
|
+
|
|
369
496
|
private:
|
|
370
|
-
template<typename Op>
|
|
497
|
+
template <typename Op>
|
|
371
498
|
EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
|
|
372
499
|
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
373
500
|
T c;
|
|
@@ -378,7 +505,7 @@ struct bytewise_bitwise_helper {
|
|
|
378
505
|
return c;
|
|
379
506
|
}
|
|
380
507
|
|
|
381
|
-
template<typename Op>
|
|
508
|
+
template <typename Op>
|
|
382
509
|
EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
|
|
383
510
|
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
384
511
|
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
|
|
@@ -392,186 +519,215 @@ struct bytewise_bitwise_helper {
|
|
|
392
519
|
};
|
|
393
520
|
|
|
394
521
|
// In the general case, use byte-by-byte manipulation.
|
|
395
|
-
template<typename T, typename EnableIf = void>
|
|
522
|
+
template <typename T, typename EnableIf = void>
|
|
396
523
|
struct bitwise_helper : public bytewise_bitwise_helper<T> {};
|
|
397
524
|
|
|
398
525
|
// For integers or non-trivial scalars, use binary operators.
|
|
399
|
-
template<typename T>
|
|
400
|
-
struct bitwise_helper<T,
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
> : public operator_bitwise_helper<T> {};
|
|
526
|
+
template <typename T>
|
|
527
|
+
struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
|
|
528
|
+
(NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
|
|
529
|
+
: public operator_bitwise_helper<T> {};
|
|
404
530
|
|
|
405
531
|
/** \internal \returns the bitwise and of \a a and \a b */
|
|
406
|
-
template<typename Packet>
|
|
407
|
-
pand(const Packet& a, const Packet& b) {
|
|
532
|
+
template <typename Packet>
|
|
533
|
+
EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
|
|
408
534
|
return bitwise_helper<Packet>::bitwise_and(a, b);
|
|
409
535
|
}
|
|
410
536
|
|
|
411
537
|
/** \internal \returns the bitwise or of \a a and \a b */
|
|
412
|
-
template<typename Packet>
|
|
413
|
-
por(const Packet& a, const Packet& b) {
|
|
538
|
+
template <typename Packet>
|
|
539
|
+
EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
|
|
414
540
|
return bitwise_helper<Packet>::bitwise_or(a, b);
|
|
415
541
|
}
|
|
416
542
|
|
|
417
543
|
/** \internal \returns the bitwise xor of \a a and \a b */
|
|
418
|
-
template<typename Packet>
|
|
419
|
-
pxor(const Packet& a, const Packet& b) {
|
|
544
|
+
template <typename Packet>
|
|
545
|
+
EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
|
|
420
546
|
return bitwise_helper<Packet>::bitwise_xor(a, b);
|
|
421
547
|
}
|
|
422
548
|
|
|
423
549
|
/** \internal \returns the bitwise not of \a a */
|
|
424
|
-
template<typename Packet>
|
|
425
|
-
pnot(const Packet& a) {
|
|
550
|
+
template <typename Packet>
|
|
551
|
+
EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
|
|
426
552
|
return bitwise_helper<Packet>::bitwise_not(a);
|
|
427
553
|
}
|
|
428
554
|
|
|
429
555
|
/** \internal \returns the bitwise and of \a a and not \a b */
|
|
430
|
-
template<typename Packet>
|
|
431
|
-
pandnot(const Packet& a, const Packet& b) {
|
|
556
|
+
template <typename Packet>
|
|
557
|
+
EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
|
|
558
|
+
return pand(a, pnot(b));
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/** \internal \returns a < b as a bit mask */
|
|
562
|
+
template <typename Packet>
|
|
563
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
|
|
564
|
+
return a < b ? ptrue(a) : pzero(a);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/** \internal \returns a == b as a bit mask */
|
|
568
|
+
template <typename Packet>
|
|
569
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
|
|
570
|
+
return a == b ? ptrue(a) : pzero(a);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/** \internal \returns a <= b as a bit mask */
|
|
574
|
+
template <typename Packet>
|
|
575
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
|
|
576
|
+
return por(pcmp_eq(a, b), pcmp_lt(a, b));
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
|
|
580
|
+
template <typename Packet>
|
|
581
|
+
EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
|
|
582
|
+
return a >= b ? pzero(a) : ptrue(a);
|
|
583
|
+
}
|
|
432
584
|
|
|
433
585
|
// In the general case, use bitwise select.
|
|
434
|
-
template<typename Packet,
|
|
586
|
+
template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
|
|
435
587
|
struct pselect_impl {
|
|
436
588
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
|
437
|
-
return por(pand(a,mask),pandnot(b,mask));
|
|
589
|
+
return por(pand(a, mask), pandnot(b, mask));
|
|
438
590
|
}
|
|
439
591
|
};
|
|
440
592
|
|
|
441
593
|
// For scalars, use ternary select.
|
|
442
|
-
template<typename Packet>
|
|
443
|
-
struct pselect_impl<Packet,
|
|
444
|
-
typename internal::enable_if<is_scalar<Packet>::value>::type > {
|
|
594
|
+
template <typename Packet>
|
|
595
|
+
struct pselect_impl<Packet, true> {
|
|
445
596
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
|
446
|
-
return numext::
|
|
597
|
+
return numext::select(mask, a, b);
|
|
447
598
|
}
|
|
448
599
|
};
|
|
449
600
|
|
|
450
601
|
/** \internal \returns \a or \b for each field in packet according to \mask */
|
|
451
|
-
template<typename Packet>
|
|
452
|
-
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
|
602
|
+
template <typename Packet>
|
|
603
|
+
EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
|
453
604
|
return pselect_impl<Packet>::run(mask, a, b);
|
|
454
605
|
}
|
|
455
606
|
|
|
456
|
-
template<>
|
|
457
|
-
|
|
607
|
+
template <>
|
|
608
|
+
EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
|
|
458
609
|
return cond ? a : b;
|
|
459
610
|
}
|
|
460
611
|
|
|
461
612
|
/** \internal \returns the min or of \a a and \a b (coeff-wise)
|
|
462
613
|
If either \a a or \a b are NaN, the result is implementation defined. */
|
|
463
|
-
template<int NaNPropagation>
|
|
614
|
+
template <int NaNPropagation, bool IsInteger>
|
|
464
615
|
struct pminmax_impl {
|
|
465
616
|
template <typename Packet, typename Op>
|
|
466
617
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
467
|
-
return op(a,b);
|
|
618
|
+
return op(a, b);
|
|
468
619
|
}
|
|
469
620
|
};
|
|
470
621
|
|
|
471
622
|
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
|
472
623
|
If either \a a or \a b are NaN, NaN is returned. */
|
|
473
|
-
template<>
|
|
474
|
-
struct pminmax_impl<PropagateNaN> {
|
|
624
|
+
template <>
|
|
625
|
+
struct pminmax_impl<PropagateNaN, false> {
|
|
475
626
|
template <typename Packet, typename Op>
|
|
476
627
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
pselect(not_nan_mask_b, op(a, b), b),
|
|
481
|
-
a);
|
|
628
|
+
Packet not_nan_mask_a = pcmp_eq(a, a);
|
|
629
|
+
Packet not_nan_mask_b = pcmp_eq(b, b);
|
|
630
|
+
return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
|
|
482
631
|
}
|
|
483
632
|
};
|
|
484
633
|
|
|
485
634
|
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
|
486
635
|
If both \a a and \a b are NaN, NaN is returned.
|
|
487
636
|
Equivalent to std::fmin(a, b). */
|
|
488
|
-
template<>
|
|
489
|
-
struct pminmax_impl<PropagateNumbers> {
|
|
637
|
+
template <>
|
|
638
|
+
struct pminmax_impl<PropagateNumbers, false> {
|
|
490
639
|
template <typename Packet, typename Op>
|
|
491
640
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
pselect(not_nan_mask_b, op(a, b), a),
|
|
496
|
-
b);
|
|
641
|
+
Packet not_nan_mask_a = pcmp_eq(a, a);
|
|
642
|
+
Packet not_nan_mask_b = pcmp_eq(b, b);
|
|
643
|
+
return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
|
|
497
644
|
}
|
|
498
645
|
};
|
|
499
646
|
|
|
500
|
-
|
|
501
|
-
#ifndef SYCL_DEVICE_ONLY
|
|
502
|
-
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
|
|
503
|
-
#else
|
|
504
|
-
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
|
|
505
|
-
[](const Type& a, const Type& b) { \
|
|
506
|
-
return Func(a, b);}
|
|
507
|
-
#endif
|
|
647
|
+
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
|
|
508
648
|
|
|
509
649
|
/** \internal \returns the min of \a a and \a b (coeff-wise).
|
|
510
650
|
If \a a or \b b is NaN, the return value is implementation defined. */
|
|
511
|
-
template<typename Packet>
|
|
512
|
-
pmin(const Packet& a, const Packet& b) {
|
|
651
|
+
template <typename Packet>
|
|
652
|
+
EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
|
653
|
+
return numext::mini(a, b);
|
|
654
|
+
}
|
|
513
655
|
|
|
514
656
|
/** \internal \returns the min of \a a and \a b (coeff-wise).
|
|
515
657
|
NaNPropagation determines the NaN propagation semantics. */
|
|
516
658
|
template <int NaNPropagation, typename Packet>
|
|
517
659
|
EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
|
518
|
-
|
|
660
|
+
constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
|
|
661
|
+
return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
|
|
519
662
|
}
|
|
520
663
|
|
|
521
664
|
/** \internal \returns the max of \a a and \a b (coeff-wise)
|
|
522
665
|
If \a a or \b b is NaN, the return value is implementation defined. */
|
|
523
|
-
template<typename Packet>
|
|
524
|
-
pmax(const Packet& a, const Packet& b) {
|
|
666
|
+
template <typename Packet>
|
|
667
|
+
EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
|
668
|
+
return numext::maxi(a, b);
|
|
669
|
+
}
|
|
525
670
|
|
|
526
671
|
/** \internal \returns the max of \a a and \a b (coeff-wise).
|
|
527
672
|
NaNPropagation determines the NaN propagation semantics. */
|
|
528
673
|
template <int NaNPropagation, typename Packet>
|
|
529
674
|
EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
|
530
|
-
|
|
675
|
+
constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
|
|
676
|
+
return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
|
|
531
677
|
}
|
|
532
678
|
|
|
533
679
|
/** \internal \returns the absolute value of \a a */
|
|
534
|
-
template<typename Packet>
|
|
535
|
-
pabs(const Packet& a) {
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
template<>
|
|
539
|
-
pabs(const unsigned
|
|
540
|
-
|
|
541
|
-
|
|
680
|
+
template <typename Packet>
|
|
681
|
+
EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
|
|
682
|
+
return numext::abs(a);
|
|
683
|
+
}
|
|
684
|
+
template <>
|
|
685
|
+
EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
|
|
686
|
+
return a;
|
|
687
|
+
}
|
|
688
|
+
template <>
|
|
689
|
+
EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
|
|
690
|
+
return a;
|
|
691
|
+
}
|
|
692
|
+
template <>
|
|
693
|
+
EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
|
|
694
|
+
return a;
|
|
695
|
+
}
|
|
542
696
|
|
|
543
697
|
/** \internal \returns the addsub value of \a a,b */
|
|
544
|
-
template<typename Packet>
|
|
545
|
-
paddsub(const Packet& a, const Packet& b) {
|
|
698
|
+
template <typename Packet>
|
|
699
|
+
EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
|
|
546
700
|
return pselect(peven_mask(a), padd(a, b), psub(a, b));
|
|
547
|
-
|
|
701
|
+
}
|
|
548
702
|
|
|
549
703
|
/** \internal \returns the phase angle of \a a */
|
|
550
|
-
template<typename Packet>
|
|
551
|
-
parg(const Packet& a) {
|
|
704
|
+
template <typename Packet>
|
|
705
|
+
EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
|
|
706
|
+
using numext::arg;
|
|
707
|
+
return arg(a);
|
|
708
|
+
}
|
|
552
709
|
|
|
710
|
+
/** \internal \returns \a a arithmetically shifted by N bits to the right */
|
|
711
|
+
template <int N, typename T>
|
|
712
|
+
EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
|
|
713
|
+
return numext::arithmetic_shift_right(a, N);
|
|
714
|
+
}
|
|
553
715
|
|
|
554
716
|
/** \internal \returns \a a logically shifted by N bits to the right */
|
|
555
|
-
template<int N
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
/** \internal \returns \a a arithmetically shifted by N bits to the right */
|
|
561
|
-
template<int N> EIGEN_DEVICE_FUNC inline int
|
|
562
|
-
plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
|
|
563
|
-
template<int N> EIGEN_DEVICE_FUNC inline long int
|
|
564
|
-
plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
|
|
717
|
+
template <int N, typename T>
|
|
718
|
+
EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
|
|
719
|
+
return numext::logical_shift_right(a, N);
|
|
720
|
+
}
|
|
565
721
|
|
|
566
722
|
/** \internal \returns \a a shifted by N bits to the left */
|
|
567
|
-
template<int N
|
|
568
|
-
plogical_shift_left(const
|
|
569
|
-
|
|
570
|
-
|
|
723
|
+
template <int N, typename T>
|
|
724
|
+
EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
|
|
725
|
+
return numext::logical_shift_left(a, N);
|
|
726
|
+
}
|
|
571
727
|
|
|
572
728
|
/** \internal \returns the significant and exponent of the underlying floating point numbers
|
|
573
|
-
|
|
574
|
-
|
|
729
|
+
* See https://en.cppreference.com/w/cpp/numeric/math/frexp
|
|
730
|
+
*/
|
|
575
731
|
template <typename Packet>
|
|
576
732
|
EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
|
|
577
733
|
int exp;
|
|
@@ -582,142 +738,250 @@ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
|
|
|
582
738
|
}
|
|
583
739
|
|
|
584
740
|
/** \internal \returns a * 2^((int)exponent)
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
template<typename Packet>
|
|
588
|
-
pldexp(const Packet
|
|
741
|
+
* See https://en.cppreference.com/w/cpp/numeric/math/ldexp
|
|
742
|
+
*/
|
|
743
|
+
template <typename Packet>
|
|
744
|
+
EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
|
|
589
745
|
EIGEN_USING_STD(ldexp)
|
|
590
746
|
return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
|
|
591
747
|
}
|
|
592
748
|
|
|
593
749
|
/** \internal \returns the min of \a a and \a b (coeff-wise) */
|
|
594
|
-
template<typename Packet>
|
|
595
|
-
pabsdiff(const Packet& a, const Packet& b) {
|
|
750
|
+
template <typename Packet>
|
|
751
|
+
EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
|
|
752
|
+
return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
|
|
753
|
+
}
|
|
596
754
|
|
|
597
|
-
/** \internal \returns a packet version of \a *from, from must be
|
|
598
|
-
template<typename Packet>
|
|
599
|
-
pload(const typename unpacket_traits<Packet>::type* from) {
|
|
755
|
+
/** \internal \returns a packet version of \a *from, from must be properly aligned */
|
|
756
|
+
template <typename Packet>
|
|
757
|
+
EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
|
|
758
|
+
return *from;
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
/** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
|
|
762
|
+
* offset indicates the starting element in which to load and
|
|
763
|
+
* offset + n <= unpacket_traits::size
|
|
764
|
+
* All elements before offset and after the last element loaded will initialized with zero */
|
|
765
|
+
template <typename Packet>
|
|
766
|
+
EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
|
|
767
|
+
const Index offset = 0) {
|
|
768
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
769
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
|
770
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
771
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
|
|
772
|
+
for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
|
|
773
|
+
elements[i] = from[i - offset];
|
|
774
|
+
}
|
|
775
|
+
return pload<Packet>(elements);
|
|
776
|
+
}
|
|
600
777
|
|
|
601
778
|
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
|
602
|
-
template<typename Packet>
|
|
603
|
-
ploadu(const typename unpacket_traits<Packet>::type* from) {
|
|
779
|
+
template <typename Packet>
|
|
780
|
+
EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
|
|
781
|
+
return *from;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
|
|
785
|
+
* All elements after the last element loaded will initialized with zero */
|
|
786
|
+
template <typename Packet>
|
|
787
|
+
EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
|
|
788
|
+
const Index offset = 0) {
|
|
789
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
790
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
|
791
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
792
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
|
|
793
|
+
for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
|
|
794
|
+
elements[i] = from[i - offset];
|
|
795
|
+
}
|
|
796
|
+
return pload<Packet>(elements);
|
|
797
|
+
}
|
|
604
798
|
|
|
605
799
|
/** \internal \returns a packet version of \a *from, (un-aligned masked load)
|
|
606
800
|
* There is no generic implementation. We only have implementations for specialized
|
|
607
801
|
* cases. Generic case should not be called.
|
|
608
802
|
*/
|
|
609
|
-
template<typename Packet>
|
|
610
|
-
|
|
611
|
-
|
|
803
|
+
template <typename Packet>
|
|
804
|
+
EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
|
|
805
|
+
const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
|
|
612
806
|
|
|
613
807
|
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
|
614
|
-
template<typename Packet>
|
|
615
|
-
pset1(const typename unpacket_traits<Packet>::type& a) {
|
|
808
|
+
template <typename Packet>
|
|
809
|
+
EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
|
|
810
|
+
return a;
|
|
811
|
+
}
|
|
616
812
|
|
|
617
813
|
/** \internal \returns a packet with constant coefficients set from bits */
|
|
618
|
-
template<typename Packet,typename BitsType>
|
|
619
|
-
pset1frombits(BitsType a);
|
|
814
|
+
template <typename Packet, typename BitsType>
|
|
815
|
+
EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
|
|
620
816
|
|
|
621
817
|
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
|
|
622
|
-
template<typename Packet>
|
|
623
|
-
pload1(const typename unpacket_traits<Packet>::type
|
|
818
|
+
template <typename Packet>
|
|
819
|
+
EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
|
|
820
|
+
return pset1<Packet>(*a);
|
|
821
|
+
}
|
|
624
822
|
|
|
625
823
|
/** \internal \returns a packet with elements of \a *from duplicated.
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
template<typename Packet>
|
|
631
|
-
ploaddup(const typename unpacket_traits<Packet>::type* from) {
|
|
824
|
+
* For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
|
|
825
|
+
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
|
826
|
+
* Currently, this function is only used for scalar * complex products.
|
|
827
|
+
*/
|
|
828
|
+
template <typename Packet>
|
|
829
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
|
|
830
|
+
return *from;
|
|
831
|
+
}
|
|
632
832
|
|
|
633
833
|
/** \internal \returns a packet with elements of \a *from quadrupled.
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
template<typename Packet>
|
|
640
|
-
ploadquad(const typename unpacket_traits<Packet>::type* from)
|
|
641
|
-
|
|
834
|
+
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
|
|
835
|
+
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
|
|
836
|
+
* Currently, this function is only used in matrix products.
|
|
837
|
+
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
|
838
|
+
*/
|
|
839
|
+
template <typename Packet>
|
|
840
|
+
EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
|
|
841
|
+
return pload1<Packet>(from);
|
|
842
|
+
}
|
|
642
843
|
|
|
643
844
|
/** \internal equivalent to
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
template<typename Packet>
|
|
653
|
-
inline void pbroadcast4(const typename unpacket_traits<Packet>::type
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
a3 = pload1<Packet>(a+3);
|
|
845
|
+
* \code
|
|
846
|
+
* a0 = pload1(a+0);
|
|
847
|
+
* a1 = pload1(a+1);
|
|
848
|
+
* a2 = pload1(a+2);
|
|
849
|
+
* a3 = pload1(a+3);
|
|
850
|
+
* \endcode
|
|
851
|
+
* \sa pset1, pload1, ploaddup, pbroadcast2
|
|
852
|
+
*/
|
|
853
|
+
template <typename Packet>
|
|
854
|
+
EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
|
|
855
|
+
Packet& a2, Packet& a3) {
|
|
856
|
+
a0 = pload1<Packet>(a + 0);
|
|
857
|
+
a1 = pload1<Packet>(a + 1);
|
|
858
|
+
a2 = pload1<Packet>(a + 2);
|
|
859
|
+
a3 = pload1<Packet>(a + 3);
|
|
660
860
|
}
|
|
661
861
|
|
|
662
862
|
/** \internal equivalent to
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
template<typename Packet>
|
|
670
|
-
inline void pbroadcast2(const typename unpacket_traits<Packet>::type
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
a0 = pload1<Packet>(a+0);
|
|
674
|
-
a1 = pload1<Packet>(a+1);
|
|
863
|
+
* \code
|
|
864
|
+
* a0 = pload1(a+0);
|
|
865
|
+
* a1 = pload1(a+1);
|
|
866
|
+
* \endcode
|
|
867
|
+
* \sa pset1, pload1, ploaddup, pbroadcast4
|
|
868
|
+
*/
|
|
869
|
+
template <typename Packet>
|
|
870
|
+
EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
|
|
871
|
+
a0 = pload1<Packet>(a + 0);
|
|
872
|
+
a1 = pload1<Packet>(a + 1);
|
|
675
873
|
}
|
|
676
874
|
|
|
677
875
|
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
|
678
|
-
template<typename Packet>
|
|
679
|
-
plset(const typename unpacket_traits<Packet>::type& a) {
|
|
876
|
+
template <typename Packet>
|
|
877
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
|
|
878
|
+
return a;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
template <typename Packet, typename EnableIf = void>
|
|
882
|
+
struct peven_mask_impl {
|
|
883
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
|
|
884
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
885
|
+
const size_t n = unpacket_traits<Packet>::size;
|
|
886
|
+
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
887
|
+
for (size_t i = 0; i < n; ++i) {
|
|
888
|
+
memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
|
889
|
+
}
|
|
890
|
+
return ploadu<Packet>(elements);
|
|
891
|
+
}
|
|
892
|
+
};
|
|
893
|
+
|
|
894
|
+
template <typename Scalar>
|
|
895
|
+
struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
|
|
896
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
|
|
897
|
+
};
|
|
680
898
|
|
|
681
899
|
/** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
|
|
682
900
|
where x is the value of all 1-bits. */
|
|
683
|
-
template<typename Packet>
|
|
684
|
-
peven_mask(const Packet&
|
|
685
|
-
|
|
686
|
-
const size_t n = unpacket_traits<Packet>::size;
|
|
687
|
-
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
688
|
-
for(size_t i = 0; i < n; ++i) {
|
|
689
|
-
memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
|
690
|
-
}
|
|
691
|
-
return ploadu<Packet>(elements);
|
|
901
|
+
template <typename Packet>
|
|
902
|
+
EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
|
|
903
|
+
return peven_mask_impl<Packet>::run(a);
|
|
692
904
|
}
|
|
693
905
|
|
|
906
|
+
/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
|
|
907
|
+
template <typename Scalar, typename Packet>
|
|
908
|
+
EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
|
|
909
|
+
(*to) = from;
|
|
910
|
+
}
|
|
694
911
|
|
|
695
|
-
/** \internal copy the packet \a from to \a *to, \a to must be
|
|
696
|
-
|
|
697
|
-
|
|
912
|
+
/** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
|
|
913
|
+
* offset indicates the starting element in which to store and
|
|
914
|
+
* offset + n <= unpacket_traits::size */
|
|
915
|
+
template <typename Scalar, typename Packet>
|
|
916
|
+
EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
|
|
917
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
918
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
|
919
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
|
920
|
+
pstore<Scalar>(elements, from);
|
|
921
|
+
for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
|
|
922
|
+
to[i] = elements[i + offset];
|
|
923
|
+
}
|
|
924
|
+
}
|
|
698
925
|
|
|
699
926
|
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
|
700
|
-
template<typename Scalar, typename Packet>
|
|
701
|
-
|
|
927
|
+
template <typename Scalar, typename Packet>
|
|
928
|
+
EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
|
|
929
|
+
(*to) = from;
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
|
|
933
|
+
template <typename Scalar, typename Packet>
|
|
934
|
+
EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
|
|
935
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
936
|
+
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
|
937
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
|
938
|
+
pstore<Scalar>(elements, from);
|
|
939
|
+
for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
|
|
940
|
+
to[i] = elements[i + offset];
|
|
941
|
+
}
|
|
942
|
+
}
|
|
702
943
|
|
|
703
944
|
/** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
|
|
704
945
|
* There is no generic implementation. We only have implementations for specialized
|
|
705
946
|
* cases. Generic case should not be called.
|
|
706
947
|
*/
|
|
707
|
-
template<typename Scalar, typename Packet>
|
|
708
|
-
EIGEN_DEVICE_FUNC inline
|
|
709
|
-
typename
|
|
710
|
-
pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
|
|
948
|
+
template <typename Scalar, typename Packet>
|
|
949
|
+
EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
|
|
950
|
+
Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
|
|
711
951
|
|
|
712
|
-
|
|
713
|
-
|
|
952
|
+
template <typename Scalar, typename Packet>
|
|
953
|
+
EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
|
|
954
|
+
return ploadu<Packet>(from);
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
template <typename Scalar, typename Packet>
|
|
958
|
+
EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
|
|
959
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
960
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
|
|
961
|
+
for (Index i = 0; i < numext::mini(n, packet_size); i++) {
|
|
962
|
+
elements[i] = from[i * stride];
|
|
963
|
+
}
|
|
964
|
+
return pload<Packet>(elements);
|
|
965
|
+
}
|
|
714
966
|
|
|
715
|
-
|
|
716
|
-
|
|
967
|
+
template <typename Scalar, typename Packet>
|
|
968
|
+
EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
|
|
969
|
+
pstore(to, from);
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
template <typename Scalar, typename Packet>
|
|
973
|
+
EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
|
|
974
|
+
const Index packet_size = unpacket_traits<Packet>::size;
|
|
975
|
+
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
|
976
|
+
pstore<Scalar>(elements, from);
|
|
977
|
+
for (Index i = 0; i < numext::mini(n, packet_size); i++) {
|
|
978
|
+
to[i * stride] = elements[i];
|
|
979
|
+
}
|
|
980
|
+
}
|
|
717
981
|
|
|
718
982
|
/** \internal tries to do cache prefetching of \a addr */
|
|
719
|
-
template<typename Scalar>
|
|
720
|
-
{
|
|
983
|
+
template <typename Scalar>
|
|
984
|
+
EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
|
|
721
985
|
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
722
986
|
// do nothing
|
|
723
987
|
#elif defined(EIGEN_CUDA_ARCH)
|
|
@@ -734,135 +998,246 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
|
|
|
734
998
|
}
|
|
735
999
|
|
|
736
1000
|
/** \internal \returns the reversed elements of \a a*/
|
|
737
|
-
template<typename Packet>
|
|
738
|
-
|
|
1001
|
+
template <typename Packet>
|
|
1002
|
+
EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
|
|
1003
|
+
return a;
|
|
1004
|
+
}
|
|
739
1005
|
|
|
740
1006
|
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
|
741
|
-
template<typename Packet>
|
|
742
|
-
{
|
|
743
|
-
return Packet(numext::imag(a),numext::real(a));
|
|
1007
|
+
template <typename Packet>
|
|
1008
|
+
EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
|
|
1009
|
+
return Packet(numext::imag(a), numext::real(a));
|
|
744
1010
|
}
|
|
745
1011
|
|
|
746
1012
|
/**************************
|
|
747
|
-
* Special math functions
|
|
748
|
-
***************************/
|
|
1013
|
+
* Special math functions
|
|
1014
|
+
***************************/
|
|
1015
|
+
|
|
1016
|
+
/** \internal \returns isnan(a) */
|
|
1017
|
+
template <typename Packet>
|
|
1018
|
+
EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
|
|
1019
|
+
return pandnot(ptrue(a), pcmp_eq(a, a));
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
/** \internal \returns isinf(a) */
|
|
1023
|
+
template <typename Packet>
|
|
1024
|
+
EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
|
|
1025
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1026
|
+
constexpr Scalar inf = NumTraits<Scalar>::infinity();
|
|
1027
|
+
return pcmp_eq(pabs(a), pset1<Packet>(inf));
|
|
1028
|
+
}
|
|
749
1029
|
|
|
750
1030
|
/** \internal \returns the sine of \a a (coeff-wise) */
|
|
751
|
-
template<typename Packet>
|
|
752
|
-
Packet psin(const Packet& a) {
|
|
1031
|
+
template <typename Packet>
|
|
1032
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
|
|
1033
|
+
EIGEN_USING_STD(sin);
|
|
1034
|
+
return sin(a);
|
|
1035
|
+
}
|
|
753
1036
|
|
|
754
1037
|
/** \internal \returns the cosine of \a a (coeff-wise) */
|
|
755
|
-
template<typename Packet>
|
|
756
|
-
Packet pcos(const Packet& a) {
|
|
1038
|
+
template <typename Packet>
|
|
1039
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
|
|
1040
|
+
EIGEN_USING_STD(cos);
|
|
1041
|
+
return cos(a);
|
|
1042
|
+
}
|
|
757
1043
|
|
|
758
1044
|
/** \internal \returns the tan of \a a (coeff-wise) */
|
|
759
|
-
template<typename Packet>
|
|
760
|
-
Packet ptan(const Packet& a) {
|
|
1045
|
+
template <typename Packet>
|
|
1046
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
|
|
1047
|
+
EIGEN_USING_STD(tan);
|
|
1048
|
+
return tan(a);
|
|
1049
|
+
}
|
|
761
1050
|
|
|
762
1051
|
/** \internal \returns the arc sine of \a a (coeff-wise) */
|
|
763
|
-
template<typename Packet>
|
|
764
|
-
Packet pasin(const Packet& a) {
|
|
1052
|
+
template <typename Packet>
|
|
1053
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
|
|
1054
|
+
EIGEN_USING_STD(asin);
|
|
1055
|
+
return asin(a);
|
|
1056
|
+
}
|
|
765
1057
|
|
|
766
1058
|
/** \internal \returns the arc cosine of \a a (coeff-wise) */
|
|
767
|
-
template<typename Packet>
|
|
768
|
-
Packet pacos(const Packet& a) {
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
|
|
1059
|
+
template <typename Packet>
|
|
1060
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
|
|
1061
|
+
EIGEN_USING_STD(acos);
|
|
1062
|
+
return acos(a);
|
|
1063
|
+
}
|
|
773
1064
|
|
|
774
1065
|
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
|
775
|
-
template<typename Packet>
|
|
776
|
-
Packet psinh(const Packet& a) {
|
|
1066
|
+
template <typename Packet>
|
|
1067
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
|
|
1068
|
+
EIGEN_USING_STD(sinh);
|
|
1069
|
+
return sinh(a);
|
|
1070
|
+
}
|
|
777
1071
|
|
|
778
1072
|
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
|
779
|
-
template<typename Packet>
|
|
780
|
-
Packet pcosh(const Packet& a) {
|
|
1073
|
+
template <typename Packet>
|
|
1074
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
|
|
1075
|
+
EIGEN_USING_STD(cosh);
|
|
1076
|
+
return cosh(a);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
|
1080
|
+
template <typename Packet>
|
|
1081
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
|
|
1082
|
+
EIGEN_USING_STD(atan);
|
|
1083
|
+
return atan(a);
|
|
1084
|
+
}
|
|
781
1085
|
|
|
782
1086
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
|
783
|
-
template<typename Packet>
|
|
784
|
-
Packet ptanh(const Packet& a) {
|
|
1087
|
+
template <typename Packet>
|
|
1088
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
|
|
1089
|
+
EIGEN_USING_STD(tanh);
|
|
1090
|
+
return tanh(a);
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
|
1094
|
+
template <typename Packet>
|
|
1095
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
|
|
1096
|
+
EIGEN_USING_STD(atanh);
|
|
1097
|
+
return atanh(a);
|
|
1098
|
+
}
|
|
785
1099
|
|
|
786
1100
|
/** \internal \returns the exp of \a a (coeff-wise) */
|
|
787
|
-
template<typename Packet>
|
|
788
|
-
Packet pexp(const Packet& a) {
|
|
1101
|
+
template <typename Packet>
|
|
1102
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
|
|
1103
|
+
return numext::exp(a);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
/** \internal \returns the exp2 of \a a (coeff-wise) */
|
|
1107
|
+
template <typename Packet>
|
|
1108
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
|
|
1109
|
+
return numext::exp2(a);
|
|
1110
|
+
}
|
|
789
1111
|
|
|
790
1112
|
/** \internal \returns the expm1 of \a a (coeff-wise) */
|
|
791
|
-
template<typename Packet>
|
|
792
|
-
Packet pexpm1(const Packet& a) {
|
|
1113
|
+
template <typename Packet>
|
|
1114
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
|
|
1115
|
+
return numext::expm1(a);
|
|
1116
|
+
}
|
|
793
1117
|
|
|
794
1118
|
/** \internal \returns the log of \a a (coeff-wise) */
|
|
795
|
-
template<typename Packet>
|
|
796
|
-
Packet plog(const Packet& a) {
|
|
1119
|
+
template <typename Packet>
|
|
1120
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
|
|
1121
|
+
EIGEN_USING_STD(log);
|
|
1122
|
+
return log(a);
|
|
1123
|
+
}
|
|
797
1124
|
|
|
798
1125
|
/** \internal \returns the log1p of \a a (coeff-wise) */
|
|
799
|
-
template<typename Packet>
|
|
800
|
-
Packet plog1p(const Packet& a) {
|
|
1126
|
+
template <typename Packet>
|
|
1127
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
|
|
1128
|
+
return numext::log1p(a);
|
|
1129
|
+
}
|
|
801
1130
|
|
|
802
1131
|
/** \internal \returns the log10 of \a a (coeff-wise) */
|
|
803
|
-
template<typename Packet>
|
|
804
|
-
Packet plog10(const Packet& a) {
|
|
1132
|
+
template <typename Packet>
|
|
1133
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
|
|
1134
|
+
EIGEN_USING_STD(log10);
|
|
1135
|
+
return log10(a);
|
|
1136
|
+
}
|
|
805
1137
|
|
|
806
|
-
/** \internal \returns the
|
|
807
|
-
template<typename Packet>
|
|
808
|
-
Packet plog2(const Packet& a) {
|
|
809
|
-
|
|
810
|
-
|
|
1138
|
+
/** \internal \returns the log2 of \a a (coeff-wise) */
|
|
1139
|
+
template <typename Packet>
|
|
1140
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
|
|
1141
|
+
using Scalar = typename internal::unpacket_traits<Packet>::type;
|
|
1142
|
+
using RealScalar = typename NumTraits<Scalar>::Real;
|
|
1143
|
+
return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
|
|
811
1144
|
}
|
|
812
1145
|
|
|
813
1146
|
/** \internal \returns the square-root of \a a (coeff-wise) */
|
|
814
|
-
template<typename Packet>
|
|
815
|
-
Packet psqrt(const Packet& a) {
|
|
1147
|
+
template <typename Packet>
|
|
1148
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
|
|
1149
|
+
return numext::sqrt(a);
|
|
1150
|
+
}
|
|
816
1151
|
|
|
817
|
-
/** \internal \returns the
|
|
818
|
-
template<typename Packet>
|
|
819
|
-
Packet
|
|
820
|
-
|
|
821
|
-
return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
|
|
1152
|
+
/** \internal \returns the cube-root of \a a (coeff-wise) */
|
|
1153
|
+
template <typename Packet>
|
|
1154
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
|
|
1155
|
+
return numext::cbrt(a);
|
|
822
1156
|
}
|
|
823
1157
|
|
|
1158
|
+
template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
|
|
1159
|
+
bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
|
|
1160
|
+
struct nearest_integer_packetop_impl {
|
|
1161
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
|
|
1162
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
|
|
1163
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
|
|
1164
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
|
|
1165
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
|
|
1166
|
+
};
|
|
1167
|
+
|
|
824
1168
|
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
|
825
|
-
template<typename Packet>
|
|
826
|
-
Packet pround(const Packet& a) {
|
|
1169
|
+
template <typename Packet>
|
|
1170
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
|
|
1171
|
+
return nearest_integer_packetop_impl<Packet>::run_round(a);
|
|
1172
|
+
}
|
|
827
1173
|
|
|
828
1174
|
/** \internal \returns the floor of \a a (coeff-wise) */
|
|
829
|
-
template<typename Packet>
|
|
830
|
-
Packet pfloor(const Packet& a) {
|
|
1175
|
+
template <typename Packet>
|
|
1176
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
|
|
1177
|
+
return nearest_integer_packetop_impl<Packet>::run_floor(a);
|
|
1178
|
+
}
|
|
831
1179
|
|
|
832
1180
|
/** \internal \returns the rounded value of \a a (coeff-wise) with current
|
|
833
1181
|
* rounding mode */
|
|
834
|
-
template<typename Packet>
|
|
835
|
-
Packet print(const Packet& a) {
|
|
1182
|
+
template <typename Packet>
|
|
1183
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
|
|
1184
|
+
return nearest_integer_packetop_impl<Packet>::run_rint(a);
|
|
1185
|
+
}
|
|
836
1186
|
|
|
837
1187
|
/** \internal \returns the ceil of \a a (coeff-wise) */
|
|
838
|
-
template<typename Packet>
|
|
839
|
-
Packet pceil(const Packet& a) {
|
|
1188
|
+
template <typename Packet>
|
|
1189
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
|
|
1190
|
+
return nearest_integer_packetop_impl<Packet>::run_ceil(a);
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
/** \internal \returns the truncation of \a a (coeff-wise) */
|
|
1194
|
+
template <typename Packet>
|
|
1195
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
|
|
1196
|
+
return nearest_integer_packetop_impl<Packet>::run_trunc(a);
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
template <typename Packet, typename EnableIf = void>
|
|
1200
|
+
struct psign_impl {
|
|
1201
|
+
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
|
|
1202
|
+
};
|
|
1203
|
+
|
|
1204
|
+
/** \internal \returns the sign of \a a (coeff-wise) */
|
|
1205
|
+
template <typename Packet>
|
|
1206
|
+
EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
|
|
1207
|
+
return psign_impl<Packet>::run(a);
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
template <>
|
|
1211
|
+
EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
|
|
1212
|
+
return a;
|
|
1213
|
+
}
|
|
840
1214
|
|
|
841
1215
|
/** \internal \returns the first element of a packet */
|
|
842
|
-
template<typename Packet>
|
|
843
|
-
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
|
844
|
-
|
|
845
|
-
|
|
1216
|
+
template <typename Packet>
|
|
1217
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
|
|
1218
|
+
return a;
|
|
1219
|
+
}
|
|
846
1220
|
|
|
847
1221
|
/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
template<typename Packet>
|
|
852
|
-
EIGEN_DEVICE_FUNC inline
|
|
853
|
-
|
|
854
|
-
|
|
1222
|
+
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
|
1223
|
+
* For packet-size smaller or equal to 4, this boils down to a noop.
|
|
1224
|
+
*/
|
|
1225
|
+
template <typename Packet>
|
|
1226
|
+
EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
|
|
1227
|
+
typename unpacket_traits<Packet>::half, Packet>
|
|
1228
|
+
predux_half_dowto4(const Packet& a) {
|
|
1229
|
+
return a;
|
|
1230
|
+
}
|
|
855
1231
|
|
|
856
1232
|
// Slow generic implementation of Packet reduction.
|
|
857
1233
|
template <typename Packet, typename Op>
|
|
858
|
-
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
|
859
|
-
predux_helper(const Packet& a, Op op) {
|
|
1234
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
|
|
860
1235
|
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
861
1236
|
const size_t n = unpacket_traits<Packet>::size;
|
|
862
1237
|
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
|
863
1238
|
pstoreu<Scalar>(elements, a);
|
|
864
|
-
for(size_t k = n / 2; k > 0; k /= 2)
|
|
865
|
-
for(size_t i = 0; i < k; ++i) {
|
|
1239
|
+
for (size_t k = n / 2; k > 0; k /= 2) {
|
|
1240
|
+
for (size_t i = 0; i < k; ++i) {
|
|
866
1241
|
elements[i] = op(elements[i], elements[i + k]);
|
|
867
1242
|
}
|
|
868
1243
|
}
|
|
@@ -870,65 +1245,78 @@ predux_helper(const Packet& a, Op op) {
|
|
|
870
1245
|
}
|
|
871
1246
|
|
|
872
1247
|
/** \internal \returns the sum of the elements of \a a*/
|
|
873
|
-
template<typename Packet>
|
|
874
|
-
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
|
875
|
-
predux(const Packet& a)
|
|
876
|
-
{
|
|
1248
|
+
template <typename Packet>
|
|
1249
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
|
|
877
1250
|
return a;
|
|
878
1251
|
}
|
|
879
1252
|
|
|
880
1253
|
/** \internal \returns the product of the elements of \a a */
|
|
881
1254
|
template <typename Packet>
|
|
882
|
-
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
|
|
883
|
-
|
|
884
|
-
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1255
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
|
|
1256
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
885
1257
|
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
|
|
886
1258
|
}
|
|
887
1259
|
|
|
888
1260
|
/** \internal \returns the min of the elements of \a a */
|
|
889
1261
|
template <typename Packet>
|
|
890
|
-
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
|
|
1262
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
|
1263
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1264
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
|
|
894
1265
|
}
|
|
895
1266
|
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
900
|
-
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (
|
|
1267
|
+
/** \internal \returns the max of the elements of \a a */
|
|
1268
|
+
template <typename Packet>
|
|
1269
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
|
|
1270
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1271
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
|
|
901
1272
|
}
|
|
902
1273
|
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
1274
|
+
template <int NaNPropagation, typename Packet>
|
|
1275
|
+
struct predux_min_max_helper_impl {
|
|
1276
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1277
|
+
static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
|
|
1278
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
|
|
1279
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
|
|
1280
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
|
|
1281
|
+
}
|
|
1282
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
|
|
1283
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
|
|
1284
|
+
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
|
1285
|
+
}
|
|
1286
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
|
|
1287
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
|
|
1288
|
+
return predux_min(a);
|
|
1289
|
+
}
|
|
1290
|
+
template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
|
|
1291
|
+
static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
|
|
1292
|
+
return predux_max(a);
|
|
1293
|
+
}
|
|
1294
|
+
};
|
|
1295
|
+
|
|
1296
|
+
template <int NaNPropagation, typename Packet>
|
|
1297
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
|
1298
|
+
return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
|
|
909
1299
|
}
|
|
910
1300
|
|
|
911
1301
|
template <int NaNPropagation, typename Packet>
|
|
912
|
-
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
|
|
913
|
-
|
|
914
|
-
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
915
|
-
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
|
1302
|
+
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
|
|
1303
|
+
return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
|
|
916
1304
|
}
|
|
917
1305
|
|
|
918
1306
|
#undef EIGEN_BINARY_OP_NAN_PROPAGATION
|
|
919
1307
|
|
|
920
1308
|
/** \internal \returns true if all coeffs of \a a means "true"
|
|
921
|
-
|
|
922
|
-
|
|
1309
|
+
* It is supposed to be called on values returned by pcmp_*.
|
|
1310
|
+
*/
|
|
923
1311
|
// not needed yet
|
|
924
1312
|
// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
|
|
925
1313
|
// { return bool(a); }
|
|
926
1314
|
|
|
927
1315
|
/** \internal \returns true if any coeffs of \a a means "true"
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
template<typename Packet>
|
|
931
|
-
{
|
|
1316
|
+
* It is supposed to be called on values returned by pcmp_*.
|
|
1317
|
+
*/
|
|
1318
|
+
template <typename Packet>
|
|
1319
|
+
EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
|
|
932
1320
|
// Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
|
|
933
1321
|
// It is expected that "true" is either:
|
|
934
1322
|
// - Scalar(1)
|
|
@@ -940,101 +1328,375 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet&
|
|
|
940
1328
|
}
|
|
941
1329
|
|
|
942
1330
|
/***************************************************************************
|
|
943
|
-
* The following functions might not have to be overwritten for vectorized types
|
|
944
|
-
***************************************************************************/
|
|
945
|
-
|
|
946
|
-
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
|
|
947
|
-
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
|
|
948
|
-
template<typename Packet>
|
|
949
|
-
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
|
|
950
|
-
{
|
|
951
|
-
pstore(to, pset1<Packet>(a));
|
|
952
|
-
}
|
|
1331
|
+
* The following functions might not have to be overwritten for vectorized types
|
|
1332
|
+
***************************************************************************/
|
|
953
1333
|
|
|
1334
|
+
template <typename Packet, typename EnableIf = void>
|
|
1335
|
+
struct pmadd_impl {
|
|
1336
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1337
|
+
return padd(pmul(a, b), c);
|
|
1338
|
+
}
|
|
1339
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1340
|
+
return psub(pmul(a, b), c);
|
|
1341
|
+
}
|
|
1342
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1343
|
+
return psub(c, pmul(a, b));
|
|
1344
|
+
}
|
|
1345
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1346
|
+
return pnegate(pmadd(a, b, c));
|
|
1347
|
+
}
|
|
1348
|
+
};
|
|
1349
|
+
|
|
1350
|
+
template <typename Scalar>
|
|
1351
|
+
struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
|
|
1352
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1353
|
+
return numext::madd<Scalar>(a, b, c);
|
|
1354
|
+
}
|
|
1355
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1356
|
+
return numext::madd<Scalar>(a, b, Scalar(-c));
|
|
1357
|
+
}
|
|
1358
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1359
|
+
return numext::madd<Scalar>(Scalar(-a), b, c);
|
|
1360
|
+
}
|
|
1361
|
+
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
1362
|
+
return -Scalar(numext::madd<Scalar>(a, b, c));
|
|
1363
|
+
}
|
|
1364
|
+
};
|
|
1365
|
+
|
|
1366
|
+
// Multiply-add instructions.
|
|
954
1367
|
/** \internal \returns a * b + c (coeff-wise) */
|
|
955
|
-
template<typename Packet>
|
|
956
|
-
pmadd(const Packet&
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
1368
|
+
template <typename Packet>
|
|
1369
|
+
EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1370
|
+
return pmadd_impl<Packet>::pmadd(a, b, c);
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
/** \internal \returns a * b - c (coeff-wise) */
|
|
1374
|
+
template <typename Packet>
|
|
1375
|
+
EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1376
|
+
return pmadd_impl<Packet>::pmsub(a, b, c);
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
/** \internal \returns -(a * b) + c (coeff-wise) */
|
|
1380
|
+
template <typename Packet>
|
|
1381
|
+
EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
|
|
1382
|
+
return pmadd_impl<Packet>::pnmadd(a, b, c);
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
/** \internal \returns -((a * b + c) (coeff-wise) */
|
|
1386
|
+
template <typename Packet>
|
|
1387
|
+
EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
|
|
1388
|
+
return pmadd_impl<Packet>::pnmsub(a, b, c);
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
|
|
1392
|
+
*/
|
|
1393
|
+
// NOTE: this function must really be templated on the packet type (think about different packet types for the same
|
|
1394
|
+
// scalar type)
|
|
1395
|
+
template <typename Packet>
|
|
1396
|
+
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
|
|
1397
|
+
pstore(to, pset1<Packet>(a));
|
|
1398
|
+
}
|
|
960
1399
|
|
|
961
1400
|
/** \internal \returns a packet version of \a *from.
|
|
962
|
-
|
|
963
|
-
template<typename Packet, int Alignment>
|
|
964
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
|
965
|
-
|
|
966
|
-
if(Alignment >= unpacket_traits<Packet>::alignment)
|
|
1401
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1402
|
+
template <typename Packet, int Alignment>
|
|
1403
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
|
|
1404
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
967
1405
|
return pload<Packet>(from);
|
|
968
1406
|
else
|
|
969
1407
|
return ploadu<Packet>(from);
|
|
970
1408
|
}
|
|
971
1409
|
|
|
1410
|
+
/** \internal \returns n elements of a packet version of \a *from.
|
|
1411
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1412
|
+
template <typename Packet, int Alignment>
|
|
1413
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
|
|
1414
|
+
const Index n, const Index offset = 0) {
|
|
1415
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
1416
|
+
return pload_partial<Packet>(from, n, offset);
|
|
1417
|
+
else
|
|
1418
|
+
return ploadu_partial<Packet>(from, n, offset);
|
|
1419
|
+
}
|
|
1420
|
+
|
|
972
1421
|
/** \internal copy the packet \a from to \a *to.
|
|
973
|
-
|
|
974
|
-
template<typename Scalar, typename Packet, int Alignment>
|
|
975
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
|
|
976
|
-
|
|
977
|
-
if(Alignment >= unpacket_traits<Packet>::alignment)
|
|
1422
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1423
|
+
template <typename Scalar, typename Packet, int Alignment>
|
|
1424
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
|
|
1425
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
978
1426
|
pstore(to, from);
|
|
979
1427
|
else
|
|
980
1428
|
pstoreu(to, from);
|
|
981
1429
|
}
|
|
982
1430
|
|
|
1431
|
+
/** \internal copy n elements of the packet \a from to \a *to.
|
|
1432
|
+
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
|
1433
|
+
template <typename Scalar, typename Packet, int Alignment>
|
|
1434
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
|
|
1435
|
+
const Index offset = 0) {
|
|
1436
|
+
if (Alignment >= unpacket_traits<Packet>::alignment)
|
|
1437
|
+
pstore_partial(to, from, n, offset);
|
|
1438
|
+
else
|
|
1439
|
+
pstoreu_partial(to, from, n, offset);
|
|
1440
|
+
}
|
|
1441
|
+
|
|
983
1442
|
/** \internal \returns a packet version of \a *from.
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
template<typename Packet, int LoadMode>
|
|
989
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
|
990
|
-
{
|
|
1443
|
+
* Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
|
|
1444
|
+
* hardware if available to speedup the loading of data that won't be modified
|
|
1445
|
+
* by the current computation.
|
|
1446
|
+
*/
|
|
1447
|
+
template <typename Packet, int LoadMode>
|
|
1448
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
|
|
991
1449
|
return ploadt<Packet, LoadMode>(from);
|
|
992
1450
|
}
|
|
993
1451
|
|
|
994
1452
|
/***************************************************************************
|
|
995
|
-
* Fast complex products (GCC generates a function call which is very slow)
|
|
996
|
-
***************************************************************************/
|
|
1453
|
+
* Fast complex products (GCC generates a function call which is very slow)
|
|
1454
|
+
***************************************************************************/
|
|
997
1455
|
|
|
998
1456
|
// Eigen+CUDA does not support complexes.
|
|
999
1457
|
#if !defined(EIGEN_GPUCC)
|
|
1000
1458
|
|
|
1001
|
-
template<>
|
|
1002
|
-
|
|
1459
|
+
template <>
|
|
1460
|
+
inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
|
|
1461
|
+
return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
|
|
1462
|
+
}
|
|
1003
1463
|
|
|
1004
|
-
template<>
|
|
1005
|
-
|
|
1464
|
+
template <>
|
|
1465
|
+
inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
|
|
1466
|
+
return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
|
|
1467
|
+
}
|
|
1006
1468
|
|
|
1007
1469
|
#endif
|
|
1008
1470
|
|
|
1009
|
-
|
|
1010
1471
|
/***************************************************************************
|
|
1011
1472
|
* PacketBlock, that is a collection of N packets where the number of words
|
|
1012
1473
|
* in the packet is a multiple of N.
|
|
1013
|
-
***************************************************************************/
|
|
1014
|
-
template <typename Packet,int N=unpacket_traits<Packet>::size>
|
|
1474
|
+
***************************************************************************/
|
|
1475
|
+
template <typename Packet, int N = unpacket_traits<Packet>::size>
|
|
1476
|
+
struct PacketBlock {
|
|
1015
1477
|
Packet packet[N];
|
|
1016
1478
|
};
|
|
1017
1479
|
|
|
1018
|
-
template<typename Packet>
|
|
1019
|
-
ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
|
|
1480
|
+
template <typename Packet>
|
|
1481
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
|
|
1020
1482
|
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
|
|
1021
1483
|
}
|
|
1022
1484
|
|
|
1023
1485
|
/***************************************************************************
|
|
1024
1486
|
* Selector, i.e. vector of N boolean values used to select (i.e. blend)
|
|
1025
1487
|
* words from 2 packets.
|
|
1026
|
-
***************************************************************************/
|
|
1027
|
-
template <size_t N>
|
|
1488
|
+
***************************************************************************/
|
|
1489
|
+
template <size_t N>
|
|
1490
|
+
struct Selector {
|
|
1028
1491
|
bool select[N];
|
|
1029
1492
|
};
|
|
1030
1493
|
|
|
1031
|
-
template<typename Packet>
|
|
1032
|
-
pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
|
|
1494
|
+
template <typename Packet>
|
|
1495
|
+
EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
|
|
1496
|
+
const Packet& thenPacket, const Packet& elsePacket) {
|
|
1033
1497
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
|
1034
1498
|
}
|
|
1035
1499
|
|
|
1036
|
-
|
|
1500
|
+
/** \internal \returns 1 / a (coeff-wise) */
|
|
1501
|
+
template <typename Packet>
|
|
1502
|
+
EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
|
|
1503
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1504
|
+
return pdiv(pset1<Packet>(Scalar(1)), a);
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
|
1508
|
+
template <typename Packet>
|
|
1509
|
+
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
|
|
1510
|
+
return preciprocal<Packet>(psqrt(a));
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
|
|
1514
|
+
bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
|
|
1515
|
+
struct psignbit_impl;
|
|
1516
|
+
template <typename Packet, bool IsInteger>
|
|
1517
|
+
struct psignbit_impl<Packet, true, IsInteger> {
|
|
1518
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
|
|
1519
|
+
};
|
|
1520
|
+
template <typename Packet>
|
|
1521
|
+
struct psignbit_impl<Packet, false, false> {
|
|
1522
|
+
// generic implementation if not specialized in PacketMath.h
|
|
1523
|
+
// slower than arithmetic shift
|
|
1524
|
+
typedef typename unpacket_traits<Packet>::type Scalar;
|
|
1525
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
|
|
1526
|
+
const Packet cst_pos_one = pset1<Packet>(Scalar(1));
|
|
1527
|
+
const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
|
|
1528
|
+
return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
|
|
1529
|
+
}
|
|
1530
|
+
};
|
|
1531
|
+
template <typename Packet>
|
|
1532
|
+
struct psignbit_impl<Packet, false, true> {
|
|
1533
|
+
// generic implementation for integer packets
|
|
1534
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
|
|
1535
|
+
};
|
|
1536
|
+
/** \internal \returns the sign bit of \a a as a bitmask*/
|
|
1537
|
+
template <typename Packet>
|
|
1538
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
|
|
1539
|
+
return psignbit_impl<Packet>::run(a);
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
|
|
1543
|
+
template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
|
|
1544
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
|
|
1545
|
+
return numext::atan2(y, x);
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
|
|
1549
|
+
template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
|
|
1550
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
|
|
1551
|
+
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
|
1552
|
+
|
|
1553
|
+
// See https://en.cppreference.com/w/cpp/numeric/math/atan2
|
|
1554
|
+
// for how corner cases are supposed to be handled according to the
|
|
1555
|
+
// IEEE floating-point standard (IEC 60559).
|
|
1556
|
+
const Packet kSignMask = pset1<Packet>(-Scalar(0));
|
|
1557
|
+
const Packet kZero = pzero(x);
|
|
1558
|
+
const Packet kOne = pset1<Packet>(Scalar(1));
|
|
1559
|
+
const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
|
|
1560
|
+
|
|
1561
|
+
const Packet x_has_signbit = psignbit(x);
|
|
1562
|
+
const Packet y_signmask = pand(y, kSignMask);
|
|
1563
|
+
const Packet x_signmask = pand(x, kSignMask);
|
|
1564
|
+
const Packet result_signmask = pxor(y_signmask, x_signmask);
|
|
1565
|
+
const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
|
|
1566
|
+
|
|
1567
|
+
const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
|
|
1568
|
+
const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
|
|
1569
|
+
|
|
1570
|
+
Packet arg = pdiv(y, x);
|
|
1571
|
+
arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
|
|
1572
|
+
arg = pselect(x_and_y_are_zero, result_signmask, arg);
|
|
1573
|
+
|
|
1574
|
+
Packet result = patan(arg);
|
|
1575
|
+
result = padd(result, shift);
|
|
1576
|
+
return result;
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
/** \internal \returns the argument of \a a as a complex number */
|
|
1580
|
+
template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
|
|
1581
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
|
|
1582
|
+
return Packet(numext::arg(a));
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
/** \internal \returns the argument of \a a as a complex number */
|
|
1586
|
+
template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
|
|
1587
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
|
|
1588
|
+
EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
|
|
1589
|
+
THIS METHOD IS FOR COMPLEX TYPES ONLY)
|
|
1590
|
+
using RealPacket = typename unpacket_traits<Packet>::as_real;
|
|
1591
|
+
// a // r i r i ...
|
|
1592
|
+
RealPacket aflip = pcplxflip(a).v; // i r i r ...
|
|
1593
|
+
RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
|
|
1594
|
+
return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
|
1598
|
+
* outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/
|
|
1599
|
+
template <typename Packet>
|
|
1600
|
+
EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
|
1601
|
+
Index count) {
|
|
1602
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1603
|
+
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
|
1604
|
+
eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
|
|
1605
|
+
Scalar aux[PacketSize] = {};
|
|
1606
|
+
for (Index k = begin; k < begin + count; k++) {
|
|
1607
|
+
aux[k] = from[k];
|
|
1608
|
+
}
|
|
1609
|
+
return ploadu<Packet>(aux);
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
|
1613
|
+
* outside this range are not defined. \a *from must be aligned, and cannot be null.*/
|
|
1614
|
+
template <typename Packet>
|
|
1615
|
+
EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
|
1616
|
+
Index count) {
|
|
1617
|
+
return ploaduSegment<Packet>(from, begin, count);
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
|
1621
|
+
Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be
|
|
1622
|
+
null if \a count is zero.*/
|
|
1623
|
+
template <typename Scalar, typename Packet>
|
|
1624
|
+
EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
|
1625
|
+
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
|
1626
|
+
eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
|
|
1627
|
+
Scalar aux[PacketSize];
|
|
1628
|
+
pstoreu<Scalar, Packet>(aux, from);
|
|
1629
|
+
for (Index k = begin; k < begin + count; k++) {
|
|
1630
|
+
to[k] = aux[k];
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1634
|
+
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
|
1635
|
+
Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be
|
|
1636
|
+
null.*/
|
|
1637
|
+
template <typename Scalar, typename Packet>
|
|
1638
|
+
EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
|
1639
|
+
return pstoreuSegment(to, from, begin, count);
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
|
1643
|
+
* outside this range are not defined.*/
|
|
1644
|
+
template <typename Packet, int Alignment>
|
|
1645
|
+
EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
|
1646
|
+
Index count) {
|
|
1647
|
+
constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
|
|
1648
|
+
if (Alignment >= RequiredAlignment) {
|
|
1649
|
+
return ploadSegment<Packet>(from, begin, count);
|
|
1650
|
+
} else {
|
|
1651
|
+
return ploaduSegment<Packet>(from, begin, count);
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
|
|
1655
|
+
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
|
1656
|
+
Elements outside of the range [begin, begin + count) are not defined.*/
|
|
1657
|
+
template <typename Scalar, typename Packet, int Alignment>
|
|
1658
|
+
EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
|
1659
|
+
constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
|
|
1660
|
+
if (Alignment >= RequiredAlignment) {
|
|
1661
|
+
pstoreSegment<Scalar, Packet>(to, from, begin, count);
|
|
1662
|
+
} else {
|
|
1663
|
+
pstoreuSegment<Scalar, Packet>(to, from, begin, count);
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
#ifndef EIGEN_NO_IO
|
|
1668
|
+
|
|
1669
|
+
template <typename Packet>
|
|
1670
|
+
class StreamablePacket {
|
|
1671
|
+
public:
|
|
1672
|
+
using Scalar = typename unpacket_traits<Packet>::type;
|
|
1673
|
+
StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
|
|
1674
|
+
|
|
1675
|
+
friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
|
|
1676
|
+
os << "{" << packet.v_[0];
|
|
1677
|
+
for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
|
|
1678
|
+
os << "," << packet.v_[i];
|
|
1679
|
+
}
|
|
1680
|
+
os << "}";
|
|
1681
|
+
return os;
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
private:
|
|
1685
|
+
Scalar v_[unpacket_traits<Packet>::size];
|
|
1686
|
+
};
|
|
1687
|
+
|
|
1688
|
+
/**
|
|
1689
|
+
* \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging.
|
|
1690
|
+
*/
|
|
1691
|
+
template <typename Packet>
|
|
1692
|
+
StreamablePacket<Packet> postream(const Packet& packet) {
|
|
1693
|
+
return StreamablePacket<Packet>(packet);
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
#endif // EIGEN_NO_IO
|
|
1697
|
+
|
|
1698
|
+
} // end namespace internal
|
|
1037
1699
|
|
|
1038
|
-
}
|
|
1700
|
+
} // end namespace Eigen
|
|
1039
1701
|
|
|
1040
|
-
#endif
|
|
1702
|
+
#endif // EIGEN_GENERIC_PACKET_MATH_H
|