@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -10,10 +10,11 @@
|
|
|
10
10
|
#ifndef EIGEN_PACKET_MATH_SVE_H
|
|
11
11
|
#define EIGEN_PACKET_MATH_SVE_H
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
{
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
namespace Eigen {
|
|
17
|
+
namespace internal {
|
|
17
18
|
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
|
18
19
|
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
|
19
20
|
#endif
|
|
@@ -40,7 +41,6 @@ struct packet_traits<numext::int32_t> : default_packet_traits {
|
|
|
40
41
|
Vectorizable = 1,
|
|
41
42
|
AlignedOnScalar = 1,
|
|
42
43
|
size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,
|
|
43
|
-
HasHalfPacket = 0,
|
|
44
44
|
|
|
45
45
|
HasAdd = 1,
|
|
46
46
|
HasSub = 1,
|
|
@@ -73,174 +73,146 @@ struct unpacket_traits<PacketXi> {
|
|
|
73
73
|
};
|
|
74
74
|
|
|
75
75
|
template <>
|
|
76
|
-
EIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr)
|
|
77
|
-
{
|
|
76
|
+
EIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr) {
|
|
78
77
|
svprfw(svptrue_b32(), addr, SV_PLDL1KEEP);
|
|
79
78
|
}
|
|
80
79
|
|
|
81
80
|
template <>
|
|
82
|
-
EIGEN_STRONG_INLINE PacketXi pset1<PacketXi>(const numext::int32_t& from)
|
|
83
|
-
{
|
|
81
|
+
EIGEN_STRONG_INLINE PacketXi pset1<PacketXi>(const numext::int32_t& from) {
|
|
84
82
|
return svdup_n_s32(from);
|
|
85
83
|
}
|
|
86
84
|
|
|
87
85
|
template <>
|
|
88
|
-
EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const numext::int32_t& a)
|
|
89
|
-
{
|
|
86
|
+
EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const numext::int32_t& a) {
|
|
90
87
|
numext::int32_t c[packet_traits<numext::int32_t>::size];
|
|
91
88
|
for (int i = 0; i < packet_traits<numext::int32_t>::size; i++) c[i] = i;
|
|
92
|
-
return
|
|
89
|
+
return svadd_s32_x(svptrue_b32(), pset1<PacketXi>(a), svld1_s32(svptrue_b32(), c));
|
|
93
90
|
}
|
|
94
91
|
|
|
95
92
|
template <>
|
|
96
|
-
EIGEN_STRONG_INLINE PacketXi padd<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
97
|
-
|
|
98
|
-
return svadd_s32_z(svptrue_b32(), a, b);
|
|
93
|
+
EIGEN_STRONG_INLINE PacketXi padd<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
94
|
+
return svadd_s32_x(svptrue_b32(), a, b);
|
|
99
95
|
}
|
|
100
96
|
|
|
101
97
|
template <>
|
|
102
|
-
EIGEN_STRONG_INLINE PacketXi psub<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
103
|
-
|
|
104
|
-
return svsub_s32_z(svptrue_b32(), a, b);
|
|
98
|
+
EIGEN_STRONG_INLINE PacketXi psub<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
99
|
+
return svsub_s32_x(svptrue_b32(), a, b);
|
|
105
100
|
}
|
|
106
101
|
|
|
107
102
|
template <>
|
|
108
|
-
EIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a)
|
|
109
|
-
|
|
110
|
-
return svneg_s32_z(svptrue_b32(), a);
|
|
103
|
+
EIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a) {
|
|
104
|
+
return svneg_s32_x(svptrue_b32(), a);
|
|
111
105
|
}
|
|
112
106
|
|
|
113
107
|
template <>
|
|
114
|
-
EIGEN_STRONG_INLINE PacketXi pconj(const PacketXi& a)
|
|
115
|
-
{
|
|
108
|
+
EIGEN_STRONG_INLINE PacketXi pconj(const PacketXi& a) {
|
|
116
109
|
return a;
|
|
117
110
|
}
|
|
118
111
|
|
|
119
112
|
template <>
|
|
120
|
-
EIGEN_STRONG_INLINE PacketXi pmul<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
121
|
-
|
|
122
|
-
return svmul_s32_z(svptrue_b32(), a, b);
|
|
113
|
+
EIGEN_STRONG_INLINE PacketXi pmul<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
114
|
+
return svmul_s32_x(svptrue_b32(), a, b);
|
|
123
115
|
}
|
|
124
116
|
|
|
125
117
|
template <>
|
|
126
|
-
EIGEN_STRONG_INLINE PacketXi pdiv<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
127
|
-
|
|
128
|
-
return svdiv_s32_z(svptrue_b32(), a, b);
|
|
118
|
+
EIGEN_STRONG_INLINE PacketXi pdiv<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
119
|
+
return svdiv_s32_x(svptrue_b32(), a, b);
|
|
129
120
|
}
|
|
130
121
|
|
|
131
122
|
template <>
|
|
132
|
-
EIGEN_STRONG_INLINE PacketXi pmadd(const PacketXi& a, const PacketXi& b, const PacketXi& c)
|
|
133
|
-
|
|
134
|
-
return svmla_s32_z(svptrue_b32(), c, a, b);
|
|
123
|
+
EIGEN_STRONG_INLINE PacketXi pmadd(const PacketXi& a, const PacketXi& b, const PacketXi& c) {
|
|
124
|
+
return svmla_s32_x(svptrue_b32(), c, a, b);
|
|
135
125
|
}
|
|
136
126
|
|
|
137
127
|
template <>
|
|
138
|
-
EIGEN_STRONG_INLINE PacketXi pmin<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
139
|
-
|
|
140
|
-
return svmin_s32_z(svptrue_b32(), a, b);
|
|
128
|
+
EIGEN_STRONG_INLINE PacketXi pmin<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
129
|
+
return svmin_s32_x(svptrue_b32(), a, b);
|
|
141
130
|
}
|
|
142
131
|
|
|
143
132
|
template <>
|
|
144
|
-
EIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
145
|
-
|
|
146
|
-
return svmax_s32_z(svptrue_b32(), a, b);
|
|
133
|
+
EIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
134
|
+
return svmax_s32_x(svptrue_b32(), a, b);
|
|
147
135
|
}
|
|
148
136
|
|
|
149
137
|
template <>
|
|
150
|
-
EIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
151
|
-
|
|
152
|
-
return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);
|
|
138
|
+
EIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
139
|
+
return svdup_n_s32_z(svcmple_s32(svptrue_b32(), a, b), 0xffffffffu);
|
|
153
140
|
}
|
|
154
141
|
|
|
155
142
|
template <>
|
|
156
|
-
EIGEN_STRONG_INLINE PacketXi pcmp_lt<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
157
|
-
{
|
|
143
|
+
EIGEN_STRONG_INLINE PacketXi pcmp_lt<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
158
144
|
return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);
|
|
159
145
|
}
|
|
160
146
|
|
|
161
147
|
template <>
|
|
162
|
-
EIGEN_STRONG_INLINE PacketXi pcmp_eq<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
163
|
-
{
|
|
148
|
+
EIGEN_STRONG_INLINE PacketXi pcmp_eq<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
164
149
|
return svdup_n_s32_z(svcmpeq_s32(svptrue_b32(), a, b), 0xffffffffu);
|
|
165
150
|
}
|
|
166
151
|
|
|
167
152
|
template <>
|
|
168
|
-
EIGEN_STRONG_INLINE PacketXi ptrue<PacketXi>(const PacketXi& /*a*/)
|
|
169
|
-
|
|
170
|
-
return svdup_n_s32_z(svptrue_b32(), 0xffffffffu);
|
|
153
|
+
EIGEN_STRONG_INLINE PacketXi ptrue<PacketXi>(const PacketXi& /*a*/) {
|
|
154
|
+
return svdup_n_s32_x(svptrue_b32(), 0xffffffffu);
|
|
171
155
|
}
|
|
172
156
|
|
|
173
157
|
template <>
|
|
174
|
-
EIGEN_STRONG_INLINE PacketXi pzero<PacketXi>(const PacketXi& /*a*/)
|
|
175
|
-
|
|
176
|
-
return svdup_n_s32_z(svptrue_b32(), 0);
|
|
158
|
+
EIGEN_STRONG_INLINE PacketXi pzero<PacketXi>(const PacketXi& /*a*/) {
|
|
159
|
+
return svdup_n_s32_x(svptrue_b32(), 0);
|
|
177
160
|
}
|
|
178
161
|
|
|
179
162
|
template <>
|
|
180
|
-
EIGEN_STRONG_INLINE PacketXi pand<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
181
|
-
|
|
182
|
-
return svand_s32_z(svptrue_b32(), a, b);
|
|
163
|
+
EIGEN_STRONG_INLINE PacketXi pand<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
164
|
+
return svand_s32_x(svptrue_b32(), a, b);
|
|
183
165
|
}
|
|
184
166
|
|
|
185
167
|
template <>
|
|
186
|
-
EIGEN_STRONG_INLINE PacketXi por<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
187
|
-
|
|
188
|
-
return svorr_s32_z(svptrue_b32(), a, b);
|
|
168
|
+
EIGEN_STRONG_INLINE PacketXi por<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
169
|
+
return svorr_s32_x(svptrue_b32(), a, b);
|
|
189
170
|
}
|
|
190
171
|
|
|
191
172
|
template <>
|
|
192
|
-
EIGEN_STRONG_INLINE PacketXi pxor<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
193
|
-
|
|
194
|
-
return sveor_s32_z(svptrue_b32(), a, b);
|
|
173
|
+
EIGEN_STRONG_INLINE PacketXi pxor<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
174
|
+
return sveor_s32_x(svptrue_b32(), a, b);
|
|
195
175
|
}
|
|
196
176
|
|
|
197
177
|
template <>
|
|
198
|
-
EIGEN_STRONG_INLINE PacketXi pandnot<PacketXi>(const PacketXi& a, const PacketXi& b)
|
|
199
|
-
|
|
200
|
-
return svbic_s32_z(svptrue_b32(), a, b);
|
|
178
|
+
EIGEN_STRONG_INLINE PacketXi pandnot<PacketXi>(const PacketXi& a, const PacketXi& b) {
|
|
179
|
+
return svbic_s32_x(svptrue_b32(), a, b);
|
|
201
180
|
}
|
|
202
181
|
|
|
203
182
|
template <int N>
|
|
204
|
-
EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a)
|
|
205
|
-
|
|
206
|
-
return svasrd_n_s32_z(svptrue_b32(), a, N);
|
|
183
|
+
EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a) {
|
|
184
|
+
return svasrd_n_s32_x(svptrue_b32(), a, N);
|
|
207
185
|
}
|
|
208
186
|
|
|
209
187
|
template <int N>
|
|
210
|
-
EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a)
|
|
211
|
-
|
|
212
|
-
return svreinterpret_s32_u32(svlsr_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), svdup_n_u32_z(svptrue_b32(), N)));
|
|
188
|
+
EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a) {
|
|
189
|
+
return svreinterpret_s32_u32(svlsr_n_u32_x(svptrue_b32(), svreinterpret_u32_s32(a), N));
|
|
213
190
|
}
|
|
214
191
|
|
|
215
192
|
template <int N>
|
|
216
|
-
EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a)
|
|
217
|
-
|
|
218
|
-
return svlsl_s32_z(svptrue_b32(), a, svdup_n_u32_z(svptrue_b32(), N));
|
|
193
|
+
EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a) {
|
|
194
|
+
return svlsl_n_s32_x(svptrue_b32(), a, N);
|
|
219
195
|
}
|
|
220
196
|
|
|
221
197
|
template <>
|
|
222
|
-
EIGEN_STRONG_INLINE PacketXi pload<PacketXi>(const numext::int32_t* from)
|
|
223
|
-
{
|
|
198
|
+
EIGEN_STRONG_INLINE PacketXi pload<PacketXi>(const numext::int32_t* from) {
|
|
224
199
|
EIGEN_DEBUG_ALIGNED_LOAD return svld1_s32(svptrue_b32(), from);
|
|
225
200
|
}
|
|
226
201
|
|
|
227
202
|
template <>
|
|
228
|
-
EIGEN_STRONG_INLINE PacketXi ploadu<PacketXi>(const numext::int32_t* from)
|
|
229
|
-
{
|
|
203
|
+
EIGEN_STRONG_INLINE PacketXi ploadu<PacketXi>(const numext::int32_t* from) {
|
|
230
204
|
EIGEN_DEBUG_UNALIGNED_LOAD return svld1_s32(svptrue_b32(), from);
|
|
231
205
|
}
|
|
232
206
|
|
|
233
207
|
template <>
|
|
234
|
-
EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const numext::int32_t* from)
|
|
235
|
-
{
|
|
208
|
+
EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const numext::int32_t* from) {
|
|
236
209
|
svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
|
|
237
210
|
indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
|
|
238
211
|
return svld1_gather_u32index_s32(svptrue_b32(), from, indices);
|
|
239
212
|
}
|
|
240
213
|
|
|
241
214
|
template <>
|
|
242
|
-
EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from)
|
|
243
|
-
{
|
|
215
|
+
EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from) {
|
|
244
216
|
svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
|
|
245
217
|
indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
|
|
246
218
|
indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}
|
|
@@ -248,102 +220,91 @@ EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from)
|
|
|
248
220
|
}
|
|
249
221
|
|
|
250
222
|
template <>
|
|
251
|
-
EIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const PacketXi& from)
|
|
252
|
-
{
|
|
223
|
+
EIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const PacketXi& from) {
|
|
253
224
|
EIGEN_DEBUG_ALIGNED_STORE svst1_s32(svptrue_b32(), to, from);
|
|
254
225
|
}
|
|
255
226
|
|
|
256
227
|
template <>
|
|
257
|
-
EIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const PacketXi& from)
|
|
258
|
-
{
|
|
228
|
+
EIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const PacketXi& from) {
|
|
259
229
|
EIGEN_DEBUG_UNALIGNED_STORE svst1_s32(svptrue_b32(), to, from);
|
|
260
230
|
}
|
|
261
231
|
|
|
262
232
|
template <>
|
|
263
|
-
EIGEN_DEVICE_FUNC inline PacketXi pgather<numext::int32_t, PacketXi>(const numext::int32_t* from, Index stride)
|
|
264
|
-
{
|
|
233
|
+
EIGEN_DEVICE_FUNC inline PacketXi pgather<numext::int32_t, PacketXi>(const numext::int32_t* from, Index stride) {
|
|
265
234
|
// Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
|
|
266
235
|
svint32_t indices = svindex_s32(0, stride);
|
|
267
236
|
return svld1_gather_s32index_s32(svptrue_b32(), from, indices);
|
|
268
237
|
}
|
|
269
238
|
|
|
270
239
|
template <>
|
|
271
|
-
EIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, PacketXi>(numext::int32_t* to, const PacketXi& from,
|
|
272
|
-
{
|
|
240
|
+
EIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, PacketXi>(numext::int32_t* to, const PacketXi& from,
|
|
241
|
+
Index stride) {
|
|
273
242
|
// Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
|
|
274
243
|
svint32_t indices = svindex_s32(0, stride);
|
|
275
244
|
svst1_scatter_s32index_s32(svptrue_b32(), to, indices, from);
|
|
276
245
|
}
|
|
277
246
|
|
|
278
247
|
template <>
|
|
279
|
-
EIGEN_STRONG_INLINE numext::int32_t pfirst<PacketXi>(const PacketXi& a)
|
|
280
|
-
{
|
|
248
|
+
EIGEN_STRONG_INLINE numext::int32_t pfirst<PacketXi>(const PacketXi& a) {
|
|
281
249
|
// svlasta returns the first element if all predicate bits are 0
|
|
282
250
|
return svlasta_s32(svpfalse_b(), a);
|
|
283
251
|
}
|
|
284
252
|
|
|
285
253
|
template <>
|
|
286
|
-
EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a)
|
|
287
|
-
{
|
|
254
|
+
EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) {
|
|
288
255
|
return svrev_s32(a);
|
|
289
256
|
}
|
|
290
257
|
|
|
291
258
|
template <>
|
|
292
|
-
EIGEN_STRONG_INLINE PacketXi pabs(const PacketXi& a)
|
|
293
|
-
|
|
294
|
-
return svabs_s32_z(svptrue_b32(), a);
|
|
259
|
+
EIGEN_STRONG_INLINE PacketXi pabs(const PacketXi& a) {
|
|
260
|
+
return svabs_s32_x(svptrue_b32(), a);
|
|
295
261
|
}
|
|
296
262
|
|
|
297
263
|
template <>
|
|
298
|
-
EIGEN_STRONG_INLINE numext::int32_t predux<PacketXi>(const PacketXi& a)
|
|
299
|
-
{
|
|
264
|
+
EIGEN_STRONG_INLINE numext::int32_t predux<PacketXi>(const PacketXi& a) {
|
|
300
265
|
return static_cast<numext::int32_t>(svaddv_s32(svptrue_b32(), a));
|
|
301
266
|
}
|
|
302
267
|
|
|
303
268
|
template <>
|
|
304
|
-
EIGEN_STRONG_INLINE numext::int32_t predux_mul<PacketXi>(const PacketXi& a)
|
|
305
|
-
|
|
306
|
-
EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0),
|
|
307
|
-
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
|
|
269
|
+
EIGEN_STRONG_INLINE numext::int32_t predux_mul<PacketXi>(const PacketXi& a) {
|
|
270
|
+
EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0), EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
|
|
308
271
|
|
|
309
272
|
// Multiply the vector by its reverse
|
|
310
|
-
svint32_t prod =
|
|
273
|
+
svint32_t prod = svmul_s32_x(svptrue_b32(), a, svrev_s32(a));
|
|
311
274
|
svint32_t half_prod;
|
|
312
275
|
|
|
313
276
|
// Extract the high half of the vector. Depending on the VL more reductions need to be done
|
|
314
277
|
if (EIGEN_ARM64_SVE_VL >= 2048) {
|
|
315
278
|
half_prod = svtbl_s32(prod, svindex_u32(32, 1));
|
|
316
|
-
prod =
|
|
279
|
+
prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
|
|
317
280
|
}
|
|
318
281
|
if (EIGEN_ARM64_SVE_VL >= 1024) {
|
|
319
282
|
half_prod = svtbl_s32(prod, svindex_u32(16, 1));
|
|
320
|
-
prod =
|
|
283
|
+
prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
|
|
321
284
|
}
|
|
322
285
|
if (EIGEN_ARM64_SVE_VL >= 512) {
|
|
323
286
|
half_prod = svtbl_s32(prod, svindex_u32(8, 1));
|
|
324
|
-
prod =
|
|
287
|
+
prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
|
|
325
288
|
}
|
|
326
289
|
if (EIGEN_ARM64_SVE_VL >= 256) {
|
|
327
290
|
half_prod = svtbl_s32(prod, svindex_u32(4, 1));
|
|
328
|
-
prod =
|
|
291
|
+
prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
|
|
329
292
|
}
|
|
330
293
|
// Last reduction
|
|
331
294
|
half_prod = svtbl_s32(prod, svindex_u32(2, 1));
|
|
332
|
-
prod =
|
|
295
|
+
prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
|
|
333
296
|
|
|
334
297
|
// The reduction is done to the first element.
|
|
335
298
|
return pfirst<PacketXi>(prod);
|
|
336
299
|
}
|
|
337
300
|
|
|
338
301
|
template <>
|
|
339
|
-
EIGEN_STRONG_INLINE numext::int32_t predux_min<PacketXi>(const PacketXi& a)
|
|
340
|
-
{
|
|
302
|
+
EIGEN_STRONG_INLINE numext::int32_t predux_min<PacketXi>(const PacketXi& a) {
|
|
341
303
|
return svminv_s32(svptrue_b32(), a);
|
|
342
304
|
}
|
|
343
305
|
|
|
344
306
|
template <>
|
|
345
|
-
EIGEN_STRONG_INLINE numext::int32_t predux_max<PacketXi>(const PacketXi& a)
|
|
346
|
-
{
|
|
307
|
+
EIGEN_STRONG_INLINE numext::int32_t predux_max<PacketXi>(const PacketXi& a) {
|
|
347
308
|
return svmaxv_s32(svptrue_b32(), a);
|
|
348
309
|
}
|
|
349
310
|
|
|
@@ -375,7 +336,6 @@ struct packet_traits<float> : default_packet_traits {
|
|
|
375
336
|
Vectorizable = 1,
|
|
376
337
|
AlignedOnScalar = 1,
|
|
377
338
|
size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,
|
|
378
|
-
HasHalfPacket = 0,
|
|
379
339
|
|
|
380
340
|
HasAdd = 1,
|
|
381
341
|
HasSub = 1,
|
|
@@ -393,15 +353,17 @@ struct packet_traits<float> : default_packet_traits {
|
|
|
393
353
|
HasReduxp = 0, // Not implemented in SVE
|
|
394
354
|
|
|
395
355
|
HasDiv = 1,
|
|
396
|
-
HasFloor = 1,
|
|
397
356
|
|
|
357
|
+
HasCmp = 1,
|
|
398
358
|
HasSin = EIGEN_FAST_MATH,
|
|
399
359
|
HasCos = EIGEN_FAST_MATH,
|
|
400
360
|
HasLog = 1,
|
|
401
361
|
HasExp = 1,
|
|
402
|
-
|
|
362
|
+
HasPow = 1,
|
|
363
|
+
HasSqrt = 1,
|
|
403
364
|
HasTanh = EIGEN_FAST_MATH,
|
|
404
|
-
HasErf = EIGEN_FAST_MATH
|
|
365
|
+
HasErf = EIGEN_FAST_MATH,
|
|
366
|
+
HasErfc = EIGEN_FAST_MATH
|
|
405
367
|
};
|
|
406
368
|
};
|
|
407
369
|
|
|
@@ -421,120 +383,101 @@ struct unpacket_traits<PacketXf> {
|
|
|
421
383
|
};
|
|
422
384
|
|
|
423
385
|
template <>
|
|
424
|
-
EIGEN_STRONG_INLINE PacketXf pset1<PacketXf>(const float& from)
|
|
425
|
-
{
|
|
386
|
+
EIGEN_STRONG_INLINE PacketXf pset1<PacketXf>(const float& from) {
|
|
426
387
|
return svdup_n_f32(from);
|
|
427
388
|
}
|
|
428
389
|
|
|
429
390
|
template <>
|
|
430
|
-
EIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(numext::uint32_t from)
|
|
431
|
-
|
|
432
|
-
return svreinterpret_f32_u32(svdup_n_u32_z(svptrue_b32(), from));
|
|
391
|
+
EIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(numext::uint32_t from) {
|
|
392
|
+
return svreinterpret_f32_u32(svdup_n_u32_x(svptrue_b32(), from));
|
|
433
393
|
}
|
|
434
394
|
|
|
435
395
|
template <>
|
|
436
|
-
EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a)
|
|
437
|
-
{
|
|
396
|
+
EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a) {
|
|
438
397
|
float c[packet_traits<float>::size];
|
|
439
398
|
for (int i = 0; i < packet_traits<float>::size; i++) c[i] = i;
|
|
440
|
-
return
|
|
399
|
+
return svadd_f32_x(svptrue_b32(), pset1<PacketXf>(a), svld1_f32(svptrue_b32(), c));
|
|
441
400
|
}
|
|
442
401
|
|
|
443
402
|
template <>
|
|
444
|
-
EIGEN_STRONG_INLINE PacketXf padd<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
445
|
-
|
|
446
|
-
return svadd_f32_z(svptrue_b32(), a, b);
|
|
403
|
+
EIGEN_STRONG_INLINE PacketXf padd<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
404
|
+
return svadd_f32_x(svptrue_b32(), a, b);
|
|
447
405
|
}
|
|
448
406
|
|
|
449
407
|
template <>
|
|
450
|
-
EIGEN_STRONG_INLINE PacketXf psub<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
451
|
-
|
|
452
|
-
return svsub_f32_z(svptrue_b32(), a, b);
|
|
408
|
+
EIGEN_STRONG_INLINE PacketXf psub<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
409
|
+
return svsub_f32_x(svptrue_b32(), a, b);
|
|
453
410
|
}
|
|
454
411
|
|
|
455
412
|
template <>
|
|
456
|
-
EIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a)
|
|
457
|
-
|
|
458
|
-
return svneg_f32_z(svptrue_b32(), a);
|
|
413
|
+
EIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a) {
|
|
414
|
+
return svneg_f32_x(svptrue_b32(), a);
|
|
459
415
|
}
|
|
460
416
|
|
|
461
417
|
template <>
|
|
462
|
-
EIGEN_STRONG_INLINE PacketXf pconj(const PacketXf& a)
|
|
463
|
-
{
|
|
418
|
+
EIGEN_STRONG_INLINE PacketXf pconj(const PacketXf& a) {
|
|
464
419
|
return a;
|
|
465
420
|
}
|
|
466
421
|
|
|
467
422
|
template <>
|
|
468
|
-
EIGEN_STRONG_INLINE PacketXf pmul<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
469
|
-
|
|
470
|
-
return svmul_f32_z(svptrue_b32(), a, b);
|
|
423
|
+
EIGEN_STRONG_INLINE PacketXf pmul<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
424
|
+
return svmul_f32_x(svptrue_b32(), a, b);
|
|
471
425
|
}
|
|
472
426
|
|
|
473
427
|
template <>
|
|
474
|
-
EIGEN_STRONG_INLINE PacketXf pdiv<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
475
|
-
|
|
476
|
-
return svdiv_f32_z(svptrue_b32(), a, b);
|
|
428
|
+
EIGEN_STRONG_INLINE PacketXf pdiv<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
429
|
+
return svdiv_f32_x(svptrue_b32(), a, b);
|
|
477
430
|
}
|
|
478
431
|
|
|
479
432
|
template <>
|
|
480
|
-
EIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c)
|
|
481
|
-
|
|
482
|
-
return svmla_f32_z(svptrue_b32(), c, a, b);
|
|
433
|
+
EIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c) {
|
|
434
|
+
return svmla_f32_x(svptrue_b32(), c, a, b);
|
|
483
435
|
}
|
|
484
436
|
|
|
485
437
|
template <>
|
|
486
|
-
EIGEN_STRONG_INLINE PacketXf pmin<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
487
|
-
|
|
488
|
-
return svmin_f32_z(svptrue_b32(), a, b);
|
|
438
|
+
EIGEN_STRONG_INLINE PacketXf pmin<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
439
|
+
return svmin_f32_x(svptrue_b32(), a, b);
|
|
489
440
|
}
|
|
490
441
|
|
|
491
442
|
template <>
|
|
492
|
-
EIGEN_STRONG_INLINE PacketXf pmin<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
493
|
-
{
|
|
443
|
+
EIGEN_STRONG_INLINE PacketXf pmin<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
494
444
|
return pmin<PacketXf>(a, b);
|
|
495
445
|
}
|
|
496
446
|
|
|
497
447
|
template <>
|
|
498
|
-
EIGEN_STRONG_INLINE PacketXf pmin<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
499
|
-
|
|
500
|
-
return svminnm_f32_z(svptrue_b32(), a, b);
|
|
448
|
+
EIGEN_STRONG_INLINE PacketXf pmin<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
449
|
+
return svminnm_f32_x(svptrue_b32(), a, b);
|
|
501
450
|
}
|
|
502
451
|
|
|
503
452
|
template <>
|
|
504
|
-
EIGEN_STRONG_INLINE PacketXf pmax<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
505
|
-
|
|
506
|
-
return svmax_f32_z(svptrue_b32(), a, b);
|
|
453
|
+
EIGEN_STRONG_INLINE PacketXf pmax<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
454
|
+
return svmax_f32_x(svptrue_b32(), a, b);
|
|
507
455
|
}
|
|
508
456
|
|
|
509
457
|
template <>
|
|
510
|
-
EIGEN_STRONG_INLINE PacketXf pmax<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
511
|
-
{
|
|
458
|
+
EIGEN_STRONG_INLINE PacketXf pmax<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
512
459
|
return pmax<PacketXf>(a, b);
|
|
513
460
|
}
|
|
514
461
|
|
|
515
462
|
template <>
|
|
516
|
-
EIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
517
|
-
|
|
518
|
-
return svmaxnm_f32_z(svptrue_b32(), a, b);
|
|
463
|
+
EIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
464
|
+
return svmaxnm_f32_x(svptrue_b32(), a, b);
|
|
519
465
|
}
|
|
520
466
|
|
|
521
467
|
// Float comparisons in SVE return svbool (predicate). Use svdup to set active
|
|
522
468
|
// lanes to 1 (0xffffffffu) and inactive lanes to 0.
|
|
523
469
|
template <>
|
|
524
|
-
EIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
525
|
-
|
|
526
|
-
return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));
|
|
470
|
+
EIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
471
|
+
return svreinterpret_f32_u32(svdup_n_u32_z(svcmple_f32(svptrue_b32(), a, b), 0xffffffffu));
|
|
527
472
|
}
|
|
528
473
|
|
|
529
474
|
template <>
|
|
530
|
-
EIGEN_STRONG_INLINE PacketXf pcmp_lt<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
531
|
-
{
|
|
475
|
+
EIGEN_STRONG_INLINE PacketXf pcmp_lt<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
532
476
|
return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));
|
|
533
477
|
}
|
|
534
478
|
|
|
535
479
|
template <>
|
|
536
|
-
EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
537
|
-
{
|
|
480
|
+
EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
538
481
|
return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));
|
|
539
482
|
}
|
|
540
483
|
|
|
@@ -542,71 +485,60 @@ EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf
|
|
|
542
485
|
// greater/equal comparison (svcmpge_f32). Then fill a float vector with the
|
|
543
486
|
// active elements.
|
|
544
487
|
template <>
|
|
545
|
-
EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
546
|
-
{
|
|
488
|
+
EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
547
489
|
return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
|
|
548
490
|
}
|
|
549
491
|
|
|
550
492
|
template <>
|
|
551
|
-
EIGEN_STRONG_INLINE PacketXf pfloor<PacketXf>(const PacketXf& a)
|
|
552
|
-
|
|
553
|
-
return svrintm_f32_z(svptrue_b32(), a);
|
|
493
|
+
EIGEN_STRONG_INLINE PacketXf pfloor<PacketXf>(const PacketXf& a) {
|
|
494
|
+
return svrintm_f32_x(svptrue_b32(), a);
|
|
554
495
|
}
|
|
555
496
|
|
|
556
497
|
template <>
|
|
557
|
-
EIGEN_STRONG_INLINE PacketXf ptrue<PacketXf>(const PacketXf& /*a*/)
|
|
558
|
-
|
|
559
|
-
return svreinterpret_f32_u32(svdup_n_u32_z(svptrue_b32(), 0xffffffffu));
|
|
498
|
+
EIGEN_STRONG_INLINE PacketXf ptrue<PacketXf>(const PacketXf& /*a*/) {
|
|
499
|
+
return svreinterpret_f32_u32(svdup_n_u32_x(svptrue_b32(), 0xffffffffu));
|
|
560
500
|
}
|
|
561
501
|
|
|
562
502
|
// Logical Operations are not supported for float, so reinterpret casts
|
|
563
503
|
template <>
|
|
564
|
-
EIGEN_STRONG_INLINE PacketXf pand<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
565
|
-
|
|
566
|
-
return svreinterpret_f32_u32(svand_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
504
|
+
EIGEN_STRONG_INLINE PacketXf pand<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
505
|
+
return svreinterpret_f32_u32(svand_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
567
506
|
}
|
|
568
507
|
|
|
569
508
|
template <>
|
|
570
|
-
EIGEN_STRONG_INLINE PacketXf por<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
571
|
-
|
|
572
|
-
return svreinterpret_f32_u32(svorr_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
509
|
+
EIGEN_STRONG_INLINE PacketXf por<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
510
|
+
return svreinterpret_f32_u32(svorr_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
573
511
|
}
|
|
574
512
|
|
|
575
513
|
template <>
|
|
576
|
-
EIGEN_STRONG_INLINE PacketXf pxor<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
577
|
-
|
|
578
|
-
return svreinterpret_f32_u32(sveor_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
514
|
+
EIGEN_STRONG_INLINE PacketXf pxor<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
515
|
+
return svreinterpret_f32_u32(sveor_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
579
516
|
}
|
|
580
517
|
|
|
581
518
|
template <>
|
|
582
|
-
EIGEN_STRONG_INLINE PacketXf pandnot<PacketXf>(const PacketXf& a, const PacketXf& b)
|
|
583
|
-
|
|
584
|
-
return svreinterpret_f32_u32(svbic_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
519
|
+
EIGEN_STRONG_INLINE PacketXf pandnot<PacketXf>(const PacketXf& a, const PacketXf& b) {
|
|
520
|
+
return svreinterpret_f32_u32(svbic_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
|
|
585
521
|
}
|
|
586
522
|
|
|
587
523
|
template <>
|
|
588
|
-
EIGEN_STRONG_INLINE PacketXf pload<PacketXf>(const float* from)
|
|
589
|
-
{
|
|
524
|
+
EIGEN_STRONG_INLINE PacketXf pload<PacketXf>(const float* from) {
|
|
590
525
|
EIGEN_DEBUG_ALIGNED_LOAD return svld1_f32(svptrue_b32(), from);
|
|
591
526
|
}
|
|
592
527
|
|
|
593
528
|
template <>
|
|
594
|
-
EIGEN_STRONG_INLINE PacketXf ploadu<PacketXf>(const float* from)
|
|
595
|
-
{
|
|
529
|
+
EIGEN_STRONG_INLINE PacketXf ploadu<PacketXf>(const float* from) {
|
|
596
530
|
EIGEN_DEBUG_UNALIGNED_LOAD return svld1_f32(svptrue_b32(), from);
|
|
597
531
|
}
|
|
598
532
|
|
|
599
533
|
template <>
|
|
600
|
-
EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from)
|
|
601
|
-
{
|
|
534
|
+
EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from) {
|
|
602
535
|
svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
|
|
603
536
|
indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
|
|
604
537
|
return svld1_gather_u32index_f32(svptrue_b32(), from, indices);
|
|
605
538
|
}
|
|
606
539
|
|
|
607
540
|
template <>
|
|
608
|
-
EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from)
|
|
609
|
-
{
|
|
541
|
+
EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from) {
|
|
610
542
|
svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
|
|
611
543
|
indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
|
|
612
544
|
indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}
|
|
@@ -614,63 +546,54 @@ EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from)
|
|
|
614
546
|
}
|
|
615
547
|
|
|
616
548
|
template <>
|
|
617
|
-
EIGEN_STRONG_INLINE void pstore<float>(float* to, const PacketXf& from)
|
|
618
|
-
{
|
|
549
|
+
EIGEN_STRONG_INLINE void pstore<float>(float* to, const PacketXf& from) {
|
|
619
550
|
EIGEN_DEBUG_ALIGNED_STORE svst1_f32(svptrue_b32(), to, from);
|
|
620
551
|
}
|
|
621
552
|
|
|
622
553
|
template <>
|
|
623
|
-
EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const PacketXf& from)
|
|
624
|
-
{
|
|
554
|
+
EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const PacketXf& from) {
|
|
625
555
|
EIGEN_DEBUG_UNALIGNED_STORE svst1_f32(svptrue_b32(), to, from);
|
|
626
556
|
}
|
|
627
557
|
|
|
628
558
|
template <>
|
|
629
|
-
EIGEN_DEVICE_FUNC inline PacketXf pgather<float, PacketXf>(const float* from, Index stride)
|
|
630
|
-
{
|
|
559
|
+
EIGEN_DEVICE_FUNC inline PacketXf pgather<float, PacketXf>(const float* from, Index stride) {
|
|
631
560
|
// Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
|
|
632
561
|
svint32_t indices = svindex_s32(0, stride);
|
|
633
562
|
return svld1_gather_s32index_f32(svptrue_b32(), from, indices);
|
|
634
563
|
}
|
|
635
564
|
|
|
636
565
|
template <>
|
|
637
|
-
EIGEN_DEVICE_FUNC inline void pscatter<float, PacketXf>(float* to, const PacketXf& from, Index stride)
|
|
638
|
-
{
|
|
566
|
+
EIGEN_DEVICE_FUNC inline void pscatter<float, PacketXf>(float* to, const PacketXf& from, Index stride) {
|
|
639
567
|
// Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
|
|
640
568
|
svint32_t indices = svindex_s32(0, stride);
|
|
641
569
|
svst1_scatter_s32index_f32(svptrue_b32(), to, indices, from);
|
|
642
570
|
}
|
|
643
571
|
|
|
644
572
|
template <>
|
|
645
|
-
EIGEN_STRONG_INLINE float pfirst<PacketXf>(const PacketXf& a)
|
|
646
|
-
{
|
|
573
|
+
EIGEN_STRONG_INLINE float pfirst<PacketXf>(const PacketXf& a) {
|
|
647
574
|
// svlasta returns the first element if all predicate bits are 0
|
|
648
575
|
return svlasta_f32(svpfalse_b(), a);
|
|
649
576
|
}
|
|
650
577
|
|
|
651
578
|
template <>
|
|
652
|
-
EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a)
|
|
653
|
-
{
|
|
579
|
+
EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) {
|
|
654
580
|
return svrev_f32(a);
|
|
655
581
|
}
|
|
656
582
|
|
|
657
583
|
template <>
|
|
658
|
-
EIGEN_STRONG_INLINE PacketXf pabs(const PacketXf& a)
|
|
659
|
-
|
|
660
|
-
return svabs_f32_z(svptrue_b32(), a);
|
|
584
|
+
EIGEN_STRONG_INLINE PacketXf pabs(const PacketXf& a) {
|
|
585
|
+
return svabs_f32_x(svptrue_b32(), a);
|
|
661
586
|
}
|
|
662
587
|
|
|
663
|
-
// TODO(tellenbach): Should this go into MathFunctions.h? If so, change for
|
|
588
|
+
// TODO(tellenbach): Should this go into MathFunctions.h? If so, change for
|
|
664
589
|
// all vector extensions and the generic version.
|
|
665
590
|
template <>
|
|
666
|
-
EIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent)
|
|
667
|
-
{
|
|
591
|
+
EIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent) {
|
|
668
592
|
return pfrexp_generic(a, exponent);
|
|
669
593
|
}
|
|
670
594
|
|
|
671
595
|
template <>
|
|
672
|
-
EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a)
|
|
673
|
-
{
|
|
596
|
+
EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a) {
|
|
674
597
|
return svaddv_f32(svptrue_b32(), a);
|
|
675
598
|
}
|
|
676
599
|
|
|
@@ -678,54 +601,49 @@ EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a)
|
|
|
678
601
|
// mul
|
|
679
602
|
// Only works for SVE Vls multiple of 128
|
|
680
603
|
template <>
|
|
681
|
-
EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a)
|
|
682
|
-
|
|
683
|
-
EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0),
|
|
684
|
-
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
|
|
604
|
+
EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a) {
|
|
605
|
+
EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0), EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
|
|
685
606
|
// Multiply the vector by its reverse
|
|
686
|
-
svfloat32_t prod =
|
|
607
|
+
svfloat32_t prod = svmul_f32_x(svptrue_b32(), a, svrev_f32(a));
|
|
687
608
|
svfloat32_t half_prod;
|
|
688
609
|
|
|
689
610
|
// Extract the high half of the vector. Depending on the VL more reductions need to be done
|
|
690
611
|
if (EIGEN_ARM64_SVE_VL >= 2048) {
|
|
691
612
|
half_prod = svtbl_f32(prod, svindex_u32(32, 1));
|
|
692
|
-
prod =
|
|
613
|
+
prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
|
|
693
614
|
}
|
|
694
615
|
if (EIGEN_ARM64_SVE_VL >= 1024) {
|
|
695
616
|
half_prod = svtbl_f32(prod, svindex_u32(16, 1));
|
|
696
|
-
prod =
|
|
617
|
+
prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
|
|
697
618
|
}
|
|
698
619
|
if (EIGEN_ARM64_SVE_VL >= 512) {
|
|
699
620
|
half_prod = svtbl_f32(prod, svindex_u32(8, 1));
|
|
700
|
-
prod =
|
|
621
|
+
prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
|
|
701
622
|
}
|
|
702
623
|
if (EIGEN_ARM64_SVE_VL >= 256) {
|
|
703
624
|
half_prod = svtbl_f32(prod, svindex_u32(4, 1));
|
|
704
|
-
prod =
|
|
625
|
+
prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
|
|
705
626
|
}
|
|
706
627
|
// Last reduction
|
|
707
628
|
half_prod = svtbl_f32(prod, svindex_u32(2, 1));
|
|
708
|
-
prod =
|
|
629
|
+
prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
|
|
709
630
|
|
|
710
631
|
// The reduction is done to the first element.
|
|
711
632
|
return pfirst<PacketXf>(prod);
|
|
712
633
|
}
|
|
713
634
|
|
|
714
635
|
template <>
|
|
715
|
-
EIGEN_STRONG_INLINE float predux_min<PacketXf>(const PacketXf& a)
|
|
716
|
-
{
|
|
636
|
+
EIGEN_STRONG_INLINE float predux_min<PacketXf>(const PacketXf& a) {
|
|
717
637
|
return svminv_f32(svptrue_b32(), a);
|
|
718
638
|
}
|
|
719
639
|
|
|
720
640
|
template <>
|
|
721
|
-
EIGEN_STRONG_INLINE float predux_max<PacketXf>(const PacketXf& a)
|
|
722
|
-
{
|
|
641
|
+
EIGEN_STRONG_INLINE float predux_max<PacketXf>(const PacketXf& a) {
|
|
723
642
|
return svmaxv_f32(svptrue_b32(), a);
|
|
724
643
|
}
|
|
725
644
|
|
|
726
|
-
template<int N>
|
|
727
|
-
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel)
|
|
728
|
-
{
|
|
645
|
+
template <int N>
|
|
646
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel) {
|
|
729
647
|
float buffer[packet_traits<float>::size * N] = {0};
|
|
730
648
|
int i = 0;
|
|
731
649
|
|
|
@@ -740,12 +658,16 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel)
|
|
|
740
658
|
}
|
|
741
659
|
}
|
|
742
660
|
|
|
743
|
-
template<>
|
|
744
|
-
EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent)
|
|
745
|
-
{
|
|
661
|
+
template <>
|
|
662
|
+
EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent) {
|
|
746
663
|
return pldexp_generic(a, exponent);
|
|
747
664
|
}
|
|
748
665
|
|
|
666
|
+
template <>
|
|
667
|
+
EIGEN_STRONG_INLINE PacketXf psqrt<PacketXf>(const PacketXf& a) {
|
|
668
|
+
return svsqrt_f32_x(svptrue_b32(), a);
|
|
669
|
+
}
|
|
670
|
+
|
|
749
671
|
} // namespace internal
|
|
750
672
|
} // namespace Eigen
|
|
751
673
|
|