@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -10,10 +10,11 @@
10
10
  #ifndef EIGEN_PACKET_MATH_SVE_H
11
11
  #define EIGEN_PACKET_MATH_SVE_H
12
12
 
13
- namespace Eigen
14
- {
15
- namespace internal
16
- {
13
+ // IWYU pragma: private
14
+ #include "../../InternalHeaderCheck.h"
15
+
16
+ namespace Eigen {
17
+ namespace internal {
17
18
  #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
18
19
  #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
19
20
  #endif
@@ -40,7 +41,6 @@ struct packet_traits<numext::int32_t> : default_packet_traits {
40
41
  Vectorizable = 1,
41
42
  AlignedOnScalar = 1,
42
43
  size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,
43
- HasHalfPacket = 0,
44
44
 
45
45
  HasAdd = 1,
46
46
  HasSub = 1,
@@ -73,174 +73,146 @@ struct unpacket_traits<PacketXi> {
73
73
  };
74
74
 
75
75
  template <>
76
- EIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr)
77
- {
76
+ EIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr) {
78
77
  svprfw(svptrue_b32(), addr, SV_PLDL1KEEP);
79
78
  }
80
79
 
81
80
  template <>
82
- EIGEN_STRONG_INLINE PacketXi pset1<PacketXi>(const numext::int32_t& from)
83
- {
81
+ EIGEN_STRONG_INLINE PacketXi pset1<PacketXi>(const numext::int32_t& from) {
84
82
  return svdup_n_s32(from);
85
83
  }
86
84
 
87
85
  template <>
88
- EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const numext::int32_t& a)
89
- {
86
+ EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const numext::int32_t& a) {
90
87
  numext::int32_t c[packet_traits<numext::int32_t>::size];
91
88
  for (int i = 0; i < packet_traits<numext::int32_t>::size; i++) c[i] = i;
92
- return svadd_s32_z(svptrue_b32(), pset1<PacketXi>(a), svld1_s32(svptrue_b32(), c));
89
+ return svadd_s32_x(svptrue_b32(), pset1<PacketXi>(a), svld1_s32(svptrue_b32(), c));
93
90
  }
94
91
 
95
92
  template <>
96
- EIGEN_STRONG_INLINE PacketXi padd<PacketXi>(const PacketXi& a, const PacketXi& b)
97
- {
98
- return svadd_s32_z(svptrue_b32(), a, b);
93
+ EIGEN_STRONG_INLINE PacketXi padd<PacketXi>(const PacketXi& a, const PacketXi& b) {
94
+ return svadd_s32_x(svptrue_b32(), a, b);
99
95
  }
100
96
 
101
97
  template <>
102
- EIGEN_STRONG_INLINE PacketXi psub<PacketXi>(const PacketXi& a, const PacketXi& b)
103
- {
104
- return svsub_s32_z(svptrue_b32(), a, b);
98
+ EIGEN_STRONG_INLINE PacketXi psub<PacketXi>(const PacketXi& a, const PacketXi& b) {
99
+ return svsub_s32_x(svptrue_b32(), a, b);
105
100
  }
106
101
 
107
102
  template <>
108
- EIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a)
109
- {
110
- return svneg_s32_z(svptrue_b32(), a);
103
+ EIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a) {
104
+ return svneg_s32_x(svptrue_b32(), a);
111
105
  }
112
106
 
113
107
  template <>
114
- EIGEN_STRONG_INLINE PacketXi pconj(const PacketXi& a)
115
- {
108
+ EIGEN_STRONG_INLINE PacketXi pconj(const PacketXi& a) {
116
109
  return a;
117
110
  }
118
111
 
119
112
  template <>
120
- EIGEN_STRONG_INLINE PacketXi pmul<PacketXi>(const PacketXi& a, const PacketXi& b)
121
- {
122
- return svmul_s32_z(svptrue_b32(), a, b);
113
+ EIGEN_STRONG_INLINE PacketXi pmul<PacketXi>(const PacketXi& a, const PacketXi& b) {
114
+ return svmul_s32_x(svptrue_b32(), a, b);
123
115
  }
124
116
 
125
117
  template <>
126
- EIGEN_STRONG_INLINE PacketXi pdiv<PacketXi>(const PacketXi& a, const PacketXi& b)
127
- {
128
- return svdiv_s32_z(svptrue_b32(), a, b);
118
+ EIGEN_STRONG_INLINE PacketXi pdiv<PacketXi>(const PacketXi& a, const PacketXi& b) {
119
+ return svdiv_s32_x(svptrue_b32(), a, b);
129
120
  }
130
121
 
131
122
  template <>
132
- EIGEN_STRONG_INLINE PacketXi pmadd(const PacketXi& a, const PacketXi& b, const PacketXi& c)
133
- {
134
- return svmla_s32_z(svptrue_b32(), c, a, b);
123
+ EIGEN_STRONG_INLINE PacketXi pmadd(const PacketXi& a, const PacketXi& b, const PacketXi& c) {
124
+ return svmla_s32_x(svptrue_b32(), c, a, b);
135
125
  }
136
126
 
137
127
  template <>
138
- EIGEN_STRONG_INLINE PacketXi pmin<PacketXi>(const PacketXi& a, const PacketXi& b)
139
- {
140
- return svmin_s32_z(svptrue_b32(), a, b);
128
+ EIGEN_STRONG_INLINE PacketXi pmin<PacketXi>(const PacketXi& a, const PacketXi& b) {
129
+ return svmin_s32_x(svptrue_b32(), a, b);
141
130
  }
142
131
 
143
132
  template <>
144
- EIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b)
145
- {
146
- return svmax_s32_z(svptrue_b32(), a, b);
133
+ EIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b) {
134
+ return svmax_s32_x(svptrue_b32(), a, b);
147
135
  }
148
136
 
149
137
  template <>
150
- EIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b)
151
- {
152
- return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);
138
+ EIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b) {
139
+ return svdup_n_s32_z(svcmple_s32(svptrue_b32(), a, b), 0xffffffffu);
153
140
  }
154
141
 
155
142
  template <>
156
- EIGEN_STRONG_INLINE PacketXi pcmp_lt<PacketXi>(const PacketXi& a, const PacketXi& b)
157
- {
143
+ EIGEN_STRONG_INLINE PacketXi pcmp_lt<PacketXi>(const PacketXi& a, const PacketXi& b) {
158
144
  return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);
159
145
  }
160
146
 
161
147
  template <>
162
- EIGEN_STRONG_INLINE PacketXi pcmp_eq<PacketXi>(const PacketXi& a, const PacketXi& b)
163
- {
148
+ EIGEN_STRONG_INLINE PacketXi pcmp_eq<PacketXi>(const PacketXi& a, const PacketXi& b) {
164
149
  return svdup_n_s32_z(svcmpeq_s32(svptrue_b32(), a, b), 0xffffffffu);
165
150
  }
166
151
 
167
152
  template <>
168
- EIGEN_STRONG_INLINE PacketXi ptrue<PacketXi>(const PacketXi& /*a*/)
169
- {
170
- return svdup_n_s32_z(svptrue_b32(), 0xffffffffu);
153
+ EIGEN_STRONG_INLINE PacketXi ptrue<PacketXi>(const PacketXi& /*a*/) {
154
+ return svdup_n_s32_x(svptrue_b32(), 0xffffffffu);
171
155
  }
172
156
 
173
157
  template <>
174
- EIGEN_STRONG_INLINE PacketXi pzero<PacketXi>(const PacketXi& /*a*/)
175
- {
176
- return svdup_n_s32_z(svptrue_b32(), 0);
158
+ EIGEN_STRONG_INLINE PacketXi pzero<PacketXi>(const PacketXi& /*a*/) {
159
+ return svdup_n_s32_x(svptrue_b32(), 0);
177
160
  }
178
161
 
179
162
  template <>
180
- EIGEN_STRONG_INLINE PacketXi pand<PacketXi>(const PacketXi& a, const PacketXi& b)
181
- {
182
- return svand_s32_z(svptrue_b32(), a, b);
163
+ EIGEN_STRONG_INLINE PacketXi pand<PacketXi>(const PacketXi& a, const PacketXi& b) {
164
+ return svand_s32_x(svptrue_b32(), a, b);
183
165
  }
184
166
 
185
167
  template <>
186
- EIGEN_STRONG_INLINE PacketXi por<PacketXi>(const PacketXi& a, const PacketXi& b)
187
- {
188
- return svorr_s32_z(svptrue_b32(), a, b);
168
+ EIGEN_STRONG_INLINE PacketXi por<PacketXi>(const PacketXi& a, const PacketXi& b) {
169
+ return svorr_s32_x(svptrue_b32(), a, b);
189
170
  }
190
171
 
191
172
  template <>
192
- EIGEN_STRONG_INLINE PacketXi pxor<PacketXi>(const PacketXi& a, const PacketXi& b)
193
- {
194
- return sveor_s32_z(svptrue_b32(), a, b);
173
+ EIGEN_STRONG_INLINE PacketXi pxor<PacketXi>(const PacketXi& a, const PacketXi& b) {
174
+ return sveor_s32_x(svptrue_b32(), a, b);
195
175
  }
196
176
 
197
177
  template <>
198
- EIGEN_STRONG_INLINE PacketXi pandnot<PacketXi>(const PacketXi& a, const PacketXi& b)
199
- {
200
- return svbic_s32_z(svptrue_b32(), a, b);
178
+ EIGEN_STRONG_INLINE PacketXi pandnot<PacketXi>(const PacketXi& a, const PacketXi& b) {
179
+ return svbic_s32_x(svptrue_b32(), a, b);
201
180
  }
202
181
 
203
182
  template <int N>
204
- EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a)
205
- {
206
- return svasrd_n_s32_z(svptrue_b32(), a, N);
183
+ EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a) {
184
+ return svasrd_n_s32_x(svptrue_b32(), a, N);
207
185
  }
208
186
 
209
187
  template <int N>
210
- EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a)
211
- {
212
- return svreinterpret_s32_u32(svlsr_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), svdup_n_u32_z(svptrue_b32(), N)));
188
+ EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a) {
189
+ return svreinterpret_s32_u32(svlsr_n_u32_x(svptrue_b32(), svreinterpret_u32_s32(a), N));
213
190
  }
214
191
 
215
192
  template <int N>
216
- EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a)
217
- {
218
- return svlsl_s32_z(svptrue_b32(), a, svdup_n_u32_z(svptrue_b32(), N));
193
+ EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a) {
194
+ return svlsl_n_s32_x(svptrue_b32(), a, N);
219
195
  }
220
196
 
221
197
  template <>
222
- EIGEN_STRONG_INLINE PacketXi pload<PacketXi>(const numext::int32_t* from)
223
- {
198
+ EIGEN_STRONG_INLINE PacketXi pload<PacketXi>(const numext::int32_t* from) {
224
199
  EIGEN_DEBUG_ALIGNED_LOAD return svld1_s32(svptrue_b32(), from);
225
200
  }
226
201
 
227
202
  template <>
228
- EIGEN_STRONG_INLINE PacketXi ploadu<PacketXi>(const numext::int32_t* from)
229
- {
203
+ EIGEN_STRONG_INLINE PacketXi ploadu<PacketXi>(const numext::int32_t* from) {
230
204
  EIGEN_DEBUG_UNALIGNED_LOAD return svld1_s32(svptrue_b32(), from);
231
205
  }
232
206
 
233
207
  template <>
234
- EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const numext::int32_t* from)
235
- {
208
+ EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const numext::int32_t* from) {
236
209
  svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
237
210
  indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
238
211
  return svld1_gather_u32index_s32(svptrue_b32(), from, indices);
239
212
  }
240
213
 
241
214
  template <>
242
- EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from)
243
- {
215
+ EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from) {
244
216
  svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
245
217
  indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
246
218
  indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}
@@ -248,102 +220,91 @@ EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from)
248
220
  }
249
221
 
250
222
  template <>
251
- EIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const PacketXi& from)
252
- {
223
+ EIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const PacketXi& from) {
253
224
  EIGEN_DEBUG_ALIGNED_STORE svst1_s32(svptrue_b32(), to, from);
254
225
  }
255
226
 
256
227
  template <>
257
- EIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const PacketXi& from)
258
- {
228
+ EIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const PacketXi& from) {
259
229
  EIGEN_DEBUG_UNALIGNED_STORE svst1_s32(svptrue_b32(), to, from);
260
230
  }
261
231
 
262
232
  template <>
263
- EIGEN_DEVICE_FUNC inline PacketXi pgather<numext::int32_t, PacketXi>(const numext::int32_t* from, Index stride)
264
- {
233
+ EIGEN_DEVICE_FUNC inline PacketXi pgather<numext::int32_t, PacketXi>(const numext::int32_t* from, Index stride) {
265
234
  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
266
235
  svint32_t indices = svindex_s32(0, stride);
267
236
  return svld1_gather_s32index_s32(svptrue_b32(), from, indices);
268
237
  }
269
238
 
270
239
  template <>
271
- EIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, PacketXi>(numext::int32_t* to, const PacketXi& from, Index stride)
272
- {
240
+ EIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, PacketXi>(numext::int32_t* to, const PacketXi& from,
241
+ Index stride) {
273
242
  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
274
243
  svint32_t indices = svindex_s32(0, stride);
275
244
  svst1_scatter_s32index_s32(svptrue_b32(), to, indices, from);
276
245
  }
277
246
 
278
247
  template <>
279
- EIGEN_STRONG_INLINE numext::int32_t pfirst<PacketXi>(const PacketXi& a)
280
- {
248
+ EIGEN_STRONG_INLINE numext::int32_t pfirst<PacketXi>(const PacketXi& a) {
281
249
  // svlasta returns the first element if all predicate bits are 0
282
250
  return svlasta_s32(svpfalse_b(), a);
283
251
  }
284
252
 
285
253
  template <>
286
- EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a)
287
- {
254
+ EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) {
288
255
  return svrev_s32(a);
289
256
  }
290
257
 
291
258
  template <>
292
- EIGEN_STRONG_INLINE PacketXi pabs(const PacketXi& a)
293
- {
294
- return svabs_s32_z(svptrue_b32(), a);
259
+ EIGEN_STRONG_INLINE PacketXi pabs(const PacketXi& a) {
260
+ return svabs_s32_x(svptrue_b32(), a);
295
261
  }
296
262
 
297
263
  template <>
298
- EIGEN_STRONG_INLINE numext::int32_t predux<PacketXi>(const PacketXi& a)
299
- {
264
+ EIGEN_STRONG_INLINE numext::int32_t predux<PacketXi>(const PacketXi& a) {
300
265
  return static_cast<numext::int32_t>(svaddv_s32(svptrue_b32(), a));
301
266
  }
302
267
 
303
268
  template <>
304
- EIGEN_STRONG_INLINE numext::int32_t predux_mul<PacketXi>(const PacketXi& a)
305
- {
306
- EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0),
307
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
269
+ EIGEN_STRONG_INLINE numext::int32_t predux_mul<PacketXi>(const PacketXi& a) {
270
+ EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0), EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
308
271
 
309
272
  // Multiply the vector by its reverse
310
- svint32_t prod = svmul_s32_z(svptrue_b32(), a, svrev_s32(a));
273
+ svint32_t prod = svmul_s32_x(svptrue_b32(), a, svrev_s32(a));
311
274
  svint32_t half_prod;
312
275
 
313
276
  // Extract the high half of the vector. Depending on the VL more reductions need to be done
314
277
  if (EIGEN_ARM64_SVE_VL >= 2048) {
315
278
  half_prod = svtbl_s32(prod, svindex_u32(32, 1));
316
- prod = svmul_s32_z(svptrue_b32(), prod, half_prod);
279
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
317
280
  }
318
281
  if (EIGEN_ARM64_SVE_VL >= 1024) {
319
282
  half_prod = svtbl_s32(prod, svindex_u32(16, 1));
320
- prod = svmul_s32_z(svptrue_b32(), prod, half_prod);
283
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
321
284
  }
322
285
  if (EIGEN_ARM64_SVE_VL >= 512) {
323
286
  half_prod = svtbl_s32(prod, svindex_u32(8, 1));
324
- prod = svmul_s32_z(svptrue_b32(), prod, half_prod);
287
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
325
288
  }
326
289
  if (EIGEN_ARM64_SVE_VL >= 256) {
327
290
  half_prod = svtbl_s32(prod, svindex_u32(4, 1));
328
- prod = svmul_s32_z(svptrue_b32(), prod, half_prod);
291
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
329
292
  }
330
293
  // Last reduction
331
294
  half_prod = svtbl_s32(prod, svindex_u32(2, 1));
332
- prod = svmul_s32_z(svptrue_b32(), prod, half_prod);
295
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
333
296
 
334
297
  // The reduction is done to the first element.
335
298
  return pfirst<PacketXi>(prod);
336
299
  }
337
300
 
338
301
  template <>
339
- EIGEN_STRONG_INLINE numext::int32_t predux_min<PacketXi>(const PacketXi& a)
340
- {
302
+ EIGEN_STRONG_INLINE numext::int32_t predux_min<PacketXi>(const PacketXi& a) {
341
303
  return svminv_s32(svptrue_b32(), a);
342
304
  }
343
305
 
344
306
  template <>
345
- EIGEN_STRONG_INLINE numext::int32_t predux_max<PacketXi>(const PacketXi& a)
346
- {
307
+ EIGEN_STRONG_INLINE numext::int32_t predux_max<PacketXi>(const PacketXi& a) {
347
308
  return svmaxv_s32(svptrue_b32(), a);
348
309
  }
349
310
 
@@ -375,7 +336,6 @@ struct packet_traits<float> : default_packet_traits {
375
336
  Vectorizable = 1,
376
337
  AlignedOnScalar = 1,
377
338
  size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,
378
- HasHalfPacket = 0,
379
339
 
380
340
  HasAdd = 1,
381
341
  HasSub = 1,
@@ -393,15 +353,17 @@ struct packet_traits<float> : default_packet_traits {
393
353
  HasReduxp = 0, // Not implemented in SVE
394
354
 
395
355
  HasDiv = 1,
396
- HasFloor = 1,
397
356
 
357
+ HasCmp = 1,
398
358
  HasSin = EIGEN_FAST_MATH,
399
359
  HasCos = EIGEN_FAST_MATH,
400
360
  HasLog = 1,
401
361
  HasExp = 1,
402
- HasSqrt = 0,
362
+ HasPow = 1,
363
+ HasSqrt = 1,
403
364
  HasTanh = EIGEN_FAST_MATH,
404
- HasErf = EIGEN_FAST_MATH
365
+ HasErf = EIGEN_FAST_MATH,
366
+ HasErfc = EIGEN_FAST_MATH
405
367
  };
406
368
  };
407
369
 
@@ -421,120 +383,101 @@ struct unpacket_traits<PacketXf> {
421
383
  };
422
384
 
423
385
  template <>
424
- EIGEN_STRONG_INLINE PacketXf pset1<PacketXf>(const float& from)
425
- {
386
+ EIGEN_STRONG_INLINE PacketXf pset1<PacketXf>(const float& from) {
426
387
  return svdup_n_f32(from);
427
388
  }
428
389
 
429
390
  template <>
430
- EIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(numext::uint32_t from)
431
- {
432
- return svreinterpret_f32_u32(svdup_n_u32_z(svptrue_b32(), from));
391
+ EIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(numext::uint32_t from) {
392
+ return svreinterpret_f32_u32(svdup_n_u32_x(svptrue_b32(), from));
433
393
  }
434
394
 
435
395
  template <>
436
- EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a)
437
- {
396
+ EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a) {
438
397
  float c[packet_traits<float>::size];
439
398
  for (int i = 0; i < packet_traits<float>::size; i++) c[i] = i;
440
- return svadd_f32_z(svptrue_b32(), pset1<PacketXf>(a), svld1_f32(svptrue_b32(), c));
399
+ return svadd_f32_x(svptrue_b32(), pset1<PacketXf>(a), svld1_f32(svptrue_b32(), c));
441
400
  }
442
401
 
443
402
  template <>
444
- EIGEN_STRONG_INLINE PacketXf padd<PacketXf>(const PacketXf& a, const PacketXf& b)
445
- {
446
- return svadd_f32_z(svptrue_b32(), a, b);
403
+ EIGEN_STRONG_INLINE PacketXf padd<PacketXf>(const PacketXf& a, const PacketXf& b) {
404
+ return svadd_f32_x(svptrue_b32(), a, b);
447
405
  }
448
406
 
449
407
  template <>
450
- EIGEN_STRONG_INLINE PacketXf psub<PacketXf>(const PacketXf& a, const PacketXf& b)
451
- {
452
- return svsub_f32_z(svptrue_b32(), a, b);
408
+ EIGEN_STRONG_INLINE PacketXf psub<PacketXf>(const PacketXf& a, const PacketXf& b) {
409
+ return svsub_f32_x(svptrue_b32(), a, b);
453
410
  }
454
411
 
455
412
  template <>
456
- EIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a)
457
- {
458
- return svneg_f32_z(svptrue_b32(), a);
413
+ EIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a) {
414
+ return svneg_f32_x(svptrue_b32(), a);
459
415
  }
460
416
 
461
417
  template <>
462
- EIGEN_STRONG_INLINE PacketXf pconj(const PacketXf& a)
463
- {
418
+ EIGEN_STRONG_INLINE PacketXf pconj(const PacketXf& a) {
464
419
  return a;
465
420
  }
466
421
 
467
422
  template <>
468
- EIGEN_STRONG_INLINE PacketXf pmul<PacketXf>(const PacketXf& a, const PacketXf& b)
469
- {
470
- return svmul_f32_z(svptrue_b32(), a, b);
423
+ EIGEN_STRONG_INLINE PacketXf pmul<PacketXf>(const PacketXf& a, const PacketXf& b) {
424
+ return svmul_f32_x(svptrue_b32(), a, b);
471
425
  }
472
426
 
473
427
  template <>
474
- EIGEN_STRONG_INLINE PacketXf pdiv<PacketXf>(const PacketXf& a, const PacketXf& b)
475
- {
476
- return svdiv_f32_z(svptrue_b32(), a, b);
428
+ EIGEN_STRONG_INLINE PacketXf pdiv<PacketXf>(const PacketXf& a, const PacketXf& b) {
429
+ return svdiv_f32_x(svptrue_b32(), a, b);
477
430
  }
478
431
 
479
432
  template <>
480
- EIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c)
481
- {
482
- return svmla_f32_z(svptrue_b32(), c, a, b);
433
+ EIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c) {
434
+ return svmla_f32_x(svptrue_b32(), c, a, b);
483
435
  }
484
436
 
485
437
  template <>
486
- EIGEN_STRONG_INLINE PacketXf pmin<PacketXf>(const PacketXf& a, const PacketXf& b)
487
- {
488
- return svmin_f32_z(svptrue_b32(), a, b);
438
+ EIGEN_STRONG_INLINE PacketXf pmin<PacketXf>(const PacketXf& a, const PacketXf& b) {
439
+ return svmin_f32_x(svptrue_b32(), a, b);
489
440
  }
490
441
 
491
442
  template <>
492
- EIGEN_STRONG_INLINE PacketXf pmin<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b)
493
- {
443
+ EIGEN_STRONG_INLINE PacketXf pmin<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b) {
494
444
  return pmin<PacketXf>(a, b);
495
445
  }
496
446
 
497
447
  template <>
498
- EIGEN_STRONG_INLINE PacketXf pmin<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b)
499
- {
500
- return svminnm_f32_z(svptrue_b32(), a, b);
448
+ EIGEN_STRONG_INLINE PacketXf pmin<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b) {
449
+ return svminnm_f32_x(svptrue_b32(), a, b);
501
450
  }
502
451
 
503
452
  template <>
504
- EIGEN_STRONG_INLINE PacketXf pmax<PacketXf>(const PacketXf& a, const PacketXf& b)
505
- {
506
- return svmax_f32_z(svptrue_b32(), a, b);
453
+ EIGEN_STRONG_INLINE PacketXf pmax<PacketXf>(const PacketXf& a, const PacketXf& b) {
454
+ return svmax_f32_x(svptrue_b32(), a, b);
507
455
  }
508
456
 
509
457
  template <>
510
- EIGEN_STRONG_INLINE PacketXf pmax<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b)
511
- {
458
+ EIGEN_STRONG_INLINE PacketXf pmax<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b) {
512
459
  return pmax<PacketXf>(a, b);
513
460
  }
514
461
 
515
462
  template <>
516
- EIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b)
517
- {
518
- return svmaxnm_f32_z(svptrue_b32(), a, b);
463
+ EIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b) {
464
+ return svmaxnm_f32_x(svptrue_b32(), a, b);
519
465
  }
520
466
 
521
467
  // Float comparisons in SVE return svbool (predicate). Use svdup to set active
522
468
  // lanes to 1 (0xffffffffu) and inactive lanes to 0.
523
469
  template <>
524
- EIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b)
525
- {
526
- return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));
470
+ EIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b) {
471
+ return svreinterpret_f32_u32(svdup_n_u32_z(svcmple_f32(svptrue_b32(), a, b), 0xffffffffu));
527
472
  }
528
473
 
529
474
  template <>
530
- EIGEN_STRONG_INLINE PacketXf pcmp_lt<PacketXf>(const PacketXf& a, const PacketXf& b)
531
- {
475
+ EIGEN_STRONG_INLINE PacketXf pcmp_lt<PacketXf>(const PacketXf& a, const PacketXf& b) {
532
476
  return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));
533
477
  }
534
478
 
535
479
  template <>
536
- EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf& b)
537
- {
480
+ EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf& b) {
538
481
  return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));
539
482
  }
540
483
 
@@ -542,71 +485,60 @@ EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf
542
485
  // greater/equal comparison (svcmpge_f32). Then fill a float vector with the
543
486
  // active elements.
544
487
  template <>
545
- EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b)
546
- {
488
+ EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b) {
547
489
  return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
548
490
  }
549
491
 
550
492
  template <>
551
- EIGEN_STRONG_INLINE PacketXf pfloor<PacketXf>(const PacketXf& a)
552
- {
553
- return svrintm_f32_z(svptrue_b32(), a);
493
+ EIGEN_STRONG_INLINE PacketXf pfloor<PacketXf>(const PacketXf& a) {
494
+ return svrintm_f32_x(svptrue_b32(), a);
554
495
  }
555
496
 
556
497
  template <>
557
- EIGEN_STRONG_INLINE PacketXf ptrue<PacketXf>(const PacketXf& /*a*/)
558
- {
559
- return svreinterpret_f32_u32(svdup_n_u32_z(svptrue_b32(), 0xffffffffu));
498
+ EIGEN_STRONG_INLINE PacketXf ptrue<PacketXf>(const PacketXf& /*a*/) {
499
+ return svreinterpret_f32_u32(svdup_n_u32_x(svptrue_b32(), 0xffffffffu));
560
500
  }
561
501
 
562
502
  // Logical Operations are not supported for float, so reinterpret casts
563
503
  template <>
564
- EIGEN_STRONG_INLINE PacketXf pand<PacketXf>(const PacketXf& a, const PacketXf& b)
565
- {
566
- return svreinterpret_f32_u32(svand_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
504
+ EIGEN_STRONG_INLINE PacketXf pand<PacketXf>(const PacketXf& a, const PacketXf& b) {
505
+ return svreinterpret_f32_u32(svand_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
567
506
  }
568
507
 
569
508
  template <>
570
- EIGEN_STRONG_INLINE PacketXf por<PacketXf>(const PacketXf& a, const PacketXf& b)
571
- {
572
- return svreinterpret_f32_u32(svorr_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
509
+ EIGEN_STRONG_INLINE PacketXf por<PacketXf>(const PacketXf& a, const PacketXf& b) {
510
+ return svreinterpret_f32_u32(svorr_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
573
511
  }
574
512
 
575
513
  template <>
576
- EIGEN_STRONG_INLINE PacketXf pxor<PacketXf>(const PacketXf& a, const PacketXf& b)
577
- {
578
- return svreinterpret_f32_u32(sveor_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
514
+ EIGEN_STRONG_INLINE PacketXf pxor<PacketXf>(const PacketXf& a, const PacketXf& b) {
515
+ return svreinterpret_f32_u32(sveor_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
579
516
  }
580
517
 
581
518
  template <>
582
- EIGEN_STRONG_INLINE PacketXf pandnot<PacketXf>(const PacketXf& a, const PacketXf& b)
583
- {
584
- return svreinterpret_f32_u32(svbic_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
519
+ EIGEN_STRONG_INLINE PacketXf pandnot<PacketXf>(const PacketXf& a, const PacketXf& b) {
520
+ return svreinterpret_f32_u32(svbic_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
585
521
  }
586
522
 
587
523
  template <>
588
- EIGEN_STRONG_INLINE PacketXf pload<PacketXf>(const float* from)
589
- {
524
+ EIGEN_STRONG_INLINE PacketXf pload<PacketXf>(const float* from) {
590
525
  EIGEN_DEBUG_ALIGNED_LOAD return svld1_f32(svptrue_b32(), from);
591
526
  }
592
527
 
593
528
  template <>
594
- EIGEN_STRONG_INLINE PacketXf ploadu<PacketXf>(const float* from)
595
- {
529
+ EIGEN_STRONG_INLINE PacketXf ploadu<PacketXf>(const float* from) {
596
530
  EIGEN_DEBUG_UNALIGNED_LOAD return svld1_f32(svptrue_b32(), from);
597
531
  }
598
532
 
599
533
  template <>
600
- EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from)
601
- {
534
+ EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from) {
602
535
  svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
603
536
  indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
604
537
  return svld1_gather_u32index_f32(svptrue_b32(), from, indices);
605
538
  }
606
539
 
607
540
  template <>
608
- EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from)
609
- {
541
+ EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from) {
610
542
  svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
611
543
  indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
612
544
  indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}
@@ -614,63 +546,54 @@ EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from)
614
546
  }
615
547
 
616
548
  template <>
617
- EIGEN_STRONG_INLINE void pstore<float>(float* to, const PacketXf& from)
618
- {
549
+ EIGEN_STRONG_INLINE void pstore<float>(float* to, const PacketXf& from) {
619
550
  EIGEN_DEBUG_ALIGNED_STORE svst1_f32(svptrue_b32(), to, from);
620
551
  }
621
552
 
622
553
  template <>
623
- EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const PacketXf& from)
624
- {
554
+ EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const PacketXf& from) {
625
555
  EIGEN_DEBUG_UNALIGNED_STORE svst1_f32(svptrue_b32(), to, from);
626
556
  }
627
557
 
628
558
  template <>
629
- EIGEN_DEVICE_FUNC inline PacketXf pgather<float, PacketXf>(const float* from, Index stride)
630
- {
559
+ EIGEN_DEVICE_FUNC inline PacketXf pgather<float, PacketXf>(const float* from, Index stride) {
631
560
  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
632
561
  svint32_t indices = svindex_s32(0, stride);
633
562
  return svld1_gather_s32index_f32(svptrue_b32(), from, indices);
634
563
  }
635
564
 
636
565
  template <>
637
- EIGEN_DEVICE_FUNC inline void pscatter<float, PacketXf>(float* to, const PacketXf& from, Index stride)
638
- {
566
+ EIGEN_DEVICE_FUNC inline void pscatter<float, PacketXf>(float* to, const PacketXf& from, Index stride) {
639
567
  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
640
568
  svint32_t indices = svindex_s32(0, stride);
641
569
  svst1_scatter_s32index_f32(svptrue_b32(), to, indices, from);
642
570
  }
643
571
 
644
572
  template <>
645
- EIGEN_STRONG_INLINE float pfirst<PacketXf>(const PacketXf& a)
646
- {
573
+ EIGEN_STRONG_INLINE float pfirst<PacketXf>(const PacketXf& a) {
647
574
  // svlasta returns the first element if all predicate bits are 0
648
575
  return svlasta_f32(svpfalse_b(), a);
649
576
  }
650
577
 
651
578
  template <>
652
- EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a)
653
- {
579
+ EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) {
654
580
  return svrev_f32(a);
655
581
  }
656
582
 
657
583
  template <>
658
- EIGEN_STRONG_INLINE PacketXf pabs(const PacketXf& a)
659
- {
660
- return svabs_f32_z(svptrue_b32(), a);
584
+ EIGEN_STRONG_INLINE PacketXf pabs(const PacketXf& a) {
585
+ return svabs_f32_x(svptrue_b32(), a);
661
586
  }
662
587
 
663
- // TODO(tellenbach): Should this go into MathFunctions.h? If so, change for
588
+ // TODO(tellenbach): Should this go into MathFunctions.h? If so, change for
664
589
  // all vector extensions and the generic version.
665
590
  template <>
666
- EIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent)
667
- {
591
+ EIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent) {
668
592
  return pfrexp_generic(a, exponent);
669
593
  }
670
594
 
671
595
  template <>
672
- EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a)
673
- {
596
+ EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a) {
674
597
  return svaddv_f32(svptrue_b32(), a);
675
598
  }
676
599
 
@@ -678,54 +601,49 @@ EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a)
678
601
  // mul
679
602
  // Only works for SVE Vls multiple of 128
680
603
  template <>
681
- EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a)
682
- {
683
- EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0),
684
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
604
+ EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a) {
605
+ EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0), EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
685
606
  // Multiply the vector by its reverse
686
- svfloat32_t prod = svmul_f32_z(svptrue_b32(), a, svrev_f32(a));
607
+ svfloat32_t prod = svmul_f32_x(svptrue_b32(), a, svrev_f32(a));
687
608
  svfloat32_t half_prod;
688
609
 
689
610
  // Extract the high half of the vector. Depending on the VL more reductions need to be done
690
611
  if (EIGEN_ARM64_SVE_VL >= 2048) {
691
612
  half_prod = svtbl_f32(prod, svindex_u32(32, 1));
692
- prod = svmul_f32_z(svptrue_b32(), prod, half_prod);
613
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
693
614
  }
694
615
  if (EIGEN_ARM64_SVE_VL >= 1024) {
695
616
  half_prod = svtbl_f32(prod, svindex_u32(16, 1));
696
- prod = svmul_f32_z(svptrue_b32(), prod, half_prod);
617
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
697
618
  }
698
619
  if (EIGEN_ARM64_SVE_VL >= 512) {
699
620
  half_prod = svtbl_f32(prod, svindex_u32(8, 1));
700
- prod = svmul_f32_z(svptrue_b32(), prod, half_prod);
621
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
701
622
  }
702
623
  if (EIGEN_ARM64_SVE_VL >= 256) {
703
624
  half_prod = svtbl_f32(prod, svindex_u32(4, 1));
704
- prod = svmul_f32_z(svptrue_b32(), prod, half_prod);
625
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
705
626
  }
706
627
  // Last reduction
707
628
  half_prod = svtbl_f32(prod, svindex_u32(2, 1));
708
- prod = svmul_f32_z(svptrue_b32(), prod, half_prod);
629
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
709
630
 
710
631
  // The reduction is done to the first element.
711
632
  return pfirst<PacketXf>(prod);
712
633
  }
713
634
 
714
635
  template <>
715
- EIGEN_STRONG_INLINE float predux_min<PacketXf>(const PacketXf& a)
716
- {
636
+ EIGEN_STRONG_INLINE float predux_min<PacketXf>(const PacketXf& a) {
717
637
  return svminv_f32(svptrue_b32(), a);
718
638
  }
719
639
 
720
640
  template <>
721
- EIGEN_STRONG_INLINE float predux_max<PacketXf>(const PacketXf& a)
722
- {
641
+ EIGEN_STRONG_INLINE float predux_max<PacketXf>(const PacketXf& a) {
723
642
  return svmaxv_f32(svptrue_b32(), a);
724
643
  }
725
644
 
726
- template<int N>
727
- EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel)
728
- {
645
+ template <int N>
646
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel) {
729
647
  float buffer[packet_traits<float>::size * N] = {0};
730
648
  int i = 0;
731
649
 
@@ -740,12 +658,16 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel)
740
658
  }
741
659
  }
742
660
 
743
- template<>
744
- EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent)
745
- {
661
+ template <>
662
+ EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent) {
746
663
  return pldexp_generic(a, exponent);
747
664
  }
748
665
 
666
+ template <>
667
+ EIGEN_STRONG_INLINE PacketXf psqrt<PacketXf>(const PacketXf& a) {
668
+ return svsqrt_f32_x(svptrue_b32(), a);
669
+ }
670
+
749
671
  } // namespace internal
750
672
  } // namespace Eigen
751
673