@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -0,0 +1,674 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2020, Arm Limited and Contributors
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_PACKET_MATH_SVE_H
11
+ #define EIGEN_PACKET_MATH_SVE_H
12
+
13
+ // IWYU pragma: private
14
+ #include "../../InternalHeaderCheck.h"
15
+
16
+ namespace Eigen {
17
+ namespace internal {
18
+ #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
19
+ #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
20
+ #endif
21
+
22
+ #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
23
+ #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
24
+ #endif
25
+
26
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
27
+
28
+ template <typename Scalar, int SVEVectorLength>
29
+ struct sve_packet_size_selector {
30
+ enum { size = SVEVectorLength / (sizeof(Scalar) * CHAR_BIT) };
31
+ };
32
+
33
+ /********************************* int32 **************************************/
34
+ typedef svint32_t PacketXi __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)));
35
+
36
+ template <>
37
+ struct packet_traits<numext::int32_t> : default_packet_traits {
38
+ typedef PacketXi type;
39
+ typedef PacketXi half; // Half not implemented yet
40
+ enum {
41
+ Vectorizable = 1,
42
+ AlignedOnScalar = 1,
43
+ size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,
44
+
45
+ HasAdd = 1,
46
+ HasSub = 1,
47
+ HasShift = 1,
48
+ HasMul = 1,
49
+ HasNegate = 1,
50
+ HasAbs = 1,
51
+ HasArg = 0,
52
+ HasAbs2 = 1,
53
+ HasMin = 1,
54
+ HasMax = 1,
55
+ HasConj = 1,
56
+ HasSetLinear = 0,
57
+ HasBlend = 0,
58
+ HasReduxp = 0 // Not implemented in SVE
59
+ };
60
+ };
61
+
62
+ template <>
63
+ struct unpacket_traits<PacketXi> {
64
+ typedef numext::int32_t type;
65
+ typedef PacketXi half; // Half not yet implemented
66
+ enum {
67
+ size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,
68
+ alignment = Aligned64,
69
+ vectorizable = true,
70
+ masked_load_available = false,
71
+ masked_store_available = false
72
+ };
73
+ };
74
+
75
+ template <>
76
+ EIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr) {
77
+ svprfw(svptrue_b32(), addr, SV_PLDL1KEEP);
78
+ }
79
+
80
+ template <>
81
+ EIGEN_STRONG_INLINE PacketXi pset1<PacketXi>(const numext::int32_t& from) {
82
+ return svdup_n_s32(from);
83
+ }
84
+
85
+ template <>
86
+ EIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const numext::int32_t& a) {
87
+ numext::int32_t c[packet_traits<numext::int32_t>::size];
88
+ for (int i = 0; i < packet_traits<numext::int32_t>::size; i++) c[i] = i;
89
+ return svadd_s32_x(svptrue_b32(), pset1<PacketXi>(a), svld1_s32(svptrue_b32(), c));
90
+ }
91
+
92
+ template <>
93
+ EIGEN_STRONG_INLINE PacketXi padd<PacketXi>(const PacketXi& a, const PacketXi& b) {
94
+ return svadd_s32_x(svptrue_b32(), a, b);
95
+ }
96
+
97
+ template <>
98
+ EIGEN_STRONG_INLINE PacketXi psub<PacketXi>(const PacketXi& a, const PacketXi& b) {
99
+ return svsub_s32_x(svptrue_b32(), a, b);
100
+ }
101
+
102
+ template <>
103
+ EIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a) {
104
+ return svneg_s32_x(svptrue_b32(), a);
105
+ }
106
+
107
+ template <>
108
+ EIGEN_STRONG_INLINE PacketXi pconj(const PacketXi& a) {
109
+ return a;
110
+ }
111
+
112
+ template <>
113
+ EIGEN_STRONG_INLINE PacketXi pmul<PacketXi>(const PacketXi& a, const PacketXi& b) {
114
+ return svmul_s32_x(svptrue_b32(), a, b);
115
+ }
116
+
117
+ template <>
118
+ EIGEN_STRONG_INLINE PacketXi pdiv<PacketXi>(const PacketXi& a, const PacketXi& b) {
119
+ return svdiv_s32_x(svptrue_b32(), a, b);
120
+ }
121
+
122
+ template <>
123
+ EIGEN_STRONG_INLINE PacketXi pmadd(const PacketXi& a, const PacketXi& b, const PacketXi& c) {
124
+ return svmla_s32_x(svptrue_b32(), c, a, b);
125
+ }
126
+
127
+ template <>
128
+ EIGEN_STRONG_INLINE PacketXi pmin<PacketXi>(const PacketXi& a, const PacketXi& b) {
129
+ return svmin_s32_x(svptrue_b32(), a, b);
130
+ }
131
+
132
+ template <>
133
+ EIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b) {
134
+ return svmax_s32_x(svptrue_b32(), a, b);
135
+ }
136
+
137
+ template <>
138
+ EIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b) {
139
+ return svdup_n_s32_z(svcmple_s32(svptrue_b32(), a, b), 0xffffffffu);
140
+ }
141
+
142
+ template <>
143
+ EIGEN_STRONG_INLINE PacketXi pcmp_lt<PacketXi>(const PacketXi& a, const PacketXi& b) {
144
+ return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);
145
+ }
146
+
147
+ template <>
148
+ EIGEN_STRONG_INLINE PacketXi pcmp_eq<PacketXi>(const PacketXi& a, const PacketXi& b) {
149
+ return svdup_n_s32_z(svcmpeq_s32(svptrue_b32(), a, b), 0xffffffffu);
150
+ }
151
+
152
+ template <>
153
+ EIGEN_STRONG_INLINE PacketXi ptrue<PacketXi>(const PacketXi& /*a*/) {
154
+ return svdup_n_s32_x(svptrue_b32(), 0xffffffffu);
155
+ }
156
+
157
+ template <>
158
+ EIGEN_STRONG_INLINE PacketXi pzero<PacketXi>(const PacketXi& /*a*/) {
159
+ return svdup_n_s32_x(svptrue_b32(), 0);
160
+ }
161
+
162
+ template <>
163
+ EIGEN_STRONG_INLINE PacketXi pand<PacketXi>(const PacketXi& a, const PacketXi& b) {
164
+ return svand_s32_x(svptrue_b32(), a, b);
165
+ }
166
+
167
+ template <>
168
+ EIGEN_STRONG_INLINE PacketXi por<PacketXi>(const PacketXi& a, const PacketXi& b) {
169
+ return svorr_s32_x(svptrue_b32(), a, b);
170
+ }
171
+
172
+ template <>
173
+ EIGEN_STRONG_INLINE PacketXi pxor<PacketXi>(const PacketXi& a, const PacketXi& b) {
174
+ return sveor_s32_x(svptrue_b32(), a, b);
175
+ }
176
+
177
+ template <>
178
+ EIGEN_STRONG_INLINE PacketXi pandnot<PacketXi>(const PacketXi& a, const PacketXi& b) {
179
+ return svbic_s32_x(svptrue_b32(), a, b);
180
+ }
181
+
182
+ template <int N>
183
+ EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a) {
184
+ return svasrd_n_s32_x(svptrue_b32(), a, N);
185
+ }
186
+
187
+ template <int N>
188
+ EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a) {
189
+ return svreinterpret_s32_u32(svlsr_n_u32_x(svptrue_b32(), svreinterpret_u32_s32(a), N));
190
+ }
191
+
192
+ template <int N>
193
+ EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a) {
194
+ return svlsl_n_s32_x(svptrue_b32(), a, N);
195
+ }
196
+
197
+ template <>
198
+ EIGEN_STRONG_INLINE PacketXi pload<PacketXi>(const numext::int32_t* from) {
199
+ EIGEN_DEBUG_ALIGNED_LOAD return svld1_s32(svptrue_b32(), from);
200
+ }
201
+
202
+ template <>
203
+ EIGEN_STRONG_INLINE PacketXi ploadu<PacketXi>(const numext::int32_t* from) {
204
+ EIGEN_DEBUG_UNALIGNED_LOAD return svld1_s32(svptrue_b32(), from);
205
+ }
206
+
207
+ template <>
208
+ EIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const numext::int32_t* from) {
209
+ svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
210
+ indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
211
+ return svld1_gather_u32index_s32(svptrue_b32(), from, indices);
212
+ }
213
+
214
+ template <>
215
+ EIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from) {
216
+ svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
217
+ indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
218
+ indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}
219
+ return svld1_gather_u32index_s32(svptrue_b32(), from, indices);
220
+ }
221
+
222
+ template <>
223
+ EIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const PacketXi& from) {
224
+ EIGEN_DEBUG_ALIGNED_STORE svst1_s32(svptrue_b32(), to, from);
225
+ }
226
+
227
+ template <>
228
+ EIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const PacketXi& from) {
229
+ EIGEN_DEBUG_UNALIGNED_STORE svst1_s32(svptrue_b32(), to, from);
230
+ }
231
+
232
+ template <>
233
+ EIGEN_DEVICE_FUNC inline PacketXi pgather<numext::int32_t, PacketXi>(const numext::int32_t* from, Index stride) {
234
+ // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
235
+ svint32_t indices = svindex_s32(0, stride);
236
+ return svld1_gather_s32index_s32(svptrue_b32(), from, indices);
237
+ }
238
+
239
+ template <>
240
+ EIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, PacketXi>(numext::int32_t* to, const PacketXi& from,
241
+ Index stride) {
242
+ // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
243
+ svint32_t indices = svindex_s32(0, stride);
244
+ svst1_scatter_s32index_s32(svptrue_b32(), to, indices, from);
245
+ }
246
+
247
+ template <>
248
+ EIGEN_STRONG_INLINE numext::int32_t pfirst<PacketXi>(const PacketXi& a) {
249
+ // svlasta returns the first element if all predicate bits are 0
250
+ return svlasta_s32(svpfalse_b(), a);
251
+ }
252
+
253
+ template <>
254
+ EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) {
255
+ return svrev_s32(a);
256
+ }
257
+
258
+ template <>
259
+ EIGEN_STRONG_INLINE PacketXi pabs(const PacketXi& a) {
260
+ return svabs_s32_x(svptrue_b32(), a);
261
+ }
262
+
263
+ template <>
264
+ EIGEN_STRONG_INLINE numext::int32_t predux<PacketXi>(const PacketXi& a) {
265
+ return static_cast<numext::int32_t>(svaddv_s32(svptrue_b32(), a));
266
+ }
267
+
268
+ template <>
269
+ EIGEN_STRONG_INLINE numext::int32_t predux_mul<PacketXi>(const PacketXi& a) {
270
+ EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0), EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
271
+
272
+ // Multiply the vector by its reverse
273
+ svint32_t prod = svmul_s32_x(svptrue_b32(), a, svrev_s32(a));
274
+ svint32_t half_prod;
275
+
276
+ // Extract the high half of the vector. Depending on the VL more reductions need to be done
277
+ if (EIGEN_ARM64_SVE_VL >= 2048) {
278
+ half_prod = svtbl_s32(prod, svindex_u32(32, 1));
279
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
280
+ }
281
+ if (EIGEN_ARM64_SVE_VL >= 1024) {
282
+ half_prod = svtbl_s32(prod, svindex_u32(16, 1));
283
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
284
+ }
285
+ if (EIGEN_ARM64_SVE_VL >= 512) {
286
+ half_prod = svtbl_s32(prod, svindex_u32(8, 1));
287
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
288
+ }
289
+ if (EIGEN_ARM64_SVE_VL >= 256) {
290
+ half_prod = svtbl_s32(prod, svindex_u32(4, 1));
291
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
292
+ }
293
+ // Last reduction
294
+ half_prod = svtbl_s32(prod, svindex_u32(2, 1));
295
+ prod = svmul_s32_x(svptrue_b32(), prod, half_prod);
296
+
297
+ // The reduction is done to the first element.
298
+ return pfirst<PacketXi>(prod);
299
+ }
300
+
301
+ template <>
302
+ EIGEN_STRONG_INLINE numext::int32_t predux_min<PacketXi>(const PacketXi& a) {
303
+ return svminv_s32(svptrue_b32(), a);
304
+ }
305
+
306
+ template <>
307
+ EIGEN_STRONG_INLINE numext::int32_t predux_max<PacketXi>(const PacketXi& a) {
308
+ return svmaxv_s32(svptrue_b32(), a);
309
+ }
310
+
311
+ template <int N>
312
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXi, N>& kernel) {
313
+ int buffer[packet_traits<numext::int32_t>::size * N] = {0};
314
+ int i = 0;
315
+
316
+ PacketXi stride_index = svindex_s32(0, N);
317
+
318
+ for (i = 0; i < N; i++) {
319
+ svst1_scatter_s32index_s32(svptrue_b32(), buffer + i, stride_index, kernel.packet[i]);
320
+ }
321
+ for (i = 0; i < N; i++) {
322
+ kernel.packet[i] = svld1_s32(svptrue_b32(), buffer + i * packet_traits<numext::int32_t>::size);
323
+ }
324
+ }
325
+
326
+ /********************************* float32 ************************************/
327
+
328
+ typedef svfloat32_t PacketXf __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)));
329
+
330
+ template <>
331
+ struct packet_traits<float> : default_packet_traits {
332
+ typedef PacketXf type;
333
+ typedef PacketXf half;
334
+
335
+ enum {
336
+ Vectorizable = 1,
337
+ AlignedOnScalar = 1,
338
+ size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,
339
+
340
+ HasAdd = 1,
341
+ HasSub = 1,
342
+ HasShift = 1,
343
+ HasMul = 1,
344
+ HasNegate = 1,
345
+ HasAbs = 1,
346
+ HasArg = 0,
347
+ HasAbs2 = 1,
348
+ HasMin = 1,
349
+ HasMax = 1,
350
+ HasConj = 1,
351
+ HasSetLinear = 0,
352
+ HasBlend = 0,
353
+ HasReduxp = 0, // Not implemented in SVE
354
+
355
+ HasDiv = 1,
356
+
357
+ HasCmp = 1,
358
+ HasSin = EIGEN_FAST_MATH,
359
+ HasCos = EIGEN_FAST_MATH,
360
+ HasLog = 1,
361
+ HasExp = 1,
362
+ HasPow = 1,
363
+ HasSqrt = 1,
364
+ HasTanh = EIGEN_FAST_MATH,
365
+ HasErf = EIGEN_FAST_MATH,
366
+ HasErfc = EIGEN_FAST_MATH
367
+ };
368
+ };
369
+
370
+ template <>
371
+ struct unpacket_traits<PacketXf> {
372
+ typedef float type;
373
+ typedef PacketXf half; // Half not yet implemented
374
+ typedef PacketXi integer_packet;
375
+
376
+ enum {
377
+ size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,
378
+ alignment = Aligned64,
379
+ vectorizable = true,
380
+ masked_load_available = false,
381
+ masked_store_available = false
382
+ };
383
+ };
384
+
385
+ template <>
386
+ EIGEN_STRONG_INLINE PacketXf pset1<PacketXf>(const float& from) {
387
+ return svdup_n_f32(from);
388
+ }
389
+
390
+ template <>
391
+ EIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(numext::uint32_t from) {
392
+ return svreinterpret_f32_u32(svdup_n_u32_x(svptrue_b32(), from));
393
+ }
394
+
395
+ template <>
396
+ EIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a) {
397
+ float c[packet_traits<float>::size];
398
+ for (int i = 0; i < packet_traits<float>::size; i++) c[i] = i;
399
+ return svadd_f32_x(svptrue_b32(), pset1<PacketXf>(a), svld1_f32(svptrue_b32(), c));
400
+ }
401
+
402
+ template <>
403
+ EIGEN_STRONG_INLINE PacketXf padd<PacketXf>(const PacketXf& a, const PacketXf& b) {
404
+ return svadd_f32_x(svptrue_b32(), a, b);
405
+ }
406
+
407
+ template <>
408
+ EIGEN_STRONG_INLINE PacketXf psub<PacketXf>(const PacketXf& a, const PacketXf& b) {
409
+ return svsub_f32_x(svptrue_b32(), a, b);
410
+ }
411
+
412
+ template <>
413
+ EIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a) {
414
+ return svneg_f32_x(svptrue_b32(), a);
415
+ }
416
+
417
+ template <>
418
+ EIGEN_STRONG_INLINE PacketXf pconj(const PacketXf& a) {
419
+ return a;
420
+ }
421
+
422
+ template <>
423
+ EIGEN_STRONG_INLINE PacketXf pmul<PacketXf>(const PacketXf& a, const PacketXf& b) {
424
+ return svmul_f32_x(svptrue_b32(), a, b);
425
+ }
426
+
427
+ template <>
428
+ EIGEN_STRONG_INLINE PacketXf pdiv<PacketXf>(const PacketXf& a, const PacketXf& b) {
429
+ return svdiv_f32_x(svptrue_b32(), a, b);
430
+ }
431
+
432
+ template <>
433
+ EIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c) {
434
+ return svmla_f32_x(svptrue_b32(), c, a, b);
435
+ }
436
+
437
+ template <>
438
+ EIGEN_STRONG_INLINE PacketXf pmin<PacketXf>(const PacketXf& a, const PacketXf& b) {
439
+ return svmin_f32_x(svptrue_b32(), a, b);
440
+ }
441
+
442
+ template <>
443
+ EIGEN_STRONG_INLINE PacketXf pmin<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b) {
444
+ return pmin<PacketXf>(a, b);
445
+ }
446
+
447
+ template <>
448
+ EIGEN_STRONG_INLINE PacketXf pmin<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b) {
449
+ return svminnm_f32_x(svptrue_b32(), a, b);
450
+ }
451
+
452
+ template <>
453
+ EIGEN_STRONG_INLINE PacketXf pmax<PacketXf>(const PacketXf& a, const PacketXf& b) {
454
+ return svmax_f32_x(svptrue_b32(), a, b);
455
+ }
456
+
457
+ template <>
458
+ EIGEN_STRONG_INLINE PacketXf pmax<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b) {
459
+ return pmax<PacketXf>(a, b);
460
+ }
461
+
462
+ template <>
463
+ EIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b) {
464
+ return svmaxnm_f32_x(svptrue_b32(), a, b);
465
+ }
466
+
467
+ // Float comparisons in SVE return svbool (predicate). Use svdup to set active
468
+ // lanes to 1 (0xffffffffu) and inactive lanes to 0.
469
+ template <>
470
+ EIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b) {
471
+ return svreinterpret_f32_u32(svdup_n_u32_z(svcmple_f32(svptrue_b32(), a, b), 0xffffffffu));
472
+ }
473
+
474
+ template <>
475
+ EIGEN_STRONG_INLINE PacketXf pcmp_lt<PacketXf>(const PacketXf& a, const PacketXf& b) {
476
+ return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));
477
+ }
478
+
479
+ template <>
480
+ EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf& b) {
481
+ return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));
482
+ }
483
+
484
+ // Do a predicate inverse (svnot_b_z) on the predicate resulted from the
485
+ // greater/equal comparison (svcmpge_f32). Then fill a float vector with the
486
+ // active elements.
487
+ template <>
488
+ EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b) {
489
+ return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
490
+ }
491
+
492
+ template <>
493
+ EIGEN_STRONG_INLINE PacketXf pfloor<PacketXf>(const PacketXf& a) {
494
+ return svrintm_f32_x(svptrue_b32(), a);
495
+ }
496
+
497
+ template <>
498
+ EIGEN_STRONG_INLINE PacketXf ptrue<PacketXf>(const PacketXf& /*a*/) {
499
+ return svreinterpret_f32_u32(svdup_n_u32_x(svptrue_b32(), 0xffffffffu));
500
+ }
501
+
502
+ // Logical Operations are not supported for float, so reinterpret casts
503
+ template <>
504
+ EIGEN_STRONG_INLINE PacketXf pand<PacketXf>(const PacketXf& a, const PacketXf& b) {
505
+ return svreinterpret_f32_u32(svand_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
506
+ }
507
+
508
+ template <>
509
+ EIGEN_STRONG_INLINE PacketXf por<PacketXf>(const PacketXf& a, const PacketXf& b) {
510
+ return svreinterpret_f32_u32(svorr_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
511
+ }
512
+
513
+ template <>
514
+ EIGEN_STRONG_INLINE PacketXf pxor<PacketXf>(const PacketXf& a, const PacketXf& b) {
515
+ return svreinterpret_f32_u32(sveor_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
516
+ }
517
+
518
+ template <>
519
+ EIGEN_STRONG_INLINE PacketXf pandnot<PacketXf>(const PacketXf& a, const PacketXf& b) {
520
+ return svreinterpret_f32_u32(svbic_u32_x(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));
521
+ }
522
+
523
+ template <>
524
+ EIGEN_STRONG_INLINE PacketXf pload<PacketXf>(const float* from) {
525
+ EIGEN_DEBUG_ALIGNED_LOAD return svld1_f32(svptrue_b32(), from);
526
+ }
527
+
528
+ template <>
529
+ EIGEN_STRONG_INLINE PacketXf ploadu<PacketXf>(const float* from) {
530
+ EIGEN_DEBUG_UNALIGNED_LOAD return svld1_f32(svptrue_b32(), from);
531
+ }
532
+
533
+ template <>
534
+ EIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from) {
535
+ svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
536
+ indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
537
+ return svld1_gather_u32index_f32(svptrue_b32(), from, indices);
538
+ }
539
+
540
+ template <>
541
+ EIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from) {
542
+ svuint32_t indices = svindex_u32(0, 1); // index {base=0, base+step=1, base+step*2, ...}
543
+ indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a1, a1, a2, a2, ...}
544
+ indices = svzip1_u32(indices, indices); // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}
545
+ return svld1_gather_u32index_f32(svptrue_b32(), from, indices);
546
+ }
547
+
548
+ template <>
549
+ EIGEN_STRONG_INLINE void pstore<float>(float* to, const PacketXf& from) {
550
+ EIGEN_DEBUG_ALIGNED_STORE svst1_f32(svptrue_b32(), to, from);
551
+ }
552
+
553
+ template <>
554
+ EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const PacketXf& from) {
555
+ EIGEN_DEBUG_UNALIGNED_STORE svst1_f32(svptrue_b32(), to, from);
556
+ }
557
+
558
+ template <>
559
+ EIGEN_DEVICE_FUNC inline PacketXf pgather<float, PacketXf>(const float* from, Index stride) {
560
+ // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
561
+ svint32_t indices = svindex_s32(0, stride);
562
+ return svld1_gather_s32index_f32(svptrue_b32(), from, indices);
563
+ }
564
+
565
+ template <>
566
+ EIGEN_DEVICE_FUNC inline void pscatter<float, PacketXf>(float* to, const PacketXf& from, Index stride) {
567
+ // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}
568
+ svint32_t indices = svindex_s32(0, stride);
569
+ svst1_scatter_s32index_f32(svptrue_b32(), to, indices, from);
570
+ }
571
+
572
+ template <>
573
+ EIGEN_STRONG_INLINE float pfirst<PacketXf>(const PacketXf& a) {
574
+ // svlasta returns the first element if all predicate bits are 0
575
+ return svlasta_f32(svpfalse_b(), a);
576
+ }
577
+
578
+ template <>
579
+ EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) {
580
+ return svrev_f32(a);
581
+ }
582
+
583
+ template <>
584
+ EIGEN_STRONG_INLINE PacketXf pabs(const PacketXf& a) {
585
+ return svabs_f32_x(svptrue_b32(), a);
586
+ }
587
+
588
+ // TODO(tellenbach): Should this go into MathFunctions.h? If so, change for
589
+ // all vector extensions and the generic version.
590
+ template <>
591
+ EIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent) {
592
+ return pfrexp_generic(a, exponent);
593
+ }
594
+
595
+ template <>
596
+ EIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a) {
597
+ return svaddv_f32(svptrue_b32(), a);
598
+ }
599
+
600
+ // Other reduction functions:
601
+ // mul
602
+ // Only works for SVE Vls multiple of 128
603
+ template <>
604
+ EIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a) {
605
+ EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0), EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
606
+ // Multiply the vector by its reverse
607
+ svfloat32_t prod = svmul_f32_x(svptrue_b32(), a, svrev_f32(a));
608
+ svfloat32_t half_prod;
609
+
610
+ // Extract the high half of the vector. Depending on the VL more reductions need to be done
611
+ if (EIGEN_ARM64_SVE_VL >= 2048) {
612
+ half_prod = svtbl_f32(prod, svindex_u32(32, 1));
613
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
614
+ }
615
+ if (EIGEN_ARM64_SVE_VL >= 1024) {
616
+ half_prod = svtbl_f32(prod, svindex_u32(16, 1));
617
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
618
+ }
619
+ if (EIGEN_ARM64_SVE_VL >= 512) {
620
+ half_prod = svtbl_f32(prod, svindex_u32(8, 1));
621
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
622
+ }
623
+ if (EIGEN_ARM64_SVE_VL >= 256) {
624
+ half_prod = svtbl_f32(prod, svindex_u32(4, 1));
625
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
626
+ }
627
+ // Last reduction
628
+ half_prod = svtbl_f32(prod, svindex_u32(2, 1));
629
+ prod = svmul_f32_x(svptrue_b32(), prod, half_prod);
630
+
631
+ // The reduction is done to the first element.
632
+ return pfirst<PacketXf>(prod);
633
+ }
634
+
635
+ template <>
636
+ EIGEN_STRONG_INLINE float predux_min<PacketXf>(const PacketXf& a) {
637
+ return svminv_f32(svptrue_b32(), a);
638
+ }
639
+
640
+ template <>
641
+ EIGEN_STRONG_INLINE float predux_max<PacketXf>(const PacketXf& a) {
642
+ return svmaxv_f32(svptrue_b32(), a);
643
+ }
644
+
645
+ template <int N>
646
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel) {
647
+ float buffer[packet_traits<float>::size * N] = {0};
648
+ int i = 0;
649
+
650
+ PacketXi stride_index = svindex_s32(0, N);
651
+
652
+ for (i = 0; i < N; i++) {
653
+ svst1_scatter_s32index_f32(svptrue_b32(), buffer + i, stride_index, kernel.packet[i]);
654
+ }
655
+
656
+ for (i = 0; i < N; i++) {
657
+ kernel.packet[i] = svld1_f32(svptrue_b32(), buffer + i * packet_traits<float>::size);
658
+ }
659
+ }
660
+
661
+ template <>
662
+ EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent) {
663
+ return pldexp_generic(a, exponent);
664
+ }
665
+
666
+ template <>
667
+ EIGEN_STRONG_INLINE PacketXf psqrt<PacketXf>(const PacketXf& a) {
668
+ return svsqrt_f32_x(svptrue_b32(), a);
669
+ }
670
+
671
+ } // namespace internal
672
+ } // namespace Eigen
673
+
674
+ #endif // EIGEN_PACKET_MATH_SVE_H