@smake/eigen 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +1 -1
  4. package/eigen/COPYING.MINPACK +51 -52
  5. package/eigen/Eigen/Cholesky +0 -1
  6. package/eigen/Eigen/Core +108 -266
  7. package/eigen/Eigen/Eigenvalues +0 -1
  8. package/eigen/Eigen/Geometry +3 -6
  9. package/eigen/Eigen/Householder +0 -1
  10. package/eigen/Eigen/Jacobi +0 -1
  11. package/eigen/Eigen/KLUSupport +41 -0
  12. package/eigen/Eigen/LU +2 -5
  13. package/eigen/Eigen/OrderingMethods +0 -3
  14. package/eigen/Eigen/PaStiXSupport +1 -0
  15. package/eigen/Eigen/PardisoSupport +0 -0
  16. package/eigen/Eigen/QR +0 -1
  17. package/eigen/Eigen/QtAlignedMalloc +0 -1
  18. package/eigen/Eigen/SVD +0 -1
  19. package/eigen/Eigen/Sparse +0 -2
  20. package/eigen/Eigen/SparseCholesky +0 -8
  21. package/eigen/Eigen/SparseLU +4 -0
  22. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  23. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  24. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  25. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  26. package/eigen/Eigen/src/Core/Array.h +99 -11
  27. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  28. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  29. package/eigen/Eigen/src/Core/Assign.h +1 -1
  30. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  31. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  32. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  33. package/eigen/Eigen/src/Core/Block.h +56 -60
  34. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  35. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  36. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  37. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  38. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  39. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  40. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  41. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  42. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  43. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  44. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  45. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  46. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  47. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  48. package/eigen/Eigen/src/Core/Dot.h +10 -10
  49. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  50. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  51. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  52. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  53. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  54. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  55. package/eigen/Eigen/src/Core/IO.h +40 -7
  56. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  57. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  58. package/eigen/Eigen/src/Core/Map.h +7 -7
  59. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  60. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  61. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  62. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  63. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  64. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  65. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  66. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  67. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  68. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  69. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  70. package/eigen/Eigen/src/Core/Product.h +30 -25
  71. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  72. package/eigen/Eigen/src/Core/Random.h +37 -1
  73. package/eigen/Eigen/src/Core/Redux.h +180 -170
  74. package/eigen/Eigen/src/Core/Ref.h +118 -21
  75. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  76. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  77. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  78. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  79. package/eigen/Eigen/src/Core/Select.h +8 -6
  80. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  81. package/eigen/Eigen/src/Core/Solve.h +14 -14
  82. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  83. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  84. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  85. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  86. package/eigen/Eigen/src/Core/Stride.h +9 -4
  87. package/eigen/Eigen/src/Core/Swap.h +5 -4
  88. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  89. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  90. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  91. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  92. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  93. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  94. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  95. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  96. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  97. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  98. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  99. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  100. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  101. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  102. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  103. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  104. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  105. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  107. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  108. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  109. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  110. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  111. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  112. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  113. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  114. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  115. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  116. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  117. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  118. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  119. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  120. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  121. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  122. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  123. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  124. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  125. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  126. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  127. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  128. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  129. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  130. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  131. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  132. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  133. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  134. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  135. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  136. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  137. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  138. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  140. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  141. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  142. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  143. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  144. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  145. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  146. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  147. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  148. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  149. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  150. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  151. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  152. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  153. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  154. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  155. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  156. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  157. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  158. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  159. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  160. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  161. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  162. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  163. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  164. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  165. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  166. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  167. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  168. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  169. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  170. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  171. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  172. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  173. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  174. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  175. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  176. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  177. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  178. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  179. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  180. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  181. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  182. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  183. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  184. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  185. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  186. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  187. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  188. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  189. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  190. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  191. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  192. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  193. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  194. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  195. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  196. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  197. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  198. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  199. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  200. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  201. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  202. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  203. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  204. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  205. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  206. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  207. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  208. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  209. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  210. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  211. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  212. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  213. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  214. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  215. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  216. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  217. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  218. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  219. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  220. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  221. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  222. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  223. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  224. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  225. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  226. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  227. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  228. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  229. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  230. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  231. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  232. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  233. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  234. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  235. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  236. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  237. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  238. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  239. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  240. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  241. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  242. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  243. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  244. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  245. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  246. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  247. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  248. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  249. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  250. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  251. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  252. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  253. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  254. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  255. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  256. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  257. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  258. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  259. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  260. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  261. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  262. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  263. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  264. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  265. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  266. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  267. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  268. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  269. package/eigen/README.md +2 -0
  270. package/lib/LibEigen.d.ts +4 -0
  271. package/lib/LibEigen.js +14 -0
  272. package/lib/index.d.ts +1 -1
  273. package/lib/index.js +7 -3
  274. package/package.json +2 -10
  275. package/eigen/Eigen/CMakeLists.txt +0 -19
  276. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  277. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  278. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  279. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  280. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  281. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  282. package/lib/eigen.d.ts +0 -2
  283. package/lib/eigen.js +0 -15
@@ -0,0 +1,183 @@
1
+ namespace Eigen {
2
+ namespace internal {
3
+
4
+ #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
5
+
6
+ // Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
7
+ // Here we specialize gebp_traits to eliminate these register spills.
8
+ // See #2138.
9
+ template<>
10
+ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
11
+ : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
12
+ {
13
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
14
+ {
15
+ // This volatile inline ASM both acts as a barrier to prevent reordering,
16
+ // as well as enforces strict register use.
17
+ asm volatile(
18
+ "vmla.f32 %q[r], %q[c], %q[alpha]"
19
+ : [r] "+w" (r)
20
+ : [c] "w" (c),
21
+ [alpha] "w" (alpha)
22
+ : );
23
+ }
24
+
25
+ template <typename LaneIdType>
26
+ EIGEN_STRONG_INLINE void madd(const Packet4f& a, const Packet4f& b,
27
+ Packet4f& c, Packet4f& tmp,
28
+ const LaneIdType&) const {
29
+ acc(a, b, c);
30
+ }
31
+
32
+ template <typename LaneIdType>
33
+ EIGEN_STRONG_INLINE void madd(const Packet4f& a, const QuadPacket<Packet4f>& b,
34
+ Packet4f& c, Packet4f& tmp,
35
+ const LaneIdType& lane) const {
36
+ madd(a, b.get(lane), c, tmp, lane);
37
+ }
38
+ };
39
+
40
+ #endif // EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
41
+
42
+ #if EIGEN_ARCH_ARM64
43
+
44
+ template<>
45
+ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
46
+ : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
47
+ {
48
+ typedef float RhsPacket;
49
+ typedef float32x4_t RhsPacketx4;
50
+
51
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
52
+ {
53
+ dest = *b;
54
+ }
55
+
56
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const
57
+ {
58
+ dest = vld1q_f32(b);
59
+ }
60
+
61
+ EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const
62
+ {
63
+ dest = *b;
64
+ }
65
+
66
+ EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
67
+ {}
68
+
69
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
70
+ {
71
+ loadRhs(b,dest);
72
+ }
73
+
74
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
75
+ {
76
+ c = vfmaq_n_f32(c, a, b);
77
+ }
78
+
79
+ // NOTE: Template parameter inference failed when compiled with Android NDK:
80
+ // "candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>".
81
+
82
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
83
+ { madd_helper<0>(a, b, c); }
84
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<1>&) const
85
+ { madd_helper<1>(a, b, c); }
86
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<2>&) const
87
+ { madd_helper<2>(a, b, c); }
88
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<3>&) const
89
+ { madd_helper<3>(a, b, c); }
90
+
91
+ private:
92
+ template<int LaneID>
93
+ EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
94
+ {
95
+ #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
96
+ // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
97
+ // vfmaq_laneq_f32 is implemented through a costly dup
98
+ if(LaneID==0) asm("fmla %0.4s, %1.4s, %2.s[0]\n" : "+w" (c) : "w" (a), "w" (b) : );
99
+ else if(LaneID==1) asm("fmla %0.4s, %1.4s, %2.s[1]\n" : "+w" (c) : "w" (a), "w" (b) : );
100
+ else if(LaneID==2) asm("fmla %0.4s, %1.4s, %2.s[2]\n" : "+w" (c) : "w" (a), "w" (b) : );
101
+ else if(LaneID==3) asm("fmla %0.4s, %1.4s, %2.s[3]\n" : "+w" (c) : "w" (a), "w" (b) : );
102
+ #else
103
+ c = vfmaq_laneq_f32(c, a, b, LaneID);
104
+ #endif
105
+ }
106
+ };
107
+
108
+
109
+ template<>
110
+ struct gebp_traits <double,double,false,false,Architecture::NEON>
111
+ : gebp_traits<double,double,false,false,Architecture::Generic>
112
+ {
113
+ typedef double RhsPacket;
114
+
115
+ struct RhsPacketx4 {
116
+ float64x2_t B_0, B_1;
117
+ };
118
+
119
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
120
+ {
121
+ dest = *b;
122
+ }
123
+
124
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const
125
+ {
126
+ dest.B_0 = vld1q_f64(b);
127
+ dest.B_1 = vld1q_f64(b+2);
128
+ }
129
+
130
+ EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const
131
+ {
132
+ loadRhs(b,dest);
133
+ }
134
+
135
+ EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
136
+ {}
137
+
138
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
139
+ {
140
+ loadRhs(b,dest);
141
+ }
142
+
143
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
144
+ {
145
+ c = vfmaq_n_f64(c, a, b);
146
+ }
147
+
148
+ // NOTE: Template parameter inference failed when compiled with Android NDK:
149
+ // "candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>".
150
+
151
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
152
+ { madd_helper<0>(a, b, c); }
153
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<1>&) const
154
+ { madd_helper<1>(a, b, c); }
155
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<2>&) const
156
+ { madd_helper<2>(a, b, c); }
157
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<3>&) const
158
+ { madd_helper<3>(a, b, c); }
159
+
160
+ private:
161
+ template <int LaneID>
162
+ EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
163
+ {
164
+ #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
165
+ // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
166
+ // vfmaq_laneq_f64 is implemented through a costly dup
167
+ if(LaneID==0) asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_0) : );
168
+ else if(LaneID==1) asm("fmla %0.2d, %1.2d, %2.d[1]\n" : "+w" (c) : "w" (a), "w" (b.B_0) : );
169
+ else if(LaneID==2) asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_1) : );
170
+ else if(LaneID==3) asm("fmla %0.2d, %1.2d, %2.d[1]\n" : "+w" (c) : "w" (a), "w" (b.B_1) : );
171
+ #else
172
+ if(LaneID==0) c = vfmaq_laneq_f64(c, a, b.B_0, 0);
173
+ else if(LaneID==1) c = vfmaq_laneq_f64(c, a, b.B_0, 1);
174
+ else if(LaneID==2) c = vfmaq_laneq_f64(c, a, b.B_1, 0);
175
+ else if(LaneID==3) c = vfmaq_laneq_f64(c, a, b.B_1, 1);
176
+ #endif
177
+ }
178
+ };
179
+
180
+ #endif // EIGEN_ARCH_ARM64
181
+
182
+ } // namespace internal
183
+ } // namespace Eigen
@@ -5,10 +5,6 @@
5
5
  // Public License v. 2.0. If a copy of the MPL was not distributed
6
6
  // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
7
7
 
8
- /* The sin, cos, exp, and log functions of this file come from
9
- * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
10
- */
11
-
12
8
  #ifndef EIGEN_MATH_FUNCTIONS_NEON_H
13
9
  #define EIGEN_MATH_FUNCTIONS_NEON_H
14
10
 
@@ -16,74 +12,62 @@ namespace Eigen {
16
12
 
17
13
  namespace internal {
18
14
 
19
- template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
20
- Packet4f pexp<Packet4f>(const Packet4f& _x)
21
- {
22
- Packet4f x = _x;
23
- Packet4f tmp, fx;
24
-
25
- _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
26
- _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
27
- _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
28
- _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
29
- _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
30
- _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
31
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
32
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
33
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
34
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
35
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
36
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
37
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
38
- _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
39
-
40
- x = vminq_f32(x, p4f_exp_hi);
41
- x = vmaxq_f32(x, p4f_exp_lo);
42
-
43
- /* express exp(x) as exp(g + n*log(2)) */
44
- fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
45
-
46
- /* perform a floorf */
47
- tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
48
-
49
- /* if greater, substract 1 */
50
- Packet4ui mask = vcgtq_f32(tmp, fx);
51
- mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
52
-
53
- fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
54
-
55
- tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
56
- Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
57
- x = vsubq_f32(x, tmp);
58
- x = vsubq_f32(x, z);
59
-
60
- Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
61
- z = vmulq_f32(x, x);
62
- y = vaddq_f32(y, p4f_cephes_exp_p1);
63
- y = vmulq_f32(y, x);
64
- y = vaddq_f32(y, p4f_cephes_exp_p2);
65
- y = vmulq_f32(y, x);
66
- y = vaddq_f32(y, p4f_cephes_exp_p3);
67
- y = vmulq_f32(y, x);
68
- y = vaddq_f32(y, p4f_cephes_exp_p4);
69
- y = vmulq_f32(y, x);
70
- y = vaddq_f32(y, p4f_cephes_exp_p5);
71
-
72
- y = vmulq_f32(y, z);
73
- y = vaddq_f32(y, x);
74
- y = vaddq_f32(y, p4f_1);
75
-
76
- /* build 2^n */
77
- int32x4_t mm;
78
- mm = vcvtq_s32_f32(fx);
79
- mm = vaddq_s32(mm, p4i_0x7f);
80
- mm = vshlq_n_s32(mm, 23);
81
- Packet4f pow2n = vreinterpretq_f32_s32(mm);
82
-
83
- y = vmulq_f32(y, pow2n);
84
- return y;
15
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f pexp<Packet2f>(const Packet2f& x)
16
+ { return pexp_float(x); }
17
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp<Packet4f>(const Packet4f& x)
18
+ { return pexp_float(x); }
19
+
20
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f plog<Packet2f>(const Packet2f& x)
21
+ { return plog_float(x); }
22
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog<Packet4f>(const Packet4f& x)
23
+ { return plog_float(x); }
24
+
25
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f psin<Packet2f>(const Packet2f& x)
26
+ { return psin_float(x); }
27
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psin<Packet4f>(const Packet4f& x)
28
+ { return psin_float(x); }
29
+
30
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f pcos<Packet2f>(const Packet2f& x)
31
+ { return pcos_float(x); }
32
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pcos<Packet4f>(const Packet4f& x)
33
+ { return pcos_float(x); }
34
+
35
+ // Hyperbolic Tangent function.
36
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f ptanh<Packet2f>(const Packet2f& x)
37
+ { return internal::generic_fast_tanh_float(x); }
38
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh<Packet4f>(const Packet4f& x)
39
+ { return internal::generic_fast_tanh_float(x); }
40
+
41
+ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin)
42
+ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos)
43
+ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog)
44
+ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pexp)
45
+ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, ptanh)
46
+
47
+ template <>
48
+ EIGEN_STRONG_INLINE Packet4bf pfrexp(const Packet4bf& a, Packet4bf& exponent) {
49
+ Packet4f fexponent;
50
+ const Packet4bf out = F32ToBf16(pfrexp<Packet4f>(Bf16ToF32(a), fexponent));
51
+ exponent = F32ToBf16(fexponent);
52
+ return out;
53
+ }
54
+
55
+ template <>
56
+ EIGEN_STRONG_INLINE Packet4bf pldexp(const Packet4bf& a, const Packet4bf& exponent) {
57
+ return F32ToBf16(pldexp<Packet4f>(Bf16ToF32(a), Bf16ToF32(exponent)));
85
58
  }
86
59
 
60
+ //---------- double ----------
61
+
62
+ #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
63
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp<Packet2d>(const Packet2d& x)
64
+ { return pexp_double(x); }
65
+
66
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d plog<Packet2d>(const Packet2d& x)
67
+ { return plog_double(x); }
68
+
69
+ #endif
70
+
87
71
  } // end namespace internal
88
72
 
89
73
  } // end namespace Eigen