@smake/eigen 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +1 -1
  4. package/eigen/COPYING.MINPACK +51 -52
  5. package/eigen/Eigen/Cholesky +0 -1
  6. package/eigen/Eigen/Core +108 -266
  7. package/eigen/Eigen/Eigenvalues +0 -1
  8. package/eigen/Eigen/Geometry +3 -6
  9. package/eigen/Eigen/Householder +0 -1
  10. package/eigen/Eigen/Jacobi +0 -1
  11. package/eigen/Eigen/KLUSupport +41 -0
  12. package/eigen/Eigen/LU +2 -5
  13. package/eigen/Eigen/OrderingMethods +0 -3
  14. package/eigen/Eigen/PaStiXSupport +1 -0
  15. package/eigen/Eigen/PardisoSupport +0 -0
  16. package/eigen/Eigen/QR +0 -1
  17. package/eigen/Eigen/QtAlignedMalloc +0 -1
  18. package/eigen/Eigen/SVD +0 -1
  19. package/eigen/Eigen/Sparse +0 -2
  20. package/eigen/Eigen/SparseCholesky +0 -8
  21. package/eigen/Eigen/SparseLU +4 -0
  22. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  23. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  24. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  25. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  26. package/eigen/Eigen/src/Core/Array.h +99 -11
  27. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  28. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  29. package/eigen/Eigen/src/Core/Assign.h +1 -1
  30. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  31. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  32. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  33. package/eigen/Eigen/src/Core/Block.h +56 -60
  34. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  35. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  36. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  37. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  38. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  39. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  40. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  41. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  42. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  43. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  44. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  45. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  46. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  47. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  48. package/eigen/Eigen/src/Core/Dot.h +10 -10
  49. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  50. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  51. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  52. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  53. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  54. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  55. package/eigen/Eigen/src/Core/IO.h +40 -7
  56. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  57. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  58. package/eigen/Eigen/src/Core/Map.h +7 -7
  59. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  60. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  61. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  62. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  63. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  64. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  65. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  66. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  67. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  68. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  69. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  70. package/eigen/Eigen/src/Core/Product.h +30 -25
  71. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  72. package/eigen/Eigen/src/Core/Random.h +37 -1
  73. package/eigen/Eigen/src/Core/Redux.h +180 -170
  74. package/eigen/Eigen/src/Core/Ref.h +118 -21
  75. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  76. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  77. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  78. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  79. package/eigen/Eigen/src/Core/Select.h +8 -6
  80. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  81. package/eigen/Eigen/src/Core/Solve.h +14 -14
  82. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  83. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  84. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  85. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  86. package/eigen/Eigen/src/Core/Stride.h +9 -4
  87. package/eigen/Eigen/src/Core/Swap.h +5 -4
  88. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  89. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  90. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  91. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  92. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  93. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  94. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  95. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  96. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  97. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  98. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  99. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  100. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  101. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  102. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  103. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  104. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  105. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  107. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  108. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  109. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  110. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  111. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  112. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  113. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  114. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  115. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  116. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  117. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  118. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  119. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  120. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  121. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  122. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  123. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  124. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  125. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  126. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  127. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  128. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  129. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  130. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  131. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  132. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  133. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  134. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  135. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  136. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  137. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  138. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  140. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  141. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  142. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  143. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  144. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  145. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  146. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  147. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  148. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  149. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  150. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  151. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  152. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  153. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  154. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  155. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  156. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  157. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  158. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  159. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  160. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  161. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  162. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  163. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  164. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  165. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  166. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  167. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  168. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  169. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  170. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  171. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  172. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  173. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  174. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  175. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  176. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  177. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  178. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  179. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  180. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  181. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  182. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  183. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  184. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  185. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  186. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  187. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  188. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  189. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  190. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  191. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  192. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  193. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  194. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  195. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  196. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  197. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  198. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  199. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  200. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  201. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  202. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  203. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  204. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  205. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  206. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  207. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  208. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  209. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  210. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  211. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  212. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  213. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  214. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  215. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  216. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  217. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  218. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  219. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  220. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  221. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  222. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  223. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  224. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  225. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  226. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  227. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  228. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  229. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  230. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  231. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  232. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  233. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  234. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  235. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  236. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  237. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  238. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  239. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  240. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  241. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  242. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  243. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  244. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  245. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  246. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  247. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  248. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  249. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  250. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  251. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  252. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  253. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  254. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  255. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  256. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  257. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  258. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  259. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  260. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  261. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  262. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  263. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  264. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  265. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  266. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  267. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  268. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  269. package/eigen/README.md +2 -0
  270. package/lib/LibEigen.d.ts +4 -0
  271. package/lib/LibEigen.js +14 -0
  272. package/lib/index.d.ts +1 -1
  273. package/lib/index.js +7 -3
  274. package/package.json +2 -10
  275. package/eigen/Eigen/CMakeLists.txt +0 -19
  276. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  277. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  278. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  279. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  280. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  281. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  282. package/lib/eigen.d.ts +0 -2
  283. package/lib/eigen.js +0 -15
@@ -1,333 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_PACKET_MATH_CUDA_H
11
- #define EIGEN_PACKET_MATH_CUDA_H
12
-
13
- namespace Eigen {
14
-
15
- namespace internal {
16
-
17
- // Make sure this is only available when targeting a GPU: we don't want to
18
- // introduce conflicts between these packet_traits definitions and the ones
19
- // we'll use on the host side (SSE, AVX, ...)
20
- #if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
21
- template<> struct is_arithmetic<float4> { enum { value = true }; };
22
- template<> struct is_arithmetic<double2> { enum { value = true }; };
23
-
24
- template<> struct packet_traits<float> : default_packet_traits
25
- {
26
- typedef float4 type;
27
- typedef float4 half;
28
- enum {
29
- Vectorizable = 1,
30
- AlignedOnScalar = 1,
31
- size=4,
32
- HasHalfPacket = 0,
33
-
34
- HasDiv = 1,
35
- HasSin = 0,
36
- HasCos = 0,
37
- HasLog = 1,
38
- HasExp = 1,
39
- HasSqrt = 1,
40
- HasRsqrt = 1,
41
- HasLGamma = 1,
42
- HasDiGamma = 1,
43
- HasZeta = 1,
44
- HasPolygamma = 1,
45
- HasErf = 1,
46
- HasErfc = 1,
47
- HasIGamma = 1,
48
- HasIGammac = 1,
49
- HasBetaInc = 1,
50
-
51
- HasBlend = 0,
52
- };
53
- };
54
-
55
- template<> struct packet_traits<double> : default_packet_traits
56
- {
57
- typedef double2 type;
58
- typedef double2 half;
59
- enum {
60
- Vectorizable = 1,
61
- AlignedOnScalar = 1,
62
- size=2,
63
- HasHalfPacket = 0,
64
-
65
- HasDiv = 1,
66
- HasLog = 1,
67
- HasExp = 1,
68
- HasSqrt = 1,
69
- HasRsqrt = 1,
70
- HasLGamma = 1,
71
- HasDiGamma = 1,
72
- HasZeta = 1,
73
- HasPolygamma = 1,
74
- HasErf = 1,
75
- HasErfc = 1,
76
- HasIGamma = 1,
77
- HasIGammac = 1,
78
- HasBetaInc = 1,
79
-
80
- HasBlend = 0,
81
- };
82
- };
83
-
84
-
85
- template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
86
- template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
87
-
88
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
89
- return make_float4(from, from, from, from);
90
- }
91
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
92
- return make_double2(from, from);
93
- }
94
-
95
-
96
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
97
- return make_float4(a, a+1, a+2, a+3);
98
- }
99
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
100
- return make_double2(a, a+1);
101
- }
102
-
103
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
104
- return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
105
- }
106
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
107
- return make_double2(a.x+b.x, a.y+b.y);
108
- }
109
-
110
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
111
- return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
112
- }
113
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
114
- return make_double2(a.x-b.x, a.y-b.y);
115
- }
116
-
117
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
118
- return make_float4(-a.x, -a.y, -a.z, -a.w);
119
- }
120
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
121
- return make_double2(-a.x, -a.y);
122
- }
123
-
124
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
125
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
126
-
127
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
128
- return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
129
- }
130
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
131
- return make_double2(a.x*b.x, a.y*b.y);
132
- }
133
-
134
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
135
- return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
136
- }
137
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
138
- return make_double2(a.x/b.x, a.y/b.y);
139
- }
140
-
141
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
142
- return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
143
- }
144
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
145
- return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
146
- }
147
-
148
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
149
- return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
150
- }
151
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
152
- return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
153
- }
154
-
155
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
156
- return *reinterpret_cast<const float4*>(from);
157
- }
158
-
159
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
160
- return *reinterpret_cast<const double2*>(from);
161
- }
162
-
163
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
164
- return make_float4(from[0], from[1], from[2], from[3]);
165
- }
166
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
167
- return make_double2(from[0], from[1]);
168
- }
169
-
170
- template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
171
- return make_float4(from[0], from[0], from[1], from[1]);
172
- }
173
- template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
174
- return make_double2(from[0], from[0]);
175
- }
176
-
177
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
178
- *reinterpret_cast<float4*>(to) = from;
179
- }
180
-
181
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
182
- *reinterpret_cast<double2*>(to) = from;
183
- }
184
-
185
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
186
- to[0] = from.x;
187
- to[1] = from.y;
188
- to[2] = from.z;
189
- to[3] = from.w;
190
- }
191
-
192
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
193
- to[0] = from.x;
194
- to[1] = from.y;
195
- }
196
-
197
- template<>
198
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
199
- #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
200
- return __ldg((const float4*)from);
201
- #else
202
- return make_float4(from[0], from[1], from[2], from[3]);
203
- #endif
204
- }
205
- template<>
206
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
207
- #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
208
- return __ldg((const double2*)from);
209
- #else
210
- return make_double2(from[0], from[1]);
211
- #endif
212
- }
213
-
214
- template<>
215
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
216
- #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
217
- return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
218
- #else
219
- return make_float4(from[0], from[1], from[2], from[3]);
220
- #endif
221
- }
222
- template<>
223
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
224
- #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
225
- return make_double2(__ldg(from+0), __ldg(from+1));
226
- #else
227
- return make_double2(from[0], from[1]);
228
- #endif
229
- }
230
-
231
- template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
232
- return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
233
- }
234
-
235
- template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
236
- return make_double2(from[0*stride], from[1*stride]);
237
- }
238
-
239
- template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
240
- to[stride*0] = from.x;
241
- to[stride*1] = from.y;
242
- to[stride*2] = from.z;
243
- to[stride*3] = from.w;
244
- }
245
- template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
246
- to[stride*0] = from.x;
247
- to[stride*1] = from.y;
248
- }
249
-
250
- template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
251
- return a.x;
252
- }
253
- template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
254
- return a.x;
255
- }
256
-
257
- template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
258
- return a.x + a.y + a.z + a.w;
259
- }
260
- template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
261
- return a.x + a.y;
262
- }
263
-
264
- template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
265
- return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
266
- }
267
- template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
268
- return fmax(a.x, a.y);
269
- }
270
-
271
- template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
272
- return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
273
- }
274
- template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
275
- return fmin(a.x, a.y);
276
- }
277
-
278
- template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
279
- return a.x * a.y * a.z * a.w;
280
- }
281
- template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
282
- return a.x * a.y;
283
- }
284
-
285
- template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
286
- return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
287
- }
288
- template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
289
- return make_double2(fabs(a.x), fabs(a.y));
290
- }
291
-
292
- EIGEN_DEVICE_FUNC inline void
293
- ptranspose(PacketBlock<float4,4>& kernel) {
294
- float tmp = kernel.packet[0].y;
295
- kernel.packet[0].y = kernel.packet[1].x;
296
- kernel.packet[1].x = tmp;
297
-
298
- tmp = kernel.packet[0].z;
299
- kernel.packet[0].z = kernel.packet[2].x;
300
- kernel.packet[2].x = tmp;
301
-
302
- tmp = kernel.packet[0].w;
303
- kernel.packet[0].w = kernel.packet[3].x;
304
- kernel.packet[3].x = tmp;
305
-
306
- tmp = kernel.packet[1].z;
307
- kernel.packet[1].z = kernel.packet[2].y;
308
- kernel.packet[2].y = tmp;
309
-
310
- tmp = kernel.packet[1].w;
311
- kernel.packet[1].w = kernel.packet[3].y;
312
- kernel.packet[3].y = tmp;
313
-
314
- tmp = kernel.packet[2].w;
315
- kernel.packet[2].w = kernel.packet[3].z;
316
- kernel.packet[3].z = tmp;
317
- }
318
-
319
- EIGEN_DEVICE_FUNC inline void
320
- ptranspose(PacketBlock<double2,2>& kernel) {
321
- double tmp = kernel.packet[0].y;
322
- kernel.packet[0].y = kernel.packet[1].x;
323
- kernel.packet[1].x = tmp;
324
- }
325
-
326
- #endif
327
-
328
- } // end namespace internal
329
-
330
- } // end namespace Eigen
331
-
332
-
333
- #endif // EIGEN_PACKET_MATH_CUDA_H