@smake/eigen 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +1 -1
  4. package/eigen/COPYING.MINPACK +51 -52
  5. package/eigen/Eigen/Cholesky +0 -1
  6. package/eigen/Eigen/Core +108 -266
  7. package/eigen/Eigen/Eigenvalues +0 -1
  8. package/eigen/Eigen/Geometry +3 -6
  9. package/eigen/Eigen/Householder +0 -1
  10. package/eigen/Eigen/Jacobi +0 -1
  11. package/eigen/Eigen/KLUSupport +41 -0
  12. package/eigen/Eigen/LU +2 -5
  13. package/eigen/Eigen/OrderingMethods +0 -3
  14. package/eigen/Eigen/PaStiXSupport +1 -0
  15. package/eigen/Eigen/PardisoSupport +0 -0
  16. package/eigen/Eigen/QR +0 -1
  17. package/eigen/Eigen/QtAlignedMalloc +0 -1
  18. package/eigen/Eigen/SVD +0 -1
  19. package/eigen/Eigen/Sparse +0 -2
  20. package/eigen/Eigen/SparseCholesky +0 -8
  21. package/eigen/Eigen/SparseLU +4 -0
  22. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  23. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  24. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  25. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  26. package/eigen/Eigen/src/Core/Array.h +99 -11
  27. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  28. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  29. package/eigen/Eigen/src/Core/Assign.h +1 -1
  30. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  31. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  32. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  33. package/eigen/Eigen/src/Core/Block.h +56 -60
  34. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  35. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  36. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  37. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  38. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  39. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  40. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  41. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  42. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  43. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  44. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  45. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  46. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  47. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  48. package/eigen/Eigen/src/Core/Dot.h +10 -10
  49. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  50. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  51. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  52. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  53. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  54. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  55. package/eigen/Eigen/src/Core/IO.h +40 -7
  56. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  57. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  58. package/eigen/Eigen/src/Core/Map.h +7 -7
  59. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  60. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  61. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  62. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  63. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  64. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  65. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  66. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  67. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  68. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  69. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  70. package/eigen/Eigen/src/Core/Product.h +30 -25
  71. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  72. package/eigen/Eigen/src/Core/Random.h +37 -1
  73. package/eigen/Eigen/src/Core/Redux.h +180 -170
  74. package/eigen/Eigen/src/Core/Ref.h +118 -21
  75. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  76. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  77. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  78. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  79. package/eigen/Eigen/src/Core/Select.h +8 -6
  80. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  81. package/eigen/Eigen/src/Core/Solve.h +14 -14
  82. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  83. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  84. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  85. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  86. package/eigen/Eigen/src/Core/Stride.h +9 -4
  87. package/eigen/Eigen/src/Core/Swap.h +5 -4
  88. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  89. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  90. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  91. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  92. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  93. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  94. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  95. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  96. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  97. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  98. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  99. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  100. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  101. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  102. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  103. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  104. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  105. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  107. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  108. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  109. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  110. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  111. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  112. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  113. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  114. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  115. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  116. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  117. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  118. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  119. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  120. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  121. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  122. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  123. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  124. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  125. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  126. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  127. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  128. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  129. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  130. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  131. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  132. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  133. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  134. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  135. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  136. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  137. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  138. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  140. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  141. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  142. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  143. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  144. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  145. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  146. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  147. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  148. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  149. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  150. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  151. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  152. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  153. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  154. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  155. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  156. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  157. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  158. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  159. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  160. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  161. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  162. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  163. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  164. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  165. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  166. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  167. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  168. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  169. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  170. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  171. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  172. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  173. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  174. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  175. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  176. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  177. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  178. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  179. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  180. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  181. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  182. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  183. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  184. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  185. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  186. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  187. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  188. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  189. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  190. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  191. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  192. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  193. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  194. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  195. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  196. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  197. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  198. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  199. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  200. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  201. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  202. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  203. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  204. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  205. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  206. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  207. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  208. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  209. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  210. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  211. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  212. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  213. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  214. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  215. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  216. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  217. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  218. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  219. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  220. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  221. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  222. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  223. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  224. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  225. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  226. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  227. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  228. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  229. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  230. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  231. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  232. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  233. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  234. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  235. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  236. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  237. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  238. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  239. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  240. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  241. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  242. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  243. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  244. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  245. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  246. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  247. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  248. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  249. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  250. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  251. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  252. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  253. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  254. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  255. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  256. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  257. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  258. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  259. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  260. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  261. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  262. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  263. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  264. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  265. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  266. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  267. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  268. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  269. package/eigen/README.md +2 -0
  270. package/lib/LibEigen.d.ts +4 -0
  271. package/lib/LibEigen.js +14 -0
  272. package/lib/index.d.ts +1 -1
  273. package/lib/index.js +7 -3
  274. package/package.json +2 -10
  275. package/eigen/Eigen/CMakeLists.txt +0 -19
  276. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  277. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  278. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  279. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  280. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  281. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  282. package/lib/eigen.d.ts +0 -2
  283. package/lib/eigen.js +0 -15
@@ -1,212 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_TYPE_CASTING_CUDA_H
11
- #define EIGEN_TYPE_CASTING_CUDA_H
12
-
13
- namespace Eigen {
14
-
15
- namespace internal {
16
-
17
- template<>
18
- struct scalar_cast_op<float, Eigen::half> {
19
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
20
- typedef Eigen::half result_type;
21
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
22
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
23
- return __float2half(a);
24
- #else
25
- return Eigen::half(a);
26
- #endif
27
- }
28
- };
29
-
30
- template<>
31
- struct functor_traits<scalar_cast_op<float, Eigen::half> >
32
- { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
33
-
34
-
35
- template<>
36
- struct scalar_cast_op<int, Eigen::half> {
37
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
38
- typedef Eigen::half result_type;
39
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
40
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
41
- return __float2half(static_cast<float>(a));
42
- #else
43
- return Eigen::half(static_cast<float>(a));
44
- #endif
45
- }
46
- };
47
-
48
- template<>
49
- struct functor_traits<scalar_cast_op<int, Eigen::half> >
50
- { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
51
-
52
-
53
- template<>
54
- struct scalar_cast_op<Eigen::half, float> {
55
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
56
- typedef float result_type;
57
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
58
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
59
- return __half2float(a);
60
- #else
61
- return static_cast<float>(a);
62
- #endif
63
- }
64
- };
65
-
66
- template<>
67
- struct functor_traits<scalar_cast_op<Eigen::half, float> >
68
- { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
69
-
70
-
71
-
72
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
73
-
74
- template <>
75
- struct type_casting_traits<Eigen::half, float> {
76
- enum {
77
- VectorizedCast = 1,
78
- SrcCoeffRatio = 2,
79
- TgtCoeffRatio = 1
80
- };
81
- };
82
-
83
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
84
- float2 r1 = __half22float2(a);
85
- float2 r2 = __half22float2(b);
86
- return make_float4(r1.x, r1.y, r2.x, r2.y);
87
- }
88
-
89
- template <>
90
- struct type_casting_traits<float, Eigen::half> {
91
- enum {
92
- VectorizedCast = 1,
93
- SrcCoeffRatio = 1,
94
- TgtCoeffRatio = 2
95
- };
96
- };
97
-
98
- template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
99
- // Simply discard the second half of the input
100
- return __floats2half2_rn(a.x, a.y);
101
- }
102
-
103
- #elif defined EIGEN_VECTORIZE_AVX512
104
- template <>
105
- struct type_casting_traits<half, float> {
106
- enum {
107
- VectorizedCast = 1,
108
- SrcCoeffRatio = 1,
109
- TgtCoeffRatio = 1
110
- };
111
- };
112
-
113
- template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
114
- return half2float(a);
115
- }
116
-
117
- template <>
118
- struct type_casting_traits<float, half> {
119
- enum {
120
- VectorizedCast = 1,
121
- SrcCoeffRatio = 1,
122
- TgtCoeffRatio = 1
123
- };
124
- };
125
-
126
- template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
127
- return float2half(a);
128
- }
129
-
130
- #elif defined EIGEN_VECTORIZE_AVX
131
-
132
- template <>
133
- struct type_casting_traits<Eigen::half, float> {
134
- enum {
135
- VectorizedCast = 1,
136
- SrcCoeffRatio = 1,
137
- TgtCoeffRatio = 1
138
- };
139
- };
140
-
141
- template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
142
- return half2float(a);
143
- }
144
-
145
- template <>
146
- struct type_casting_traits<float, Eigen::half> {
147
- enum {
148
- VectorizedCast = 1,
149
- SrcCoeffRatio = 1,
150
- TgtCoeffRatio = 1
151
- };
152
- };
153
-
154
- template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
155
- return float2half(a);
156
- }
157
-
158
- // Disable the following code since it's broken on too many platforms / compilers.
159
- //#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
160
- #elif 0
161
-
162
- template <>
163
- struct type_casting_traits<Eigen::half, float> {
164
- enum {
165
- VectorizedCast = 1,
166
- SrcCoeffRatio = 1,
167
- TgtCoeffRatio = 1
168
- };
169
- };
170
-
171
- template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
172
- __int64_t a64 = _mm_cvtm64_si64(a.x);
173
- Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
174
- float f1 = static_cast<float>(h);
175
- h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
176
- float f2 = static_cast<float>(h);
177
- h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
178
- float f3 = static_cast<float>(h);
179
- h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
180
- float f4 = static_cast<float>(h);
181
- return _mm_set_ps(f4, f3, f2, f1);
182
- }
183
-
184
- template <>
185
- struct type_casting_traits<float, Eigen::half> {
186
- enum {
187
- VectorizedCast = 1,
188
- SrcCoeffRatio = 1,
189
- TgtCoeffRatio = 1
190
- };
191
- };
192
-
193
- template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
194
- EIGEN_ALIGN16 float aux[4];
195
- pstore(aux, a);
196
- Eigen::half h0(aux[0]);
197
- Eigen::half h1(aux[1]);
198
- Eigen::half h2(aux[2]);
199
- Eigen::half h3(aux[3]);
200
-
201
- Packet4h result;
202
- result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
203
- return result;
204
- }
205
-
206
- #endif
207
-
208
- } // end namespace internal
209
-
210
- } // end namespace Eigen
211
-
212
- #endif // EIGEN_TYPE_CASTING_CUDA_H
@@ -1,161 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
5
- // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6
- //
7
- // This Source Code Form is subject to the terms of the Mozilla
8
- // Public License v. 2.0. If a copy of the MPL was not distributed
9
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
-
11
- #ifndef EIGEN_GEOMETRY_SSE_H
12
- #define EIGEN_GEOMETRY_SSE_H
13
-
14
- namespace Eigen {
15
-
16
- namespace internal {
17
-
18
- template<class Derived, class OtherDerived>
19
- struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
20
- {
21
- enum {
22
- AAlignment = traits<Derived>::Alignment,
23
- BAlignment = traits<OtherDerived>::Alignment,
24
- ResAlignment = traits<Quaternion<float> >::Alignment
25
- };
26
- static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
27
- {
28
- Quaternion<float> res;
29
- const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
30
- __m128 a = _a.coeffs().template packet<AAlignment>(0);
31
- __m128 b = _b.coeffs().template packet<BAlignment>(0);
32
- __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
33
- __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
34
- pstoret<float,Packet4f,ResAlignment>(
35
- &res.x(),
36
- _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
37
- _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
38
- vec4f_swizzle1(b,1,2,0,0))),
39
- _mm_xor_ps(mask,_mm_add_ps(s1,s2))));
40
-
41
- return res;
42
- }
43
- };
44
-
45
- template<class Derived>
46
- struct quat_conj<Architecture::SSE, Derived, float>
47
- {
48
- enum {
49
- ResAlignment = traits<Quaternion<float> >::Alignment
50
- };
51
- static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
52
- {
53
- Quaternion<float> res;
54
- const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
55
- pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
56
- return res;
57
- }
58
- };
59
-
60
-
61
- template<typename VectorLhs,typename VectorRhs>
62
- struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
63
- {
64
- enum {
65
- ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
66
- };
67
- static inline typename plain_matrix_type<VectorLhs>::type
68
- run(const VectorLhs& lhs, const VectorRhs& rhs)
69
- {
70
- __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
71
- __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
72
- __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
73
- __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
74
- typename plain_matrix_type<VectorLhs>::type res;
75
- pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
76
- return res;
77
- }
78
- };
79
-
80
-
81
-
82
-
83
- template<class Derived, class OtherDerived>
84
- struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
85
- {
86
- enum {
87
- BAlignment = traits<OtherDerived>::Alignment,
88
- ResAlignment = traits<Quaternion<double> >::Alignment
89
- };
90
-
91
- static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
92
- {
93
- const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
94
-
95
- Quaternion<double> res;
96
-
97
- const double* a = _a.coeffs().data();
98
- Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
99
- Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
100
- Packet2d a_xx = pset1<Packet2d>(a[0]);
101
- Packet2d a_yy = pset1<Packet2d>(a[1]);
102
- Packet2d a_zz = pset1<Packet2d>(a[2]);
103
- Packet2d a_ww = pset1<Packet2d>(a[3]);
104
-
105
- // two temporaries:
106
- Packet2d t1, t2;
107
-
108
- /*
109
- * t1 = ww*xy + yy*zw
110
- * t2 = zz*xy - xx*zw
111
- * res.xy = t1 +/- swap(t2)
112
- */
113
- t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw));
114
- t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
115
- #ifdef EIGEN_VECTORIZE_SSE3
116
- EIGEN_UNUSED_VARIABLE(mask)
117
- pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
118
- #else
119
- pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
120
- #endif
121
-
122
- /*
123
- * t1 = ww*zw - yy*xy
124
- * t2 = zz*zw + xx*xy
125
- * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
126
- */
127
- t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy));
128
- t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
129
- #ifdef EIGEN_VECTORIZE_SSE3
130
- EIGEN_UNUSED_VARIABLE(mask)
131
- pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
132
- #else
133
- pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
134
- #endif
135
-
136
- return res;
137
- }
138
- };
139
-
140
- template<class Derived>
141
- struct quat_conj<Architecture::SSE, Derived, double>
142
- {
143
- enum {
144
- ResAlignment = traits<Quaternion<double> >::Alignment
145
- };
146
- static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
147
- {
148
- Quaternion<double> res;
149
- const __m128d mask0 = _mm_setr_pd(-0.,-0.);
150
- const __m128d mask2 = _mm_setr_pd(-0.,0.);
151
- pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
152
- pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
153
- return res;
154
- }
155
- };
156
-
157
- } // end namespace internal
158
-
159
- } // end namespace Eigen
160
-
161
- #endif // EIGEN_GEOMETRY_SSE_H