@smake/eigen 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +26 -0
  4. package/eigen/COPYING.GPL +674 -0
  5. package/eigen/COPYING.LGPL +502 -0
  6. package/eigen/COPYING.MINPACK +51 -0
  7. package/eigen/COPYING.MPL2 +373 -0
  8. package/eigen/COPYING.README +18 -0
  9. package/eigen/Eigen/Cholesky +0 -1
  10. package/eigen/Eigen/Core +108 -266
  11. package/eigen/Eigen/Eigenvalues +0 -1
  12. package/eigen/Eigen/Geometry +3 -6
  13. package/eigen/Eigen/Householder +0 -1
  14. package/eigen/Eigen/Jacobi +0 -1
  15. package/eigen/Eigen/KLUSupport +41 -0
  16. package/eigen/Eigen/LU +2 -5
  17. package/eigen/Eigen/OrderingMethods +0 -3
  18. package/eigen/Eigen/PaStiXSupport +1 -0
  19. package/eigen/Eigen/PardisoSupport +0 -0
  20. package/eigen/Eigen/QR +0 -1
  21. package/eigen/Eigen/QtAlignedMalloc +0 -1
  22. package/eigen/Eigen/SVD +0 -1
  23. package/eigen/Eigen/Sparse +0 -2
  24. package/eigen/Eigen/SparseCholesky +0 -8
  25. package/eigen/Eigen/SparseLU +4 -0
  26. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  27. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  28. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  29. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  30. package/eigen/Eigen/src/Core/Array.h +99 -11
  31. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  32. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  33. package/eigen/Eigen/src/Core/Assign.h +1 -1
  34. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  35. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  36. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  37. package/eigen/Eigen/src/Core/Block.h +56 -60
  38. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  39. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  40. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  41. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  42. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  43. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  44. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  45. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  46. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  47. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  48. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  49. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  50. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  51. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  52. package/eigen/Eigen/src/Core/Dot.h +10 -10
  53. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  54. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  55. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  56. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  57. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  58. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  59. package/eigen/Eigen/src/Core/IO.h +40 -7
  60. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  61. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  62. package/eigen/Eigen/src/Core/Map.h +7 -7
  63. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  64. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  65. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  66. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  67. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  68. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  69. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  70. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  71. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  72. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  73. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  74. package/eigen/Eigen/src/Core/Product.h +30 -25
  75. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  76. package/eigen/Eigen/src/Core/Random.h +37 -1
  77. package/eigen/Eigen/src/Core/Redux.h +180 -170
  78. package/eigen/Eigen/src/Core/Ref.h +118 -21
  79. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  80. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  81. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  82. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  83. package/eigen/Eigen/src/Core/Select.h +8 -6
  84. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  85. package/eigen/Eigen/src/Core/Solve.h +14 -14
  86. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  87. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  88. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  89. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  90. package/eigen/Eigen/src/Core/Stride.h +9 -4
  91. package/eigen/Eigen/src/Core/Swap.h +5 -4
  92. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  93. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  94. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  95. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  96. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  97. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  98. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  99. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  100. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  101. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  102. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  103. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  104. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  105. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  107. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  108. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  109. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  110. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  111. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  112. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  113. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  114. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  115. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  116. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  117. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  118. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  119. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  120. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  121. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  122. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  123. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  124. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  125. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  126. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  127. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  128. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  129. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  130. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  131. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  132. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  133. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  134. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  135. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  136. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  137. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  138. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  140. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  141. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  142. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  143. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  144. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  145. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  146. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  147. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  148. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  149. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  150. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  151. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  152. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  153. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  154. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  155. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  156. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  157. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  158. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  159. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  160. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  161. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  162. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  163. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  164. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  165. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  166. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  167. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  168. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  169. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  170. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  171. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  172. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  173. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  174. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  175. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  176. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  177. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  178. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  179. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  180. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  181. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  182. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  183. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  184. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  185. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  186. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  187. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  188. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  189. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  190. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  191. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  192. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  193. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  194. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  195. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  196. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  197. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  198. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  199. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  200. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  201. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  202. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  203. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  204. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  205. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  206. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  207. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  208. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  209. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  210. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  211. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  212. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  213. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  214. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  215. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  216. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  217. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  218. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  219. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  220. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  221. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  222. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  223. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  224. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  225. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  226. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  227. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  228. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  229. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  230. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  231. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  232. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  233. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  234. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  235. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  236. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  237. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  238. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  239. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  240. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  241. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  242. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  243. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  244. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  245. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  246. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  247. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  248. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  249. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  250. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  251. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  252. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  253. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  254. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  255. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  256. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  257. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  258. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  259. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  260. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  261. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  262. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  263. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  264. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  265. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  266. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  267. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  268. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  269. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  270. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  271. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  272. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  273. package/eigen/README.md +5 -0
  274. package/lib/LibEigen.d.ts +4 -0
  275. package/lib/LibEigen.js +14 -0
  276. package/lib/index.d.ts +1 -1
  277. package/lib/index.js +7 -3
  278. package/package.json +2 -10
  279. package/eigen/Eigen/CMakeLists.txt +0 -19
  280. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  281. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  282. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  283. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  284. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  285. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  286. package/lib/eigen.d.ts +0 -2
  287. package/lib/eigen.js +0 -15
@@ -1,675 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // This Source Code Form is subject to the terms of the Mozilla
5
- // Public License v. 2.0. If a copy of the MPL was not distributed
6
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
7
- //
8
- // The conversion routines are Copyright (c) Fabian Giesen, 2016.
9
- // The original license follows:
10
- //
11
- // Copyright (c) Fabian Giesen, 2016
12
- // All rights reserved.
13
- // Redistribution and use in source and binary forms, with or without
14
- // modification, are permitted.
15
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19
- // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
-
27
-
28
- // Standard 16-bit float type, mostly useful for GPUs. Defines a new
29
- // type Eigen::half (inheriting from CUDA's __half struct) with
30
- // operator overloads such that it behaves basically as an arithmetic
31
- // type. It will be quite slow on CPUs (so it is recommended to stay
32
- // in float32_bits for CPUs, except for simple parameter conversions, I/O
33
- // to disk and the likes), but fast on GPUs.
34
-
35
-
36
- #ifndef EIGEN_HALF_CUDA_H
37
- #define EIGEN_HALF_CUDA_H
38
-
39
- #if __cplusplus > 199711L
40
- #define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
41
- #else
42
- #define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
43
- #endif
44
-
45
- #include <sstream>
46
-
47
- namespace Eigen {
48
-
49
- struct half;
50
-
51
- namespace half_impl {
52
-
53
- #if !defined(EIGEN_HAS_CUDA_FP16)
54
- // Make our own __half_raw definition that is similar to CUDA's.
55
- struct __half_raw {
56
- EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
57
- explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
58
- unsigned short x;
59
- };
60
- #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
61
- // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
62
- typedef __half __half_raw;
63
- #endif
64
-
65
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
66
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
67
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
68
-
69
- struct half_base : public __half_raw {
70
- EIGEN_DEVICE_FUNC half_base() {}
71
- EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
72
- EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
73
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
74
- EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
75
- #endif
76
- };
77
-
78
- } // namespace half_impl
79
-
80
- // Class definition.
81
- struct half : public half_impl::half_base {
82
- #if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
83
- typedef half_impl::__half_raw __half_raw;
84
- #endif
85
-
86
- EIGEN_DEVICE_FUNC half() {}
87
-
88
- EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
89
- EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
90
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
91
- EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
92
- #endif
93
-
94
- explicit EIGEN_DEVICE_FUNC half(bool b)
95
- : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
96
- template<class T>
97
- explicit EIGEN_DEVICE_FUNC half(const T& val)
98
- : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
99
- explicit EIGEN_DEVICE_FUNC half(float f)
100
- : half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
101
-
102
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
103
- // +0.0 and -0.0 become false, everything else becomes true.
104
- return (x & 0x7fff) != 0;
105
- }
106
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
107
- return static_cast<signed char>(half_impl::half_to_float(*this));
108
- }
109
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
110
- return static_cast<unsigned char>(half_impl::half_to_float(*this));
111
- }
112
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
113
- return static_cast<short>(half_impl::half_to_float(*this));
114
- }
115
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
116
- return static_cast<unsigned short>(half_impl::half_to_float(*this));
117
- }
118
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
119
- return static_cast<int>(half_impl::half_to_float(*this));
120
- }
121
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
122
- return static_cast<unsigned int>(half_impl::half_to_float(*this));
123
- }
124
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
125
- return static_cast<long>(half_impl::half_to_float(*this));
126
- }
127
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
128
- return static_cast<unsigned long>(half_impl::half_to_float(*this));
129
- }
130
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
131
- return static_cast<long long>(half_impl::half_to_float(*this));
132
- }
133
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
134
- return static_cast<unsigned long long>(half_to_float(*this));
135
- }
136
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
137
- return half_impl::half_to_float(*this);
138
- }
139
- EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
140
- return static_cast<double>(half_impl::half_to_float(*this));
141
- }
142
-
143
- EIGEN_DEVICE_FUNC half& operator=(const half& other) {
144
- x = other.x;
145
- return *this;
146
- }
147
- };
148
-
149
- } // end namespace Eigen
150
-
151
- namespace std {
152
- template<>
153
- struct numeric_limits<Eigen::half> {
154
- static const bool is_specialized = true;
155
- static const bool is_signed = true;
156
- static const bool is_integer = false;
157
- static const bool is_exact = false;
158
- static const bool has_infinity = true;
159
- static const bool has_quiet_NaN = true;
160
- static const bool has_signaling_NaN = true;
161
- static const float_denorm_style has_denorm = denorm_present;
162
- static const bool has_denorm_loss = false;
163
- static const std::float_round_style round_style = std::round_to_nearest;
164
- static const bool is_iec559 = false;
165
- static const bool is_bounded = false;
166
- static const bool is_modulo = false;
167
- static const int digits = 11;
168
- static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
169
- static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
170
- static const int radix = 2;
171
- static const int min_exponent = -13;
172
- static const int min_exponent10 = -4;
173
- static const int max_exponent = 16;
174
- static const int max_exponent10 = 4;
175
- static const bool traps = true;
176
- static const bool tinyness_before = false;
177
-
178
- static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
179
- static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
180
- static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
181
- static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
182
- static Eigen::half round_error() { return Eigen::half(0.5); }
183
- static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
184
- static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
185
- static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
186
- static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
187
- };
188
-
189
- // If std::numeric_limits<T> is specialized, should also specialize
190
- // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
191
- // std::numeric_limits<const volatile T>
192
- // https://stackoverflow.com/a/16519653/
193
- template<>
194
- struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
195
- template<>
196
- struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
197
- template<>
198
- struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
199
- } // end namespace std
200
-
201
- namespace Eigen {
202
-
203
- namespace half_impl {
204
-
205
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
206
-
207
- // Intrinsics for native fp16 support. Note that on current hardware,
208
- // these are no faster than float32_bits arithmetic (you need to use the half2
209
- // versions to get the ALU speed increased), but you do save the
210
- // conversion steps back and forth.
211
-
212
- EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
213
- return __hadd(a, b);
214
- }
215
- EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
216
- return __hmul(a, b);
217
- }
218
- EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
219
- return __hsub(a, b);
220
- }
221
- EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
222
- float num = __half2float(a);
223
- float denom = __half2float(b);
224
- return __float2half(num / denom);
225
- }
226
- EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
227
- return __hneg(a);
228
- }
229
- EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
230
- a = a + b;
231
- return a;
232
- }
233
- EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
234
- a = a * b;
235
- return a;
236
- }
237
- EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
238
- a = a - b;
239
- return a;
240
- }
241
- EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
242
- a = a / b;
243
- return a;
244
- }
245
- EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
246
- return __heq(a, b);
247
- }
248
- EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
249
- return __hne(a, b);
250
- }
251
- EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
252
- return __hlt(a, b);
253
- }
254
- EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
255
- return __hle(a, b);
256
- }
257
- EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
258
- return __hgt(a, b);
259
- }
260
- EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
261
- return __hge(a, b);
262
- }
263
-
264
- #else // Emulate support for half floats
265
-
266
- // Definitions for CPUs and older CUDA, mostly working through conversion
267
- // to/from float32_bits.
268
-
269
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
270
- return half(float(a) + float(b));
271
- }
272
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
273
- return half(float(a) * float(b));
274
- }
275
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
276
- return half(float(a) - float(b));
277
- }
278
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
279
- return half(float(a) / float(b));
280
- }
281
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
282
- half result;
283
- result.x = a.x ^ 0x8000;
284
- return result;
285
- }
286
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
287
- a = half(float(a) + float(b));
288
- return a;
289
- }
290
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
291
- a = half(float(a) * float(b));
292
- return a;
293
- }
294
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
295
- a = half(float(a) - float(b));
296
- return a;
297
- }
298
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
299
- a = half(float(a) / float(b));
300
- return a;
301
- }
302
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
303
- return numext::equal_strict(float(a),float(b));
304
- }
305
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
306
- return numext::not_equal_strict(float(a), float(b));
307
- }
308
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
309
- return float(a) < float(b);
310
- }
311
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
312
- return float(a) <= float(b);
313
- }
314
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
315
- return float(a) > float(b);
316
- }
317
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
318
- return float(a) >= float(b);
319
- }
320
-
321
- #endif // Emulate support for half floats
322
-
323
- // Division by an index. Do it in full float precision to avoid accuracy
324
- // issues in converting the denominator to half.
325
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
326
- return half(static_cast<float>(a) / static_cast<float>(b));
327
- }
328
-
329
- // Conversion routines, including fallbacks for the host or older CUDA.
330
- // Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
331
- // these in hardware. If we need more performance on older/other CPUs, they are
332
- // also possible to vectorize directly.
333
-
334
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
335
- __half_raw h;
336
- h.x = x;
337
- return h;
338
- }
339
-
340
- union float32_bits {
341
- unsigned int u;
342
- float f;
343
- };
344
-
345
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
346
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
347
- __half tmp_ff = __float2half(ff);
348
- return *(__half_raw*)&tmp_ff;
349
-
350
- #elif defined(EIGEN_HAS_FP16_C)
351
- __half_raw h;
352
- h.x = _cvtss_sh(ff, 0);
353
- return h;
354
-
355
- #else
356
- float32_bits f; f.f = ff;
357
-
358
- const float32_bits f32infty = { 255 << 23 };
359
- const float32_bits f16max = { (127 + 16) << 23 };
360
- const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
361
- unsigned int sign_mask = 0x80000000u;
362
- __half_raw o;
363
- o.x = static_cast<unsigned short>(0x0u);
364
-
365
- unsigned int sign = f.u & sign_mask;
366
- f.u ^= sign;
367
-
368
- // NOTE all the integer compares in this function can be safely
369
- // compiled into signed compares since all operands are below
370
- // 0x80000000. Important if you want fast straight SSE2 code
371
- // (since there's no unsigned PCMPGTD).
372
-
373
- if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
374
- o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
375
- } else { // (De)normalized number or zero
376
- if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
377
- // use a magic value to align our 10 mantissa bits at the bottom of
378
- // the float. as long as FP addition is round-to-nearest-even this
379
- // just works.
380
- f.f += denorm_magic.f;
381
-
382
- // and one integer subtract of the bias later, we have our final float!
383
- o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
384
- } else {
385
- unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
386
-
387
- // update exponent, rounding bias part 1
388
- f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
389
- // rounding bias part 2
390
- f.u += mant_odd;
391
- // take the bits!
392
- o.x = static_cast<unsigned short>(f.u >> 13);
393
- }
394
- }
395
-
396
- o.x |= static_cast<unsigned short>(sign >> 16);
397
- return o;
398
- #endif
399
- }
400
-
401
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
402
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
403
- return __half2float(h);
404
-
405
- #elif defined(EIGEN_HAS_FP16_C)
406
- return _cvtsh_ss(h.x);
407
-
408
- #else
409
- const float32_bits magic = { 113 << 23 };
410
- const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
411
- float32_bits o;
412
-
413
- o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
414
- unsigned int exp = shifted_exp & o.u; // just the exponent
415
- o.u += (127 - 15) << 23; // exponent adjust
416
-
417
- // handle exponent special cases
418
- if (exp == shifted_exp) { // Inf/NaN?
419
- o.u += (128 - 16) << 23; // extra exp adjust
420
- } else if (exp == 0) { // Zero/Denormal?
421
- o.u += 1 << 23; // extra exp adjust
422
- o.f -= magic.f; // renormalize
423
- }
424
-
425
- o.u |= (h.x & 0x8000) << 16; // sign bit
426
- return o.f;
427
- #endif
428
- }
429
-
430
- // --- standard functions ---
431
-
432
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
433
- return (a.x & 0x7fff) == 0x7c00;
434
- }
435
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
436
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
437
- return __hisnan(a);
438
- #else
439
- return (a.x & 0x7fff) > 0x7c00;
440
- #endif
441
- }
442
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
443
- return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
444
- }
445
-
446
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
447
- half result;
448
- result.x = a.x & 0x7FFF;
449
- return result;
450
- }
451
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
452
- #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
453
- return half(hexp(a));
454
- #else
455
- return half(::expf(float(a)));
456
- #endif
457
- }
458
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
459
- #if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
460
- return half(::hlog(a));
461
- #else
462
- return half(::logf(float(a)));
463
- #endif
464
- }
465
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
466
- return half(numext::log1p(float(a)));
467
- }
468
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
469
- return half(::log10f(float(a)));
470
- }
471
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
472
- #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
473
- return half(hsqrt(a));
474
- #else
475
- return half(::sqrtf(float(a)));
476
- #endif
477
- }
478
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
479
- return half(::powf(float(a), float(b)));
480
- }
481
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
482
- return half(::sinf(float(a)));
483
- }
484
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
485
- return half(::cosf(float(a)));
486
- }
487
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
488
- return half(::tanf(float(a)));
489
- }
490
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
491
- return half(::tanhf(float(a)));
492
- }
493
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
494
- #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
495
- return half(hfloor(a));
496
- #else
497
- return half(::floorf(float(a)));
498
- #endif
499
- }
500
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
501
- #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
502
- return half(hceil(a));
503
- #else
504
- return half(::ceilf(float(a)));
505
- #endif
506
- }
507
-
508
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
509
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
510
- return __hlt(b, a) ? b : a;
511
- #else
512
- const float f1 = static_cast<float>(a);
513
- const float f2 = static_cast<float>(b);
514
- return f2 < f1 ? b : a;
515
- #endif
516
- }
517
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
518
- #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
519
- return __hlt(a, b) ? b : a;
520
- #else
521
- const float f1 = static_cast<float>(a);
522
- const float f2 = static_cast<float>(b);
523
- return f1 < f2 ? b : a;
524
- #endif
525
- }
526
-
527
- EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
528
- os << static_cast<float>(v);
529
- return os;
530
- }
531
-
532
- } // end namespace half_impl
533
-
534
- // import Eigen::half_impl::half into Eigen namespace
535
- // using half_impl::half;
536
-
537
- namespace internal {
538
-
539
- template<>
540
- struct random_default_impl<half, false, false>
541
- {
542
- static inline half run(const half& x, const half& y)
543
- {
544
- return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
545
- }
546
- static inline half run()
547
- {
548
- return run(half(-1.f), half(1.f));
549
- }
550
- };
551
-
552
- template<> struct is_arithmetic<half> { enum { value = true }; };
553
-
554
- } // end namespace internal
555
-
556
- template<> struct NumTraits<Eigen::half>
557
- : GenericNumTraits<Eigen::half>
558
- {
559
- enum {
560
- IsSigned = true,
561
- IsInteger = false,
562
- IsComplex = false,
563
- RequireInitialization = false
564
- };
565
-
566
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
567
- return half_impl::raw_uint16_to_half(0x0800);
568
- }
569
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
570
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
571
- return half_impl::raw_uint16_to_half(0x7bff);
572
- }
573
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
574
- return half_impl::raw_uint16_to_half(0xfbff);
575
- }
576
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
577
- return half_impl::raw_uint16_to_half(0x7c00);
578
- }
579
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
580
- return half_impl::raw_uint16_to_half(0x7c01);
581
- }
582
- };
583
-
584
- } // end namespace Eigen
585
-
586
- // C-like standard mathematical functions and trancendentals.
587
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
588
- Eigen::half result;
589
- result.x = a.x & 0x7FFF;
590
- return result;
591
- }
592
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
593
- return Eigen::half(::expf(float(a)));
594
- }
595
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
596
- #if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
597
- return Eigen::half(::hlog(a));
598
- #else
599
- return Eigen::half(::logf(float(a)));
600
- #endif
601
- }
602
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
603
- return Eigen::half(::sqrtf(float(a)));
604
- }
605
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
606
- return Eigen::half(::powf(float(a), float(b)));
607
- }
608
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
609
- return Eigen::half(::floorf(float(a)));
610
- }
611
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
612
- return Eigen::half(::ceilf(float(a)));
613
- }
614
-
615
- namespace std {
616
-
617
- #if __cplusplus > 199711L
618
- template <>
619
- struct hash<Eigen::half> {
620
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
621
- return static_cast<std::size_t>(a.x);
622
- }
623
- };
624
- #endif
625
-
626
- } // end namespace std
627
-
628
-
629
- // Add the missing shfl_xor intrinsic
630
- #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
631
- __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
632
- #if EIGEN_CUDACC_VER < 90000
633
- return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
634
- #else
635
- return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
636
- #endif
637
- }
638
- #endif
639
-
640
- // ldg() has an overload for __half_raw, but we also need one for Eigen::half.
641
- #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
642
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
643
- return Eigen::half_impl::raw_uint16_to_half(
644
- __ldg(reinterpret_cast<const unsigned short*>(ptr)));
645
- }
646
- #endif
647
-
648
-
649
- #if defined(EIGEN_CUDA_ARCH)
650
- namespace Eigen {
651
- namespace numext {
652
-
653
- template<>
654
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
655
- bool (isnan)(const Eigen::half& h) {
656
- return (half_impl::isnan)(h);
657
- }
658
-
659
- template<>
660
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
661
- bool (isinf)(const Eigen::half& h) {
662
- return (half_impl::isinf)(h);
663
- }
664
-
665
- template<>
666
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
667
- bool (isfinite)(const Eigen::half& h) {
668
- return (half_impl::isfinite)(h);
669
- }
670
-
671
- } // namespace Eigen
672
- } // namespace numext
673
- #endif
674
-
675
- #endif // EIGEN_HALF_CUDA_H