@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -0,0 +1,866 @@
1
+ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef EIGEN_BFLOAT16_H
17
+ #define EIGEN_BFLOAT16_H
18
+
19
+ // IWYU pragma: private
20
+ #include "../../InternalHeaderCheck.h"
21
+
22
+ #if defined(EIGEN_HAS_HIP_BF16)
23
+ // When compiling with GPU support, the "hip_bfloat16" base class as well as
24
+ // some other routines are defined in the GPU compiler header files
25
+ // (hip_bfloat16.h), and they are not tagged constexpr
26
+ // As a consequence, we get compile failures when compiling Eigen with
27
+ // GPU support. Hence the need to disable EIGEN_CONSTEXPR when building
28
+ // Eigen with GPU support
29
+ #pragma push_macro("EIGEN_CONSTEXPR")
30
+ #undef EIGEN_CONSTEXPR
31
+ #define EIGEN_CONSTEXPR
32
+ #endif
33
+
34
+ #define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
35
+ template <> \
36
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED PACKET_BF16 METHOD<PACKET_BF16>( \
37
+ const PACKET_BF16& _x) { \
38
+ return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
39
+ }
40
+
41
+ // Only use HIP GPU bf16 in kernels
42
+ #if defined(EIGEN_HAS_HIP_BF16) && defined(EIGEN_GPU_COMPILE_PHASE)
43
+ #define EIGEN_USE_HIP_BF16
44
+ #endif
45
+
46
+ namespace Eigen {
47
+
48
+ struct bfloat16;
49
+
50
+ namespace numext {
51
+ template <>
52
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src);
53
+
54
+ template <>
55
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src);
56
+ } // namespace numext
57
+ namespace bfloat16_impl {
58
+
59
+ #if defined(EIGEN_USE_HIP_BF16)
60
+
61
+ struct __bfloat16_raw : public hip_bfloat16 {
62
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
63
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(hip_bfloat16 hb) : hip_bfloat16(hb) {}
64
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : hip_bfloat16(raw) {}
65
+ };
66
+
67
+ #else
68
+
69
+ // Make our own __bfloat16_raw definition.
70
+ struct __bfloat16_raw {
71
+ #if defined(EIGEN_HAS_HIP_BF16) && !defined(EIGEN_GPU_COMPILE_PHASE)
72
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
73
+ #else
74
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
75
+ #endif
76
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
77
+ unsigned short value;
78
+ };
79
+
80
+ #endif // defined(EIGEN_USE_HIP_BF16)
81
+
82
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
83
+ template <bool AssumeArgumentIsNormalOrInfinityOrZero>
84
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
85
+ // Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
86
+ // > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated
87
+ template <>
88
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff);
89
+ template <>
90
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff);
91
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
92
+
93
+ struct bfloat16_base : public __bfloat16_raw {
94
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}
95
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
96
+ };
97
+
98
+ } // namespace bfloat16_impl
99
+
100
+ // Class definition.
101
+ struct bfloat16 : public bfloat16_impl::bfloat16_base {
102
+ typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
103
+
104
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
105
+
106
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
107
+
108
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
109
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
110
+
111
+ template <class T>
112
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
113
+ : bfloat16_impl::bfloat16_base(
114
+ bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
115
+
116
+ explicit EIGEN_DEVICE_FUNC bfloat16(float f)
117
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
118
+
119
+ // Following the convention of numpy, converting between complex and
120
+ // float will lead to loss of imag value.
121
+ template <typename RealScalar>
122
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
123
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
124
+
125
+ EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
126
+ return bfloat16_impl::bfloat16_to_float(*this);
127
+ }
128
+ };
129
+
130
+ // TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
131
+ // solve the ODR issue.
132
+ namespace bfloat16_impl {
133
+ template <typename = void>
134
+ struct numeric_limits_bfloat16_impl {
135
+ static EIGEN_CONSTEXPR const bool is_specialized = true;
136
+ static EIGEN_CONSTEXPR const bool is_signed = true;
137
+ static EIGEN_CONSTEXPR const bool is_integer = false;
138
+ static EIGEN_CONSTEXPR const bool is_exact = false;
139
+ static EIGEN_CONSTEXPR const bool has_infinity = true;
140
+ static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
141
+ static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
142
+ EIGEN_DIAGNOSTICS(push)
143
+ EIGEN_DISABLE_DEPRECATED_WARNING
144
+ static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
145
+ static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
146
+ EIGEN_DIAGNOSTICS(pop)
147
+ static EIGEN_CONSTEXPR const std::float_round_style round_style = std::numeric_limits<float>::round_style;
148
+ static EIGEN_CONSTEXPR const bool is_iec559 = true;
149
+ // The C++ standard defines this as "true if the set of values representable
150
+ // by the type is finite." BFloat16 has finite precision.
151
+ static EIGEN_CONSTEXPR const bool is_bounded = true;
152
+ static EIGEN_CONSTEXPR const bool is_modulo = false;
153
+ static EIGEN_CONSTEXPR const int digits = 8;
154
+ static EIGEN_CONSTEXPR const int digits10 = 2;
155
+ static EIGEN_CONSTEXPR const int max_digits10 = 4;
156
+ static EIGEN_CONSTEXPR const int radix = std::numeric_limits<float>::radix;
157
+ static EIGEN_CONSTEXPR const int min_exponent = std::numeric_limits<float>::min_exponent;
158
+ static EIGEN_CONSTEXPR const int min_exponent10 = std::numeric_limits<float>::min_exponent10;
159
+ static EIGEN_CONSTEXPR const int max_exponent = std::numeric_limits<float>::max_exponent;
160
+ static EIGEN_CONSTEXPR const int max_exponent10 = std::numeric_limits<float>::max_exponent10;
161
+ static EIGEN_CONSTEXPR const bool traps = std::numeric_limits<float>::traps;
162
+ // IEEE754: "The implementer shall choose how tininess is detected, but shall
163
+ // detect tininess in the same way for all operations in radix two"
164
+ static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
165
+
166
+ static EIGEN_CONSTEXPR Eigen::bfloat16(min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
167
+ static EIGEN_CONSTEXPR Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
168
+ static EIGEN_CONSTEXPR Eigen::bfloat16(max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
169
+ static EIGEN_CONSTEXPR Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
170
+ static EIGEN_CONSTEXPR Eigen::bfloat16 round_error() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3f00); }
171
+ static EIGEN_CONSTEXPR Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
172
+ static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
173
+ static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() {
174
+ return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fa0);
175
+ }
176
+ static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
177
+ };
178
+
179
+ template <typename T>
180
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_specialized;
181
+ template <typename T>
182
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_signed;
183
+ template <typename T>
184
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_integer;
185
+ template <typename T>
186
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_exact;
187
+ template <typename T>
188
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_infinity;
189
+ template <typename T>
190
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_quiet_NaN;
191
+ template <typename T>
192
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_signaling_NaN;
193
+ EIGEN_DIAGNOSTICS(push)
194
+ EIGEN_DISABLE_DEPRECATED_WARNING
195
+ template <typename T>
196
+ EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_bfloat16_impl<T>::has_denorm;
197
+ template <typename T>
198
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_denorm_loss;
199
+ EIGEN_DIAGNOSTICS(pop)
200
+ template <typename T>
201
+ EIGEN_CONSTEXPR const std::float_round_style numeric_limits_bfloat16_impl<T>::round_style;
202
+ template <typename T>
203
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_iec559;
204
+ template <typename T>
205
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_bounded;
206
+ template <typename T>
207
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_modulo;
208
+ template <typename T>
209
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits;
210
+ template <typename T>
211
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits10;
212
+ template <typename T>
213
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_digits10;
214
+ template <typename T>
215
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::radix;
216
+ template <typename T>
217
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent;
218
+ template <typename T>
219
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent10;
220
+ template <typename T>
221
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent;
222
+ template <typename T>
223
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent10;
224
+ template <typename T>
225
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::traps;
226
+ template <typename T>
227
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::tinyness_before;
228
+ } // end namespace bfloat16_impl
229
+ } // end namespace Eigen
230
+
231
+ namespace std {
232
+ // If std::numeric_limits<T> is specialized, should also specialize
233
+ // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
234
+ // std::numeric_limits<const volatile T>
235
+ // https://stackoverflow.com/a/16519653/
236
+ template <>
237
+ class numeric_limits<Eigen::bfloat16> : public Eigen::bfloat16_impl::numeric_limits_bfloat16_impl<> {};
238
+ template <>
239
+ class numeric_limits<const Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
240
+ template <>
241
+ class numeric_limits<volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
242
+ template <>
243
+ class numeric_limits<const volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
244
+ } // end namespace std
245
+
246
+ namespace Eigen {
247
+
248
+ namespace bfloat16_impl {
249
+
250
+ // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
251
+ // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
252
+ // of the functions, while the latter can only deal with one of them.
253
+ #if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
254
+
255
+ #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
256
+ // We need to provide emulated *host-side* BF16 operators for clang.
257
+ #pragma push_macro("EIGEN_DEVICE_FUNC")
258
+ #undef EIGEN_DEVICE_FUNC
259
+ #if (defined(EIGEN_HAS_GPU_BF16) && defined(EIGEN_HAS_NATIVE_BF16))
260
+ #define EIGEN_DEVICE_FUNC __host__
261
+ #else // both host and device need emulated ops.
262
+ #define EIGEN_DEVICE_FUNC __host__ __device__
263
+ #endif
264
+ #endif
265
+
266
+ // Definitions for CPUs, mostly working through conversion
267
+ // to/from fp32.
268
+
269
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const bfloat16& a, const bfloat16& b) {
270
+ return bfloat16(float(a) + float(b));
271
+ }
272
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const bfloat16& a, const int& b) {
273
+ return bfloat16(float(a) + static_cast<float>(b));
274
+ }
275
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const int& a, const bfloat16& b) {
276
+ return bfloat16(static_cast<float>(a) + float(b));
277
+ }
278
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator*(const bfloat16& a, const bfloat16& b) {
279
+ return bfloat16(float(a) * float(b));
280
+ }
281
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator-(const bfloat16& a, const bfloat16& b) {
282
+ return bfloat16(float(a) - float(b));
283
+ }
284
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator/(const bfloat16& a, const bfloat16& b) {
285
+ return bfloat16(float(a) / float(b));
286
+ }
287
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator-(const bfloat16& a) {
288
+ numext::uint16_t x = numext::bit_cast<uint16_t>(a) ^ 0x8000;
289
+ return numext::bit_cast<bfloat16>(x);
290
+ }
291
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator+=(bfloat16& a, const bfloat16& b) {
292
+ a = bfloat16(float(a) + float(b));
293
+ return a;
294
+ }
295
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator*=(bfloat16& a, const bfloat16& b) {
296
+ a = bfloat16(float(a) * float(b));
297
+ return a;
298
+ }
299
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator-=(bfloat16& a, const bfloat16& b) {
300
+ a = bfloat16(float(a) - float(b));
301
+ return a;
302
+ }
303
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator/=(bfloat16& a, const bfloat16& b) {
304
+ a = bfloat16(float(a) / float(b));
305
+ return a;
306
+ }
307
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a) {
308
+ a += bfloat16(1);
309
+ return a;
310
+ }
311
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a) {
312
+ a -= bfloat16(1);
313
+ return a;
314
+ }
315
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {
316
+ bfloat16 original_value = a;
317
+ ++a;
318
+ return original_value;
319
+ }
320
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
321
+ bfloat16 original_value = a;
322
+ --a;
323
+ return original_value;
324
+ }
325
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const bfloat16& a, const bfloat16& b) {
326
+ return numext::equal_strict(float(a), float(b));
327
+ }
328
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const bfloat16& a, const bfloat16& b) {
329
+ return numext::not_equal_strict(float(a), float(b));
330
+ }
331
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const bfloat16& a, const bfloat16& b) {
332
+ return float(a) < float(b);
333
+ }
334
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const bfloat16& a, const bfloat16& b) {
335
+ return float(a) <= float(b);
336
+ }
337
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const bfloat16& a, const bfloat16& b) {
338
+ return float(a) > float(b);
339
+ }
340
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const bfloat16& a, const bfloat16& b) {
341
+ return float(a) >= float(b);
342
+ }
343
+
344
+ #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
345
+ #pragma pop_macro("EIGEN_DEVICE_FUNC")
346
+ #endif
347
+ #endif // Emulate support for bfloat16 floats
348
+
349
+ // Division by an index. Do it in full float precision to avoid accuracy
350
+ // issues in converting the denominator to bfloat16.
351
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator/(const bfloat16& a, Index b) {
352
+ return bfloat16(static_cast<float>(a) / static_cast<float>(b));
353
+ }
354
+
355
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
356
+ #if defined(EIGEN_USE_HIP_BF16)
357
+ return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(v, __bfloat16_raw::truncate));
358
+ #else
359
+ __bfloat16_raw output;
360
+ if (numext::isnan EIGEN_NOT_A_MACRO(v)) {
361
+ output.value = std::signbit(v) ? 0xFFC0 : 0x7FC0;
362
+ return output;
363
+ }
364
+ output.value = static_cast<numext::uint16_t>(numext::bit_cast<numext::uint32_t>(v) >> 16);
365
+ return output;
366
+ #endif
367
+ }
368
+
369
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
370
+ #if defined(EIGEN_USE_HIP_BF16)
371
+ __bfloat16_raw bf;
372
+ bf.data = value;
373
+ return bf;
374
+ #else
375
+ return __bfloat16_raw(value);
376
+ #endif
377
+ }
378
+
379
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(
380
+ const __bfloat16_raw& bf) {
381
+ #if defined(EIGEN_USE_HIP_BF16)
382
+ return bf.data;
383
+ #else
384
+ return bf.value;
385
+ #endif
386
+ }
387
+
388
+ // float_to_bfloat16_rtne template specialization that does not make any
389
+ // assumption about the value of its function argument (ff).
390
+ template <>
391
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
392
+ #if defined(EIGEN_USE_HIP_BF16)
393
+ return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
394
+ #else
395
+ __bfloat16_raw output;
396
+
397
+ if (numext::isnan EIGEN_NOT_A_MACRO(ff)) {
398
+ // If the value is a NaN, squash it to a qNaN with msb of fraction set,
399
+ // this makes sure after truncation we don't end up with an inf.
400
+ //
401
+ // qNaN magic: All exponent bits set + most significant bit of fraction
402
+ // set.
403
+ output.value = std::signbit(ff) ? 0xFFC0 : 0x7FC0;
404
+ } else {
405
+ // Fast rounding algorithm that rounds a half value to nearest even. This
406
+ // reduces expected error when we convert a large number of floats. Here
407
+ // is how it works:
408
+ //
409
+ // Definitions:
410
+ // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
411
+ // with the following tags:
412
+ //
413
+ // Sign | Exp (8 bits) | Frac (23 bits)
414
+ // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
415
+ //
416
+ // S: Sign bit.
417
+ // E: Exponent bits.
418
+ // F: First 6 bits of fraction.
419
+ // L: Least significant bit of resulting bfloat16 if we truncate away the
420
+ // rest of the float32. This is also the 7th bit of fraction
421
+ // R: Rounding bit, 8th bit of fraction.
422
+ // T: Sticky bits, rest of fraction, 15 bits.
423
+ //
424
+ // To round half to nearest even, there are 3 cases where we want to round
425
+ // down (simply truncate the result of the bits away, which consists of
426
+ // rounding bit and sticky bits) and two cases where we want to round up
427
+ // (truncate then add one to the result).
428
+ //
429
+ // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
430
+ // 1s) as the rounding bias, adds the rounding bias to the input, then
431
+ // truncates the last 16 bits away.
432
+ //
433
+ // To understand how it works, we can analyze this algorithm case by case:
434
+ //
435
+ // 1. L = 0, R = 0:
436
+ // Expect: round down, this is less than half value.
437
+ //
438
+ // Algorithm:
439
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
440
+ // - Adding rounding bias to input may create any carry, depending on
441
+ // whether there is any value set to 1 in T bits.
442
+ // - R may be set to 1 if there is a carry.
443
+ // - L remains 0.
444
+ // - Note that this case also handles Inf and -Inf, where all fraction
445
+ // bits, including L, R and Ts are all 0. The output remains Inf after
446
+ // this algorithm.
447
+ //
448
+ // 2. L = 1, R = 0:
449
+ // Expect: round down, this is less than half value.
450
+ //
451
+ // Algorithm:
452
+ // - Rounding bias: 0x7fff + 1 = 0x8000
453
+ // - Adding rounding bias to input doesn't change sticky bits but
454
+ // adds 1 to rounding bit.
455
+ // - L remains 1.
456
+ //
457
+ // 3. L = 0, R = 1, all of T are 0:
458
+ // Expect: round down, this is exactly at half, the result is already
459
+ // even (L=0).
460
+ //
461
+ // Algorithm:
462
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
463
+ // - Adding rounding bias to input sets all sticky bits to 1, but
464
+ // doesn't create a carry.
465
+ // - R remains 1.
466
+ // - L remains 0.
467
+ //
468
+ // 4. L = 1, R = 1:
469
+ // Expect: round up, this is exactly at half, the result needs to be
470
+ // round to the next even number.
471
+ //
472
+ // Algorithm:
473
+ // - Rounding bias: 0x7fff + 1 = 0x8000
474
+ // - Adding rounding bias to input doesn't change sticky bits, but
475
+ // creates a carry from rounding bit.
476
+ // - The carry sets L to 0, creates another carry bit and propagate
477
+ // forward to F bits.
478
+ // - If all the F bits are 1, a carry then propagates to the exponent
479
+ // bits, which then creates the minimum value with the next exponent
480
+ // value. Note that we won't have the case where exponents are all 1,
481
+ // since that's either a NaN (handled in the other if condition) or inf
482
+ // (handled in case 1).
483
+ //
484
+ // 5. L = 0, R = 1, any of T is 1:
485
+ // Expect: round up, this is greater than half.
486
+ //
487
+ // Algorithm:
488
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
489
+ // - Adding rounding bias to input creates a carry from sticky bits,
490
+ // sets rounding bit to 0, then create another carry.
491
+ // - The second carry sets L to 1.
492
+ //
493
+ // Examples:
494
+ //
495
+ // Exact half value that is already even:
496
+ // Input:
497
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
498
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
499
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
500
+ //
501
+ // This falls into case 3. We truncate the rest of 16 bits and no
502
+ // carry is created into F and L:
503
+ //
504
+ // Output:
505
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
506
+ // S E E E E E E E E F F F F F F L
507
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
508
+ //
509
+ // Exact half value, round to next even number:
510
+ // Input:
511
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
512
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
513
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
514
+ //
515
+ // This falls into case 4. We create a carry from R and T,
516
+ // which then propagates into L and F:
517
+ //
518
+ // Output:
519
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
520
+ // S E E E E E E E E F F F F F F L
521
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
522
+ //
523
+ //
524
+ // Max denormal value round to min normal value:
525
+ // Input:
526
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
527
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
528
+ // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
529
+ //
530
+ // This falls into case 4. We create a carry from R and T,
531
+ // propagate into L and F, which then propagates into exponent
532
+ // bits:
533
+ //
534
+ // Output:
535
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
536
+ // S E E E E E E E E F F F F F F L
537
+ // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
538
+ //
539
+ // Max normal value round to Inf:
540
+ // Input:
541
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
542
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
543
+ // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
544
+ //
545
+ // This falls into case 4. We create a carry from R and T,
546
+ // propagate into L and F, which then propagates into exponent
547
+ // bits:
548
+ //
549
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
550
+ // S E E E E E E E E F F F F F F L
551
+ // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
552
+
553
+ // At this point, ff must be either a normal float, or +/-infinity.
554
+ output = float_to_bfloat16_rtne<true>(ff);
555
+ }
556
+ return output;
557
+ #endif
558
+ }
559
+
560
+ // float_to_bfloat16_rtne template specialization that assumes that its function
561
+ // argument (ff) is either a normal floating point number, or +/-infinity, or
562
+ // zero. Used to improve the runtime performance of conversion from an integer
563
+ // type to bfloat16.
564
+ template <>
565
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
566
+ #if defined(EIGEN_USE_HIP_BF16)
567
+ return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
568
+ #else
569
+ numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
570
+ __bfloat16_raw output;
571
+
572
+ // Least significant bit of resulting bfloat.
573
+ numext::uint32_t lsb = (input >> 16) & 1;
574
+ numext::uint32_t rounding_bias = 0x7fff + lsb;
575
+ input += rounding_bias;
576
+ output.value = static_cast<numext::uint16_t>(input >> 16);
577
+ return output;
578
+ #endif
579
+ }
580
+
581
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
582
+ #if defined(EIGEN_USE_HIP_BF16)
583
+ return static_cast<float>(h);
584
+ #else
585
+ return numext::bit_cast<float>(static_cast<numext::uint32_t>(h.value) << 16);
586
+ #endif
587
+ }
588
+
589
+ // --- standard functions ---
590
+
591
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isinf)(const bfloat16& a) {
592
+ EIGEN_USING_STD(isinf);
593
+ #if defined(EIGEN_USE_HIP_BF16)
594
+ return (isinf)(a); // Uses HIP hip_bfloat16 isinf operator
595
+ #else
596
+ return (isinf)(float(a));
597
+ #endif
598
+ }
599
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isnan)(const bfloat16& a) {
600
+ EIGEN_USING_STD(isnan);
601
+ #if defined(EIGEN_USE_HIP_BF16)
602
+ return (isnan)(a); // Uses HIP hip_bfloat16 isnan operator
603
+ #else
604
+ return (isnan)(float(a));
605
+ #endif
606
+ }
607
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isfinite)(const bfloat16& a) {
608
+ return !(isinf EIGEN_NOT_A_MACRO(a)) && !(isnan EIGEN_NOT_A_MACRO(a));
609
+ }
610
+
611
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
612
+ numext::uint16_t x = numext::bit_cast<numext::uint16_t>(a) & 0x7FFF;
613
+ return numext::bit_cast<bfloat16>(x);
614
+ }
615
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) { return bfloat16(::expf(float(a))); }
616
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16& a) { return bfloat16(::exp2f(float(a))); }
617
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) { return bfloat16(numext::expm1(float(a))); }
618
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) { return bfloat16(::logf(float(a))); }
619
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) { return bfloat16(numext::log1p(float(a))); }
620
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) { return bfloat16(::log10f(float(a))); }
621
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
622
+ return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
623
+ }
624
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) { return bfloat16(::sqrtf(float(a))); }
625
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
626
+ return bfloat16(::powf(float(a), float(b)));
627
+ }
628
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan2(const bfloat16& a, const bfloat16& b) {
629
+ return bfloat16(::atan2f(float(a), float(b)));
630
+ }
631
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) { return bfloat16(::sinf(float(a))); }
632
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) { return bfloat16(::cosf(float(a))); }
633
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) { return bfloat16(::tanf(float(a))); }
634
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) { return bfloat16(::asinf(float(a))); }
635
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) { return bfloat16(::acosf(float(a))); }
636
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) { return bfloat16(::atanf(float(a))); }
637
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) { return bfloat16(::sinhf(float(a))); }
638
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) { return bfloat16(::coshf(float(a))); }
639
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) { return bfloat16(::tanhf(float(a))); }
640
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) { return bfloat16(::asinhf(float(a))); }
641
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) { return bfloat16(::acoshf(float(a))); }
642
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) { return bfloat16(::atanhf(float(a))); }
643
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return bfloat16(::floorf(float(a))); }
644
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); }
645
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { return bfloat16(::rintf(float(a))); }
646
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) { return bfloat16(::roundf(float(a))); }
647
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 trunc(const bfloat16& a) { return bfloat16(::truncf(float(a))); }
648
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
649
+ return bfloat16(::fmodf(float(a), float(b)));
650
+ }
651
+
652
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16(min)(const bfloat16& a, const bfloat16& b) {
653
+ const float f1 = static_cast<float>(a);
654
+ const float f2 = static_cast<float>(b);
655
+ return f2 < f1 ? b : a;
656
+ }
657
+
658
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16(max)(const bfloat16& a, const bfloat16& b) {
659
+ const float f1 = static_cast<float>(a);
660
+ const float f2 = static_cast<float>(b);
661
+ return f1 < f2 ? b : a;
662
+ }
663
+
664
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
665
+ const float f1 = static_cast<float>(a);
666
+ const float f2 = static_cast<float>(b);
667
+ return bfloat16(::fminf(f1, f2));
668
+ }
669
+
670
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
671
+ const float f1 = static_cast<float>(a);
672
+ const float f2 = static_cast<float>(b);
673
+ return bfloat16(::fmaxf(f1, f2));
674
+ }
675
+
676
+ EIGEN_DEVICE_FUNC inline bfloat16 fma(const bfloat16& a, const bfloat16& b, const bfloat16& c) {
677
+ // Emulate FMA via float.
678
+ return bfloat16(numext::fma(static_cast<float>(a), static_cast<float>(b), static_cast<float>(c)));
679
+ }
680
+
681
+ #ifndef EIGEN_NO_IO
682
+ EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const bfloat16& v) {
683
+ os << static_cast<float>(v);
684
+ return os;
685
+ }
686
+ #endif
687
+
688
+ } // namespace bfloat16_impl
689
+
690
+ namespace internal {
691
+
692
+ template <>
693
+ struct is_arithmetic<bfloat16> {
694
+ enum { value = true };
695
+ };
696
+
697
+ template <>
698
+ struct random_impl<bfloat16> {
699
+ enum : int { MantissaBits = 7 };
700
+ using Impl = random_impl<float>;
701
+ static EIGEN_DEVICE_FUNC inline bfloat16 run(const bfloat16& x, const bfloat16& y) {
702
+ float result = Impl::run(x, y, MantissaBits);
703
+ return bfloat16(result);
704
+ }
705
+ static EIGEN_DEVICE_FUNC inline bfloat16 run() {
706
+ float result = Impl::run(MantissaBits);
707
+ return bfloat16(result);
708
+ }
709
+ };
710
+
711
+ } // namespace internal
712
+
713
+ template <>
714
+ struct NumTraits<Eigen::bfloat16> : GenericNumTraits<Eigen::bfloat16> {
715
+ enum { IsSigned = true, IsInteger = false, IsComplex = false, RequireInitialization = false };
716
+
717
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
718
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
719
+ }
720
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
721
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
722
+ }
723
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
724
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
725
+ }
726
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
727
+ return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);
728
+ }
729
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
730
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);
731
+ }
732
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
733
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);
734
+ }
735
+ };
736
+
737
+ } // namespace Eigen
738
+
739
+ #if defined(EIGEN_HAS_HIP_BF16)
740
+ #pragma pop_macro("EIGEN_CONSTEXPR")
741
+ #endif
742
+
743
+ namespace Eigen {
744
+ namespace numext {
745
+
746
+ template <>
747
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::bfloat16& h) {
748
+ return (bfloat16_impl::isnan)(h);
749
+ }
750
+
751
+ template <>
752
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isinf)(const Eigen::bfloat16& h) {
753
+ return (bfloat16_impl::isinf)(h);
754
+ }
755
+
756
+ template <>
757
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::bfloat16& h) {
758
+ return (bfloat16_impl::isfinite)(h);
759
+ }
760
+
761
+ template <>
762
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
763
+ return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src);
764
+ }
765
+
766
+ template <>
767
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
768
+ return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
769
+ }
770
+
771
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 nextafter(const bfloat16& from, const bfloat16& to) {
772
+ if (numext::isnan EIGEN_NOT_A_MACRO(from)) {
773
+ return from;
774
+ }
775
+ if (numext::isnan EIGEN_NOT_A_MACRO(to)) {
776
+ return to;
777
+ }
778
+ if (from == to) {
779
+ return to;
780
+ }
781
+ uint16_t from_bits = numext::bit_cast<uint16_t>(from);
782
+ bool from_sign = from_bits >> 15;
783
+ // Whether we are adjusting toward the infinity with the same sign as from.
784
+ bool toward_inf = (to > from) == !from_sign;
785
+ if (toward_inf) {
786
+ ++from_bits;
787
+ } else if ((from_bits & 0x7fff) == 0) {
788
+ // Adjusting away from inf, but from is zero, so just toggle the sign.
789
+ from_bits ^= 0x8000;
790
+ } else {
791
+ --from_bits;
792
+ }
793
+ return numext::bit_cast<bfloat16>(from_bits);
794
+ }
795
+
796
+ // Specialize multiply-add to match packet operations and reduce conversions to/from float.
797
+ template<>
798
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 madd<Eigen::bfloat16>(const Eigen::bfloat16& x, const Eigen::bfloat16& y, const Eigen::bfloat16& z) {
799
+ return Eigen::bfloat16(static_cast<float>(x) * static_cast<float>(y) + static_cast<float>(z));
800
+ }
801
+
802
+ } // namespace numext
803
+ } // namespace Eigen
804
+
805
+ #if EIGEN_HAS_STD_HASH
806
+ namespace std {
807
+ template <>
808
+ struct hash<Eigen::bfloat16> {
809
+ EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
810
+ return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
811
+ }
812
+ };
813
+ } // namespace std
814
+ #endif
815
+
816
+ // Add the missing shfl* intrinsics.
817
+ // The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.
818
+ // CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))
819
+ //
820
+ // HIP and CUDA prior to SDK 9.0 define
821
+ // __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float
822
+ // CUDA since 9.0 deprecates those and instead defines
823
+ // __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,
824
+ // with native support for __half and __nv_bfloat16
825
+ //
826
+ // Note that the following are __device__ - only functions.
827
+ #if defined(EIGEN_HIPCC)
828
+
829
+ #if defined(EIGEN_HAS_HIP_BF16)
830
+
831
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl(Eigen::bfloat16 var, int srcLane, int width = warpSize) {
832
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
833
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
834
+ }
835
+
836
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_up(Eigen::bfloat16 var, unsigned int delta,
837
+ int width = warpSize) {
838
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
839
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));
840
+ }
841
+
842
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_down(Eigen::bfloat16 var, unsigned int delta,
843
+ int width = warpSize) {
844
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
845
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(
846
+ static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));
847
+ }
848
+
849
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_xor(Eigen::bfloat16 var, int laneMask, int width = warpSize) {
850
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
851
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(
852
+ static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));
853
+ }
854
+
855
+ #endif // HIP
856
+
857
+ #endif // __shfl*
858
+
859
+ #if defined(EIGEN_HIPCC)
860
+ EIGEN_STRONG_INLINE __device__ Eigen::bfloat16 __ldg(const Eigen::bfloat16* ptr) {
861
+ return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(
862
+ __ldg(Eigen::numext::bit_cast<const Eigen::numext::uint16_t*>(ptr)));
863
+ }
864
+ #endif // __ldg
865
+
866
+ #endif // EIGEN_BFLOAT16_H