@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -16,26 +16,69 @@ limitations under the License.
16
16
  #ifndef EIGEN_BFLOAT16_H
17
17
  #define EIGEN_BFLOAT16_H
18
18
 
19
- #define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
20
- template <> \
21
- EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED \
22
- PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) { \
23
- return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
19
+ // IWYU pragma: private
20
+ #include "../../InternalHeaderCheck.h"
21
+
22
+ #if defined(EIGEN_HAS_HIP_BF16)
23
+ // When compiling with GPU support, the "hip_bfloat16" base class as well as
24
+ // some other routines are defined in the GPU compiler header files
25
+ // (hip_bfloat16.h), and they are not tagged constexpr
26
+ // As a consequence, we get compile failures when compiling Eigen with
27
+ // GPU support. Hence the need to disable EIGEN_CONSTEXPR when building
28
+ // Eigen with GPU support
29
+ #pragma push_macro("EIGEN_CONSTEXPR")
30
+ #undef EIGEN_CONSTEXPR
31
+ #define EIGEN_CONSTEXPR
32
+ #endif
33
+
34
+ #define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
35
+ template <> \
36
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED PACKET_BF16 METHOD<PACKET_BF16>( \
37
+ const PACKET_BF16& _x) { \
38
+ return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
24
39
  }
25
40
 
41
+ // Only use HIP GPU bf16 in kernels
42
+ #if defined(EIGEN_HAS_HIP_BF16) && defined(EIGEN_GPU_COMPILE_PHASE)
43
+ #define EIGEN_USE_HIP_BF16
44
+ #endif
45
+
26
46
  namespace Eigen {
27
47
 
28
48
  struct bfloat16;
29
49
 
50
+ namespace numext {
51
+ template <>
52
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src);
53
+
54
+ template <>
55
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src);
56
+ } // namespace numext
30
57
  namespace bfloat16_impl {
31
58
 
59
+ #if defined(EIGEN_USE_HIP_BF16)
60
+
61
+ struct __bfloat16_raw : public hip_bfloat16 {
62
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
63
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(hip_bfloat16 hb) : hip_bfloat16(hb) {}
64
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : hip_bfloat16(raw) {}
65
+ };
66
+
67
+ #else
68
+
32
69
  // Make our own __bfloat16_raw definition.
33
70
  struct __bfloat16_raw {
71
+ #if defined(EIGEN_HAS_HIP_BF16) && !defined(EIGEN_GPU_COMPILE_PHASE)
72
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
73
+ #else
34
74
  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
75
+ #endif
35
76
  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
36
77
  unsigned short value;
37
78
  };
38
79
 
80
+ #endif // defined(EIGEN_USE_HIP_BF16)
81
+
39
82
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
40
83
  template <bool AssumeArgumentIsNormalOrInfinityOrZero>
41
84
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
@@ -52,11 +95,10 @@ struct bfloat16_base : public __bfloat16_raw {
52
95
  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
53
96
  };
54
97
 
55
- } // namespace bfloat16_impl
98
+ } // namespace bfloat16_impl
56
99
 
57
100
  // Class definition.
58
101
  struct bfloat16 : public bfloat16_impl::bfloat16_base {
59
-
60
102
  typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
61
103
 
62
104
  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
@@ -66,16 +108,17 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
66
108
  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
67
109
  : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
68
110
 
69
- template<class T>
111
+ template <class T>
70
112
  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
71
- : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
113
+ : bfloat16_impl::bfloat16_base(
114
+ bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
72
115
 
73
116
  explicit EIGEN_DEVICE_FUNC bfloat16(float f)
74
117
  : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
75
118
 
76
119
  // Following the convention of numpy, converting between complex and
77
120
  // float will lead to loss of imag value.
78
- template<typename RealScalar>
121
+ template <typename RealScalar>
79
122
  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
80
123
  : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
81
124
 
@@ -83,57 +126,122 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
83
126
  return bfloat16_impl::bfloat16_to_float(*this);
84
127
  }
85
128
  };
86
- } // namespace Eigen
87
129
 
88
- namespace std {
89
- template<>
90
- struct numeric_limits<Eigen::bfloat16> {
91
- static const bool is_specialized = true;
92
- static const bool is_signed = true;
93
- static const bool is_integer = false;
94
- static const bool is_exact = false;
95
- static const bool has_infinity = true;
96
- static const bool has_quiet_NaN = true;
97
- static const bool has_signaling_NaN = true;
98
- static const float_denorm_style has_denorm = std::denorm_absent;
99
- static const bool has_denorm_loss = false;
100
- static const std::float_round_style round_style = numeric_limits<float>::round_style;
101
- static const bool is_iec559 = false;
102
- static const bool is_bounded = true;
103
- static const bool is_modulo = false;
104
- static const int digits = 8;
105
- static const int digits10 = 2;
106
- static const int max_digits10 = 4;
107
- static const int radix = 2;
108
- static const int min_exponent = numeric_limits<float>::min_exponent;
109
- static const int min_exponent10 = numeric_limits<float>::min_exponent10;
110
- static const int max_exponent = numeric_limits<float>::max_exponent;
111
- static const int max_exponent10 = numeric_limits<float>::max_exponent10;
112
- static const bool traps = numeric_limits<float>::traps;
113
- static const bool tinyness_before = numeric_limits<float>::tinyness_before;
114
-
115
- static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
116
- static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
117
- static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
118
- static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
119
- static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
120
- static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
121
- static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
122
- static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
123
- static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
130
+ // TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
131
+ // solve the ODR issue.
132
+ namespace bfloat16_impl {
133
+ template <typename = void>
134
+ struct numeric_limits_bfloat16_impl {
135
+ static EIGEN_CONSTEXPR const bool is_specialized = true;
136
+ static EIGEN_CONSTEXPR const bool is_signed = true;
137
+ static EIGEN_CONSTEXPR const bool is_integer = false;
138
+ static EIGEN_CONSTEXPR const bool is_exact = false;
139
+ static EIGEN_CONSTEXPR const bool has_infinity = true;
140
+ static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
141
+ static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
142
+ EIGEN_DIAGNOSTICS(push)
143
+ EIGEN_DISABLE_DEPRECATED_WARNING
144
+ static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
145
+ static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
146
+ EIGEN_DIAGNOSTICS(pop)
147
+ static EIGEN_CONSTEXPR const std::float_round_style round_style = std::numeric_limits<float>::round_style;
148
+ static EIGEN_CONSTEXPR const bool is_iec559 = true;
149
+ // The C++ standard defines this as "true if the set of values representable
150
+ // by the type is finite." BFloat16 has finite precision.
151
+ static EIGEN_CONSTEXPR const bool is_bounded = true;
152
+ static EIGEN_CONSTEXPR const bool is_modulo = false;
153
+ static EIGEN_CONSTEXPR const int digits = 8;
154
+ static EIGEN_CONSTEXPR const int digits10 = 2;
155
+ static EIGEN_CONSTEXPR const int max_digits10 = 4;
156
+ static EIGEN_CONSTEXPR const int radix = std::numeric_limits<float>::radix;
157
+ static EIGEN_CONSTEXPR const int min_exponent = std::numeric_limits<float>::min_exponent;
158
+ static EIGEN_CONSTEXPR const int min_exponent10 = std::numeric_limits<float>::min_exponent10;
159
+ static EIGEN_CONSTEXPR const int max_exponent = std::numeric_limits<float>::max_exponent;
160
+ static EIGEN_CONSTEXPR const int max_exponent10 = std::numeric_limits<float>::max_exponent10;
161
+ static EIGEN_CONSTEXPR const bool traps = std::numeric_limits<float>::traps;
162
+ // IEEE754: "The implementer shall choose how tininess is detected, but shall
163
+ // detect tininess in the same way for all operations in radix two"
164
+ static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
165
+
166
+ static EIGEN_CONSTEXPR Eigen::bfloat16(min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
167
+ static EIGEN_CONSTEXPR Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
168
+ static EIGEN_CONSTEXPR Eigen::bfloat16(max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
169
+ static EIGEN_CONSTEXPR Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
170
+ static EIGEN_CONSTEXPR Eigen::bfloat16 round_error() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3f00); }
171
+ static EIGEN_CONSTEXPR Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
172
+ static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
173
+ static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() {
174
+ return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fa0);
175
+ }
176
+ static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
124
177
  };
125
178
 
179
+ template <typename T>
180
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_specialized;
181
+ template <typename T>
182
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_signed;
183
+ template <typename T>
184
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_integer;
185
+ template <typename T>
186
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_exact;
187
+ template <typename T>
188
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_infinity;
189
+ template <typename T>
190
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_quiet_NaN;
191
+ template <typename T>
192
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_signaling_NaN;
193
+ EIGEN_DIAGNOSTICS(push)
194
+ EIGEN_DISABLE_DEPRECATED_WARNING
195
+ template <typename T>
196
+ EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_bfloat16_impl<T>::has_denorm;
197
+ template <typename T>
198
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_denorm_loss;
199
+ EIGEN_DIAGNOSTICS(pop)
200
+ template <typename T>
201
+ EIGEN_CONSTEXPR const std::float_round_style numeric_limits_bfloat16_impl<T>::round_style;
202
+ template <typename T>
203
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_iec559;
204
+ template <typename T>
205
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_bounded;
206
+ template <typename T>
207
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_modulo;
208
+ template <typename T>
209
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits;
210
+ template <typename T>
211
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits10;
212
+ template <typename T>
213
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_digits10;
214
+ template <typename T>
215
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::radix;
216
+ template <typename T>
217
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent;
218
+ template <typename T>
219
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent10;
220
+ template <typename T>
221
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent;
222
+ template <typename T>
223
+ EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent10;
224
+ template <typename T>
225
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::traps;
226
+ template <typename T>
227
+ EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::tinyness_before;
228
+ } // end namespace bfloat16_impl
229
+ } // end namespace Eigen
230
+
231
+ namespace std {
126
232
  // If std::numeric_limits<T> is specialized, should also specialize
127
233
  // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
128
234
  // std::numeric_limits<const volatile T>
129
235
  // https://stackoverflow.com/a/16519653/
130
- template<>
131
- struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
132
- template<>
133
- struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
134
- template<>
135
- struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
136
- } // namespace std
236
+ template <>
237
+ class numeric_limits<Eigen::bfloat16> : public Eigen::bfloat16_impl::numeric_limits_bfloat16_impl<> {};
238
+ template <>
239
+ class numeric_limits<const Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
240
+ template <>
241
+ class numeric_limits<volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
242
+ template <>
243
+ class numeric_limits<const volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
244
+ } // end namespace std
137
245
 
138
246
  namespace Eigen {
139
247
 
@@ -142,15 +250,15 @@ namespace bfloat16_impl {
142
250
  // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
143
251
  // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
144
252
  // of the functions, while the latter can only deal with one of them.
145
- #if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
253
+ #if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
146
254
 
147
255
  #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
148
256
  // We need to provide emulated *host-side* BF16 operators for clang.
149
257
  #pragma push_macro("EIGEN_DEVICE_FUNC")
150
258
  #undef EIGEN_DEVICE_FUNC
151
- #if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
259
+ #if (defined(EIGEN_HAS_GPU_BF16) && defined(EIGEN_HAS_NATIVE_BF16))
152
260
  #define EIGEN_DEVICE_FUNC __host__
153
- #else // both host and device need emulated ops.
261
+ #else // both host and device need emulated ops.
154
262
  #define EIGEN_DEVICE_FUNC __host__ __device__
155
263
  #endif
156
264
  #endif
@@ -158,42 +266,41 @@ namespace bfloat16_impl {
158
266
  // Definitions for CPUs, mostly working through conversion
159
267
  // to/from fp32.
160
268
 
161
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
269
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const bfloat16& a, const bfloat16& b) {
162
270
  return bfloat16(float(a) + float(b));
163
271
  }
164
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
272
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const bfloat16& a, const int& b) {
165
273
  return bfloat16(float(a) + static_cast<float>(b));
166
274
  }
167
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
275
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const int& a, const bfloat16& b) {
168
276
  return bfloat16(static_cast<float>(a) + float(b));
169
277
  }
170
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
278
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator*(const bfloat16& a, const bfloat16& b) {
171
279
  return bfloat16(float(a) * float(b));
172
280
  }
173
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
281
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator-(const bfloat16& a, const bfloat16& b) {
174
282
  return bfloat16(float(a) - float(b));
175
283
  }
176
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
284
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator/(const bfloat16& a, const bfloat16& b) {
177
285
  return bfloat16(float(a) / float(b));
178
286
  }
179
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
180
- bfloat16 result;
181
- result.value = a.value ^ 0x8000;
182
- return result;
287
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator-(const bfloat16& a) {
288
+ numext::uint16_t x = numext::bit_cast<uint16_t>(a) ^ 0x8000;
289
+ return numext::bit_cast<bfloat16>(x);
183
290
  }
184
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
291
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator+=(bfloat16& a, const bfloat16& b) {
185
292
  a = bfloat16(float(a) + float(b));
186
293
  return a;
187
294
  }
188
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
295
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator*=(bfloat16& a, const bfloat16& b) {
189
296
  a = bfloat16(float(a) * float(b));
190
297
  return a;
191
298
  }
192
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
299
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator-=(bfloat16& a, const bfloat16& b) {
193
300
  a = bfloat16(float(a) - float(b));
194
301
  return a;
195
302
  }
196
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
303
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator/=(bfloat16& a, const bfloat16& b) {
197
304
  a = bfloat16(float(a) / float(b));
198
305
  return a;
199
306
  }
@@ -215,22 +322,22 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
215
322
  --a;
216
323
  return original_value;
217
324
  }
218
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
219
- return numext::equal_strict(float(a),float(b));
325
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const bfloat16& a, const bfloat16& b) {
326
+ return numext::equal_strict(float(a), float(b));
220
327
  }
221
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
328
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const bfloat16& a, const bfloat16& b) {
222
329
  return numext::not_equal_strict(float(a), float(b));
223
330
  }
224
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
331
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const bfloat16& a, const bfloat16& b) {
225
332
  return float(a) < float(b);
226
333
  }
227
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
334
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const bfloat16& a, const bfloat16& b) {
228
335
  return float(a) <= float(b);
229
336
  }
230
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
337
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const bfloat16& a, const bfloat16& b) {
231
338
  return float(a) > float(b);
232
339
  }
233
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
340
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const bfloat16& a, const bfloat16& b) {
234
341
  return float(a) >= float(b);
235
342
  }
236
343
 
@@ -241,49 +348,59 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const
241
348
 
242
349
  // Division by an index. Do it in full float precision to avoid accuracy
243
350
  // issues in converting the denominator to bfloat16.
244
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
351
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator/(const bfloat16& a, Index b) {
245
352
  return bfloat16(static_cast<float>(a) / static_cast<float>(b));
246
353
  }
247
354
 
248
355
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
356
+ #if defined(EIGEN_USE_HIP_BF16)
357
+ return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(v, __bfloat16_raw::truncate));
358
+ #else
249
359
  __bfloat16_raw output;
250
- if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
251
- output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
360
+ if (numext::isnan EIGEN_NOT_A_MACRO(v)) {
361
+ output.value = std::signbit(v) ? 0xFFC0 : 0x7FC0;
252
362
  return output;
253
363
  }
254
- const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
255
- #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
256
- output.value = p[0];
257
- #else
258
- output.value = p[1];
259
- #endif
364
+ output.value = static_cast<numext::uint16_t>(numext::bit_cast<numext::uint32_t>(v) >> 16);
260
365
  return output;
366
+ #endif
261
367
  }
262
368
 
263
369
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
370
+ #if defined(EIGEN_USE_HIP_BF16)
371
+ __bfloat16_raw bf;
372
+ bf.data = value;
373
+ return bf;
374
+ #else
264
375
  return __bfloat16_raw(value);
376
+ #endif
265
377
  }
266
378
 
267
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
379
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(
380
+ const __bfloat16_raw& bf) {
381
+ #if defined(EIGEN_USE_HIP_BF16)
382
+ return bf.data;
383
+ #else
268
384
  return bf.value;
385
+ #endif
269
386
  }
270
387
 
271
388
  // float_to_bfloat16_rtne template specialization that does not make any
272
389
  // assumption about the value of its function argument (ff).
273
390
  template <>
274
391
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
275
- #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
276
- // Nothing to do here
392
+ #if defined(EIGEN_USE_HIP_BF16)
393
+ return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
277
394
  #else
278
395
  __bfloat16_raw output;
279
396
 
280
- if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
397
+ if (numext::isnan EIGEN_NOT_A_MACRO(ff)) {
281
398
  // If the value is a NaN, squash it to a qNaN with msb of fraction set,
282
399
  // this makes sure after truncation we don't end up with an inf.
283
400
  //
284
401
  // qNaN magic: All exponent bits set + most significant bit of fraction
285
402
  // set.
286
- output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
403
+ output.value = std::signbit(ff) ? 0xFFC0 : 0x7FC0;
287
404
  } else {
288
405
  // Fast rounding algorithm that rounds a half value to nearest even. This
289
406
  // reduces expected error when we convert a large number of floats. Here
@@ -446,134 +563,99 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<fals
446
563
  // type to bfloat16.
447
564
  template <>
448
565
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
449
- #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
450
- // Nothing to do here
566
+ #if defined(EIGEN_USE_HIP_BF16)
567
+ return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
451
568
  #else
452
- numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
453
- __bfloat16_raw output;
454
-
455
- // Least significant bit of resulting bfloat.
456
- numext::uint32_t lsb = (input >> 16) & 1;
457
- numext::uint32_t rounding_bias = 0x7fff + lsb;
458
- input += rounding_bias;
459
- output.value = static_cast<numext::uint16_t>(input >> 16);
460
- return output;
569
+ numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
570
+ __bfloat16_raw output;
571
+
572
+ // Least significant bit of resulting bfloat.
573
+ numext::uint32_t lsb = (input >> 16) & 1;
574
+ numext::uint32_t rounding_bias = 0x7fff + lsb;
575
+ input += rounding_bias;
576
+ output.value = static_cast<numext::uint16_t>(input >> 16);
577
+ return output;
461
578
  #endif
462
579
  }
463
580
 
464
581
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
465
- float result = 0;
466
- unsigned short* q = reinterpret_cast<unsigned short*>(&result);
467
- #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
468
- q[0] = h.value;
582
+ #if defined(EIGEN_USE_HIP_BF16)
583
+ return static_cast<float>(h);
469
584
  #else
470
- q[1] = h.value;
585
+ return numext::bit_cast<float>(static_cast<numext::uint32_t>(h.value) << 16);
471
586
  #endif
472
- return result;
473
587
  }
588
+
474
589
  // --- standard functions ---
475
590
 
476
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
591
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isinf)(const bfloat16& a) {
477
592
  EIGEN_USING_STD(isinf);
593
+ #if defined(EIGEN_USE_HIP_BF16)
594
+ return (isinf)(a); // Uses HIP hip_bfloat16 isinf operator
595
+ #else
478
596
  return (isinf)(float(a));
597
+ #endif
479
598
  }
480
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
599
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isnan)(const bfloat16& a) {
481
600
  EIGEN_USING_STD(isnan);
601
+ #if defined(EIGEN_USE_HIP_BF16)
602
+ return (isnan)(a); // Uses HIP hip_bfloat16 isnan operator
603
+ #else
482
604
  return (isnan)(float(a));
605
+ #endif
483
606
  }
484
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
485
- return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
607
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isfinite)(const bfloat16& a) {
608
+ return !(isinf EIGEN_NOT_A_MACRO(a)) && !(isnan EIGEN_NOT_A_MACRO(a));
486
609
  }
487
610
 
488
611
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
489
- bfloat16 result;
490
- result.value = a.value & 0x7FFF;
491
- return result;
492
- }
493
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
494
- return bfloat16(::expf(float(a)));
495
- }
496
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
497
- return bfloat16(numext::expm1(float(a)));
498
- }
499
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
500
- return bfloat16(::logf(float(a)));
501
- }
502
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
503
- return bfloat16(numext::log1p(float(a)));
504
- }
505
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
506
- return bfloat16(::log10f(float(a)));
507
- }
612
+ numext::uint16_t x = numext::bit_cast<numext::uint16_t>(a) & 0x7FFF;
613
+ return numext::bit_cast<bfloat16>(x);
614
+ }
615
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) { return bfloat16(::expf(float(a))); }
616
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16& a) { return bfloat16(::exp2f(float(a))); }
617
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) { return bfloat16(numext::expm1(float(a))); }
618
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) { return bfloat16(::logf(float(a))); }
619
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) { return bfloat16(numext::log1p(float(a))); }
620
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) { return bfloat16(::log10f(float(a))); }
508
621
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
509
622
  return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
510
623
  }
511
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
512
- return bfloat16(::sqrtf(float(a)));
513
- }
624
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) { return bfloat16(::sqrtf(float(a))); }
514
625
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
515
626
  return bfloat16(::powf(float(a), float(b)));
516
627
  }
517
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
518
- return bfloat16(::sinf(float(a)));
519
- }
520
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
521
- return bfloat16(::cosf(float(a)));
522
- }
523
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
524
- return bfloat16(::tanf(float(a)));
525
- }
526
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
527
- return bfloat16(::asinf(float(a)));
528
- }
529
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
530
- return bfloat16(::acosf(float(a)));
531
- }
532
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
533
- return bfloat16(::atanf(float(a)));
534
- }
535
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
536
- return bfloat16(::sinhf(float(a)));
537
- }
538
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
539
- return bfloat16(::coshf(float(a)));
540
- }
541
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
542
- return bfloat16(::tanhf(float(a)));
543
- }
544
- #if EIGEN_HAS_CXX11_MATH
545
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
546
- return bfloat16(::asinhf(float(a)));
547
- }
548
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
549
- return bfloat16(::acoshf(float(a)));
550
- }
551
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
552
- return bfloat16(::atanhf(float(a)));
553
- }
554
- #endif
555
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
556
- return bfloat16(::floorf(float(a)));
557
- }
558
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
559
- return bfloat16(::ceilf(float(a)));
560
- }
561
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
562
- return bfloat16(::rintf(float(a)));
563
- }
564
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
565
- return bfloat16(::roundf(float(a)));
566
- }
628
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan2(const bfloat16& a, const bfloat16& b) {
629
+ return bfloat16(::atan2f(float(a), float(b)));
630
+ }
631
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) { return bfloat16(::sinf(float(a))); }
632
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) { return bfloat16(::cosf(float(a))); }
633
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) { return bfloat16(::tanf(float(a))); }
634
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) { return bfloat16(::asinf(float(a))); }
635
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) { return bfloat16(::acosf(float(a))); }
636
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) { return bfloat16(::atanf(float(a))); }
637
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) { return bfloat16(::sinhf(float(a))); }
638
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) { return bfloat16(::coshf(float(a))); }
639
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) { return bfloat16(::tanhf(float(a))); }
640
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) { return bfloat16(::asinhf(float(a))); }
641
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) { return bfloat16(::acoshf(float(a))); }
642
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) { return bfloat16(::atanhf(float(a))); }
643
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return bfloat16(::floorf(float(a))); }
644
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); }
645
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { return bfloat16(::rintf(float(a))); }
646
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) { return bfloat16(::roundf(float(a))); }
647
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 trunc(const bfloat16& a) { return bfloat16(::truncf(float(a))); }
567
648
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
568
649
  return bfloat16(::fmodf(float(a), float(b)));
569
650
  }
570
651
 
571
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
652
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16(min)(const bfloat16& a, const bfloat16& b) {
572
653
  const float f1 = static_cast<float>(a);
573
654
  const float f2 = static_cast<float>(b);
574
655
  return f2 < f1 ? b : a;
575
656
  }
576
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
657
+
658
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16(max)(const bfloat16& a, const bfloat16& b) {
577
659
  const float f1 = static_cast<float>(a);
578
660
  const float f2 = static_cast<float>(b);
579
661
  return f1 < f2 ? b : a;
@@ -584,56 +666,59 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfl
584
666
  const float f2 = static_cast<float>(b);
585
667
  return bfloat16(::fminf(f1, f2));
586
668
  }
669
+
587
670
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
588
671
  const float f1 = static_cast<float>(a);
589
672
  const float f2 = static_cast<float>(b);
590
673
  return bfloat16(::fmaxf(f1, f2));
591
674
  }
592
675
 
676
+ EIGEN_DEVICE_FUNC inline bfloat16 fma(const bfloat16& a, const bfloat16& b, const bfloat16& c) {
677
+ // Emulate FMA via float.
678
+ return bfloat16(numext::fma(static_cast<float>(a), static_cast<float>(b), static_cast<float>(c)));
679
+ }
680
+
593
681
  #ifndef EIGEN_NO_IO
594
- EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
682
+ EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const bfloat16& v) {
595
683
  os << static_cast<float>(v);
596
684
  return os;
597
685
  }
598
686
  #endif
599
687
 
600
- } // namespace bfloat16_impl
688
+ } // namespace bfloat16_impl
601
689
 
602
690
  namespace internal {
603
691
 
604
- template<>
605
- struct random_default_impl<bfloat16, false, false>
606
- {
607
- static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
608
- {
609
- return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
692
+ template <>
693
+ struct is_arithmetic<bfloat16> {
694
+ enum { value = true };
695
+ };
696
+
697
+ template <>
698
+ struct random_impl<bfloat16> {
699
+ enum : int { MantissaBits = 7 };
700
+ using Impl = random_impl<float>;
701
+ static EIGEN_DEVICE_FUNC inline bfloat16 run(const bfloat16& x, const bfloat16& y) {
702
+ float result = Impl::run(x, y, MantissaBits);
703
+ return bfloat16(result);
610
704
  }
611
- static inline bfloat16 run()
612
- {
613
- return run(bfloat16(-1.f), bfloat16(1.f));
705
+ static EIGEN_DEVICE_FUNC inline bfloat16 run() {
706
+ float result = Impl::run(MantissaBits);
707
+ return bfloat16(result);
614
708
  }
615
709
  };
616
710
 
617
- template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
618
-
619
- } // namespace internal
711
+ } // namespace internal
620
712
 
621
- template<> struct NumTraits<Eigen::bfloat16>
622
- : GenericNumTraits<Eigen::bfloat16>
623
- {
624
- enum {
625
- IsSigned = true,
626
- IsInteger = false,
627
- IsComplex = false,
628
- RequireInitialization = false
629
- };
713
+ template <>
714
+ struct NumTraits<Eigen::bfloat16> : GenericNumTraits<Eigen::bfloat16> {
715
+ enum { IsSigned = true, IsInteger = false, IsComplex = false, RequireInitialization = false };
630
716
 
631
717
  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
632
718
  return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
633
719
  }
634
720
  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
635
721
  return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
636
-
637
722
  }
638
723
  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
639
724
  return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
@@ -649,32 +734,33 @@ template<> struct NumTraits<Eigen::bfloat16>
649
734
  }
650
735
  };
651
736
 
652
- } // namespace Eigen
737
+ } // namespace Eigen
738
+
739
+ #if defined(EIGEN_HAS_HIP_BF16)
740
+ #pragma pop_macro("EIGEN_CONSTEXPR")
741
+ #endif
653
742
 
654
743
  namespace Eigen {
655
744
  namespace numext {
656
745
 
657
- template<>
658
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
659
- bool (isnan)(const Eigen::bfloat16& h) {
746
+ template <>
747
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::bfloat16& h) {
660
748
  return (bfloat16_impl::isnan)(h);
661
749
  }
662
750
 
663
- template<>
664
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
665
- bool (isinf)(const Eigen::bfloat16& h) {
751
+ template <>
752
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isinf)(const Eigen::bfloat16& h) {
666
753
  return (bfloat16_impl::isinf)(h);
667
754
  }
668
755
 
669
- template<>
670
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
671
- bool (isfinite)(const Eigen::bfloat16& h) {
756
+ template <>
757
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::bfloat16& h) {
672
758
  return (bfloat16_impl::isfinite)(h);
673
759
  }
674
760
 
675
761
  template <>
676
762
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
677
- return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
763
+ return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src);
678
764
  }
679
765
 
680
766
  template <>
@@ -682,6 +768,37 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat1
682
768
  return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
683
769
  }
684
770
 
771
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 nextafter(const bfloat16& from, const bfloat16& to) {
772
+ if (numext::isnan EIGEN_NOT_A_MACRO(from)) {
773
+ return from;
774
+ }
775
+ if (numext::isnan EIGEN_NOT_A_MACRO(to)) {
776
+ return to;
777
+ }
778
+ if (from == to) {
779
+ return to;
780
+ }
781
+ uint16_t from_bits = numext::bit_cast<uint16_t>(from);
782
+ bool from_sign = from_bits >> 15;
783
+ // Whether we are adjusting toward the infinity with the same sign as from.
784
+ bool toward_inf = (to > from) == !from_sign;
785
+ if (toward_inf) {
786
+ ++from_bits;
787
+ } else if ((from_bits & 0x7fff) == 0) {
788
+ // Adjusting away from inf, but from is zero, so just toggle the sign.
789
+ from_bits ^= 0x8000;
790
+ } else {
791
+ --from_bits;
792
+ }
793
+ return numext::bit_cast<bfloat16>(from_bits);
794
+ }
795
+
796
+ // Specialize multiply-add to match packet operations and reduce conversions to/from float.
797
+ template<>
798
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 madd<Eigen::bfloat16>(const Eigen::bfloat16& x, const Eigen::bfloat16& y, const Eigen::bfloat16& z) {
799
+ return Eigen::bfloat16(static_cast<float>(x) * static_cast<float>(y) + static_cast<float>(z));
800
+ }
801
+
685
802
  } // namespace numext
686
803
  } // namespace Eigen
687
804
 
@@ -693,8 +810,57 @@ struct hash<Eigen::bfloat16> {
693
810
  return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
694
811
  }
695
812
  };
696
- } // namespace std
813
+ } // namespace std
697
814
  #endif
698
815
 
816
+ // Add the missing shfl* intrinsics.
817
+ // The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.
818
+ // CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))
819
+ //
820
+ // HIP and CUDA prior to SDK 9.0 define
821
+ // __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float
822
+ // CUDA since 9.0 deprecates those and instead defines
823
+ // __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,
824
+ // with native support for __half and __nv_bfloat16
825
+ //
826
+ // Note that the following are __device__ - only functions.
827
+ #if defined(EIGEN_HIPCC)
828
+
829
+ #if defined(EIGEN_HAS_HIP_BF16)
830
+
831
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl(Eigen::bfloat16 var, int srcLane, int width = warpSize) {
832
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
833
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
834
+ }
835
+
836
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_up(Eigen::bfloat16 var, unsigned int delta,
837
+ int width = warpSize) {
838
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
839
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));
840
+ }
841
+
842
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_down(Eigen::bfloat16 var, unsigned int delta,
843
+ int width = warpSize) {
844
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
845
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(
846
+ static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));
847
+ }
848
+
849
+ __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_xor(Eigen::bfloat16 var, int laneMask, int width = warpSize) {
850
+ const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
851
+ return Eigen::numext::bit_cast<Eigen::bfloat16>(
852
+ static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));
853
+ }
854
+
855
+ #endif // HIP
856
+
857
+ #endif // __shfl*
858
+
859
+ #if defined(EIGEN_HIPCC)
860
+ EIGEN_STRONG_INLINE __device__ Eigen::bfloat16 __ldg(const Eigen::bfloat16* ptr) {
861
+ return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(
862
+ __ldg(Eigen::numext::bit_cast<const Eigen::numext::uint16_t*>(ptr)));
863
+ }
864
+ #endif // __ldg
699
865
 
700
- #endif // EIGEN_BFLOAT16_H
866
+ #endif // EIGEN_BFLOAT16_H