npm - @smake/eigen - Versions diffs - 1.1.0 → 1.1.1 - Mend

@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (431) hide show

package/README.md +1 -1
package/eigen/Eigen/AccelerateSupport +52 -0
package/eigen/Eigen/Cholesky +18 -20
package/eigen/Eigen/CholmodSupport +28 -28
package/eigen/Eigen/Core +187 -120
package/eigen/Eigen/Eigenvalues +16 -13
package/eigen/Eigen/Geometry +18 -18
package/eigen/Eigen/Householder +9 -7
package/eigen/Eigen/IterativeLinearSolvers +8 -4
package/eigen/Eigen/Jacobi +14 -13
package/eigen/Eigen/KLUSupport +23 -21
package/eigen/Eigen/LU +15 -16
package/eigen/Eigen/MetisSupport +12 -12
package/eigen/Eigen/OrderingMethods +54 -51
package/eigen/Eigen/PaStiXSupport +23 -21
package/eigen/Eigen/PardisoSupport +17 -14
package/eigen/Eigen/QR +18 -20
package/eigen/Eigen/QtAlignedMalloc +5 -12
package/eigen/Eigen/SPQRSupport +21 -14
package/eigen/Eigen/SVD +23 -17
package/eigen/Eigen/Sparse +1 -2
package/eigen/Eigen/SparseCholesky +18 -15
package/eigen/Eigen/SparseCore +18 -17
package/eigen/Eigen/SparseLU +9 -9
package/eigen/Eigen/SparseQR +16 -14
package/eigen/Eigen/StdDeque +5 -2
package/eigen/Eigen/StdList +5 -2
package/eigen/Eigen/StdVector +5 -2
package/eigen/Eigen/SuperLUSupport +30 -24
package/eigen/Eigen/ThreadPool +80 -0
package/eigen/Eigen/UmfPackSupport +19 -17
package/eigen/Eigen/Version +14 -0
package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
package/eigen/Eigen/src/Core/Array.h +329 -370
package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
package/eigen/Eigen/src/Core/Assign.h +30 -40
package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
package/eigen/Eigen/src/Core/Block.h +371 -390
package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
package/eigen/Eigen/src/Core/DenseBase.h +630 -658
package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
package/eigen/Eigen/src/Core/Diagonal.h +168 -207
package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
package/eigen/Eigen/src/Core/Dot.h +167 -217
package/eigen/Eigen/src/Core/EigenBase.h +74 -85
package/eigen/Eigen/src/Core/Fill.h +138 -0
package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
package/eigen/Eigen/src/Core/IO.h +131 -156
package/eigen/Eigen/src/Core/IndexedView.h +209 -125
package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Core/Inverse.h +50 -59
package/eigen/Eigen/src/Core/Map.h +123 -141
package/eigen/Eigen/src/Core/MapBase.h +255 -282
package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
package/eigen/Eigen/src/Core/Matrix.h +463 -494
package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
package/eigen/Eigen/src/Core/NestByValue.h +58 -52
package/eigen/Eigen/src/Core/NoAlias.h +79 -86
package/eigen/Eigen/src/Core/NumTraits.h +206 -206
package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
package/eigen/Eigen/src/Core/Product.h +246 -130
package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
package/eigen/Eigen/src/Core/Random.h +153 -164
package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
package/eigen/Eigen/src/Core/RealView.h +250 -0
package/eigen/Eigen/src/Core/Redux.h +334 -314
package/eigen/Eigen/src/Core/Ref.h +259 -257
package/eigen/Eigen/src/Core/Replicate.h +92 -104
package/eigen/Eigen/src/Core/Reshaped.h +215 -271
package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
package/eigen/Eigen/src/Core/Reverse.h +133 -148
package/eigen/Eigen/src/Core/Select.h +68 -140
package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
package/eigen/Eigen/src/Core/Solve.h +88 -102
package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
package/eigen/Eigen/src/Core/SolverBase.h +132 -133
package/eigen/Eigen/src/Core/StableNorm.h +113 -147
package/eigen/Eigen/src/Core/StlIterators.h +404 -248
package/eigen/Eigen/src/Core/Stride.h +90 -92
package/eigen/Eigen/src/Core/Swap.h +70 -39
package/eigen/Eigen/src/Core/Transpose.h +258 -295
package/eigen/Eigen/src/Core/Transpositions.h +270 -333
package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
package/eigen/Eigen/src/Core/Visitor.h +464 -308
package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
package/eigen/Eigen/src/Core/util/Assert.h +158 -0
package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
package/eigen/Eigen/src/Core/util/Constants.h +297 -262
package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
package/eigen/Eigen/src/Core/util/Macros.h +655 -773
package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
package/eigen/Eigen/src/Core/util/Memory.h +970 -748
package/eigen/Eigen/src/Core/util/Meta.h +581 -633
package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
package/eigen/Eigen/src/Geometry/Transform.h +858 -936
package/eigen/Eigen/src/Geometry/Translation.h +94 -92
package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
package/eigen/Eigen/src/Householder/Householder.h +102 -124
package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
package/eigen/Eigen/src/LU/Determinant.h +50 -69
package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
package/eigen/Eigen/src/StlSupport/details.h +48 -50
package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
package/eigen/Eigen/src/misc/Image.h +41 -43
package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/misc/Kernel.h +39 -41
package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
package/eigen/Eigen/src/misc/blas.h +83 -426
package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
package/package.json +1 -1
package/eigen/COPYING.APACHE +0 -203
package/eigen/COPYING.BSD +0 -26
package/eigen/COPYING.GPL +0 -674
package/eigen/COPYING.LGPL +0 -502
package/eigen/COPYING.MINPACK +0 -51
package/eigen/COPYING.MPL2 +0 -373
package/eigen/COPYING.README +0 -18
package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
package/eigen/Eigen/src/misc/lapack.h +0 -152
package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
package/eigen/README.md +0 -5

package/eigen/Eigen/src/Core/arch/Default/BFloat16.h CHANGED Viewed

@@ -16,26 +16,69 @@ limitations under the License.
 #ifndef EIGEN_BFLOAT16_H
 #define EIGEN_BFLOAT16_H
-#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)         \
-  template <>                                                       \
-  EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED  \
-  PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) {          \
-    return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x)));              \
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
+#if defined(EIGEN_HAS_HIP_BF16)
+// When compiling with GPU support, the "hip_bfloat16" base class as well as
+// some other routines are defined in the GPU compiler header files
+// (hip_bfloat16.h), and they are not tagged constexpr
+// As a consequence, we get compile failures when compiling Eigen with
+// GPU support. Hence the need to disable EIGEN_CONSTEXPR when building
+// Eigen with GPU support
+#pragma push_macro("EIGEN_CONSTEXPR")
+#undef EIGEN_CONSTEXPR
+#define EIGEN_CONSTEXPR
+#endif
+#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)                                         \
+  template <>                                                                                       \
+  EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED PACKET_BF16 METHOD<PACKET_BF16>( \
+      const PACKET_BF16& _x) {                                                                      \
+    return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x)));                                              \
   }
+// Only use HIP GPU bf16 in kernels
+#if defined(EIGEN_HAS_HIP_BF16) && defined(EIGEN_GPU_COMPILE_PHASE)
+#define EIGEN_USE_HIP_BF16
+#endif
 namespace Eigen {
 struct bfloat16;
+namespace numext {
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src);
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src);
+}  // namespace numext
 namespace bfloat16_impl {
+#if defined(EIGEN_USE_HIP_BF16)
+struct __bfloat16_raw : public hip_bfloat16 {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(hip_bfloat16 hb) : hip_bfloat16(hb) {}
+  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : hip_bfloat16(raw) {}
+};
+#else
 // Make our own __bfloat16_raw definition.
 struct __bfloat16_raw {
+#if defined(EIGEN_HAS_HIP_BF16) && !defined(EIGEN_GPU_COMPILE_PHASE)
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
+#else
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
+#endif
   explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
   unsigned short value;
 };
+#endif  // defined(EIGEN_USE_HIP_BF16)
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
 template <bool AssumeArgumentIsNormalOrInfinityOrZero>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
@@ -52,11 +95,10 @@ struct bfloat16_base : public __bfloat16_raw {
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
 };
-} // namespace bfloat16_impl
+}  // namespace bfloat16_impl
 // Class definition.
 struct bfloat16 : public bfloat16_impl::bfloat16_base {
   typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
@@ -66,16 +108,17 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
   explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
       : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
-  template<class T>
+  template <class T>
   explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
-      : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
+      : bfloat16_impl::bfloat16_base(
+            bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
   explicit EIGEN_DEVICE_FUNC bfloat16(float f)
       : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
   // Following the convention of numpy, converting between complex and
   // float will lead to loss of imag value.
-  template<typename RealScalar>
+  template <typename RealScalar>
   explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
       : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
@@ -83,57 +126,122 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
     return bfloat16_impl::bfloat16_to_float(*this);
   }
 };
-} // namespace Eigen
-namespace std {
-template<>
-struct numeric_limits<Eigen::bfloat16> {
-  static const bool is_specialized = true;
-  static const bool is_signed = true;
-  static const bool is_integer = false;
-  static const bool is_exact = false;
-  static const bool has_infinity = true;
-  static const bool has_quiet_NaN = true;
-  static const bool has_signaling_NaN = true;
-  static const float_denorm_style has_denorm = std::denorm_absent;
-  static const bool has_denorm_loss = false;
-  static const std::float_round_style round_style = numeric_limits<float>::round_style;
-  static const bool is_iec559 = false;
-  static const bool is_bounded = true;
-  static const bool is_modulo = false;
-  static const int digits = 8;
-  static const int digits10 = 2;
-  static const int max_digits10 = 4;
-  static const int radix = 2;
-  static const int min_exponent = numeric_limits<float>::min_exponent;
-  static const int min_exponent10 = numeric_limits<float>::min_exponent10;
-  static const int max_exponent = numeric_limits<float>::max_exponent;
-  static const int max_exponent10 = numeric_limits<float>::max_exponent10;
-  static const bool traps = numeric_limits<float>::traps;
-  static const bool tinyness_before = numeric_limits<float>::tinyness_before;
-  static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
-  static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
-  static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
-  static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
-  static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
-  static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
-  static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
-  static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
-  static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
+// TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
+// solve the ODR issue.
+namespace bfloat16_impl {
+template <typename = void>
+struct numeric_limits_bfloat16_impl {
+  static EIGEN_CONSTEXPR const bool is_specialized = true;
+  static EIGEN_CONSTEXPR const bool is_signed = true;
+  static EIGEN_CONSTEXPR const bool is_integer = false;
+  static EIGEN_CONSTEXPR const bool is_exact = false;
+  static EIGEN_CONSTEXPR const bool has_infinity = true;
+  static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
+  static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
+  EIGEN_DIAGNOSTICS(push)
+  EIGEN_DISABLE_DEPRECATED_WARNING
+  static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
+  static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
+  EIGEN_DIAGNOSTICS(pop)
+  static EIGEN_CONSTEXPR const std::float_round_style round_style = std::numeric_limits<float>::round_style;
+  static EIGEN_CONSTEXPR const bool is_iec559 = true;
+  // The C++ standard defines this as "true if the set of values representable
+  // by the type is finite." BFloat16 has finite precision.
+  static EIGEN_CONSTEXPR const bool is_bounded = true;
+  static EIGEN_CONSTEXPR const bool is_modulo = false;
+  static EIGEN_CONSTEXPR const int digits = 8;
+  static EIGEN_CONSTEXPR const int digits10 = 2;
+  static EIGEN_CONSTEXPR const int max_digits10 = 4;
+  static EIGEN_CONSTEXPR const int radix = std::numeric_limits<float>::radix;
+  static EIGEN_CONSTEXPR const int min_exponent = std::numeric_limits<float>::min_exponent;
+  static EIGEN_CONSTEXPR const int min_exponent10 = std::numeric_limits<float>::min_exponent10;
+  static EIGEN_CONSTEXPR const int max_exponent = std::numeric_limits<float>::max_exponent;
+  static EIGEN_CONSTEXPR const int max_exponent10 = std::numeric_limits<float>::max_exponent10;
+  static EIGEN_CONSTEXPR const bool traps = std::numeric_limits<float>::traps;
+  // IEEE754: "The implementer shall choose how tininess is detected, but shall
+  // detect tininess in the same way for all operations in radix two"
+  static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
+  static EIGEN_CONSTEXPR Eigen::bfloat16(min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16(max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 round_error() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3f00); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() {
+    return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fa0);
+  }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
 };
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_specialized;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_signed;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_integer;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_exact;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_infinity;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_quiet_NaN;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_signaling_NaN;
+EIGEN_DIAGNOSTICS(push)
+EIGEN_DISABLE_DEPRECATED_WARNING
+template <typename T>
+EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_bfloat16_impl<T>::has_denorm;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_denorm_loss;
+EIGEN_DIAGNOSTICS(pop)
+template <typename T>
+EIGEN_CONSTEXPR const std::float_round_style numeric_limits_bfloat16_impl<T>::round_style;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_iec559;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_bounded;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_modulo;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits10;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_digits10;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::radix;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent10;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent;
+template <typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent10;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::traps;
+template <typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::tinyness_before;
+}  // end namespace bfloat16_impl
+}  // end namespace Eigen
+namespace std {
 // If std::numeric_limits<T> is specialized, should also specialize
 // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
 // std::numeric_limits<const volatile T>
 // https://stackoverflow.com/a/16519653/
-template<>
-struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
-template<>
-struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
-template<>
-struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
-} // namespace std
+template <>
+class numeric_limits<Eigen::bfloat16> : public Eigen::bfloat16_impl::numeric_limits_bfloat16_impl<> {};
+template <>
+class numeric_limits<const Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
+template <>
+class numeric_limits<volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
+template <>
+class numeric_limits<const volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
+}  // end namespace std
 namespace Eigen {
@@ -142,15 +250,15 @@ namespace bfloat16_impl {
 // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
 // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
 // of the functions, while the latter can only deal with one of them.
-#if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
+#if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)  // Emulate support for bfloat16 floats
 #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
 // We need to provide emulated *host-side* BF16 operators for clang.
 #pragma push_macro("EIGEN_DEVICE_FUNC")
 #undef EIGEN_DEVICE_FUNC
-#if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
+#if (defined(EIGEN_HAS_GPU_BF16) && defined(EIGEN_HAS_NATIVE_BF16))
 #define EIGEN_DEVICE_FUNC __host__
-#else // both host and device need emulated ops.
+#else  // both host and device need emulated ops.
 #define EIGEN_DEVICE_FUNC __host__ __device__
 #endif
 #endif
@@ -158,42 +266,41 @@ namespace bfloat16_impl {
 // Definitions for CPUs, mostly working through conversion
 // to/from fp32.
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const bfloat16& a, const bfloat16& b) {
   return bfloat16(float(a) + float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const bfloat16& a, const int& b) {
   return bfloat16(float(a) + static_cast<float>(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator+(const int& a, const bfloat16& b) {
   return bfloat16(static_cast<float>(a) + float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator*(const bfloat16& a, const bfloat16& b) {
   return bfloat16(float(a) * float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator-(const bfloat16& a, const bfloat16& b) {
   return bfloat16(float(a) - float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator/(const bfloat16& a, const bfloat16& b) {
   return bfloat16(float(a) / float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
-  bfloat16 result;
-  result.value = a.value ^ 0x8000;
-  return result;
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator-(const bfloat16& a) {
+  numext::uint16_t x = numext::bit_cast<uint16_t>(a) ^ 0x8000;
+  return numext::bit_cast<bfloat16>(x);
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator+=(bfloat16& a, const bfloat16& b) {
   a = bfloat16(float(a) + float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator*=(bfloat16& a, const bfloat16& b) {
   a = bfloat16(float(a) * float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator-=(bfloat16& a, const bfloat16& b) {
   a = bfloat16(float(a) - float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator/=(bfloat16& a, const bfloat16& b) {
   a = bfloat16(float(a) / float(b));
   return a;
 }
@@ -215,22 +322,22 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
   --a;
   return original_value;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
-  return numext::equal_strict(float(a),float(b));
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const bfloat16& a, const bfloat16& b) {
+  return numext::equal_strict(float(a), float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const bfloat16& a, const bfloat16& b) {
   return numext::not_equal_strict(float(a), float(b));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const bfloat16& a, const bfloat16& b) {
   return float(a) < float(b);
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const bfloat16& a, const bfloat16& b) {
   return float(a) <= float(b);
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const bfloat16& a, const bfloat16& b) {
   return float(a) > float(b);
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const bfloat16& a, const bfloat16& b) {
   return float(a) >= float(b);
 }
@@ -241,49 +348,59 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const
 // Division by an index. Do it in full float precision to avoid accuracy
 // issues in converting the denominator to bfloat16.
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator/(const bfloat16& a, Index b) {
   return bfloat16(static_cast<float>(a) / static_cast<float>(b));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
+#if defined(EIGEN_USE_HIP_BF16)
+  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(v, __bfloat16_raw::truncate));
+#else
   __bfloat16_raw output;
-  if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
-    output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
+  if (numext::isnan EIGEN_NOT_A_MACRO(v)) {
+    output.value = std::signbit(v) ? 0xFFC0 : 0x7FC0;
     return output;
   }
-  const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
-#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  output.value = p[0];
-#else
-  output.value = p[1];
-#endif
+  output.value = static_cast<numext::uint16_t>(numext::bit_cast<numext::uint32_t>(v) >> 16);
   return output;
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
+#if defined(EIGEN_USE_HIP_BF16)
+  __bfloat16_raw bf;
+  bf.data = value;
+  return bf;
+#else
   return __bfloat16_raw(value);
+#endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(
+    const __bfloat16_raw& bf) {
+#if defined(EIGEN_USE_HIP_BF16)
+  return bf.data;
+#else
   return bf.value;
+#endif
 }
 // float_to_bfloat16_rtne template specialization that does not make any
 // assumption about the value of its function argument (ff).
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
-#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
-  // Nothing to do here
+#if defined(EIGEN_USE_HIP_BF16)
+  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
 #else
   __bfloat16_raw output;
-  if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
+  if (numext::isnan EIGEN_NOT_A_MACRO(ff)) {
     // If the value is a NaN, squash it to a qNaN with msb of fraction set,
     // this makes sure after truncation we don't end up with an inf.
     //
     // qNaN magic: All exponent bits set + most significant bit of fraction
     // set.
-    output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
+    output.value = std::signbit(ff) ? 0xFFC0 : 0x7FC0;
   } else {
     // Fast rounding algorithm that rounds a half value to nearest even. This
     // reduces expected error when we convert a large number of floats. Here
@@ -446,134 +563,99 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<fals
 // type to bfloat16.
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
-#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
-    // Nothing to do here
+#if defined(EIGEN_USE_HIP_BF16)
+  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
 #else
-    numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
-    __bfloat16_raw output;
-    // Least significant bit of resulting bfloat.
-    numext::uint32_t lsb = (input >> 16) & 1;
-    numext::uint32_t rounding_bias = 0x7fff + lsb;
-    input += rounding_bias;
-    output.value = static_cast<numext::uint16_t>(input >> 16);
-    return output;
+  numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
+  __bfloat16_raw output;
+  // Least significant bit of resulting bfloat.
+  numext::uint32_t lsb = (input >> 16) & 1;
+  numext::uint32_t rounding_bias = 0x7fff + lsb;
+  input += rounding_bias;
+  output.value = static_cast<numext::uint16_t>(input >> 16);
+  return output;
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
-    float result = 0;
-    unsigned short* q = reinterpret_cast<unsigned short*>(&result);
-#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    q[0] = h.value;
+#if defined(EIGEN_USE_HIP_BF16)
+  return static_cast<float>(h);
 #else
-    q[1] = h.value;
+  return numext::bit_cast<float>(static_cast<numext::uint32_t>(h.value) << 16);
 #endif
-    return result;
 }
 // --- standard functions ---
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isinf)(const bfloat16& a) {
   EIGEN_USING_STD(isinf);
+#if defined(EIGEN_USE_HIP_BF16)
+  return (isinf)(a);  // Uses HIP hip_bfloat16 isinf operator
+#else
   return (isinf)(float(a));
+#endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isnan)(const bfloat16& a) {
   EIGEN_USING_STD(isnan);
+#if defined(EIGEN_USE_HIP_BF16)
+  return (isnan)(a);  // Uses HIP hip_bfloat16 isnan operator
+#else
   return (isnan)(float(a));
+#endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
-  return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isfinite)(const bfloat16& a) {
+  return !(isinf EIGEN_NOT_A_MACRO(a)) && !(isnan EIGEN_NOT_A_MACRO(a));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
-  bfloat16 result;
-  result.value = a.value & 0x7FFF;
-  return result;
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
-   return bfloat16(::expf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
-  return bfloat16(numext::expm1(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
-  return bfloat16(::logf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
-  return bfloat16(numext::log1p(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
-  return bfloat16(::log10f(float(a)));
-}
+  numext::uint16_t x = numext::bit_cast<numext::uint16_t>(a) & 0x7FFF;
+  return numext::bit_cast<bfloat16>(x);
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) { return bfloat16(::expf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16& a) { return bfloat16(::exp2f(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) { return bfloat16(numext::expm1(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) { return bfloat16(::logf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) { return bfloat16(numext::log1p(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) { return bfloat16(::log10f(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
   return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
-    return bfloat16(::sqrtf(float(a)));
-}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) { return bfloat16(::sqrtf(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
   return bfloat16(::powf(float(a), float(b)));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
-  return bfloat16(::sinf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
-  return bfloat16(::cosf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
-  return bfloat16(::tanf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
-  return bfloat16(::asinf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
-  return bfloat16(::acosf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
-  return bfloat16(::atanf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
-  return bfloat16(::sinhf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
-  return bfloat16(::coshf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
-  return bfloat16(::tanhf(float(a)));
-}
-#if EIGEN_HAS_CXX11_MATH
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
-  return bfloat16(::asinhf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
-  return bfloat16(::acoshf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
-  return bfloat16(::atanhf(float(a)));
-}
-#endif
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
-  return bfloat16(::floorf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
-  return bfloat16(::ceilf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
-  return bfloat16(::rintf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
-  return bfloat16(::roundf(float(a)));
-}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan2(const bfloat16& a, const bfloat16& b) {
+  return bfloat16(::atan2f(float(a), float(b)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) { return bfloat16(::sinf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) { return bfloat16(::cosf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) { return bfloat16(::tanf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) { return bfloat16(::asinf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) { return bfloat16(::acosf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) { return bfloat16(::atanf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) { return bfloat16(::sinhf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) { return bfloat16(::coshf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) { return bfloat16(::tanhf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) { return bfloat16(::asinhf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) { return bfloat16(::acoshf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) { return bfloat16(::atanhf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return bfloat16(::floorf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { return bfloat16(::rintf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) { return bfloat16(::roundf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 trunc(const bfloat16& a) { return bfloat16(::truncf(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
   return bfloat16(::fmodf(float(a), float(b)));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16(min)(const bfloat16& a, const bfloat16& b) {
   const float f1 = static_cast<float>(a);
   const float f2 = static_cast<float>(b);
   return f2 < f1 ? b : a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16(max)(const bfloat16& a, const bfloat16& b) {
   const float f1 = static_cast<float>(a);
   const float f2 = static_cast<float>(b);
   return f1 < f2 ? b : a;
@@ -584,56 +666,59 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfl
   const float f2 = static_cast<float>(b);
   return bfloat16(::fminf(f1, f2));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
   const float f1 = static_cast<float>(a);
   const float f2 = static_cast<float>(b);
   return bfloat16(::fmaxf(f1, f2));
 }
+EIGEN_DEVICE_FUNC inline bfloat16 fma(const bfloat16& a, const bfloat16& b, const bfloat16& c) {
+  // Emulate FMA via float.
+  return bfloat16(numext::fma(static_cast<float>(a), static_cast<float>(b), static_cast<float>(c)));
+}
 #ifndef EIGEN_NO_IO
-EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
+EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const bfloat16& v) {
   os << static_cast<float>(v);
   return os;
 }
 #endif
-} // namespace bfloat16_impl
+}  // namespace bfloat16_impl
 namespace internal {
-template<>
-struct random_default_impl<bfloat16, false, false>
-{
-  static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
-  {
-    return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
+template <>
+struct is_arithmetic<bfloat16> {
+  enum { value = true };
+};
+template <>
+struct random_impl<bfloat16> {
+  enum : int { MantissaBits = 7 };
+  using Impl = random_impl<float>;
+  static EIGEN_DEVICE_FUNC inline bfloat16 run(const bfloat16& x, const bfloat16& y) {
+    float result = Impl::run(x, y, MantissaBits);
+    return bfloat16(result);
   }
-  static inline bfloat16 run()
-  {
-    return run(bfloat16(-1.f), bfloat16(1.f));
+  static EIGEN_DEVICE_FUNC inline bfloat16 run() {
+    float result = Impl::run(MantissaBits);
+    return bfloat16(result);
   }
 };
-template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
-} // namespace internal
+}  // namespace internal
-template<> struct NumTraits<Eigen::bfloat16>
-    : GenericNumTraits<Eigen::bfloat16>
-{
-  enum {
-    IsSigned = true,
-    IsInteger = false,
-    IsComplex = false,
-    RequireInitialization = false
-  };
+template <>
+struct NumTraits<Eigen::bfloat16> : GenericNumTraits<Eigen::bfloat16> {
+  enum { IsSigned = true, IsInteger = false, IsComplex = false, RequireInitialization = false };
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
     return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
   }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
     return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D);  // bfloat16(5e-2f);
   }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
     return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
@@ -649,32 +734,33 @@ template<> struct NumTraits<Eigen::bfloat16>
   }
 };
-} // namespace Eigen
+}  // namespace Eigen
+#if defined(EIGEN_HAS_HIP_BF16)
+#pragma pop_macro("EIGEN_CONSTEXPR")
+#endif
 namespace Eigen {
 namespace numext {
-template<>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool (isnan)(const Eigen::bfloat16& h) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::bfloat16& h) {
   return (bfloat16_impl::isnan)(h);
 }
-template<>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool (isinf)(const Eigen::bfloat16& h) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isinf)(const Eigen::bfloat16& h) {
   return (bfloat16_impl::isinf)(h);
 }
-template<>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool (isfinite)(const Eigen::bfloat16& h) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::bfloat16& h) {
   return (bfloat16_impl::isfinite)(h);
 }
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
-  return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
+  return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src);
 }
 template <>
@@ -682,6 +768,37 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat1
   return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 nextafter(const bfloat16& from, const bfloat16& to) {
+  if (numext::isnan EIGEN_NOT_A_MACRO(from)) {
+    return from;
+  }
+  if (numext::isnan EIGEN_NOT_A_MACRO(to)) {
+    return to;
+  }
+  if (from == to) {
+    return to;
+  }
+  uint16_t from_bits = numext::bit_cast<uint16_t>(from);
+  bool from_sign = from_bits >> 15;
+  // Whether we are adjusting toward the infinity with the same sign as from.
+  bool toward_inf = (to > from) == !from_sign;
+  if (toward_inf) {
+    ++from_bits;
+  } else if ((from_bits & 0x7fff) == 0) {
+    // Adjusting away from inf, but from is zero, so just toggle the sign.
+    from_bits ^= 0x8000;
+  } else {
+    --from_bits;
+  }
+  return numext::bit_cast<bfloat16>(from_bits);
+}
+// Specialize multiply-add to match packet operations and reduce conversions to/from float.
+template<>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 madd<Eigen::bfloat16>(const Eigen::bfloat16& x, const Eigen::bfloat16& y, const Eigen::bfloat16& z) {
+  return Eigen::bfloat16(static_cast<float>(x) * static_cast<float>(y) + static_cast<float>(z));
+}
 }  // namespace numext
 }  // namespace Eigen
@@ -693,8 +810,57 @@ struct hash<Eigen::bfloat16> {
     return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
   }
 };
-} // namespace std
+}  // namespace std
 #endif
+// Add the missing shfl* intrinsics.
+// The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.
+//   CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))
+//
+// HIP and CUDA prior to SDK 9.0 define
+//    __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float
+// CUDA since 9.0 deprecates those and instead defines
+//    __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,
+//    with native support for __half and __nv_bfloat16
+//
+// Note that the following are __device__ - only functions.
+#if defined(EIGEN_HIPCC)
+#if defined(EIGEN_HAS_HIP_BF16)
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl(Eigen::bfloat16 var, int srcLane, int width = warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
+}
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_up(Eigen::bfloat16 var, unsigned int delta,
+                                                         int width = warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));
+}
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_down(Eigen::bfloat16 var, unsigned int delta,
+                                                           int width = warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(
+      static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));
+}
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_xor(Eigen::bfloat16 var, int laneMask, int width = warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(
+      static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));
+}
+#endif  // HIP
+#endif  // __shfl*
+#if defined(EIGEN_HIPCC)
+EIGEN_STRONG_INLINE __device__ Eigen::bfloat16 __ldg(const Eigen::bfloat16* ptr) {
+  return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(
+      __ldg(Eigen::numext::bit_cast<const Eigen::numext::uint16_t*>(ptr)));
+}
+#endif  // __ldg
-#endif // EIGEN_BFLOAT16_H
+#endif  // EIGEN_BFLOAT16_H