npm - @smake/eigen - Versions diffs - 1.1.0 → 1.1.1 - Mend

@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (431) hide show

package/README.md +1 -1
package/eigen/Eigen/AccelerateSupport +52 -0
package/eigen/Eigen/Cholesky +18 -20
package/eigen/Eigen/CholmodSupport +28 -28
package/eigen/Eigen/Core +187 -120
package/eigen/Eigen/Eigenvalues +16 -13
package/eigen/Eigen/Geometry +18 -18
package/eigen/Eigen/Householder +9 -7
package/eigen/Eigen/IterativeLinearSolvers +8 -4
package/eigen/Eigen/Jacobi +14 -13
package/eigen/Eigen/KLUSupport +23 -21
package/eigen/Eigen/LU +15 -16
package/eigen/Eigen/MetisSupport +12 -12
package/eigen/Eigen/OrderingMethods +54 -51
package/eigen/Eigen/PaStiXSupport +23 -21
package/eigen/Eigen/PardisoSupport +17 -14
package/eigen/Eigen/QR +18 -20
package/eigen/Eigen/QtAlignedMalloc +5 -12
package/eigen/Eigen/SPQRSupport +21 -14
package/eigen/Eigen/SVD +23 -17
package/eigen/Eigen/Sparse +1 -2
package/eigen/Eigen/SparseCholesky +18 -15
package/eigen/Eigen/SparseCore +18 -17
package/eigen/Eigen/SparseLU +9 -9
package/eigen/Eigen/SparseQR +16 -14
package/eigen/Eigen/StdDeque +5 -2
package/eigen/Eigen/StdList +5 -2
package/eigen/Eigen/StdVector +5 -2
package/eigen/Eigen/SuperLUSupport +30 -24
package/eigen/Eigen/ThreadPool +80 -0
package/eigen/Eigen/UmfPackSupport +19 -17
package/eigen/Eigen/Version +14 -0
package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
package/eigen/Eigen/src/Core/Array.h +329 -370
package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
package/eigen/Eigen/src/Core/Assign.h +30 -40
package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
package/eigen/Eigen/src/Core/Block.h +371 -390
package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
package/eigen/Eigen/src/Core/DenseBase.h +630 -658
package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
package/eigen/Eigen/src/Core/Diagonal.h +168 -207
package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
package/eigen/Eigen/src/Core/Dot.h +167 -217
package/eigen/Eigen/src/Core/EigenBase.h +74 -85
package/eigen/Eigen/src/Core/Fill.h +138 -0
package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
package/eigen/Eigen/src/Core/IO.h +131 -156
package/eigen/Eigen/src/Core/IndexedView.h +209 -125
package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Core/Inverse.h +50 -59
package/eigen/Eigen/src/Core/Map.h +123 -141
package/eigen/Eigen/src/Core/MapBase.h +255 -282
package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
package/eigen/Eigen/src/Core/Matrix.h +463 -494
package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
package/eigen/Eigen/src/Core/NestByValue.h +58 -52
package/eigen/Eigen/src/Core/NoAlias.h +79 -86
package/eigen/Eigen/src/Core/NumTraits.h +206 -206
package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
package/eigen/Eigen/src/Core/Product.h +246 -130
package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
package/eigen/Eigen/src/Core/Random.h +153 -164
package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
package/eigen/Eigen/src/Core/RealView.h +250 -0
package/eigen/Eigen/src/Core/Redux.h +334 -314
package/eigen/Eigen/src/Core/Ref.h +259 -257
package/eigen/Eigen/src/Core/Replicate.h +92 -104
package/eigen/Eigen/src/Core/Reshaped.h +215 -271
package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
package/eigen/Eigen/src/Core/Reverse.h +133 -148
package/eigen/Eigen/src/Core/Select.h +68 -140
package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
package/eigen/Eigen/src/Core/Solve.h +88 -102
package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
package/eigen/Eigen/src/Core/SolverBase.h +132 -133
package/eigen/Eigen/src/Core/StableNorm.h +113 -147
package/eigen/Eigen/src/Core/StlIterators.h +404 -248
package/eigen/Eigen/src/Core/Stride.h +90 -92
package/eigen/Eigen/src/Core/Swap.h +70 -39
package/eigen/Eigen/src/Core/Transpose.h +258 -295
package/eigen/Eigen/src/Core/Transpositions.h +270 -333
package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
package/eigen/Eigen/src/Core/Visitor.h +464 -308
package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
package/eigen/Eigen/src/Core/util/Assert.h +158 -0
package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
package/eigen/Eigen/src/Core/util/Constants.h +297 -262
package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
package/eigen/Eigen/src/Core/util/Macros.h +655 -773
package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
package/eigen/Eigen/src/Core/util/Memory.h +970 -748
package/eigen/Eigen/src/Core/util/Meta.h +581 -633
package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
package/eigen/Eigen/src/Geometry/Transform.h +858 -936
package/eigen/Eigen/src/Geometry/Translation.h +94 -92
package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
package/eigen/Eigen/src/Householder/Householder.h +102 -124
package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
package/eigen/Eigen/src/LU/Determinant.h +50 -69
package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
package/eigen/Eigen/src/StlSupport/details.h +48 -50
package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
package/eigen/Eigen/src/misc/Image.h +41 -43
package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
package/eigen/Eigen/src/misc/Kernel.h +39 -41
package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
package/eigen/Eigen/src/misc/blas.h +83 -426
package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
package/package.json +1 -1
package/eigen/COPYING.APACHE +0 -203
package/eigen/COPYING.BSD +0 -26
package/eigen/COPYING.GPL +0 -674
package/eigen/COPYING.LGPL +0 -502
package/eigen/COPYING.MINPACK +0 -51
package/eigen/COPYING.MPL2 +0 -373
package/eigen/COPYING.README +0 -18
package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
package/eigen/Eigen/src/misc/lapack.h +0 -152
package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
package/eigen/README.md +0 -5

package/eigen/Eigen/src/Core/arch/Default/Half.h CHANGED Viewed

@@ -24,7 +24,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Standard 16-bit float type, mostly useful for GPUs. Defines a new
 // type Eigen::half (inheriting either from CUDA's or HIP's __half struct) with
 // operator overloads such that it behaves basically as an arithmetic
@@ -32,29 +31,30 @@
 // in fp32 for CPUs, except for simple parameter conversions, I/O
 // to disk and the likes), but fast on GPUs.
 #ifndef EIGEN_HALF_H
 #define EIGEN_HALF_H
-#include <sstream>
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
-#if defined(EIGEN_HAS_GPU_FP16) || defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
 // When compiling with GPU support, the "__half_raw" base class as well as
 // some other routines are defined in the GPU compiler header files
 // (cuda_fp16.h, hip_fp16.h), and they are not tagged constexpr
 // As a consequence, we get compile failures when compiling Eigen with
 // GPU support. Hence the need to disable EIGEN_CONSTEXPR when building
-// Eigen with GPU support
-  #pragma push_macro("EIGEN_CONSTEXPR")
-  #undef EIGEN_CONSTEXPR
-  #define EIGEN_CONSTEXPR
+// Eigen with GPU support.
+// Any functions that require `numext::bit_cast` may also not be constexpr,
+// including any native types when setting via raw bit values.
+#if defined(EIGEN_HAS_GPU_FP16) || defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) || defined(EIGEN_HAS_BUILTIN_FLOAT16)
+#define _EIGEN_MAYBE_CONSTEXPR
+#else
+#define _EIGEN_MAYBE_CONSTEXPR constexpr
 #endif
-#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)           \
-  template <>                                                       \
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED                \
-  PACKET_F16 METHOD<PACKET_F16>(const PACKET_F16& _x) {             \
-    return float2half(METHOD<PACKET_F>(half2float(_x)));            \
+#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)                                                  \
+  template <>                                                                                              \
+  EIGEN_UNUSED EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PACKET_F16 METHOD<PACKET_F16>(const PACKET_F16& _x) { \
+    return float2half(METHOD<PACKET_F>(half2float(_x)));                                                   \
   }
 namespace Eigen {
@@ -83,8 +83,10 @@ namespace half_impl {
 // Making the host side compile phase of hipcc use the same Eigen::half impl, as the gcc compile, resolves
 // this error, and hence the following convoluted #if condition
 #if !defined(EIGEN_HAS_GPU_FP16) || !defined(EIGEN_GPU_COMPILE_PHASE)
 // Make our own __half_raw definition that is similar to CUDA's.
 struct __half_raw {
+  struct construct_from_rep_tag {};
 #if (defined(EIGEN_HAS_GPU_FP16) && !defined(EIGEN_GPU_COMPILE_PHASE))
   // Eigen::half can be used as the datatype for shared memory declarations (in Eigen and TF)
   // The element type for shared memory cannot have non-trivial constructors
@@ -93,54 +95,62 @@ struct __half_raw {
   // hence the need for this
   EIGEN_DEVICE_FUNC __half_raw() {}
 #else
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw() : x(0) {}
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR __half_raw() : x(0) {}
 #endif
 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(numext::uint16_t raw) : x(numext::bit_cast<__fp16>(raw)) {
-  }
+  explicit EIGEN_DEVICE_FUNC __half_raw(numext::uint16_t raw) : x(numext::bit_cast<__fp16>(raw)) {}
+  EIGEN_DEVICE_FUNC constexpr __half_raw(construct_from_rep_tag, __fp16 rep) : x{rep} {}
   __fp16 x;
+#elif defined(EIGEN_HAS_BUILTIN_FLOAT16)
+  explicit EIGEN_DEVICE_FUNC __half_raw(numext::uint16_t raw) : x(numext::bit_cast<_Float16>(raw)) {}
+  EIGEN_DEVICE_FUNC constexpr __half_raw(construct_from_rep_tag, _Float16 rep) : x{rep} {}
+  _Float16 x;
 #else
-  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(numext::uint16_t raw) : x(raw) {}
+  explicit EIGEN_DEVICE_FUNC constexpr __half_raw(numext::uint16_t raw) : x(raw) {}
+  EIGEN_DEVICE_FUNC constexpr __half_raw(construct_from_rep_tag, numext::uint16_t rep) : x{rep} {}
   numext::uint16_t x;
 #endif
 };
 #elif defined(EIGEN_HAS_HIP_FP16)
-  // Nothing to do here
-  // HIP fp16 header file has a definition for __half_raw
+// HIP GPU compile phase: nothing to do here.
+// HIP fp16 header file has a definition for __half_raw
 #elif defined(EIGEN_HAS_CUDA_FP16)
-  #if EIGEN_CUDA_SDK_VER < 90000
-    // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
-    typedef __half __half_raw;
-  #endif // defined(EIGEN_HAS_CUDA_FP16)
+// CUDA GPU compile phase.
+#if EIGEN_CUDA_SDK_VER < 90000
+// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
+typedef __half __half_raw;
+#endif  // defined(EIGEN_HAS_CUDA_FP16)
 #elif defined(SYCL_DEVICE_ONLY)
-  typedef cl::sycl::half __half_raw;
+typedef cl::sycl::half __half_raw;
 #endif
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x);
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
 struct half_base : public __half_raw {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base() {}
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half_raw& h) : __half_raw(h) {}
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half_base() {}
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half_base(const __half_raw& h) : __half_raw(h) {}
 #if defined(EIGEN_HAS_GPU_FP16)
- #if defined(EIGEN_HAS_HIP_FP16)
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) { x = __half_as_ushort(h); }
- #elif defined(EIGEN_HAS_CUDA_FP16)
-  #if EIGEN_CUDA_SDK_VER >= 90000
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
-  #endif
- #endif
+#if defined(EIGEN_HAS_HIP_FP16)
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half_base(const __half& h) { x = __half_as_ushort(h); }
+#elif defined(EIGEN_HAS_CUDA_FP16)
+#if EIGEN_CUDA_SDK_VER >= 90000
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
+#endif
+#endif
 #endif
 };
-} // namespace half_impl
+}  // namespace half_impl
 // Class definition.
 struct half : public half_impl::half_base {
   // Writing this out as separate #if-else blocks to make the code easier to follow
   // The same applies to most #if-else blocks in this file
 #if !defined(EIGEN_HAS_GPU_FP16) || !defined(EIGEN_GPU_COMPILE_PHASE)
@@ -152,44 +162,50 @@ struct half : public half_impl::half_base {
   // Nothing to do here
   // HIP fp16 header file has a definition for __half_raw
 #elif defined(EIGEN_HAS_CUDA_FP16)
-  // Note that EIGEN_CUDA_SDK_VER is set to 0 even when compiling with HIP, so
-  // (EIGEN_CUDA_SDK_VER < 90000) is true even for HIP!  So keeping this within
-  // #if defined(EIGEN_HAS_CUDA_FP16) is needed
-  #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
-    typedef half_impl::__half_raw __half_raw;
-  #endif
+// Note that EIGEN_CUDA_SDK_VER is set to 0 even when compiling with HIP, so
+// (EIGEN_CUDA_SDK_VER < 90000) is true even for HIP!  So keeping this within
+// #if defined(EIGEN_HAS_CUDA_FP16) is needed
+#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
+  typedef half_impl::__half_raw __half_raw;
+#endif
 #endif
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half() {}
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half() {}
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half_raw& h) : half_impl::half_base(h) {}
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half(const __half_raw& h) : half_impl::half_base(h) {}
 #if defined(EIGEN_HAS_GPU_FP16)
- #if defined(EIGEN_HAS_HIP_FP16)
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}
- #elif defined(EIGEN_HAS_CUDA_FP16)
-  #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}
-  #endif
- #endif
+#if defined(EIGEN_HAS_HIP_FP16)
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}
+#elif defined(EIGEN_HAS_CUDA_FP16)
+#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}
+#endif
+#endif
 #endif
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+  explicit EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half(__fp16 b)
+      : half(__half_raw(__half_raw::construct_from_rep_tag(), b)) {}
+#elif defined(EIGEN_HAS_BUILTIN_FLOAT16)
+  explicit EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half(_Float16 b)
+      : half(__half_raw(__half_raw::construct_from_rep_tag(), b)) {}
+#endif
-  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(bool b)
+  explicit EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR half(bool b)
       : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
-  template<class T>
+  template <class T>
   explicit EIGEN_DEVICE_FUNC half(T val)
       : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
-  explicit EIGEN_DEVICE_FUNC half(float f)
-      : half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
+  explicit EIGEN_DEVICE_FUNC half(float f) : half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
   // Following the convention of numpy, converting between complex and
   // float will lead to loss of imag value.
-  template<typename RealScalar>
+  template <typename RealScalar>
   explicit EIGEN_DEVICE_FUNC half(std::complex<RealScalar> c)
       : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(c.real()))) {}
-   EIGEN_DEVICE_FUNC operator float() const {  // NOLINT: Allow implicit conversion to float, because it is lossless.
+  EIGEN_DEVICE_FUNC operator float() const {  // NOLINT: Allow implicit conversion to float, because it is lossless.
     return half_impl::half_to_float(*this);
   }
@@ -202,69 +218,131 @@ struct half : public half_impl::half_base {
 #endif
 };
-} // end namespace Eigen
-namespace std {
-template<>
-struct numeric_limits<Eigen::half> {
-  static const bool is_specialized = true;
-  static const bool is_signed = true;
-  static const bool is_integer = false;
-  static const bool is_exact = false;
-  static const bool has_infinity = true;
-  static const bool has_quiet_NaN = true;
-  static const bool has_signaling_NaN = true;
-  static const float_denorm_style has_denorm = denorm_present;
-  static const bool has_denorm_loss = false;
-  static const std::float_round_style round_style = std::round_to_nearest;
-  static const bool is_iec559 = false;
-  static const bool is_bounded = false;
-  static const bool is_modulo = false;
-  static const int digits = 11;
-  static const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
-  static const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
-  static const int radix = 2;
-  static const int min_exponent = -13;
-  static const int min_exponent10 = -4;
-  static const int max_exponent = 16;
-  static const int max_exponent10 = 4;
-  static const bool traps = true;
-  static const bool tinyness_before = false;
-  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
-  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
-  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
-  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
-  static Eigen::half round_error() { return Eigen::half(0.5); }
-  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
-  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
-  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); }
-  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
+// TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
+// solve the ODR issue.
+namespace half_impl {
+template <typename = void>
+struct numeric_limits_half_impl {
+  static constexpr const bool is_specialized = true;
+  static constexpr const bool is_signed = true;
+  static constexpr const bool is_integer = false;
+  static constexpr const bool is_exact = false;
+  static constexpr const bool has_infinity = true;
+  static constexpr const bool has_quiet_NaN = true;
+  static constexpr const bool has_signaling_NaN = true;
+  EIGEN_DIAGNOSTICS(push)
+  EIGEN_DISABLE_DEPRECATED_WARNING
+  static constexpr const std::float_denorm_style has_denorm = std::denorm_present;
+  static constexpr const bool has_denorm_loss = false;
+  EIGEN_DIAGNOSTICS(pop)
+  static constexpr const std::float_round_style round_style = std::round_to_nearest;
+  static constexpr const bool is_iec559 = true;
+  // The C++ standard defines this as "true if the set of values representable
+  // by the type is finite." Half has finite precision.
+  static constexpr const bool is_bounded = true;
+  static constexpr const bool is_modulo = false;
+  static constexpr const int digits = 11;
+  static constexpr const int digits10 =
+      3;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static constexpr const int max_digits10 =
+      5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static constexpr const int radix = std::numeric_limits<float>::radix;
+  static constexpr const int min_exponent = -13;
+  static constexpr const int min_exponent10 = -4;
+  static constexpr const int max_exponent = 16;
+  static constexpr const int max_exponent10 = 4;
+  static constexpr const bool traps = std::numeric_limits<float>::traps;
+  // IEEE754: "The implementer shall choose how tininess is detected, but shall
+  // detect tininess in the same way for all operations in radix two"
+  static constexpr const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half(min)() { return Eigen::half_impl::raw_uint16_to_half(0x0400); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half(max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x1400); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half round_error() { return Eigen::half_impl::raw_uint16_to_half(0x3800); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); }
+  static _EIGEN_MAYBE_CONSTEXPR Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x0001); }
 };
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_specialized;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_signed;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_integer;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_exact;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::has_infinity;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::has_quiet_NaN;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::has_signaling_NaN;
+EIGEN_DIAGNOSTICS(push)
+EIGEN_DISABLE_DEPRECATED_WARNING
+template <typename T>
+constexpr const std::float_denorm_style numeric_limits_half_impl<T>::has_denorm;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::has_denorm_loss;
+EIGEN_DIAGNOSTICS(pop)
+template <typename T>
+constexpr const std::float_round_style numeric_limits_half_impl<T>::round_style;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_iec559;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_bounded;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::is_modulo;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::digits;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::digits10;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::max_digits10;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::radix;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::min_exponent;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::min_exponent10;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::max_exponent;
+template <typename T>
+constexpr const int numeric_limits_half_impl<T>::max_exponent10;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::traps;
+template <typename T>
+constexpr const bool numeric_limits_half_impl<T>::tinyness_before;
+}  // end namespace half_impl
+}  // end namespace Eigen
+namespace std {
 // If std::numeric_limits<T> is specialized, should also specialize
 // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
 // std::numeric_limits<const volatile T>
 // https://stackoverflow.com/a/16519653/
-template<>
-struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
-template<>
-struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
-template<>
-struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
-} // end namespace std
+template <>
+class numeric_limits<Eigen::half> : public Eigen::half_impl::numeric_limits_half_impl<> {};
+template <>
+class numeric_limits<const Eigen::half> : public numeric_limits<Eigen::half> {};
+template <>
+class numeric_limits<volatile Eigen::half> : public numeric_limits<Eigen::half> {};
+template <>
+class numeric_limits<const volatile Eigen::half> : public numeric_limits<Eigen::half> {};
+}  // end namespace std
 namespace Eigen {
 namespace half_impl {
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && \
-     EIGEN_CUDA_ARCH >= 530) ||                                  \
+#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
     (defined(EIGEN_HAS_HIP_FP16) && defined(HIP_DEVICE_COMPILE))
-// Note: We deliberatly do *not* define this to 1 even if we have Arm's native
-// fp16 type since GPU halfs are rather different from native CPU halfs.
-// TODO: Rename to something like EIGEN_HAS_NATIVE_GPU_FP16
-#define EIGEN_HAS_NATIVE_FP16
+// Note: We deliberately do *not* define this to 1 even if we have Arm's native
+// fp16 type since GPU half types are rather different from native CPU half types.
+#define EIGEN_HAS_NATIVE_GPU_FP16
 #endif
 // Intrinsics for native fp16 support. Note that on current hardware,
@@ -272,21 +350,17 @@ namespace half_impl {
 // versions to get the ALU speed increased), but you do save the
 // conversion steps back and forth.
-#if defined(EIGEN_HAS_NATIVE_FP16)
-EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
+#if defined(EIGEN_HAS_NATIVE_GPU_FP16)
+EIGEN_STRONG_INLINE __device__ half operator+(const half& a, const half& b) {
 #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
   return __hadd(::__half(a), ::__half(b));
 #else
   return __hadd(a, b);
 #endif
 }
-EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
-  return __hmul(a, b);
-}
-EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
-  return __hsub(a, b);
-}
-EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator*(const half& a, const half& b) { return __hmul(a, b); }
+EIGEN_STRONG_INLINE __device__ half operator-(const half& a, const half& b) { return __hsub(a, b); }
+EIGEN_STRONG_INLINE __device__ half operator/(const half& a, const half& b) {
 #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
   return __hdiv(a, b);
 #else
@@ -295,173 +369,194 @@ EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
   return __float2half(num / denom);
 #endif
 }
-EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
-  return __hneg(a);
-}
-EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator-(const half& a) { return __hneg(a); }
+EIGEN_STRONG_INLINE __device__ half& operator+=(half& a, const half& b) {
   a = a + b;
   return a;
 }
-EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator*=(half& a, const half& b) {
   a = a * b;
   return a;
 }
-EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator-=(half& a, const half& b) {
   a = a - b;
   return a;
 }
-EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator/=(half& a, const half& b) {
   a = a / b;
   return a;
 }
-EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
-  return __heq(a, b);
-}
-EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
-  return __hne(a, b);
-}
-EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
-  return __hlt(a, b);
-}
-EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
-  return __hle(a, b);
-}
-EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
-  return __hgt(a, b);
-}
-EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
-  return __hge(a, b);
-}
-#endif
-#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
-  return half(vaddh_f16(a.x, b.x));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
-  return half(vmulh_f16(a.x, b.x));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
-  return half(vsubh_f16(a.x, b.x));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
-  return half(vdivh_f16(a.x, b.x));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
-  return half(vnegh_f16(a.x));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator==(const half& a, const half& b) { return __heq(a, b); }
+EIGEN_STRONG_INLINE __device__ bool operator!=(const half& a, const half& b) { return __hne(a, b); }
+EIGEN_STRONG_INLINE __device__ bool operator<(const half& a, const half& b) { return __hlt(a, b); }
+EIGEN_STRONG_INLINE __device__ bool operator<=(const half& a, const half& b) { return __hle(a, b); }
+EIGEN_STRONG_INLINE __device__ bool operator>(const half& a, const half& b) { return __hgt(a, b); }
+EIGEN_STRONG_INLINE __device__ bool operator>=(const half& a, const half& b) { return __hge(a, b); }
+#endif  // EIGEN_HAS_NATIVE_GPU_FP16
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator+(const half& a, const half& b) { return half(vaddh_f16(a.x, b.x)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator*(const half& a, const half& b) { return half(vmulh_f16(a.x, b.x)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator-(const half& a, const half& b) { return half(vsubh_f16(a.x, b.x)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator/(const half& a, const half& b) { return half(vdivh_f16(a.x, b.x)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator-(const half& a) { return half(vnegh_f16(a.x)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator+=(half& a, const half& b) {
   a = half(vaddh_f16(a.x, b.x));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator*=(half& a, const half& b) {
   a = half(vmulh_f16(a.x, b.x));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator-=(half& a, const half& b) {
   a = half(vsubh_f16(a.x, b.x));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator/=(half& a, const half& b) {
   a = half(vdivh_f16(a.x, b.x));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
-  return vceqh_f16(a.x, b.x);
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
-  return !vceqh_f16(a.x, b.x);
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
-  return vclth_f16(a.x, b.x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const half& a, const half& b) { return vceqh_f16(a.x, b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const half& a, const half& b) { return !vceqh_f16(a.x, b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const half& a, const half& b) { return vclth_f16(a.x, b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const half& a, const half& b) { return vcleh_f16(a.x, b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const half& a, const half& b) { return vcgth_f16(a.x, b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const half& a, const half& b) { return vcgeh_f16(a.x, b.x); }
+#elif defined(EIGEN_HAS_BUILTIN_FLOAT16) && !defined(EIGEN_GPU_COMPILE_PHASE)
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator+(const half& a, const half& b) { return half(a.x + b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator*(const half& a, const half& b) { return half(a.x * b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator-(const half& a, const half& b) { return half(a.x - b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator/(const half& a, const half& b) { return half(a.x / b.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator-(const half& a) { return half(-a.x); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator+=(half& a, const half& b) {
+  a = a + b;
+  return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
-  return vcleh_f16(a.x, b.x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator*=(half& a, const half& b) {
+  a = a * b;
+  return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
-  return vcgth_f16(a.x, b.x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator-=(half& a, const half& b) {
+  a = a - b;
+  return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
-  return vcgeh_f16(a.x, b.x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator/=(half& a, const half& b) {
+  a = a / b;
+  return a;
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const half& a, const half& b) { return a.x == b.x; }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const half& a, const half& b) { return a.x != b.x; }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const half& a, const half& b) { return a.x < b.x; }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const half& a, const half& b) { return a.x <= b.x; }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const half& a, const half& b) { return a.x > b.x; }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const half& a, const half& b) { return a.x >= b.x; }
 // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
 // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
 // of the functions, while the latter can only deal with one of them.
-#elif !defined(EIGEN_HAS_NATIVE_FP16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for half floats
+#elif !defined(EIGEN_HAS_NATIVE_GPU_FP16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)  // Emulate support for half floats
-#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
+#if EIGEN_COMP_CLANG && defined(EIGEN_GPUCC)
 // We need to provide emulated *host-side* FP16 operators for clang.
 #pragma push_macro("EIGEN_DEVICE_FUNC")
 #undef EIGEN_DEVICE_FUNC
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_HAS_NATIVE_FP16)
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_HAS_NATIVE_GPU_FP16)
 #define EIGEN_DEVICE_FUNC __host__
-#else // both host and device need emulated ops.
+#else  // both host and device need emulated ops.
 #define EIGEN_DEVICE_FUNC __host__ __device__
 #endif
 #endif
 // Definitions for CPUs and older HIP+CUDA, mostly working through conversion
 // to/from fp32.
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
-  return half(float(a) + float(b));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
-  return half(float(a) * float(b));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
-  return half(float(a) - float(b));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
-  return half(float(a) / float(b));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator+(const half& a, const half& b) { return half(float(a) + float(b)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator*(const half& a, const half& b) { return half(float(a) * float(b)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator-(const half& a, const half& b) { return half(float(a) - float(b)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator/(const half& a, const half& b) { return half(float(a) / float(b)); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator-(const half& a) {
   half result;
   result.x = a.x ^ 0x8000;
   return result;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator+=(half& a, const half& b) {
   a = half(float(a) + float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator*=(half& a, const half& b) {
   a = half(float(a) * float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator-=(half& a, const half& b) {
   a = half(float(a) - float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator/=(half& a, const half& b) {
   a = half(float(a) / float(b));
   return a;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
-  return numext::equal_strict(float(a),float(b));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
-  return numext::not_equal_strict(float(a), float(b));
+// Non-negative floating point numbers have a monotonic mapping to non-negative integers.
+// This property allows floating point numbers to be reinterpreted as integers for comparisons, which is useful if there
+// is no native floating point comparison operator. Floating point signedness is handled by the sign-magnitude
+// representation, whereas integers typically use two's complement. Converting the bit pattern from sign-magnitude to
+// two's complement allows the transformed bit patterns be compared as signed integers. All edge cases (+/-0 and +/-
+// infinity) are handled automatically, except NaN.
+//
+// fp16 uses 1 sign bit, 5 exponent bits, and 10 mantissa bits. The bit pattern conveys NaN when all the exponent
+// bits (5) are set, and at least one mantissa bit is set. The sign bit is irrelevant for determining NaN. To check for
+// NaN, clear the sign bit and check if the integral representation is greater than 01111100000000. To test
+// for non-NaN, clear the sign bit and check if the integeral representation is less than or equal to 01111100000000.
+// convert sign-magnitude representation to two's complement
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int16_t mapToSigned(uint16_t a) {
+  constexpr uint16_t kAbsMask = (1 << 15) - 1;
+  // If the sign bit is set, clear the sign bit and return the (integer) negation. Otherwise, return the input.
+  return (a >> 15) ? -(a & kAbsMask) : a;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool isOrdered(const half& a, const half& b) {
+  constexpr uint16_t kInf = ((1 << 5) - 1) << 10;
+  constexpr uint16_t kAbsMask = (1 << 15) - 1;
+  return numext::maxi(a.x & kAbsMask, b.x & kAbsMask) <= kInf;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const half& a, const half& b) {
+  bool result = mapToSigned(a.x) == mapToSigned(b.x);
+  result &= isOrdered(a, b);
+  return result;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
-  return float(a) < float(b);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const half& a, const half& b) { return !(a == b); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const half& a, const half& b) {
+  bool result = mapToSigned(a.x) < mapToSigned(b.x);
+  result &= isOrdered(a, b);
+  return result;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
-  return float(a) <= float(b);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const half& a, const half& b) {
+  bool result = mapToSigned(a.x) <= mapToSigned(b.x);
+  result &= isOrdered(a, b);
+  return result;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
-  return float(a) > float(b);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const half& a, const half& b) {
+  bool result = mapToSigned(a.x) > mapToSigned(b.x);
+  result &= isOrdered(a, b);
+  return result;
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
-  return float(a) >= float(b);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const half& a, const half& b) {
+  bool result = mapToSigned(a.x) >= mapToSigned(b.x);
+  result &= isOrdered(a, b);
+  return result;
 }
-#if defined(__clang__) && defined(__CUDA__)
+#if EIGEN_COMP_CLANG && defined(EIGEN_GPUCC)
 #pragma pop_macro("EIGEN_DEVICE_FUNC")
 #endif
 #endif  // Emulate support for half floats
 // Division by an index. Do it in full float precision to avoid accuracy
 // issues in converting the denominator to half.
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator/(const half& a, Index b) {
   return half(static_cast<float>(a) / static_cast<float>(b));
 }
@@ -492,7 +587,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator--(half& a, int) {
 // these in hardware. If we need more performance on older/other CPUs, they are
 // also possible to vectorize directly.
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x) {
   // We cannot simply do a "return __half_raw(x)" here, because __half_raw is union type
   // in the hip_fp16 header file, and that will trigger a compile error
   // On the other hand, having anything but a return statement also triggers a compile error
@@ -500,8 +595,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_h
   // Fortunately, since we need to disable EIGEN_CONSTEXPR for GPU anyway, we can get out
   // of this catch22 by having separate bodies for GPU / non GPU
 #if defined(EIGEN_HAS_GPU_FP16)
-   __half_raw h;
-   h.x = x;
+  __half_raw h;
+  h.x = x;
   return h;
 #else
   return __half_raw(x);
@@ -514,6 +609,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC numext::uint16_t raw_half_as_uint16(const
   // For SYCL, cl::sycl::half is _Float16, so cast directly.
 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
   return numext::bit_cast<numext::uint16_t>(h.x);
+#elif defined(EIGEN_HAS_BUILTIN_FLOAT16)
+  return numext::bit_cast<numext::uint16_t>(h.x);
 #elif defined(SYCL_DEVICE_ONLY)
   return numext::bit_cast<numext::uint16_t>(h);
 #else
@@ -521,67 +618,72 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC numext::uint16_t raw_half_as_uint16(const
 #endif
 }
-union float32_bits {
-  unsigned int u;
-  float f;
-};
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
   __half tmp_ff = __float2half(ff);
   return *(__half_raw*)&tmp_ff;
-#elif defined(EIGEN_HAS_FP16_C)
+#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
   __half_raw h;
-  h.x = _cvtss_sh(ff, 0);
+  h.x = static_cast<__fp16>(ff);
   return h;
-#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+#elif defined(EIGEN_HAS_BUILTIN_FLOAT16)
   __half_raw h;
-  h.x = static_cast<__fp16>(ff);
+  h.x = static_cast<_Float16>(ff);
   return h;
+#elif defined(EIGEN_HAS_FP16_C)
+  __half_raw h;
+#if EIGEN_COMP_MSVC
+  // MSVC does not have scalar instructions.
+  h.x = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(ff), 0), 0);
 #else
-  float32_bits f; f.f = ff;
+  h.x = _cvtss_sh(ff, 0);
+#endif
+  return h;
-  const float32_bits f32infty = { 255 << 23 };
-  const float32_bits f16max = { (127 + 16) << 23 };
-  const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
-  unsigned int sign_mask = 0x80000000u;
+#else
+  uint32_t f_bits = Eigen::numext::bit_cast<uint32_t>(ff);
+  const uint32_t f32infty_bits = {255 << 23};
+  const uint32_t f16max_bits = {(127 + 16) << 23};
+  const uint32_t denorm_magic_bits = {((127 - 15) + (23 - 10) + 1) << 23};
+  const uint32_t sign_mask = 0x80000000u;
   __half_raw o;
-  o.x = static_cast<numext::uint16_t>(0x0u);
+  o.x = static_cast<uint16_t>(0x0u);
-  unsigned int sign = f.u & sign_mask;
-  f.u ^= sign;
+  const uint32_t sign = f_bits & sign_mask;
+  f_bits ^= sign;
   // NOTE all the integer compares in this function can be safely
   // compiled into signed compares since all operands are below
   // 0x80000000. Important if you want fast straight SSE2 code
   // (since there's no unsigned PCMPGTD).
-  if (f.u >= f16max.u) {  // result is Inf or NaN (all exponent bits set)
-    o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
-  } else {  // (De)normalized number or zero
-    if (f.u < (113 << 23)) {  // resulting FP16 is subnormal or zero
+  if (f_bits >= f16max_bits) {                         // result is Inf or NaN (all exponent bits set)
+    o.x = (f_bits > f32infty_bits) ? 0x7e00 : 0x7c00;  // NaN->qNaN and Inf->Inf
+  } else {                                             // (De)normalized number or zero
+    if (f_bits < (113 << 23)) {                        // resulting FP16 is subnormal or zero
       // use a magic value to align our 10 mantissa bits at the bottom of
       // the float. as long as FP addition is round-to-nearest-even this
       // just works.
-      f.f += denorm_magic.f;
+      f_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(f_bits) +
+                                                 Eigen::numext::bit_cast<float>(denorm_magic_bits));
       // and one integer subtract of the bias later, we have our final float!
-      o.x = static_cast<numext::uint16_t>(f.u - denorm_magic.u);
+      o.x = static_cast<numext::uint16_t>(f_bits - denorm_magic_bits);
     } else {
-      unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
+      const uint32_t mant_odd = (f_bits >> 13) & 1;  // resulting mantissa is odd
       // update exponent, rounding bias part 1
       // Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
       // without arithmetic overflow.
-      f.u += 0xc8000fffU;
+      f_bits += 0xc8000fffU;
       // rounding bias part 2
-      f.u += mant_odd;
+      f_bits += mant_odd;
       // take the bits!
-      o.x = static_cast<numext::uint16_t>(f.u >> 13);
+      o.x = static_cast<numext::uint16_t>(f_bits >> 13);
     }
   }
@@ -592,60 +694,73 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
   return __half2float(h);
+#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) || defined(EIGEN_HAS_BUILTIN_FLOAT16)
+  return static_cast<float>(h.x);
 #elif defined(EIGEN_HAS_FP16_C)
+#if EIGEN_COMP_MSVC
+  // MSVC does not have scalar instructions.
+  return _mm_cvtss_f32(_mm_cvtph_ps(_mm_set1_epi16(h.x)));
+#else
   return _cvtsh_ss(h.x);
-#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-  return static_cast<float>(h.x);
+#endif
 #else
-  const float32_bits magic = { 113 << 23 };
-  const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
-  float32_bits o;
-  o.u = (h.x & 0x7fff) << 13;             // exponent/mantissa bits
-  unsigned int exp = shifted_exp & o.u;   // just the exponent
-  o.u += (127 - 15) << 23;                // exponent adjust
+  const float magic = Eigen::numext::bit_cast<float>(static_cast<uint32_t>(113 << 23));
+  const uint32_t shifted_exp = 0x7c00 << 13;  // exponent mask after shift
+  uint32_t o_bits = (h.x & 0x7fff) << 13;     // exponent/mantissa bits
+  const uint32_t exp = shifted_exp & o_bits;  // just the exponent
+  o_bits += (127 - 15) << 23;                 // exponent adjust
   // handle exponent special cases
-  if (exp == shifted_exp) {     // Inf/NaN?
-    o.u += (128 - 16) << 23;    // extra exp adjust
-  } else if (exp == 0) {        // Zero/Denormal?
-    o.u += 1 << 23;             // extra exp adjust
-    o.f -= magic.f;             // renormalize
+  if (exp == shifted_exp) {      // Inf/NaN?
+    o_bits += (128 - 16) << 23;  // extra exp adjust
+  } else if (exp == 0) {         // Zero/Denormal?
+    o_bits += 1 << 23;           // extra exp adjust
+    // renormalize
+    o_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(o_bits) - magic);
   }
-  o.u |= (h.x & 0x8000) << 16;    // sign bit
-  return o.f;
+  o_bits |= (h.x & 0x8000) << 16;  // sign bit
+  return Eigen::numext::bit_cast<float>(o_bits);
 #endif
 }
 // --- standard functions ---
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
-#ifdef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isinf)(const half& a) {
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) || defined(EIGEN_HAS_BUILTIN_FLOAT16)
   return (numext::bit_cast<numext::uint16_t>(a.x) & 0x7fff) == 0x7c00;
 #else
   return (a.x & 0x7fff) == 0x7c00;
 #endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isnan)(const half& a) {
 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
   return __hisnan(a);
-#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) || defined(EIGEN_HAS_BUILTIN_FLOAT16)
   return (numext::bit_cast<numext::uint16_t>(a.x) & 0x7fff) > 0x7c00;
 #else
   return (a.x & 0x7fff) > 0x7c00;
 #endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
-  return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isfinite)(const half& a) {
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) || defined(EIGEN_HAS_BUILTIN_FLOAT16)
+  return (numext::bit_cast<numext::uint16_t>(a.x) & 0x7fff) < 0x7c00;
+#else
+  return (a.x & 0x7fff) < 0x7c00;
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
   return half(vabsh_f16(a.x));
+#elif defined(EIGEN_HAS_BUILTIN_FLOAT16)
+  half result;
+  result.x =
+      numext::bit_cast<_Float16>(static_cast<numext::uint16_t>(numext::bit_cast<numext::uint16_t>(a.x) & 0x7FFF));
+  return result;
 #else
   half result;
   result.x = a.x & 0x7FFF;
@@ -654,65 +769,61 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
 #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
-  defined(EIGEN_HIP_DEVICE_COMPILE)
+    defined(EIGEN_HIP_DEVICE_COMPILE)
   return half(hexp(a));
 #else
-   return half(::expf(float(a)));
+  return half(::expf(float(a)));
 #endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {
-  return half(numext::expm1(float(a)));
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp2(const half& a) {
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
+    defined(EIGEN_HIP_DEVICE_COMPILE)
+  return half(hexp2(a));
+#else
+  return half(::exp2f(float(a)));
+#endif
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { return half(numext::expm1(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
-  return half(::hlog(a));
+#if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && \
+     EIGEN_CUDA_ARCH >= 530) ||                                                                 \
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+  return half(hlog(a));
 #else
   return half(::logf(float(a)));
 #endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
-  return half(numext::log1p(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
-  return half(::log10f(float(a)));
-}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) { return half(numext::log1p(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log2(const half& a) {
   return half(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
 #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
-  defined(EIGEN_HIP_DEVICE_COMPILE)
+    defined(EIGEN_HIP_DEVICE_COMPILE)
   return half(hsqrt(a));
 #else
-    return half(::sqrtf(float(a)));
+  return half(::sqrtf(float(a)));
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
   return half(::powf(float(a), float(b)));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
-  return half(::sinf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
-  return half(::cosf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
-  return half(::tanf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
-  return half(::tanhf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) {
-  return half(::asinf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {
-  return half(::acosf(float(a)));
-}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan2(const half& a, const half& b) {
+  return half(::atan2f(float(a), float(b)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) { return half(::sinf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) { return half(::cosf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) { return half(::tanf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) { return half(::asinf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) { return half(::acosf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan(const half& a) { return half(::atanf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atanh(const half& a) { return half(::atanhf(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
 #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
-  defined(EIGEN_HIP_DEVICE_COMPILE)
+    defined(EIGEN_HIP_DEVICE_COMPILE)
   return half(hfloor(a));
 #else
   return half(::floorf(float(a)));
@@ -720,109 +831,97 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
 #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
-  defined(EIGEN_HIP_DEVICE_COMPILE)
+    defined(EIGEN_HIP_DEVICE_COMPILE)
   return half(hceil(a));
 #else
   return half(::ceilf(float(a)));
 #endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) {
-  return half(::rintf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half round(const half& a) {
-  return half(::roundf(float(a)));
-}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) { return half(::rintf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half round(const half& a) { return half(::roundf(float(a))); }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half trunc(const half& a) { return half(::truncf(float(a))); }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half fmod(const half& a, const half& b) {
   return half(::fmodf(float(a), float(b)));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
-  return __hlt(b, a) ? b : a;
-#else
-  const float f1 = static_cast<float>(a);
-  const float f2 = static_cast<float>(b);
-  return f2 < f1 ? b : a;
-#endif
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
-  return __hlt(a, b) ? b : a;
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half(min)(const half& a, const half& b) { return b < a ? b : a; }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half(max)(const half& a, const half& b) { return a < b ? b : a; }
+EIGEN_DEVICE_FUNC inline half fma(const half& a, const half& b, const half& c) {
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+  return half(vfmah_f16(c.x, a.x, b.x));
+#elif defined(EIGEN_VECTORIZE_AVX512FP16)
+  // Reduces to vfmadd213sh.
+  return half(_mm_cvtsh_h(_mm_fmadd_ph(_mm_set_sh(a.x), _mm_set_sh(b.x), _mm_set_sh(c.x))));
 #else
-  const float f1 = static_cast<float>(a);
-  const float f2 = static_cast<float>(b);
-  return f1 < f2 ? b : a;
+  // Emulate FMA via float.
+  return half(numext::fma(static_cast<float>(a), static_cast<float>(b), static_cast<float>(c)));
 #endif
 }
 #ifndef EIGEN_NO_IO
-EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
+EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const half& v) {
   os << static_cast<float>(v);
   return os;
 }
 #endif
-} // end namespace half_impl
+}  // end namespace half_impl
 // import Eigen::half_impl::half into Eigen namespace
 // using half_impl::half;
 namespace internal {
-template<>
-struct random_default_impl<half, false, false>
-{
-  static inline half run(const half& x, const half& y)
-  {
-    return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
+template <>
+struct is_arithmetic<half> {
+  enum { value = true };
+};
+template <>
+struct random_impl<half> {
+  enum : int { MantissaBits = 10 };
+  using Impl = random_impl<float>;
+  static EIGEN_DEVICE_FUNC inline half run(const half& x, const half& y) {
+    float result = Impl::run(x, y, MantissaBits);
+    return half(result);
   }
-  static inline half run()
-  {
-    return run(half(-1.f), half(1.f));
+  static EIGEN_DEVICE_FUNC inline half run() {
+    float result = Impl::run(MantissaBits);
+    return half(result);
   }
 };
-template<> struct is_arithmetic<half> { enum { value = true }; };
+}  // end namespace internal
-} // end namespace internal
-template<> struct NumTraits<Eigen::half>
-    : GenericNumTraits<Eigen::half>
-{
-  enum {
-    IsSigned = true,
-    IsInteger = false,
-    IsComplex = false,
-    RequireInitialization = false
-  };
+template <>
+struct NumTraits<Eigen::half> : GenericNumTraits<Eigen::half> {
+  enum { IsSigned = true, IsInteger = false, IsComplex = false, RequireInitialization = false };
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half epsilon() {
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half epsilon() {
     return half_impl::raw_uint16_to_half(0x0800);
   }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half dummy_precision() {
-    return half_impl::raw_uint16_to_half(0x211f); //  Eigen::half(1e-2f);
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half dummy_precision() {
+    return half_impl::raw_uint16_to_half(0x211f);  //  Eigen::half(1e-2f);
   }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half highest() {
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half highest() {
     return half_impl::raw_uint16_to_half(0x7bff);
   }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half lowest() {
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half lowest() {
     return half_impl::raw_uint16_to_half(0xfbff);
   }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half infinity() {
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half infinity() {
     return half_impl::raw_uint16_to_half(0x7c00);
   }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
+  EIGEN_DEVICE_FUNC _EIGEN_MAYBE_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
     return half_impl::raw_uint16_to_half(0x7e00);
   }
 };
-} // end namespace Eigen
+}  // end namespace Eigen
-#if defined(EIGEN_HAS_GPU_FP16) || defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-  #pragma pop_macro("EIGEN_CONSTEXPR")
-#endif
+#undef _EIGEN_MAYBE_CONSTEXPR
 namespace Eigen {
 namespace numext {
@@ -856,6 +955,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::half>(c
   return Eigen::half_impl::raw_half_as_uint16(src);
 }
+// Specialize multiply-add to match packet operations and reduce conversions to/from float.
+template<>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half madd<Eigen::half>(const Eigen::half& x, const Eigen::half& y, const Eigen::half& z) {
+  return Eigen::half(static_cast<float>(x) * static_cast<float>(y) + static_cast<float>(z));
+}
 }  // namespace numext
 }  // namespace Eigen
@@ -870,63 +975,65 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::half>(c
 //    with native support for __half and __nv_bfloat16
 //
 // Note that the following are __device__ - only functions.
-#if (defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 300)) \
-    || defined(EIGEN_HIPCC)
+#if (defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 300)) || defined(EIGEN_HIPCC)
 #if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,
+                                                       int width = warpSize) {
   const __half h = var;
   return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
 }
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up_sync(unsigned mask, Eigen::half var, unsigned int delta,
+                                                          int width = warpSize) {
   const __half h = var;
   return static_cast<Eigen::half>(__shfl_up_sync(mask, h, delta, width));
 }
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down_sync(unsigned mask, Eigen::half var, unsigned int delta,
+                                                            int width = warpSize) {
   const __half h = var;
   return static_cast<Eigen::half>(__shfl_down_sync(mask, h, delta, width));
 }
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor_sync(unsigned mask, Eigen::half var, int laneMask, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor_sync(unsigned mask, Eigen::half var, int laneMask,
+                                                           int width = warpSize) {
   const __half h = var;
   return static_cast<Eigen::half>(__shfl_xor_sync(mask, h, laneMask, width));
 }
-#else // HIP or CUDA SDK < 9.0
+#else  // HIP or CUDA SDK < 9.0
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width = warpSize) {
   const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
   return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
 }
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up(Eigen::half var, unsigned int delta, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up(Eigen::half var, unsigned int delta, int width = warpSize) {
   const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
   return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));
 }
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down(Eigen::half var, unsigned int delta, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down(Eigen::half var, unsigned int delta, int width = warpSize) {
   const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
   return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));
 }
-__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width = warpSize) {
   const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
   return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));
 }
-#endif // HIP vs CUDA
-#endif // __shfl*
+#endif  // HIP vs CUDA
+#endif  // __shfl*
 // ldg() has an overload for __half_raw, but we also need one for Eigen::half.
-#if (defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 350)) \
-    || defined(EIGEN_HIPCC)
+#if (defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 350)) || defined(EIGEN_HIPCC)
 EIGEN_STRONG_INLINE __device__ Eigen::half __ldg(const Eigen::half* ptr) {
   return Eigen::half_impl::raw_uint16_to_half(__ldg(reinterpret_cast<const Eigen::numext::uint16_t*>(ptr)));
 }
-#endif // __ldg
+#endif  // __ldg
 #if EIGEN_HAS_STD_HASH
 namespace std {
@@ -936,7 +1043,49 @@ struct hash<Eigen::half> {
     return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
   }
 };
-} // end namespace std
+}  // end namespace std
+#endif
+namespace Eigen {
+namespace internal {
+template <>
+struct cast_impl<float, half> {
+  EIGEN_DEVICE_FUNC static inline half run(const float& a) {
+#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+    return __float2half(a);
+#else
+    return half(a);
+#endif
+  }
+};
+template <>
+struct cast_impl<int, half> {
+  EIGEN_DEVICE_FUNC static inline half run(const int& a) {
+#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+    return __float2half(static_cast<float>(a));
+#else
+    return half(static_cast<float>(a));
+#endif
+  }
+};
+template <>
+struct cast_impl<half, float> {
+  EIGEN_DEVICE_FUNC static inline float run(const half& a) {
+#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+    return __half2float(a);
+#else
+    return static_cast<float>(a);
 #endif
+  }
+};
+}  // namespace internal
+}  // namespace Eigen
-#endif // EIGEN_HALF_H
+#endif  // EIGEN_HALF_H