tomoto 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +8 -10
- data/ext/tomoto/extconf.rb +6 -2
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +21 -0
- data/vendor/tomotopy/README.rst +20 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +60 -14
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,700 @@
|
|
1
|
+
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef EIGEN_BFLOAT16_H
|
17
|
+
#define EIGEN_BFLOAT16_H
|
18
|
+
|
19
|
+
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
|
20
|
+
template <> \
|
21
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED \
|
22
|
+
PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) { \
|
23
|
+
return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
|
24
|
+
}
|
25
|
+
|
26
|
+
namespace Eigen {
|
27
|
+
|
28
|
+
struct bfloat16;
|
29
|
+
|
30
|
+
namespace bfloat16_impl {
|
31
|
+
|
32
|
+
// Make our own __bfloat16_raw definition.
|
33
|
+
struct __bfloat16_raw {
|
34
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
|
35
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
|
36
|
+
unsigned short value;
|
37
|
+
};
|
38
|
+
|
39
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
|
40
|
+
template <bool AssumeArgumentIsNormalOrInfinityOrZero>
|
41
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
|
42
|
+
// Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
|
43
|
+
// > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated
|
44
|
+
template <>
|
45
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff);
|
46
|
+
template <>
|
47
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff);
|
48
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
|
49
|
+
|
50
|
+
struct bfloat16_base : public __bfloat16_raw {
|
51
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}
|
52
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
|
53
|
+
};
|
54
|
+
|
55
|
+
} // namespace bfloat16_impl
|
56
|
+
|
57
|
+
// Class definition.
|
58
|
+
struct bfloat16 : public bfloat16_impl::bfloat16_base {
|
59
|
+
|
60
|
+
typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
|
61
|
+
|
62
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
|
63
|
+
|
64
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
|
65
|
+
|
66
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
|
67
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
|
68
|
+
|
69
|
+
template<class T>
|
70
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
|
71
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
|
72
|
+
|
73
|
+
explicit EIGEN_DEVICE_FUNC bfloat16(float f)
|
74
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
|
75
|
+
|
76
|
+
// Following the convention of numpy, converting between complex and
|
77
|
+
// float will lead to loss of imag value.
|
78
|
+
template<typename RealScalar>
|
79
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
|
80
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
|
81
|
+
|
82
|
+
EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
|
83
|
+
return bfloat16_impl::bfloat16_to_float(*this);
|
84
|
+
}
|
85
|
+
};
|
86
|
+
} // namespace Eigen
|
87
|
+
|
88
|
+
namespace std {
|
89
|
+
template<>
|
90
|
+
struct numeric_limits<Eigen::bfloat16> {
|
91
|
+
static const bool is_specialized = true;
|
92
|
+
static const bool is_signed = true;
|
93
|
+
static const bool is_integer = false;
|
94
|
+
static const bool is_exact = false;
|
95
|
+
static const bool has_infinity = true;
|
96
|
+
static const bool has_quiet_NaN = true;
|
97
|
+
static const bool has_signaling_NaN = true;
|
98
|
+
static const float_denorm_style has_denorm = std::denorm_absent;
|
99
|
+
static const bool has_denorm_loss = false;
|
100
|
+
static const std::float_round_style round_style = numeric_limits<float>::round_style;
|
101
|
+
static const bool is_iec559 = false;
|
102
|
+
static const bool is_bounded = true;
|
103
|
+
static const bool is_modulo = false;
|
104
|
+
static const int digits = 8;
|
105
|
+
static const int digits10 = 2;
|
106
|
+
static const int max_digits10 = 4;
|
107
|
+
static const int radix = 2;
|
108
|
+
static const int min_exponent = numeric_limits<float>::min_exponent;
|
109
|
+
static const int min_exponent10 = numeric_limits<float>::min_exponent10;
|
110
|
+
static const int max_exponent = numeric_limits<float>::max_exponent;
|
111
|
+
static const int max_exponent10 = numeric_limits<float>::max_exponent10;
|
112
|
+
static const bool traps = numeric_limits<float>::traps;
|
113
|
+
static const bool tinyness_before = numeric_limits<float>::tinyness_before;
|
114
|
+
|
115
|
+
static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
|
116
|
+
static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
|
117
|
+
static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
|
118
|
+
static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
|
119
|
+
static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
|
120
|
+
static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
|
121
|
+
static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
|
122
|
+
static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
|
123
|
+
static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
|
124
|
+
};
|
125
|
+
|
126
|
+
// If std::numeric_limits<T> is specialized, should also specialize
|
127
|
+
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
128
|
+
// std::numeric_limits<const volatile T>
|
129
|
+
// https://stackoverflow.com/a/16519653/
|
130
|
+
template<>
|
131
|
+
struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
132
|
+
template<>
|
133
|
+
struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
134
|
+
template<>
|
135
|
+
struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
136
|
+
} // namespace std
|
137
|
+
|
138
|
+
namespace Eigen {
|
139
|
+
|
140
|
+
namespace bfloat16_impl {
|
141
|
+
|
142
|
+
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
|
143
|
+
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
144
|
+
// of the functions, while the latter can only deal with one of them.
|
145
|
+
#if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
|
146
|
+
|
147
|
+
#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
|
148
|
+
// We need to provide emulated *host-side* BF16 operators for clang.
|
149
|
+
#pragma push_macro("EIGEN_DEVICE_FUNC")
|
150
|
+
#undef EIGEN_DEVICE_FUNC
|
151
|
+
#if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
|
152
|
+
#define EIGEN_DEVICE_FUNC __host__
|
153
|
+
#else // both host and device need emulated ops.
|
154
|
+
#define EIGEN_DEVICE_FUNC __host__ __device__
|
155
|
+
#endif
|
156
|
+
#endif
|
157
|
+
|
158
|
+
// Definitions for CPUs, mostly working through conversion
|
159
|
+
// to/from fp32.
|
160
|
+
|
161
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
|
162
|
+
return bfloat16(float(a) + float(b));
|
163
|
+
}
|
164
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
|
165
|
+
return bfloat16(float(a) + static_cast<float>(b));
|
166
|
+
}
|
167
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
|
168
|
+
return bfloat16(static_cast<float>(a) + float(b));
|
169
|
+
}
|
170
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
|
171
|
+
return bfloat16(float(a) * float(b));
|
172
|
+
}
|
173
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
|
174
|
+
return bfloat16(float(a) - float(b));
|
175
|
+
}
|
176
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
|
177
|
+
return bfloat16(float(a) / float(b));
|
178
|
+
}
|
179
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
|
180
|
+
bfloat16 result;
|
181
|
+
result.value = a.value ^ 0x8000;
|
182
|
+
return result;
|
183
|
+
}
|
184
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
|
185
|
+
a = bfloat16(float(a) + float(b));
|
186
|
+
return a;
|
187
|
+
}
|
188
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
|
189
|
+
a = bfloat16(float(a) * float(b));
|
190
|
+
return a;
|
191
|
+
}
|
192
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
|
193
|
+
a = bfloat16(float(a) - float(b));
|
194
|
+
return a;
|
195
|
+
}
|
196
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
|
197
|
+
a = bfloat16(float(a) / float(b));
|
198
|
+
return a;
|
199
|
+
}
|
200
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a) {
|
201
|
+
a += bfloat16(1);
|
202
|
+
return a;
|
203
|
+
}
|
204
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a) {
|
205
|
+
a -= bfloat16(1);
|
206
|
+
return a;
|
207
|
+
}
|
208
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {
|
209
|
+
bfloat16 original_value = a;
|
210
|
+
++a;
|
211
|
+
return original_value;
|
212
|
+
}
|
213
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
|
214
|
+
bfloat16 original_value = a;
|
215
|
+
--a;
|
216
|
+
return original_value;
|
217
|
+
}
|
218
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
|
219
|
+
return numext::equal_strict(float(a),float(b));
|
220
|
+
}
|
221
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
|
222
|
+
return numext::not_equal_strict(float(a), float(b));
|
223
|
+
}
|
224
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
|
225
|
+
return float(a) < float(b);
|
226
|
+
}
|
227
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
|
228
|
+
return float(a) <= float(b);
|
229
|
+
}
|
230
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
|
231
|
+
return float(a) > float(b);
|
232
|
+
}
|
233
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
|
234
|
+
return float(a) >= float(b);
|
235
|
+
}
|
236
|
+
|
237
|
+
#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
|
238
|
+
#pragma pop_macro("EIGEN_DEVICE_FUNC")
|
239
|
+
#endif
|
240
|
+
#endif // Emulate support for bfloat16 floats
|
241
|
+
|
242
|
+
// Division by an index. Do it in full float precision to avoid accuracy
|
243
|
+
// issues in converting the denominator to bfloat16.
|
244
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
|
245
|
+
return bfloat16(static_cast<float>(a) / static_cast<float>(b));
|
246
|
+
}
|
247
|
+
|
248
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
|
249
|
+
__bfloat16_raw output;
|
250
|
+
if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
|
251
|
+
output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
|
252
|
+
return output;
|
253
|
+
}
|
254
|
+
const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
|
255
|
+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
256
|
+
output.value = p[0];
|
257
|
+
#else
|
258
|
+
output.value = p[1];
|
259
|
+
#endif
|
260
|
+
return output;
|
261
|
+
}
|
262
|
+
|
263
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
|
264
|
+
return __bfloat16_raw(value);
|
265
|
+
}
|
266
|
+
|
267
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
|
268
|
+
return bf.value;
|
269
|
+
}
|
270
|
+
|
271
|
+
// float_to_bfloat16_rtne template specialization that does not make any
|
272
|
+
// assumption about the value of its function argument (ff).
|
273
|
+
template <>
|
274
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
|
275
|
+
#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
|
276
|
+
// Nothing to do here
|
277
|
+
#else
|
278
|
+
__bfloat16_raw output;
|
279
|
+
|
280
|
+
if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
|
281
|
+
// If the value is a NaN, squash it to a qNaN with msb of fraction set,
|
282
|
+
// this makes sure after truncation we don't end up with an inf.
|
283
|
+
//
|
284
|
+
// qNaN magic: All exponent bits set + most significant bit of fraction
|
285
|
+
// set.
|
286
|
+
output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
|
287
|
+
} else {
|
288
|
+
// Fast rounding algorithm that rounds a half value to nearest even. This
|
289
|
+
// reduces expected error when we convert a large number of floats. Here
|
290
|
+
// is how it works:
|
291
|
+
//
|
292
|
+
// Definitions:
|
293
|
+
// To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
|
294
|
+
// with the following tags:
|
295
|
+
//
|
296
|
+
// Sign | Exp (8 bits) | Frac (23 bits)
|
297
|
+
// S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
|
298
|
+
//
|
299
|
+
// S: Sign bit.
|
300
|
+
// E: Exponent bits.
|
301
|
+
// F: First 6 bits of fraction.
|
302
|
+
// L: Least significant bit of resulting bfloat16 if we truncate away the
|
303
|
+
// rest of the float32. This is also the 7th bit of fraction
|
304
|
+
// R: Rounding bit, 8th bit of fraction.
|
305
|
+
// T: Sticky bits, rest of fraction, 15 bits.
|
306
|
+
//
|
307
|
+
// To round half to nearest even, there are 3 cases where we want to round
|
308
|
+
// down (simply truncate the result of the bits away, which consists of
|
309
|
+
// rounding bit and sticky bits) and two cases where we want to round up
|
310
|
+
// (truncate then add one to the result).
|
311
|
+
//
|
312
|
+
// The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
|
313
|
+
// 1s) as the rounding bias, adds the rounding bias to the input, then
|
314
|
+
// truncates the last 16 bits away.
|
315
|
+
//
|
316
|
+
// To understand how it works, we can analyze this algorithm case by case:
|
317
|
+
//
|
318
|
+
// 1. L = 0, R = 0:
|
319
|
+
// Expect: round down, this is less than half value.
|
320
|
+
//
|
321
|
+
// Algorithm:
|
322
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
323
|
+
// - Adding rounding bias to input may create any carry, depending on
|
324
|
+
// whether there is any value set to 1 in T bits.
|
325
|
+
// - R may be set to 1 if there is a carry.
|
326
|
+
// - L remains 0.
|
327
|
+
// - Note that this case also handles Inf and -Inf, where all fraction
|
328
|
+
// bits, including L, R and Ts are all 0. The output remains Inf after
|
329
|
+
// this algorithm.
|
330
|
+
//
|
331
|
+
// 2. L = 1, R = 0:
|
332
|
+
// Expect: round down, this is less than half value.
|
333
|
+
//
|
334
|
+
// Algorithm:
|
335
|
+
// - Rounding bias: 0x7fff + 1 = 0x8000
|
336
|
+
// - Adding rounding bias to input doesn't change sticky bits but
|
337
|
+
// adds 1 to rounding bit.
|
338
|
+
// - L remains 1.
|
339
|
+
//
|
340
|
+
// 3. L = 0, R = 1, all of T are 0:
|
341
|
+
// Expect: round down, this is exactly at half, the result is already
|
342
|
+
// even (L=0).
|
343
|
+
//
|
344
|
+
// Algorithm:
|
345
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
346
|
+
// - Adding rounding bias to input sets all sticky bits to 1, but
|
347
|
+
// doesn't create a carry.
|
348
|
+
// - R remains 1.
|
349
|
+
// - L remains 0.
|
350
|
+
//
|
351
|
+
// 4. L = 1, R = 1:
|
352
|
+
// Expect: round up, this is exactly at half, the result needs to be
|
353
|
+
// round to the next even number.
|
354
|
+
//
|
355
|
+
// Algorithm:
|
356
|
+
// - Rounding bias: 0x7fff + 1 = 0x8000
|
357
|
+
// - Adding rounding bias to input doesn't change sticky bits, but
|
358
|
+
// creates a carry from rounding bit.
|
359
|
+
// - The carry sets L to 0, creates another carry bit and propagate
|
360
|
+
// forward to F bits.
|
361
|
+
// - If all the F bits are 1, a carry then propagates to the exponent
|
362
|
+
// bits, which then creates the minimum value with the next exponent
|
363
|
+
// value. Note that we won't have the case where exponents are all 1,
|
364
|
+
// since that's either a NaN (handled in the other if condition) or inf
|
365
|
+
// (handled in case 1).
|
366
|
+
//
|
367
|
+
// 5. L = 0, R = 1, any of T is 1:
|
368
|
+
// Expect: round up, this is greater than half.
|
369
|
+
//
|
370
|
+
// Algorithm:
|
371
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
372
|
+
// - Adding rounding bias to input creates a carry from sticky bits,
|
373
|
+
// sets rounding bit to 0, then create another carry.
|
374
|
+
// - The second carry sets L to 1.
|
375
|
+
//
|
376
|
+
// Examples:
|
377
|
+
//
|
378
|
+
// Exact half value that is already even:
|
379
|
+
// Input:
|
380
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
381
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
382
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
|
383
|
+
//
|
384
|
+
// This falls into case 3. We truncate the rest of 16 bits and no
|
385
|
+
// carry is created into F and L:
|
386
|
+
//
|
387
|
+
// Output:
|
388
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
389
|
+
// S E E E E E E E E F F F F F F L
|
390
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
391
|
+
//
|
392
|
+
// Exact half value, round to next even number:
|
393
|
+
// Input:
|
394
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
395
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
396
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
|
397
|
+
//
|
398
|
+
// This falls into case 4. We create a carry from R and T,
|
399
|
+
// which then propagates into L and F:
|
400
|
+
//
|
401
|
+
// Output:
|
402
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
403
|
+
// S E E E E E E E E F F F F F F L
|
404
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
405
|
+
//
|
406
|
+
//
|
407
|
+
// Max denormal value round to min normal value:
|
408
|
+
// Input:
|
409
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
410
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
411
|
+
// 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
|
412
|
+
//
|
413
|
+
// This falls into case 4. We create a carry from R and T,
|
414
|
+
// propagate into L and F, which then propagates into exponent
|
415
|
+
// bits:
|
416
|
+
//
|
417
|
+
// Output:
|
418
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
419
|
+
// S E E E E E E E E F F F F F F L
|
420
|
+
// 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
|
421
|
+
//
|
422
|
+
// Max normal value round to Inf:
|
423
|
+
// Input:
|
424
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
425
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
426
|
+
// 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
|
427
|
+
//
|
428
|
+
// This falls into case 4. We create a carry from R and T,
|
429
|
+
// propagate into L and F, which then propagates into exponent
|
430
|
+
// bits:
|
431
|
+
//
|
432
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
433
|
+
// S E E E E E E E E F F F F F F L
|
434
|
+
// 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
|
435
|
+
|
436
|
+
// At this point, ff must be either a normal float, or +/-infinity.
|
437
|
+
output = float_to_bfloat16_rtne<true>(ff);
|
438
|
+
}
|
439
|
+
return output;
|
440
|
+
#endif
|
441
|
+
}
|
442
|
+
|
443
|
+
// float_to_bfloat16_rtne template specialization that assumes that its function
|
444
|
+
// argument (ff) is either a normal floating point number, or +/-infinity, or
|
445
|
+
// zero. Used to improve the runtime performance of conversion from an integer
|
446
|
+
// type to bfloat16.
|
447
|
+
template <>
|
448
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
|
449
|
+
#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
|
450
|
+
// Nothing to do here
|
451
|
+
#else
|
452
|
+
numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
|
453
|
+
__bfloat16_raw output;
|
454
|
+
|
455
|
+
// Least significant bit of resulting bfloat.
|
456
|
+
numext::uint32_t lsb = (input >> 16) & 1;
|
457
|
+
numext::uint32_t rounding_bias = 0x7fff + lsb;
|
458
|
+
input += rounding_bias;
|
459
|
+
output.value = static_cast<numext::uint16_t>(input >> 16);
|
460
|
+
return output;
|
461
|
+
#endif
|
462
|
+
}
|
463
|
+
|
464
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
|
465
|
+
float result = 0;
|
466
|
+
unsigned short* q = reinterpret_cast<unsigned short*>(&result);
|
467
|
+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
468
|
+
q[0] = h.value;
|
469
|
+
#else
|
470
|
+
q[1] = h.value;
|
471
|
+
#endif
|
472
|
+
return result;
|
473
|
+
}
|
474
|
+
// --- standard functions ---
|
475
|
+
|
476
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
|
477
|
+
EIGEN_USING_STD(isinf);
|
478
|
+
return (isinf)(float(a));
|
479
|
+
}
|
480
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
|
481
|
+
EIGEN_USING_STD(isnan);
|
482
|
+
return (isnan)(float(a));
|
483
|
+
}
|
484
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
|
485
|
+
return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
|
486
|
+
}
|
487
|
+
|
488
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
|
489
|
+
bfloat16 result;
|
490
|
+
result.value = a.value & 0x7FFF;
|
491
|
+
return result;
|
492
|
+
}
|
493
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
|
494
|
+
return bfloat16(::expf(float(a)));
|
495
|
+
}
|
496
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
|
497
|
+
return bfloat16(numext::expm1(float(a)));
|
498
|
+
}
|
499
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
|
500
|
+
return bfloat16(::logf(float(a)));
|
501
|
+
}
|
502
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
|
503
|
+
return bfloat16(numext::log1p(float(a)));
|
504
|
+
}
|
505
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
|
506
|
+
return bfloat16(::log10f(float(a)));
|
507
|
+
}
|
508
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
|
509
|
+
return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
|
510
|
+
}
|
511
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
|
512
|
+
return bfloat16(::sqrtf(float(a)));
|
513
|
+
}
|
514
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
|
515
|
+
return bfloat16(::powf(float(a), float(b)));
|
516
|
+
}
|
517
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
|
518
|
+
return bfloat16(::sinf(float(a)));
|
519
|
+
}
|
520
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
|
521
|
+
return bfloat16(::cosf(float(a)));
|
522
|
+
}
|
523
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
|
524
|
+
return bfloat16(::tanf(float(a)));
|
525
|
+
}
|
526
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
|
527
|
+
return bfloat16(::asinf(float(a)));
|
528
|
+
}
|
529
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
|
530
|
+
return bfloat16(::acosf(float(a)));
|
531
|
+
}
|
532
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
|
533
|
+
return bfloat16(::atanf(float(a)));
|
534
|
+
}
|
535
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
|
536
|
+
return bfloat16(::sinhf(float(a)));
|
537
|
+
}
|
538
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
|
539
|
+
return bfloat16(::coshf(float(a)));
|
540
|
+
}
|
541
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
|
542
|
+
return bfloat16(::tanhf(float(a)));
|
543
|
+
}
|
544
|
+
#if EIGEN_HAS_CXX11_MATH
|
545
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
|
546
|
+
return bfloat16(::asinhf(float(a)));
|
547
|
+
}
|
548
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
|
549
|
+
return bfloat16(::acoshf(float(a)));
|
550
|
+
}
|
551
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
|
552
|
+
return bfloat16(::atanhf(float(a)));
|
553
|
+
}
|
554
|
+
#endif
|
555
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
|
556
|
+
return bfloat16(::floorf(float(a)));
|
557
|
+
}
|
558
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
|
559
|
+
return bfloat16(::ceilf(float(a)));
|
560
|
+
}
|
561
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
|
562
|
+
return bfloat16(::rintf(float(a)));
|
563
|
+
}
|
564
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
|
565
|
+
return bfloat16(::roundf(float(a)));
|
566
|
+
}
|
567
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
|
568
|
+
return bfloat16(::fmodf(float(a), float(b)));
|
569
|
+
}
|
570
|
+
|
571
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
|
572
|
+
const float f1 = static_cast<float>(a);
|
573
|
+
const float f2 = static_cast<float>(b);
|
574
|
+
return f2 < f1 ? b : a;
|
575
|
+
}
|
576
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
|
577
|
+
const float f1 = static_cast<float>(a);
|
578
|
+
const float f2 = static_cast<float>(b);
|
579
|
+
return f1 < f2 ? b : a;
|
580
|
+
}
|
581
|
+
|
582
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
|
583
|
+
const float f1 = static_cast<float>(a);
|
584
|
+
const float f2 = static_cast<float>(b);
|
585
|
+
return bfloat16(::fminf(f1, f2));
|
586
|
+
}
|
587
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
|
588
|
+
const float f1 = static_cast<float>(a);
|
589
|
+
const float f2 = static_cast<float>(b);
|
590
|
+
return bfloat16(::fmaxf(f1, f2));
|
591
|
+
}
|
592
|
+
|
593
|
+
#ifndef EIGEN_NO_IO
|
594
|
+
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
|
595
|
+
os << static_cast<float>(v);
|
596
|
+
return os;
|
597
|
+
}
|
598
|
+
#endif
|
599
|
+
|
600
|
+
} // namespace bfloat16_impl
|
601
|
+
|
602
|
+
namespace internal {
|
603
|
+
|
604
|
+
template<>
|
605
|
+
struct random_default_impl<bfloat16, false, false>
|
606
|
+
{
|
607
|
+
static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
|
608
|
+
{
|
609
|
+
return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
|
610
|
+
}
|
611
|
+
static inline bfloat16 run()
|
612
|
+
{
|
613
|
+
return run(bfloat16(-1.f), bfloat16(1.f));
|
614
|
+
}
|
615
|
+
};
|
616
|
+
|
617
|
+
template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
|
618
|
+
|
619
|
+
} // namespace internal
|
620
|
+
|
621
|
+
template<> struct NumTraits<Eigen::bfloat16>
|
622
|
+
: GenericNumTraits<Eigen::bfloat16>
|
623
|
+
{
|
624
|
+
enum {
|
625
|
+
IsSigned = true,
|
626
|
+
IsInteger = false,
|
627
|
+
IsComplex = false,
|
628
|
+
RequireInitialization = false
|
629
|
+
};
|
630
|
+
|
631
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
|
632
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
|
633
|
+
}
|
634
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
|
635
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
|
636
|
+
|
637
|
+
}
|
638
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
|
639
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
|
640
|
+
}
|
641
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
|
642
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);
|
643
|
+
}
|
644
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
|
645
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);
|
646
|
+
}
|
647
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
|
648
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);
|
649
|
+
}
|
650
|
+
};
|
651
|
+
|
652
|
+
} // namespace Eigen
|
653
|
+
|
654
|
+
namespace Eigen {
|
655
|
+
namespace numext {
|
656
|
+
|
657
|
+
template<>
|
658
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
659
|
+
bool (isnan)(const Eigen::bfloat16& h) {
|
660
|
+
return (bfloat16_impl::isnan)(h);
|
661
|
+
}
|
662
|
+
|
663
|
+
template<>
|
664
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
665
|
+
bool (isinf)(const Eigen::bfloat16& h) {
|
666
|
+
return (bfloat16_impl::isinf)(h);
|
667
|
+
}
|
668
|
+
|
669
|
+
template<>
|
670
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
671
|
+
bool (isfinite)(const Eigen::bfloat16& h) {
|
672
|
+
return (bfloat16_impl::isfinite)(h);
|
673
|
+
}
|
674
|
+
|
675
|
+
template <>
|
676
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
|
677
|
+
return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
|
678
|
+
}
|
679
|
+
|
680
|
+
template <>
|
681
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
|
682
|
+
return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
|
683
|
+
}
|
684
|
+
|
685
|
+
} // namespace numext
|
686
|
+
} // namespace Eigen
|
687
|
+
|
688
|
+
#if EIGEN_HAS_STD_HASH
|
689
|
+
namespace std {
|
690
|
+
template <>
|
691
|
+
struct hash<Eigen::bfloat16> {
|
692
|
+
EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
|
693
|
+
return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
|
694
|
+
}
|
695
|
+
};
|
696
|
+
} // namespace std
|
697
|
+
#endif
|
698
|
+
|
699
|
+
|
700
|
+
#endif // EIGEN_BFLOAT16_H
|