umappp 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +25 -0
- data/README.md +110 -0
- data/ext/umappp/extconf.rb +25 -0
- data/ext/umappp/numo.hpp +867 -0
- data/ext/umappp/umappp.cpp +225 -0
- data/lib/umappp/version.rb +5 -0
- data/lib/umappp.rb +41 -0
- data/vendor/Eigen/Cholesky +45 -0
- data/vendor/Eigen/CholmodSupport +48 -0
- data/vendor/Eigen/Core +384 -0
- data/vendor/Eigen/Dense +7 -0
- data/vendor/Eigen/Eigen +2 -0
- data/vendor/Eigen/Eigenvalues +60 -0
- data/vendor/Eigen/Geometry +59 -0
- data/vendor/Eigen/Householder +29 -0
- data/vendor/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/Eigen/Jacobi +32 -0
- data/vendor/Eigen/KLUSupport +41 -0
- data/vendor/Eigen/LU +47 -0
- data/vendor/Eigen/MetisSupport +35 -0
- data/vendor/Eigen/OrderingMethods +70 -0
- data/vendor/Eigen/PaStiXSupport +49 -0
- data/vendor/Eigen/PardisoSupport +35 -0
- data/vendor/Eigen/QR +50 -0
- data/vendor/Eigen/QtAlignedMalloc +39 -0
- data/vendor/Eigen/SPQRSupport +34 -0
- data/vendor/Eigen/SVD +50 -0
- data/vendor/Eigen/Sparse +34 -0
- data/vendor/Eigen/SparseCholesky +37 -0
- data/vendor/Eigen/SparseCore +69 -0
- data/vendor/Eigen/SparseLU +50 -0
- data/vendor/Eigen/SparseQR +36 -0
- data/vendor/Eigen/StdDeque +27 -0
- data/vendor/Eigen/StdList +26 -0
- data/vendor/Eigen/StdVector +27 -0
- data/vendor/Eigen/SuperLUSupport +64 -0
- data/vendor/Eigen/UmfPackSupport +40 -0
- data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
- data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
- data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/Eigen/src/Core/Array.h +417 -0
- data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/Eigen/src/Core/Assign.h +90 -0
- data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
- data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/Eigen/src/Core/Block.h +448 -0
- data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
- data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
- data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
- data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
- data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
- data/vendor/Eigen/src/Core/DenseBase.h +701 -0
- data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
- data/vendor/Eigen/src/Core/Diagonal.h +258 -0
- data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
- data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/Eigen/src/Core/Dot.h +318 -0
- data/vendor/Eigen/src/Core/EigenBase.h +160 -0
- data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
- data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
- data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
- data/vendor/Eigen/src/Core/IO.h +258 -0
- data/vendor/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/Eigen/src/Core/Inverse.h +117 -0
- data/vendor/Eigen/src/Core/Map.h +171 -0
- data/vendor/Eigen/src/Core/MapBase.h +310 -0
- data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
- data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- data/vendor/Eigen/src/Core/Matrix.h +565 -0
- data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
- data/vendor/Eigen/src/Core/NestByValue.h +85 -0
- data/vendor/Eigen/src/Core/NoAlias.h +109 -0
- data/vendor/Eigen/src/Core/NumTraits.h +335 -0
- data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
- data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
- data/vendor/Eigen/src/Core/Product.h +191 -0
- data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
- data/vendor/Eigen/src/Core/Random.h +218 -0
- data/vendor/Eigen/src/Core/Redux.h +515 -0
- data/vendor/Eigen/src/Core/Ref.h +381 -0
- data/vendor/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
- data/vendor/Eigen/src/Core/Reverse.h +217 -0
- data/vendor/Eigen/src/Core/Select.h +164 -0
- data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
- data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/Eigen/src/Core/Solve.h +188 -0
- data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/Eigen/src/Core/SolverBase.h +168 -0
- data/vendor/Eigen/src/Core/StableNorm.h +251 -0
- data/vendor/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/Eigen/src/Core/Stride.h +116 -0
- data/vendor/Eigen/src/Core/Swap.h +68 -0
- data/vendor/Eigen/src/Core/Transpose.h +464 -0
- data/vendor/Eigen/src/Core/Transpositions.h +386 -0
- data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
- data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
- data/vendor/Eigen/src/Core/Visitor.h +381 -0
- data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
- data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
- data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/Eigen/src/Core/util/Constants.h +563 -0
- data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
- data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
- data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
- data/vendor/Eigen/src/Core/util/Meta.h +812 -0
- data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
- data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
- data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
- data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
- data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
- data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
- data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
- data/vendor/Eigen/src/Geometry/Translation.h +202 -0
- data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
- data/vendor/Eigen/src/Householder/Householder.h +176 -0
- data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
- data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/Eigen/src/LU/Determinant.h +117 -0
- data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
- data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
- data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
- data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
- data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
- data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
- data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
- data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
- data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
- data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
- data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
- data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
- data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
- data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
- data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
- data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
- data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
- data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
- data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- data/vendor/Eigen/src/misc/Image.h +82 -0
- data/vendor/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/Eigen/src/misc/blas.h +440 -0
- data/vendor/Eigen/src/misc/lapack.h +152 -0
- data/vendor/Eigen/src/misc/lapacke.h +16292 -0
- data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
- data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/aarand/aarand.hpp +114 -0
- data/vendor/annoy/annoylib.h +1495 -0
- data/vendor/annoy/kissrandom.h +120 -0
- data/vendor/annoy/mman.h +242 -0
- data/vendor/hnswlib/bruteforce.h +152 -0
- data/vendor/hnswlib/hnswalg.h +1192 -0
- data/vendor/hnswlib/hnswlib.h +108 -0
- data/vendor/hnswlib/space_ip.h +282 -0
- data/vendor/hnswlib/space_l2.h +281 -0
- data/vendor/hnswlib/visited_list_pool.h +79 -0
- data/vendor/irlba/irlba.hpp +575 -0
- data/vendor/irlba/lanczos.hpp +212 -0
- data/vendor/irlba/parallel.hpp +474 -0
- data/vendor/irlba/utils.hpp +224 -0
- data/vendor/irlba/wrappers.hpp +228 -0
- data/vendor/kmeans/Base.hpp +75 -0
- data/vendor/kmeans/Details.hpp +79 -0
- data/vendor/kmeans/HartiganWong.hpp +492 -0
- data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
- data/vendor/kmeans/InitializeNone.hpp +44 -0
- data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
- data/vendor/kmeans/InitializeRandom.hpp +91 -0
- data/vendor/kmeans/Kmeans.hpp +161 -0
- data/vendor/kmeans/Lloyd.hpp +134 -0
- data/vendor/kmeans/MiniBatch.hpp +269 -0
- data/vendor/kmeans/QuickSearch.hpp +179 -0
- data/vendor/kmeans/compute_centroids.hpp +32 -0
- data/vendor/kmeans/compute_wcss.hpp +27 -0
- data/vendor/kmeans/is_edge_case.hpp +42 -0
- data/vendor/kmeans/random.hpp +55 -0
- data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
- data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
- data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
- data/vendor/knncolle/knncolle.hpp +34 -0
- data/vendor/knncolle/utils/Base.hpp +100 -0
- data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
- data/vendor/knncolle/utils/distances.hpp +98 -0
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
- data/vendor/powerit/PowerIterations.hpp +157 -0
- data/vendor/umappp/NeighborList.hpp +37 -0
- data/vendor/umappp/Umap.hpp +662 -0
- data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
- data/vendor/umappp/find_ab.hpp +157 -0
- data/vendor/umappp/neighbor_similarities.hpp +136 -0
- data/vendor/umappp/optimize_layout.hpp +285 -0
- data/vendor/umappp/spectral_init.hpp +181 -0
- data/vendor/umappp/umappp.hpp +13 -0
- metadata +465 -0
@@ -0,0 +1,700 @@
|
|
1
|
+
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef EIGEN_BFLOAT16_H
|
17
|
+
#define EIGEN_BFLOAT16_H
|
18
|
+
|
19
|
+
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
|
20
|
+
template <> \
|
21
|
+
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED \
|
22
|
+
PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) { \
|
23
|
+
return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
|
24
|
+
}
|
25
|
+
|
26
|
+
namespace Eigen {
|
27
|
+
|
28
|
+
struct bfloat16;
|
29
|
+
|
30
|
+
namespace bfloat16_impl {
|
31
|
+
|
32
|
+
// Make our own __bfloat16_raw definition.
|
33
|
+
struct __bfloat16_raw {
|
34
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
|
35
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
|
36
|
+
unsigned short value;
|
37
|
+
};
|
38
|
+
|
39
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
|
40
|
+
template <bool AssumeArgumentIsNormalOrInfinityOrZero>
|
41
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
|
42
|
+
// Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
|
43
|
+
// > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated
|
44
|
+
template <>
|
45
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff);
|
46
|
+
template <>
|
47
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff);
|
48
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
|
49
|
+
|
50
|
+
struct bfloat16_base : public __bfloat16_raw {
|
51
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}
|
52
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
|
53
|
+
};
|
54
|
+
|
55
|
+
} // namespace bfloat16_impl
|
56
|
+
|
57
|
+
// Class definition.
|
58
|
+
struct bfloat16 : public bfloat16_impl::bfloat16_base {
|
59
|
+
|
60
|
+
typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
|
61
|
+
|
62
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
|
63
|
+
|
64
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
|
65
|
+
|
66
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
|
67
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
|
68
|
+
|
69
|
+
template<class T>
|
70
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
|
71
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
|
72
|
+
|
73
|
+
explicit EIGEN_DEVICE_FUNC bfloat16(float f)
|
74
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
|
75
|
+
|
76
|
+
// Following the convention of numpy, converting between complex and
|
77
|
+
// float will lead to loss of imag value.
|
78
|
+
template<typename RealScalar>
|
79
|
+
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
|
80
|
+
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
|
81
|
+
|
82
|
+
EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
|
83
|
+
return bfloat16_impl::bfloat16_to_float(*this);
|
84
|
+
}
|
85
|
+
};
|
86
|
+
} // namespace Eigen
|
87
|
+
|
88
|
+
namespace std {
|
89
|
+
template<>
|
90
|
+
struct numeric_limits<Eigen::bfloat16> {
|
91
|
+
static const bool is_specialized = true;
|
92
|
+
static const bool is_signed = true;
|
93
|
+
static const bool is_integer = false;
|
94
|
+
static const bool is_exact = false;
|
95
|
+
static const bool has_infinity = true;
|
96
|
+
static const bool has_quiet_NaN = true;
|
97
|
+
static const bool has_signaling_NaN = true;
|
98
|
+
static const float_denorm_style has_denorm = std::denorm_absent;
|
99
|
+
static const bool has_denorm_loss = false;
|
100
|
+
static const std::float_round_style round_style = numeric_limits<float>::round_style;
|
101
|
+
static const bool is_iec559 = false;
|
102
|
+
static const bool is_bounded = true;
|
103
|
+
static const bool is_modulo = false;
|
104
|
+
static const int digits = 8;
|
105
|
+
static const int digits10 = 2;
|
106
|
+
static const int max_digits10 = 4;
|
107
|
+
static const int radix = 2;
|
108
|
+
static const int min_exponent = numeric_limits<float>::min_exponent;
|
109
|
+
static const int min_exponent10 = numeric_limits<float>::min_exponent10;
|
110
|
+
static const int max_exponent = numeric_limits<float>::max_exponent;
|
111
|
+
static const int max_exponent10 = numeric_limits<float>::max_exponent10;
|
112
|
+
static const bool traps = numeric_limits<float>::traps;
|
113
|
+
static const bool tinyness_before = numeric_limits<float>::tinyness_before;
|
114
|
+
|
115
|
+
static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
|
116
|
+
static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
|
117
|
+
static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
|
118
|
+
static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
|
119
|
+
static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
|
120
|
+
static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
|
121
|
+
static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
|
122
|
+
static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
|
123
|
+
static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
|
124
|
+
};
|
125
|
+
|
126
|
+
// If std::numeric_limits<T> is specialized, should also specialize
|
127
|
+
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
128
|
+
// std::numeric_limits<const volatile T>
|
129
|
+
// https://stackoverflow.com/a/16519653/
|
130
|
+
template<>
|
131
|
+
struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
132
|
+
template<>
|
133
|
+
struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
134
|
+
template<>
|
135
|
+
struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
|
136
|
+
} // namespace std
|
137
|
+
|
138
|
+
namespace Eigen {
|
139
|
+
|
140
|
+
namespace bfloat16_impl {
|
141
|
+
|
142
|
+
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
|
143
|
+
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
144
|
+
// of the functions, while the latter can only deal with one of them.
|
145
|
+
#if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
|
146
|
+
|
147
|
+
#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
|
148
|
+
// We need to provide emulated *host-side* BF16 operators for clang.
|
149
|
+
#pragma push_macro("EIGEN_DEVICE_FUNC")
|
150
|
+
#undef EIGEN_DEVICE_FUNC
|
151
|
+
#if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
|
152
|
+
#define EIGEN_DEVICE_FUNC __host__
|
153
|
+
#else // both host and device need emulated ops.
|
154
|
+
#define EIGEN_DEVICE_FUNC __host__ __device__
|
155
|
+
#endif
|
156
|
+
#endif
|
157
|
+
|
158
|
+
// Definitions for CPUs, mostly working through conversion
|
159
|
+
// to/from fp32.
|
160
|
+
|
161
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
|
162
|
+
return bfloat16(float(a) + float(b));
|
163
|
+
}
|
164
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
|
165
|
+
return bfloat16(float(a) + static_cast<float>(b));
|
166
|
+
}
|
167
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
|
168
|
+
return bfloat16(static_cast<float>(a) + float(b));
|
169
|
+
}
|
170
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
|
171
|
+
return bfloat16(float(a) * float(b));
|
172
|
+
}
|
173
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
|
174
|
+
return bfloat16(float(a) - float(b));
|
175
|
+
}
|
176
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
|
177
|
+
return bfloat16(float(a) / float(b));
|
178
|
+
}
|
179
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
|
180
|
+
bfloat16 result;
|
181
|
+
result.value = a.value ^ 0x8000;
|
182
|
+
return result;
|
183
|
+
}
|
184
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
|
185
|
+
a = bfloat16(float(a) + float(b));
|
186
|
+
return a;
|
187
|
+
}
|
188
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
|
189
|
+
a = bfloat16(float(a) * float(b));
|
190
|
+
return a;
|
191
|
+
}
|
192
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
|
193
|
+
a = bfloat16(float(a) - float(b));
|
194
|
+
return a;
|
195
|
+
}
|
196
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
|
197
|
+
a = bfloat16(float(a) / float(b));
|
198
|
+
return a;
|
199
|
+
}
|
200
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a) {
|
201
|
+
a += bfloat16(1);
|
202
|
+
return a;
|
203
|
+
}
|
204
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a) {
|
205
|
+
a -= bfloat16(1);
|
206
|
+
return a;
|
207
|
+
}
|
208
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {
|
209
|
+
bfloat16 original_value = a;
|
210
|
+
++a;
|
211
|
+
return original_value;
|
212
|
+
}
|
213
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
|
214
|
+
bfloat16 original_value = a;
|
215
|
+
--a;
|
216
|
+
return original_value;
|
217
|
+
}
|
218
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
|
219
|
+
return numext::equal_strict(float(a),float(b));
|
220
|
+
}
|
221
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
|
222
|
+
return numext::not_equal_strict(float(a), float(b));
|
223
|
+
}
|
224
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
|
225
|
+
return float(a) < float(b);
|
226
|
+
}
|
227
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
|
228
|
+
return float(a) <= float(b);
|
229
|
+
}
|
230
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
|
231
|
+
return float(a) > float(b);
|
232
|
+
}
|
233
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
|
234
|
+
return float(a) >= float(b);
|
235
|
+
}
|
236
|
+
|
237
|
+
#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
|
238
|
+
#pragma pop_macro("EIGEN_DEVICE_FUNC")
|
239
|
+
#endif
|
240
|
+
#endif // Emulate support for bfloat16 floats
|
241
|
+
|
242
|
+
// Division by an index. Do it in full float precision to avoid accuracy
|
243
|
+
// issues in converting the denominator to bfloat16.
|
244
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
|
245
|
+
return bfloat16(static_cast<float>(a) / static_cast<float>(b));
|
246
|
+
}
|
247
|
+
|
248
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
|
249
|
+
__bfloat16_raw output;
|
250
|
+
if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
|
251
|
+
output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
|
252
|
+
return output;
|
253
|
+
}
|
254
|
+
const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
|
255
|
+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
256
|
+
output.value = p[0];
|
257
|
+
#else
|
258
|
+
output.value = p[1];
|
259
|
+
#endif
|
260
|
+
return output;
|
261
|
+
}
|
262
|
+
|
263
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
|
264
|
+
return __bfloat16_raw(value);
|
265
|
+
}
|
266
|
+
|
267
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
|
268
|
+
return bf.value;
|
269
|
+
}
|
270
|
+
|
271
|
+
// float_to_bfloat16_rtne template specialization that does not make any
|
272
|
+
// assumption about the value of its function argument (ff).
|
273
|
+
template <>
|
274
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
|
275
|
+
#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
|
276
|
+
// Nothing to do here
|
277
|
+
#else
|
278
|
+
__bfloat16_raw output;
|
279
|
+
|
280
|
+
if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
|
281
|
+
// If the value is a NaN, squash it to a qNaN with msb of fraction set,
|
282
|
+
// this makes sure after truncation we don't end up with an inf.
|
283
|
+
//
|
284
|
+
// qNaN magic: All exponent bits set + most significant bit of fraction
|
285
|
+
// set.
|
286
|
+
output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
|
287
|
+
} else {
|
288
|
+
// Fast rounding algorithm that rounds a half value to nearest even. This
|
289
|
+
// reduces expected error when we convert a large number of floats. Here
|
290
|
+
// is how it works:
|
291
|
+
//
|
292
|
+
// Definitions:
|
293
|
+
// To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
|
294
|
+
// with the following tags:
|
295
|
+
//
|
296
|
+
// Sign | Exp (8 bits) | Frac (23 bits)
|
297
|
+
// S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
|
298
|
+
//
|
299
|
+
// S: Sign bit.
|
300
|
+
// E: Exponent bits.
|
301
|
+
// F: First 6 bits of fraction.
|
302
|
+
// L: Least significant bit of resulting bfloat16 if we truncate away the
|
303
|
+
// rest of the float32. This is also the 7th bit of fraction
|
304
|
+
// R: Rounding bit, 8th bit of fraction.
|
305
|
+
// T: Sticky bits, rest of fraction, 15 bits.
|
306
|
+
//
|
307
|
+
// To round half to nearest even, there are 3 cases where we want to round
|
308
|
+
// down (simply truncate the result of the bits away, which consists of
|
309
|
+
// rounding bit and sticky bits) and two cases where we want to round up
|
310
|
+
// (truncate then add one to the result).
|
311
|
+
//
|
312
|
+
// The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
|
313
|
+
// 1s) as the rounding bias, adds the rounding bias to the input, then
|
314
|
+
// truncates the last 16 bits away.
|
315
|
+
//
|
316
|
+
// To understand how it works, we can analyze this algorithm case by case:
|
317
|
+
//
|
318
|
+
// 1. L = 0, R = 0:
|
319
|
+
// Expect: round down, this is less than half value.
|
320
|
+
//
|
321
|
+
// Algorithm:
|
322
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
323
|
+
// - Adding rounding bias to input may create any carry, depending on
|
324
|
+
// whether there is any value set to 1 in T bits.
|
325
|
+
// - R may be set to 1 if there is a carry.
|
326
|
+
// - L remains 0.
|
327
|
+
// - Note that this case also handles Inf and -Inf, where all fraction
|
328
|
+
// bits, including L, R and Ts are all 0. The output remains Inf after
|
329
|
+
// this algorithm.
|
330
|
+
//
|
331
|
+
// 2. L = 1, R = 0:
|
332
|
+
// Expect: round down, this is less than half value.
|
333
|
+
//
|
334
|
+
// Algorithm:
|
335
|
+
// - Rounding bias: 0x7fff + 1 = 0x8000
|
336
|
+
// - Adding rounding bias to input doesn't change sticky bits but
|
337
|
+
// adds 1 to rounding bit.
|
338
|
+
// - L remains 1.
|
339
|
+
//
|
340
|
+
// 3. L = 0, R = 1, all of T are 0:
|
341
|
+
// Expect: round down, this is exactly at half, the result is already
|
342
|
+
// even (L=0).
|
343
|
+
//
|
344
|
+
// Algorithm:
|
345
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
346
|
+
// - Adding rounding bias to input sets all sticky bits to 1, but
|
347
|
+
// doesn't create a carry.
|
348
|
+
// - R remains 1.
|
349
|
+
// - L remains 0.
|
350
|
+
//
|
351
|
+
// 4. L = 1, R = 1:
|
352
|
+
// Expect: round up, this is exactly at half, the result needs to be
|
353
|
+
// round to the next even number.
|
354
|
+
//
|
355
|
+
// Algorithm:
|
356
|
+
// - Rounding bias: 0x7fff + 1 = 0x8000
|
357
|
+
// - Adding rounding bias to input doesn't change sticky bits, but
|
358
|
+
// creates a carry from rounding bit.
|
359
|
+
// - The carry sets L to 0, creates another carry bit and propagate
|
360
|
+
// forward to F bits.
|
361
|
+
// - If all the F bits are 1, a carry then propagates to the exponent
|
362
|
+
// bits, which then creates the minimum value with the next exponent
|
363
|
+
// value. Note that we won't have the case where exponents are all 1,
|
364
|
+
// since that's either a NaN (handled in the other if condition) or inf
|
365
|
+
// (handled in case 1).
|
366
|
+
//
|
367
|
+
// 5. L = 0, R = 1, any of T is 1:
|
368
|
+
// Expect: round up, this is greater than half.
|
369
|
+
//
|
370
|
+
// Algorithm:
|
371
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
372
|
+
// - Adding rounding bias to input creates a carry from sticky bits,
|
373
|
+
// sets rounding bit to 0, then create another carry.
|
374
|
+
// - The second carry sets L to 1.
|
375
|
+
//
|
376
|
+
// Examples:
|
377
|
+
//
|
378
|
+
// Exact half value that is already even:
|
379
|
+
// Input:
|
380
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
381
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
382
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
|
383
|
+
//
|
384
|
+
// This falls into case 3. We truncate the rest of 16 bits and no
|
385
|
+
// carry is created into F and L:
|
386
|
+
//
|
387
|
+
// Output:
|
388
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
389
|
+
// S E E E E E E E E F F F F F F L
|
390
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
391
|
+
//
|
392
|
+
// Exact half value, round to next even number:
|
393
|
+
// Input:
|
394
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
395
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
396
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
|
397
|
+
//
|
398
|
+
// This falls into case 4. We create a carry from R and T,
|
399
|
+
// which then propagates into L and F:
|
400
|
+
//
|
401
|
+
// Output:
|
402
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
403
|
+
// S E E E E E E E E F F F F F F L
|
404
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
405
|
+
//
|
406
|
+
//
|
407
|
+
// Max denormal value round to min normal value:
|
408
|
+
// Input:
|
409
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
410
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
411
|
+
// 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
|
412
|
+
//
|
413
|
+
// This falls into case 4. We create a carry from R and T,
|
414
|
+
// propagate into L and F, which then propagates into exponent
|
415
|
+
// bits:
|
416
|
+
//
|
417
|
+
// Output:
|
418
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
419
|
+
// S E E E E E E E E F F F F F F L
|
420
|
+
// 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
|
421
|
+
//
|
422
|
+
// Max normal value round to Inf:
|
423
|
+
// Input:
|
424
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
425
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
426
|
+
// 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
|
427
|
+
//
|
428
|
+
// This falls into case 4. We create a carry from R and T,
|
429
|
+
// propagate into L and F, which then propagates into exponent
|
430
|
+
// bits:
|
431
|
+
//
|
432
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
433
|
+
// S E E E E E E E E F F F F F F L
|
434
|
+
// 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
|
435
|
+
|
436
|
+
// At this point, ff must be either a normal float, or +/-infinity.
|
437
|
+
output = float_to_bfloat16_rtne<true>(ff);
|
438
|
+
}
|
439
|
+
return output;
|
440
|
+
#endif
|
441
|
+
}
|
442
|
+
|
443
|
+
// float_to_bfloat16_rtne template specialization that assumes that its function
|
444
|
+
// argument (ff) is either a normal floating point number, or +/-infinity, or
|
445
|
+
// zero. Used to improve the runtime performance of conversion from an integer
|
446
|
+
// type to bfloat16.
|
447
|
+
template <>
|
448
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
|
449
|
+
#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
|
450
|
+
// Nothing to do here
|
451
|
+
#else
|
452
|
+
numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
|
453
|
+
__bfloat16_raw output;
|
454
|
+
|
455
|
+
// Least significant bit of resulting bfloat.
|
456
|
+
numext::uint32_t lsb = (input >> 16) & 1;
|
457
|
+
numext::uint32_t rounding_bias = 0x7fff + lsb;
|
458
|
+
input += rounding_bias;
|
459
|
+
output.value = static_cast<numext::uint16_t>(input >> 16);
|
460
|
+
return output;
|
461
|
+
#endif
|
462
|
+
}
|
463
|
+
|
464
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
|
465
|
+
float result = 0;
|
466
|
+
unsigned short* q = reinterpret_cast<unsigned short*>(&result);
|
467
|
+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
468
|
+
q[0] = h.value;
|
469
|
+
#else
|
470
|
+
q[1] = h.value;
|
471
|
+
#endif
|
472
|
+
return result;
|
473
|
+
}
|
474
|
+
// --- standard functions ---
|
475
|
+
|
476
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
|
477
|
+
EIGEN_USING_STD(isinf);
|
478
|
+
return (isinf)(float(a));
|
479
|
+
}
|
480
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
|
481
|
+
EIGEN_USING_STD(isnan);
|
482
|
+
return (isnan)(float(a));
|
483
|
+
}
|
484
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
|
485
|
+
return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
|
486
|
+
}
|
487
|
+
|
488
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
|
489
|
+
bfloat16 result;
|
490
|
+
result.value = a.value & 0x7FFF;
|
491
|
+
return result;
|
492
|
+
}
|
493
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
|
494
|
+
return bfloat16(::expf(float(a)));
|
495
|
+
}
|
496
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
|
497
|
+
return bfloat16(numext::expm1(float(a)));
|
498
|
+
}
|
499
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
|
500
|
+
return bfloat16(::logf(float(a)));
|
501
|
+
}
|
502
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
|
503
|
+
return bfloat16(numext::log1p(float(a)));
|
504
|
+
}
|
505
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
|
506
|
+
return bfloat16(::log10f(float(a)));
|
507
|
+
}
|
508
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
|
509
|
+
return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
|
510
|
+
}
|
511
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
|
512
|
+
return bfloat16(::sqrtf(float(a)));
|
513
|
+
}
|
514
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
|
515
|
+
return bfloat16(::powf(float(a), float(b)));
|
516
|
+
}
|
517
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
|
518
|
+
return bfloat16(::sinf(float(a)));
|
519
|
+
}
|
520
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
|
521
|
+
return bfloat16(::cosf(float(a)));
|
522
|
+
}
|
523
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
|
524
|
+
return bfloat16(::tanf(float(a)));
|
525
|
+
}
|
526
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
|
527
|
+
return bfloat16(::asinf(float(a)));
|
528
|
+
}
|
529
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
|
530
|
+
return bfloat16(::acosf(float(a)));
|
531
|
+
}
|
532
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
|
533
|
+
return bfloat16(::atanf(float(a)));
|
534
|
+
}
|
535
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
|
536
|
+
return bfloat16(::sinhf(float(a)));
|
537
|
+
}
|
538
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
|
539
|
+
return bfloat16(::coshf(float(a)));
|
540
|
+
}
|
541
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
|
542
|
+
return bfloat16(::tanhf(float(a)));
|
543
|
+
}
|
544
|
+
#if EIGEN_HAS_CXX11_MATH
|
545
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
|
546
|
+
return bfloat16(::asinhf(float(a)));
|
547
|
+
}
|
548
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
|
549
|
+
return bfloat16(::acoshf(float(a)));
|
550
|
+
}
|
551
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
|
552
|
+
return bfloat16(::atanhf(float(a)));
|
553
|
+
}
|
554
|
+
#endif
|
555
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
|
556
|
+
return bfloat16(::floorf(float(a)));
|
557
|
+
}
|
558
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
|
559
|
+
return bfloat16(::ceilf(float(a)));
|
560
|
+
}
|
561
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
|
562
|
+
return bfloat16(::rintf(float(a)));
|
563
|
+
}
|
564
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
|
565
|
+
return bfloat16(::roundf(float(a)));
|
566
|
+
}
|
567
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
|
568
|
+
return bfloat16(::fmodf(float(a), float(b)));
|
569
|
+
}
|
570
|
+
|
571
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
|
572
|
+
const float f1 = static_cast<float>(a);
|
573
|
+
const float f2 = static_cast<float>(b);
|
574
|
+
return f2 < f1 ? b : a;
|
575
|
+
}
|
576
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
|
577
|
+
const float f1 = static_cast<float>(a);
|
578
|
+
const float f2 = static_cast<float>(b);
|
579
|
+
return f1 < f2 ? b : a;
|
580
|
+
}
|
581
|
+
|
582
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
|
583
|
+
const float f1 = static_cast<float>(a);
|
584
|
+
const float f2 = static_cast<float>(b);
|
585
|
+
return bfloat16(::fminf(f1, f2));
|
586
|
+
}
|
587
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
|
588
|
+
const float f1 = static_cast<float>(a);
|
589
|
+
const float f2 = static_cast<float>(b);
|
590
|
+
return bfloat16(::fmaxf(f1, f2));
|
591
|
+
}
|
592
|
+
|
593
|
+
#ifndef EIGEN_NO_IO
|
594
|
+
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
|
595
|
+
os << static_cast<float>(v);
|
596
|
+
return os;
|
597
|
+
}
|
598
|
+
#endif
|
599
|
+
|
600
|
+
} // namespace bfloat16_impl
|
601
|
+
|
602
|
+
namespace internal {
|
603
|
+
|
604
|
+
template<>
|
605
|
+
struct random_default_impl<bfloat16, false, false>
|
606
|
+
{
|
607
|
+
static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
|
608
|
+
{
|
609
|
+
return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
|
610
|
+
}
|
611
|
+
static inline bfloat16 run()
|
612
|
+
{
|
613
|
+
return run(bfloat16(-1.f), bfloat16(1.f));
|
614
|
+
}
|
615
|
+
};
|
616
|
+
|
617
|
+
template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
|
618
|
+
|
619
|
+
} // namespace internal
|
620
|
+
|
621
|
+
template<> struct NumTraits<Eigen::bfloat16>
|
622
|
+
: GenericNumTraits<Eigen::bfloat16>
|
623
|
+
{
|
624
|
+
enum {
|
625
|
+
IsSigned = true,
|
626
|
+
IsInteger = false,
|
627
|
+
IsComplex = false,
|
628
|
+
RequireInitialization = false
|
629
|
+
};
|
630
|
+
|
631
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
|
632
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
|
633
|
+
}
|
634
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
|
635
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
|
636
|
+
|
637
|
+
}
|
638
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
|
639
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
|
640
|
+
}
|
641
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
|
642
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);
|
643
|
+
}
|
644
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
|
645
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);
|
646
|
+
}
|
647
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
|
648
|
+
return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);
|
649
|
+
}
|
650
|
+
};
|
651
|
+
|
652
|
+
} // namespace Eigen
|
653
|
+
|
654
|
+
namespace Eigen {
|
655
|
+
namespace numext {
|
656
|
+
|
657
|
+
template<>
|
658
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
659
|
+
bool (isnan)(const Eigen::bfloat16& h) {
|
660
|
+
return (bfloat16_impl::isnan)(h);
|
661
|
+
}
|
662
|
+
|
663
|
+
template<>
|
664
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
665
|
+
bool (isinf)(const Eigen::bfloat16& h) {
|
666
|
+
return (bfloat16_impl::isinf)(h);
|
667
|
+
}
|
668
|
+
|
669
|
+
template<>
|
670
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
671
|
+
bool (isfinite)(const Eigen::bfloat16& h) {
|
672
|
+
return (bfloat16_impl::isfinite)(h);
|
673
|
+
}
|
674
|
+
|
675
|
+
template <>
|
676
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
|
677
|
+
return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
|
678
|
+
}
|
679
|
+
|
680
|
+
template <>
|
681
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
|
682
|
+
return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
|
683
|
+
}
|
684
|
+
|
685
|
+
} // namespace numext
|
686
|
+
} // namespace Eigen
|
687
|
+
|
688
|
+
#if EIGEN_HAS_STD_HASH
|
689
|
+
namespace std {
|
690
|
+
template <>
|
691
|
+
struct hash<Eigen::bfloat16> {
|
692
|
+
EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
|
693
|
+
return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
|
694
|
+
}
|
695
|
+
};
|
696
|
+
} // namespace std
|
697
|
+
#endif
|
698
|
+
|
699
|
+
|
700
|
+
#endif // EIGEN_BFLOAT16_H
|