@smake/eigen 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +1 -1
- package/eigen/COPYING.MINPACK +51 -52
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +2 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -1,675 +0,0 @@
|
|
|
1
|
-
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
-
// for linear algebra.
|
|
3
|
-
//
|
|
4
|
-
// This Source Code Form is subject to the terms of the Mozilla
|
|
5
|
-
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
6
|
-
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
7
|
-
//
|
|
8
|
-
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
|
|
9
|
-
// The original license follows:
|
|
10
|
-
//
|
|
11
|
-
// Copyright (c) Fabian Giesen, 2016
|
|
12
|
-
// All rights reserved.
|
|
13
|
-
// Redistribution and use in source and binary forms, with or without
|
|
14
|
-
// modification, are permitted.
|
|
15
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
16
|
-
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
17
|
-
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
18
|
-
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
19
|
-
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
20
|
-
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
21
|
-
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
22
|
-
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
23
|
-
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
24
|
-
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
25
|
-
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
|
|
29
|
-
// type Eigen::half (inheriting from CUDA's __half struct) with
|
|
30
|
-
// operator overloads such that it behaves basically as an arithmetic
|
|
31
|
-
// type. It will be quite slow on CPUs (so it is recommended to stay
|
|
32
|
-
// in float32_bits for CPUs, except for simple parameter conversions, I/O
|
|
33
|
-
// to disk and the likes), but fast on GPUs.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
#ifndef EIGEN_HALF_CUDA_H
|
|
37
|
-
#define EIGEN_HALF_CUDA_H
|
|
38
|
-
|
|
39
|
-
#if __cplusplus > 199711L
|
|
40
|
-
#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
|
|
41
|
-
#else
|
|
42
|
-
#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
|
|
43
|
-
#endif
|
|
44
|
-
|
|
45
|
-
#include <sstream>
|
|
46
|
-
|
|
47
|
-
namespace Eigen {
|
|
48
|
-
|
|
49
|
-
struct half;
|
|
50
|
-
|
|
51
|
-
namespace half_impl {
|
|
52
|
-
|
|
53
|
-
#if !defined(EIGEN_HAS_CUDA_FP16)
|
|
54
|
-
// Make our own __half_raw definition that is similar to CUDA's.
|
|
55
|
-
struct __half_raw {
|
|
56
|
-
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
|
57
|
-
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
|
58
|
-
unsigned short x;
|
|
59
|
-
};
|
|
60
|
-
#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
|
|
61
|
-
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
|
|
62
|
-
typedef __half __half_raw;
|
|
63
|
-
#endif
|
|
64
|
-
|
|
65
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
|
|
66
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
|
|
67
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
|
|
68
|
-
|
|
69
|
-
struct half_base : public __half_raw {
|
|
70
|
-
EIGEN_DEVICE_FUNC half_base() {}
|
|
71
|
-
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
|
|
72
|
-
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
|
|
73
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
|
74
|
-
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
|
75
|
-
#endif
|
|
76
|
-
};
|
|
77
|
-
|
|
78
|
-
} // namespace half_impl
|
|
79
|
-
|
|
80
|
-
// Class definition.
|
|
81
|
-
struct half : public half_impl::half_base {
|
|
82
|
-
#if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
|
|
83
|
-
typedef half_impl::__half_raw __half_raw;
|
|
84
|
-
#endif
|
|
85
|
-
|
|
86
|
-
EIGEN_DEVICE_FUNC half() {}
|
|
87
|
-
|
|
88
|
-
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
|
|
89
|
-
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
|
|
90
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
|
91
|
-
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
|
92
|
-
#endif
|
|
93
|
-
|
|
94
|
-
explicit EIGEN_DEVICE_FUNC half(bool b)
|
|
95
|
-
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
|
96
|
-
template<class T>
|
|
97
|
-
explicit EIGEN_DEVICE_FUNC half(const T& val)
|
|
98
|
-
: half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
|
|
99
|
-
explicit EIGEN_DEVICE_FUNC half(float f)
|
|
100
|
-
: half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
|
|
101
|
-
|
|
102
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
|
|
103
|
-
// +0.0 and -0.0 become false, everything else becomes true.
|
|
104
|
-
return (x & 0x7fff) != 0;
|
|
105
|
-
}
|
|
106
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
|
|
107
|
-
return static_cast<signed char>(half_impl::half_to_float(*this));
|
|
108
|
-
}
|
|
109
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
|
|
110
|
-
return static_cast<unsigned char>(half_impl::half_to_float(*this));
|
|
111
|
-
}
|
|
112
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
|
|
113
|
-
return static_cast<short>(half_impl::half_to_float(*this));
|
|
114
|
-
}
|
|
115
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
|
|
116
|
-
return static_cast<unsigned short>(half_impl::half_to_float(*this));
|
|
117
|
-
}
|
|
118
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
|
|
119
|
-
return static_cast<int>(half_impl::half_to_float(*this));
|
|
120
|
-
}
|
|
121
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
|
|
122
|
-
return static_cast<unsigned int>(half_impl::half_to_float(*this));
|
|
123
|
-
}
|
|
124
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
|
|
125
|
-
return static_cast<long>(half_impl::half_to_float(*this));
|
|
126
|
-
}
|
|
127
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
|
|
128
|
-
return static_cast<unsigned long>(half_impl::half_to_float(*this));
|
|
129
|
-
}
|
|
130
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
|
|
131
|
-
return static_cast<long long>(half_impl::half_to_float(*this));
|
|
132
|
-
}
|
|
133
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
|
|
134
|
-
return static_cast<unsigned long long>(half_to_float(*this));
|
|
135
|
-
}
|
|
136
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
|
|
137
|
-
return half_impl::half_to_float(*this);
|
|
138
|
-
}
|
|
139
|
-
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
|
|
140
|
-
return static_cast<double>(half_impl::half_to_float(*this));
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
EIGEN_DEVICE_FUNC half& operator=(const half& other) {
|
|
144
|
-
x = other.x;
|
|
145
|
-
return *this;
|
|
146
|
-
}
|
|
147
|
-
};
|
|
148
|
-
|
|
149
|
-
} // end namespace Eigen
|
|
150
|
-
|
|
151
|
-
namespace std {
|
|
152
|
-
template<>
|
|
153
|
-
struct numeric_limits<Eigen::half> {
|
|
154
|
-
static const bool is_specialized = true;
|
|
155
|
-
static const bool is_signed = true;
|
|
156
|
-
static const bool is_integer = false;
|
|
157
|
-
static const bool is_exact = false;
|
|
158
|
-
static const bool has_infinity = true;
|
|
159
|
-
static const bool has_quiet_NaN = true;
|
|
160
|
-
static const bool has_signaling_NaN = true;
|
|
161
|
-
static const float_denorm_style has_denorm = denorm_present;
|
|
162
|
-
static const bool has_denorm_loss = false;
|
|
163
|
-
static const std::float_round_style round_style = std::round_to_nearest;
|
|
164
|
-
static const bool is_iec559 = false;
|
|
165
|
-
static const bool is_bounded = false;
|
|
166
|
-
static const bool is_modulo = false;
|
|
167
|
-
static const int digits = 11;
|
|
168
|
-
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
|
169
|
-
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
|
170
|
-
static const int radix = 2;
|
|
171
|
-
static const int min_exponent = -13;
|
|
172
|
-
static const int min_exponent10 = -4;
|
|
173
|
-
static const int max_exponent = 16;
|
|
174
|
-
static const int max_exponent10 = 4;
|
|
175
|
-
static const bool traps = true;
|
|
176
|
-
static const bool tinyness_before = false;
|
|
177
|
-
|
|
178
|
-
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
|
179
|
-
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
|
180
|
-
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
|
181
|
-
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
|
182
|
-
static Eigen::half round_error() { return Eigen::half(0.5); }
|
|
183
|
-
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
|
184
|
-
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
|
185
|
-
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
|
186
|
-
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
|
187
|
-
};
|
|
188
|
-
|
|
189
|
-
// If std::numeric_limits<T> is specialized, should also specialize
|
|
190
|
-
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
|
191
|
-
// std::numeric_limits<const volatile T>
|
|
192
|
-
// https://stackoverflow.com/a/16519653/
|
|
193
|
-
template<>
|
|
194
|
-
struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
|
|
195
|
-
template<>
|
|
196
|
-
struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
|
197
|
-
template<>
|
|
198
|
-
struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
|
199
|
-
} // end namespace std
|
|
200
|
-
|
|
201
|
-
namespace Eigen {
|
|
202
|
-
|
|
203
|
-
namespace half_impl {
|
|
204
|
-
|
|
205
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
|
206
|
-
|
|
207
|
-
// Intrinsics for native fp16 support. Note that on current hardware,
|
|
208
|
-
// these are no faster than float32_bits arithmetic (you need to use the half2
|
|
209
|
-
// versions to get the ALU speed increased), but you do save the
|
|
210
|
-
// conversion steps back and forth.
|
|
211
|
-
|
|
212
|
-
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
|
|
213
|
-
return __hadd(a, b);
|
|
214
|
-
}
|
|
215
|
-
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
|
|
216
|
-
return __hmul(a, b);
|
|
217
|
-
}
|
|
218
|
-
EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
|
|
219
|
-
return __hsub(a, b);
|
|
220
|
-
}
|
|
221
|
-
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
|
|
222
|
-
float num = __half2float(a);
|
|
223
|
-
float denom = __half2float(b);
|
|
224
|
-
return __float2half(num / denom);
|
|
225
|
-
}
|
|
226
|
-
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
|
|
227
|
-
return __hneg(a);
|
|
228
|
-
}
|
|
229
|
-
EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
|
|
230
|
-
a = a + b;
|
|
231
|
-
return a;
|
|
232
|
-
}
|
|
233
|
-
EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
|
|
234
|
-
a = a * b;
|
|
235
|
-
return a;
|
|
236
|
-
}
|
|
237
|
-
EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
|
|
238
|
-
a = a - b;
|
|
239
|
-
return a;
|
|
240
|
-
}
|
|
241
|
-
EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
|
|
242
|
-
a = a / b;
|
|
243
|
-
return a;
|
|
244
|
-
}
|
|
245
|
-
EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
|
|
246
|
-
return __heq(a, b);
|
|
247
|
-
}
|
|
248
|
-
EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
|
|
249
|
-
return __hne(a, b);
|
|
250
|
-
}
|
|
251
|
-
EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
|
|
252
|
-
return __hlt(a, b);
|
|
253
|
-
}
|
|
254
|
-
EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
|
|
255
|
-
return __hle(a, b);
|
|
256
|
-
}
|
|
257
|
-
EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
|
|
258
|
-
return __hgt(a, b);
|
|
259
|
-
}
|
|
260
|
-
EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
|
|
261
|
-
return __hge(a, b);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
#else // Emulate support for half floats
|
|
265
|
-
|
|
266
|
-
// Definitions for CPUs and older CUDA, mostly working through conversion
|
|
267
|
-
// to/from float32_bits.
|
|
268
|
-
|
|
269
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
|
|
270
|
-
return half(float(a) + float(b));
|
|
271
|
-
}
|
|
272
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
|
|
273
|
-
return half(float(a) * float(b));
|
|
274
|
-
}
|
|
275
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
|
|
276
|
-
return half(float(a) - float(b));
|
|
277
|
-
}
|
|
278
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
|
|
279
|
-
return half(float(a) / float(b));
|
|
280
|
-
}
|
|
281
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
|
|
282
|
-
half result;
|
|
283
|
-
result.x = a.x ^ 0x8000;
|
|
284
|
-
return result;
|
|
285
|
-
}
|
|
286
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
|
|
287
|
-
a = half(float(a) + float(b));
|
|
288
|
-
return a;
|
|
289
|
-
}
|
|
290
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
|
|
291
|
-
a = half(float(a) * float(b));
|
|
292
|
-
return a;
|
|
293
|
-
}
|
|
294
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
|
|
295
|
-
a = half(float(a) - float(b));
|
|
296
|
-
return a;
|
|
297
|
-
}
|
|
298
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
|
|
299
|
-
a = half(float(a) / float(b));
|
|
300
|
-
return a;
|
|
301
|
-
}
|
|
302
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
|
|
303
|
-
return numext::equal_strict(float(a),float(b));
|
|
304
|
-
}
|
|
305
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
|
|
306
|
-
return numext::not_equal_strict(float(a), float(b));
|
|
307
|
-
}
|
|
308
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
|
|
309
|
-
return float(a) < float(b);
|
|
310
|
-
}
|
|
311
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
|
|
312
|
-
return float(a) <= float(b);
|
|
313
|
-
}
|
|
314
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
|
|
315
|
-
return float(a) > float(b);
|
|
316
|
-
}
|
|
317
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
|
|
318
|
-
return float(a) >= float(b);
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
#endif // Emulate support for half floats
|
|
322
|
-
|
|
323
|
-
// Division by an index. Do it in full float precision to avoid accuracy
|
|
324
|
-
// issues in converting the denominator to half.
|
|
325
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
|
326
|
-
return half(static_cast<float>(a) / static_cast<float>(b));
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// Conversion routines, including fallbacks for the host or older CUDA.
|
|
330
|
-
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
|
|
331
|
-
// these in hardware. If we need more performance on older/other CPUs, they are
|
|
332
|
-
// also possible to vectorize directly.
|
|
333
|
-
|
|
334
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
|
|
335
|
-
__half_raw h;
|
|
336
|
-
h.x = x;
|
|
337
|
-
return h;
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
union float32_bits {
|
|
341
|
-
unsigned int u;
|
|
342
|
-
float f;
|
|
343
|
-
};
|
|
344
|
-
|
|
345
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
|
|
346
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
|
347
|
-
__half tmp_ff = __float2half(ff);
|
|
348
|
-
return *(__half_raw*)&tmp_ff;
|
|
349
|
-
|
|
350
|
-
#elif defined(EIGEN_HAS_FP16_C)
|
|
351
|
-
__half_raw h;
|
|
352
|
-
h.x = _cvtss_sh(ff, 0);
|
|
353
|
-
return h;
|
|
354
|
-
|
|
355
|
-
#else
|
|
356
|
-
float32_bits f; f.f = ff;
|
|
357
|
-
|
|
358
|
-
const float32_bits f32infty = { 255 << 23 };
|
|
359
|
-
const float32_bits f16max = { (127 + 16) << 23 };
|
|
360
|
-
const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
|
361
|
-
unsigned int sign_mask = 0x80000000u;
|
|
362
|
-
__half_raw o;
|
|
363
|
-
o.x = static_cast<unsigned short>(0x0u);
|
|
364
|
-
|
|
365
|
-
unsigned int sign = f.u & sign_mask;
|
|
366
|
-
f.u ^= sign;
|
|
367
|
-
|
|
368
|
-
// NOTE all the integer compares in this function can be safely
|
|
369
|
-
// compiled into signed compares since all operands are below
|
|
370
|
-
// 0x80000000. Important if you want fast straight SSE2 code
|
|
371
|
-
// (since there's no unsigned PCMPGTD).
|
|
372
|
-
|
|
373
|
-
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
|
|
374
|
-
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
|
|
375
|
-
} else { // (De)normalized number or zero
|
|
376
|
-
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
|
|
377
|
-
// use a magic value to align our 10 mantissa bits at the bottom of
|
|
378
|
-
// the float. as long as FP addition is round-to-nearest-even this
|
|
379
|
-
// just works.
|
|
380
|
-
f.f += denorm_magic.f;
|
|
381
|
-
|
|
382
|
-
// and one integer subtract of the bias later, we have our final float!
|
|
383
|
-
o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
|
|
384
|
-
} else {
|
|
385
|
-
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
|
|
386
|
-
|
|
387
|
-
// update exponent, rounding bias part 1
|
|
388
|
-
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
|
|
389
|
-
// rounding bias part 2
|
|
390
|
-
f.u += mant_odd;
|
|
391
|
-
// take the bits!
|
|
392
|
-
o.x = static_cast<unsigned short>(f.u >> 13);
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
o.x |= static_cast<unsigned short>(sign >> 16);
|
|
397
|
-
return o;
|
|
398
|
-
#endif
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
|
|
402
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
|
403
|
-
return __half2float(h);
|
|
404
|
-
|
|
405
|
-
#elif defined(EIGEN_HAS_FP16_C)
|
|
406
|
-
return _cvtsh_ss(h.x);
|
|
407
|
-
|
|
408
|
-
#else
|
|
409
|
-
const float32_bits magic = { 113 << 23 };
|
|
410
|
-
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
|
411
|
-
float32_bits o;
|
|
412
|
-
|
|
413
|
-
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
|
414
|
-
unsigned int exp = shifted_exp & o.u; // just the exponent
|
|
415
|
-
o.u += (127 - 15) << 23; // exponent adjust
|
|
416
|
-
|
|
417
|
-
// handle exponent special cases
|
|
418
|
-
if (exp == shifted_exp) { // Inf/NaN?
|
|
419
|
-
o.u += (128 - 16) << 23; // extra exp adjust
|
|
420
|
-
} else if (exp == 0) { // Zero/Denormal?
|
|
421
|
-
o.u += 1 << 23; // extra exp adjust
|
|
422
|
-
o.f -= magic.f; // renormalize
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
o.u |= (h.x & 0x8000) << 16; // sign bit
|
|
426
|
-
return o.f;
|
|
427
|
-
#endif
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
// --- standard functions ---
|
|
431
|
-
|
|
432
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
|
|
433
|
-
return (a.x & 0x7fff) == 0x7c00;
|
|
434
|
-
}
|
|
435
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
|
|
436
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
|
437
|
-
return __hisnan(a);
|
|
438
|
-
#else
|
|
439
|
-
return (a.x & 0x7fff) > 0x7c00;
|
|
440
|
-
#endif
|
|
441
|
-
}
|
|
442
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
|
|
443
|
-
return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
|
|
447
|
-
half result;
|
|
448
|
-
result.x = a.x & 0x7FFF;
|
|
449
|
-
return result;
|
|
450
|
-
}
|
|
451
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
|
|
452
|
-
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
|
453
|
-
return half(hexp(a));
|
|
454
|
-
#else
|
|
455
|
-
return half(::expf(float(a)));
|
|
456
|
-
#endif
|
|
457
|
-
}
|
|
458
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
|
|
459
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
|
460
|
-
return half(::hlog(a));
|
|
461
|
-
#else
|
|
462
|
-
return half(::logf(float(a)));
|
|
463
|
-
#endif
|
|
464
|
-
}
|
|
465
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
|
|
466
|
-
return half(numext::log1p(float(a)));
|
|
467
|
-
}
|
|
468
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
|
|
469
|
-
return half(::log10f(float(a)));
|
|
470
|
-
}
|
|
471
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
|
|
472
|
-
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
|
473
|
-
return half(hsqrt(a));
|
|
474
|
-
#else
|
|
475
|
-
return half(::sqrtf(float(a)));
|
|
476
|
-
#endif
|
|
477
|
-
}
|
|
478
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
|
|
479
|
-
return half(::powf(float(a), float(b)));
|
|
480
|
-
}
|
|
481
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
|
|
482
|
-
return half(::sinf(float(a)));
|
|
483
|
-
}
|
|
484
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
|
|
485
|
-
return half(::cosf(float(a)));
|
|
486
|
-
}
|
|
487
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
|
|
488
|
-
return half(::tanf(float(a)));
|
|
489
|
-
}
|
|
490
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
|
|
491
|
-
return half(::tanhf(float(a)));
|
|
492
|
-
}
|
|
493
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
|
494
|
-
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
|
495
|
-
return half(hfloor(a));
|
|
496
|
-
#else
|
|
497
|
-
return half(::floorf(float(a)));
|
|
498
|
-
#endif
|
|
499
|
-
}
|
|
500
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
|
501
|
-
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
|
502
|
-
return half(hceil(a));
|
|
503
|
-
#else
|
|
504
|
-
return half(::ceilf(float(a)));
|
|
505
|
-
#endif
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
|
509
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
|
510
|
-
return __hlt(b, a) ? b : a;
|
|
511
|
-
#else
|
|
512
|
-
const float f1 = static_cast<float>(a);
|
|
513
|
-
const float f2 = static_cast<float>(b);
|
|
514
|
-
return f2 < f1 ? b : a;
|
|
515
|
-
#endif
|
|
516
|
-
}
|
|
517
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
|
|
518
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
|
519
|
-
return __hlt(a, b) ? b : a;
|
|
520
|
-
#else
|
|
521
|
-
const float f1 = static_cast<float>(a);
|
|
522
|
-
const float f2 = static_cast<float>(b);
|
|
523
|
-
return f1 < f2 ? b : a;
|
|
524
|
-
#endif
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
|
|
528
|
-
os << static_cast<float>(v);
|
|
529
|
-
return os;
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
} // end namespace half_impl
|
|
533
|
-
|
|
534
|
-
// import Eigen::half_impl::half into Eigen namespace
|
|
535
|
-
// using half_impl::half;
|
|
536
|
-
|
|
537
|
-
namespace internal {
|
|
538
|
-
|
|
539
|
-
template<>
|
|
540
|
-
struct random_default_impl<half, false, false>
|
|
541
|
-
{
|
|
542
|
-
static inline half run(const half& x, const half& y)
|
|
543
|
-
{
|
|
544
|
-
return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
|
|
545
|
-
}
|
|
546
|
-
static inline half run()
|
|
547
|
-
{
|
|
548
|
-
return run(half(-1.f), half(1.f));
|
|
549
|
-
}
|
|
550
|
-
};
|
|
551
|
-
|
|
552
|
-
template<> struct is_arithmetic<half> { enum { value = true }; };
|
|
553
|
-
|
|
554
|
-
} // end namespace internal
|
|
555
|
-
|
|
556
|
-
template<> struct NumTraits<Eigen::half>
|
|
557
|
-
: GenericNumTraits<Eigen::half>
|
|
558
|
-
{
|
|
559
|
-
enum {
|
|
560
|
-
IsSigned = true,
|
|
561
|
-
IsInteger = false,
|
|
562
|
-
IsComplex = false,
|
|
563
|
-
RequireInitialization = false
|
|
564
|
-
};
|
|
565
|
-
|
|
566
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
|
567
|
-
return half_impl::raw_uint16_to_half(0x0800);
|
|
568
|
-
}
|
|
569
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
|
|
570
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
|
|
571
|
-
return half_impl::raw_uint16_to_half(0x7bff);
|
|
572
|
-
}
|
|
573
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
|
|
574
|
-
return half_impl::raw_uint16_to_half(0xfbff);
|
|
575
|
-
}
|
|
576
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
|
|
577
|
-
return half_impl::raw_uint16_to_half(0x7c00);
|
|
578
|
-
}
|
|
579
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
|
|
580
|
-
return half_impl::raw_uint16_to_half(0x7c01);
|
|
581
|
-
}
|
|
582
|
-
};
|
|
583
|
-
|
|
584
|
-
} // end namespace Eigen
|
|
585
|
-
|
|
586
|
-
// C-like standard mathematical functions and trancendentals.
|
|
587
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
|
|
588
|
-
Eigen::half result;
|
|
589
|
-
result.x = a.x & 0x7FFF;
|
|
590
|
-
return result;
|
|
591
|
-
}
|
|
592
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
|
|
593
|
-
return Eigen::half(::expf(float(a)));
|
|
594
|
-
}
|
|
595
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
|
|
596
|
-
#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
|
597
|
-
return Eigen::half(::hlog(a));
|
|
598
|
-
#else
|
|
599
|
-
return Eigen::half(::logf(float(a)));
|
|
600
|
-
#endif
|
|
601
|
-
}
|
|
602
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
|
|
603
|
-
return Eigen::half(::sqrtf(float(a)));
|
|
604
|
-
}
|
|
605
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
|
|
606
|
-
return Eigen::half(::powf(float(a), float(b)));
|
|
607
|
-
}
|
|
608
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
|
|
609
|
-
return Eigen::half(::floorf(float(a)));
|
|
610
|
-
}
|
|
611
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
|
|
612
|
-
return Eigen::half(::ceilf(float(a)));
|
|
613
|
-
}
|
|
614
|
-
|
|
615
|
-
namespace std {
|
|
616
|
-
|
|
617
|
-
#if __cplusplus > 199711L
|
|
618
|
-
template <>
|
|
619
|
-
struct hash<Eigen::half> {
|
|
620
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
|
|
621
|
-
return static_cast<std::size_t>(a.x);
|
|
622
|
-
}
|
|
623
|
-
};
|
|
624
|
-
#endif
|
|
625
|
-
|
|
626
|
-
} // end namespace std
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
// Add the missing shfl_xor intrinsic
|
|
630
|
-
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
|
631
|
-
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
|
632
|
-
#if EIGEN_CUDACC_VER < 90000
|
|
633
|
-
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
|
634
|
-
#else
|
|
635
|
-
return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
|
|
636
|
-
#endif
|
|
637
|
-
}
|
|
638
|
-
#endif
|
|
639
|
-
|
|
640
|
-
// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
|
|
641
|
-
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
|
|
642
|
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
|
643
|
-
return Eigen::half_impl::raw_uint16_to_half(
|
|
644
|
-
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
|
645
|
-
}
|
|
646
|
-
#endif
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
#if defined(EIGEN_CUDA_ARCH)
|
|
650
|
-
namespace Eigen {
|
|
651
|
-
namespace numext {
|
|
652
|
-
|
|
653
|
-
template<>
|
|
654
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
655
|
-
bool (isnan)(const Eigen::half& h) {
|
|
656
|
-
return (half_impl::isnan)(h);
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
template<>
|
|
660
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
661
|
-
bool (isinf)(const Eigen::half& h) {
|
|
662
|
-
return (half_impl::isinf)(h);
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
template<>
|
|
666
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
667
|
-
bool (isfinite)(const Eigen::half& h) {
|
|
668
|
-
return (half_impl::isfinite)(h);
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
} // namespace Eigen
|
|
672
|
-
} // namespace numext
|
|
673
|
-
#endif
|
|
674
|
-
|
|
675
|
-
#endif // EIGEN_HALF_CUDA_H
|