@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* it does not correspond to the number of iterations or the number of instructions
|
|
22
22
|
*/
|
|
23
23
|
#ifndef EIGEN_UNROLLING_LIMIT
|
|
24
|
-
#define EIGEN_UNROLLING_LIMIT
|
|
24
|
+
#define EIGEN_UNROLLING_LIMIT 110
|
|
25
25
|
#endif
|
|
26
26
|
|
|
27
27
|
/** Defines the threshold between a "small" and a "large" matrix.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
|
5
|
+
// Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>
|
|
6
|
+
//
|
|
7
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
8
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
9
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
10
|
+
|
|
11
|
+
#ifndef EIGEN_GENERIC_TYPE_CASTING_H
|
|
12
|
+
#define EIGEN_GENERIC_TYPE_CASTING_H
|
|
13
|
+
|
|
14
|
+
namespace Eigen {
|
|
15
|
+
|
|
16
|
+
namespace internal {
|
|
17
|
+
|
|
18
|
+
template<>
|
|
19
|
+
struct scalar_cast_op<float, Eigen::half> {
|
|
20
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
21
|
+
typedef Eigen::half result_type;
|
|
22
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
|
|
23
|
+
#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
|
|
24
|
+
(defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
|
|
25
|
+
return __float2half(a);
|
|
26
|
+
#else
|
|
27
|
+
return Eigen::half(a);
|
|
28
|
+
#endif
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
template<>
|
|
33
|
+
struct functor_traits<scalar_cast_op<float, Eigen::half> >
|
|
34
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
template<>
|
|
38
|
+
struct scalar_cast_op<int, Eigen::half> {
|
|
39
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
40
|
+
typedef Eigen::half result_type;
|
|
41
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
|
|
42
|
+
#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
|
|
43
|
+
(defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
|
|
44
|
+
return __float2half(static_cast<float>(a));
|
|
45
|
+
#else
|
|
46
|
+
return Eigen::half(static_cast<float>(a));
|
|
47
|
+
#endif
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
template<>
|
|
52
|
+
struct functor_traits<scalar_cast_op<int, Eigen::half> >
|
|
53
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
template<>
|
|
57
|
+
struct scalar_cast_op<Eigen::half, float> {
|
|
58
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
59
|
+
typedef float result_type;
|
|
60
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
|
|
61
|
+
#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
|
|
62
|
+
(defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
|
|
63
|
+
return __half2float(a);
|
|
64
|
+
#else
|
|
65
|
+
return static_cast<float>(a);
|
|
66
|
+
#endif
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
template<>
|
|
71
|
+
struct functor_traits<scalar_cast_op<Eigen::half, float> >
|
|
72
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
template<>
|
|
76
|
+
struct scalar_cast_op<float, Eigen::bfloat16> {
|
|
77
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
78
|
+
typedef Eigen::bfloat16 result_type;
|
|
79
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const float& a) const {
|
|
80
|
+
return Eigen::bfloat16(a);
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
template<>
|
|
85
|
+
struct functor_traits<scalar_cast_op<float, Eigen::bfloat16> >
|
|
86
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
template<>
|
|
90
|
+
struct scalar_cast_op<int, Eigen::bfloat16> {
|
|
91
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
92
|
+
typedef Eigen::bfloat16 result_type;
|
|
93
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const int& a) const {
|
|
94
|
+
return Eigen::bfloat16(static_cast<float>(a));
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
template<>
|
|
99
|
+
struct functor_traits<scalar_cast_op<int, Eigen::bfloat16> >
|
|
100
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
template<>
|
|
104
|
+
struct scalar_cast_op<Eigen::bfloat16, float> {
|
|
105
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
106
|
+
typedef float result_type;
|
|
107
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::bfloat16& a) const {
|
|
108
|
+
return static_cast<float>(a);
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
template<>
|
|
113
|
+
struct functor_traits<scalar_cast_op<Eigen::bfloat16, float> >
|
|
114
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
#endif // EIGEN_GENERIC_TYPE_CASTING_H
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
8
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
9
|
|
|
10
|
-
#ifndef
|
|
11
|
-
#define
|
|
10
|
+
#ifndef EIGEN_MATH_FUNCTIONS_GPU_H
|
|
11
|
+
#define EIGEN_MATH_FUNCTIONS_GPU_H
|
|
12
12
|
|
|
13
13
|
namespace Eigen {
|
|
14
14
|
|
|
@@ -17,7 +17,7 @@ namespace internal {
|
|
|
17
17
|
// Make sure this is only available when targeting a GPU: we don't want to
|
|
18
18
|
// introduce conflicts between these packet_traits definitions and the ones
|
|
19
19
|
// we'll use on the host side (SSE, AVX, ...)
|
|
20
|
-
#if defined(
|
|
20
|
+
#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
|
|
21
21
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
22
22
|
float4 plog<float4>(const float4& a)
|
|
23
23
|
{
|
|
@@ -56,6 +56,18 @@ double2 pexp<double2>(const double2& a)
|
|
|
56
56
|
return make_double2(exp(a.x), exp(a.y));
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
+
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
60
|
+
float4 pexpm1<float4>(const float4& a)
|
|
61
|
+
{
|
|
62
|
+
return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
66
|
+
double2 pexpm1<double2>(const double2& a)
|
|
67
|
+
{
|
|
68
|
+
return make_double2(expm1(a.x), expm1(a.y));
|
|
69
|
+
}
|
|
70
|
+
|
|
59
71
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
60
72
|
float4 psqrt<float4>(const float4& a)
|
|
61
73
|
{
|
|
@@ -88,4 +100,4 @@ double2 prsqrt<double2>(const double2& a)
|
|
|
88
100
|
|
|
89
101
|
} // end namespace Eigen
|
|
90
102
|
|
|
91
|
-
#endif //
|
|
103
|
+
#endif // EIGEN_MATH_FUNCTIONS_GPU_H
|