@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -1,212 +0,0 @@
|
|
|
1
|
-
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
-
// for linear algebra.
|
|
3
|
-
//
|
|
4
|
-
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
|
5
|
-
//
|
|
6
|
-
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
-
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
-
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
-
|
|
10
|
-
#ifndef EIGEN_TYPE_CASTING_CUDA_H
|
|
11
|
-
#define EIGEN_TYPE_CASTING_CUDA_H
|
|
12
|
-
|
|
13
|
-
namespace Eigen {
|
|
14
|
-
|
|
15
|
-
namespace internal {
|
|
16
|
-
|
|
17
|
-
template<>
|
|
18
|
-
struct scalar_cast_op<float, Eigen::half> {
|
|
19
|
-
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
20
|
-
typedef Eigen::half result_type;
|
|
21
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
|
|
22
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
23
|
-
return __float2half(a);
|
|
24
|
-
#else
|
|
25
|
-
return Eigen::half(a);
|
|
26
|
-
#endif
|
|
27
|
-
}
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
template<>
|
|
31
|
-
struct functor_traits<scalar_cast_op<float, Eigen::half> >
|
|
32
|
-
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
template<>
|
|
36
|
-
struct scalar_cast_op<int, Eigen::half> {
|
|
37
|
-
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
38
|
-
typedef Eigen::half result_type;
|
|
39
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
|
|
40
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
41
|
-
return __float2half(static_cast<float>(a));
|
|
42
|
-
#else
|
|
43
|
-
return Eigen::half(static_cast<float>(a));
|
|
44
|
-
#endif
|
|
45
|
-
}
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
template<>
|
|
49
|
-
struct functor_traits<scalar_cast_op<int, Eigen::half> >
|
|
50
|
-
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
template<>
|
|
54
|
-
struct scalar_cast_op<Eigen::half, float> {
|
|
55
|
-
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
56
|
-
typedef float result_type;
|
|
57
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
|
|
58
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
59
|
-
return __half2float(a);
|
|
60
|
-
#else
|
|
61
|
-
return static_cast<float>(a);
|
|
62
|
-
#endif
|
|
63
|
-
}
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
template<>
|
|
67
|
-
struct functor_traits<scalar_cast_op<Eigen::half, float> >
|
|
68
|
-
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
73
|
-
|
|
74
|
-
template <>
|
|
75
|
-
struct type_casting_traits<Eigen::half, float> {
|
|
76
|
-
enum {
|
|
77
|
-
VectorizedCast = 1,
|
|
78
|
-
SrcCoeffRatio = 2,
|
|
79
|
-
TgtCoeffRatio = 1
|
|
80
|
-
};
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
|
|
84
|
-
float2 r1 = __half22float2(a);
|
|
85
|
-
float2 r2 = __half22float2(b);
|
|
86
|
-
return make_float4(r1.x, r1.y, r2.x, r2.y);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
template <>
|
|
90
|
-
struct type_casting_traits<float, Eigen::half> {
|
|
91
|
-
enum {
|
|
92
|
-
VectorizedCast = 1,
|
|
93
|
-
SrcCoeffRatio = 1,
|
|
94
|
-
TgtCoeffRatio = 2
|
|
95
|
-
};
|
|
96
|
-
};
|
|
97
|
-
|
|
98
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
|
99
|
-
// Simply discard the second half of the input
|
|
100
|
-
return __floats2half2_rn(a.x, a.y);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
#elif defined EIGEN_VECTORIZE_AVX512
|
|
104
|
-
template <>
|
|
105
|
-
struct type_casting_traits<half, float> {
|
|
106
|
-
enum {
|
|
107
|
-
VectorizedCast = 1,
|
|
108
|
-
SrcCoeffRatio = 1,
|
|
109
|
-
TgtCoeffRatio = 1
|
|
110
|
-
};
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
|
|
114
|
-
return half2float(a);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
template <>
|
|
118
|
-
struct type_casting_traits<float, half> {
|
|
119
|
-
enum {
|
|
120
|
-
VectorizedCast = 1,
|
|
121
|
-
SrcCoeffRatio = 1,
|
|
122
|
-
TgtCoeffRatio = 1
|
|
123
|
-
};
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
|
|
127
|
-
return float2half(a);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
#elif defined EIGEN_VECTORIZE_AVX
|
|
131
|
-
|
|
132
|
-
template <>
|
|
133
|
-
struct type_casting_traits<Eigen::half, float> {
|
|
134
|
-
enum {
|
|
135
|
-
VectorizedCast = 1,
|
|
136
|
-
SrcCoeffRatio = 1,
|
|
137
|
-
TgtCoeffRatio = 1
|
|
138
|
-
};
|
|
139
|
-
};
|
|
140
|
-
|
|
141
|
-
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
|
142
|
-
return half2float(a);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
template <>
|
|
146
|
-
struct type_casting_traits<float, Eigen::half> {
|
|
147
|
-
enum {
|
|
148
|
-
VectorizedCast = 1,
|
|
149
|
-
SrcCoeffRatio = 1,
|
|
150
|
-
TgtCoeffRatio = 1
|
|
151
|
-
};
|
|
152
|
-
};
|
|
153
|
-
|
|
154
|
-
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
|
155
|
-
return float2half(a);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Disable the following code since it's broken on too many platforms / compilers.
|
|
159
|
-
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
|
|
160
|
-
#elif 0
|
|
161
|
-
|
|
162
|
-
template <>
|
|
163
|
-
struct type_casting_traits<Eigen::half, float> {
|
|
164
|
-
enum {
|
|
165
|
-
VectorizedCast = 1,
|
|
166
|
-
SrcCoeffRatio = 1,
|
|
167
|
-
TgtCoeffRatio = 1
|
|
168
|
-
};
|
|
169
|
-
};
|
|
170
|
-
|
|
171
|
-
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
|
|
172
|
-
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
|
173
|
-
Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
|
174
|
-
float f1 = static_cast<float>(h);
|
|
175
|
-
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
|
176
|
-
float f2 = static_cast<float>(h);
|
|
177
|
-
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
|
|
178
|
-
float f3 = static_cast<float>(h);
|
|
179
|
-
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
|
|
180
|
-
float f4 = static_cast<float>(h);
|
|
181
|
-
return _mm_set_ps(f4, f3, f2, f1);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
template <>
|
|
185
|
-
struct type_casting_traits<float, Eigen::half> {
|
|
186
|
-
enum {
|
|
187
|
-
VectorizedCast = 1,
|
|
188
|
-
SrcCoeffRatio = 1,
|
|
189
|
-
TgtCoeffRatio = 1
|
|
190
|
-
};
|
|
191
|
-
};
|
|
192
|
-
|
|
193
|
-
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
|
|
194
|
-
EIGEN_ALIGN16 float aux[4];
|
|
195
|
-
pstore(aux, a);
|
|
196
|
-
Eigen::half h0(aux[0]);
|
|
197
|
-
Eigen::half h1(aux[1]);
|
|
198
|
-
Eigen::half h2(aux[2]);
|
|
199
|
-
Eigen::half h3(aux[3]);
|
|
200
|
-
|
|
201
|
-
Packet4h result;
|
|
202
|
-
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
|
|
203
|
-
return result;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
#endif
|
|
207
|
-
|
|
208
|
-
} // end namespace internal
|
|
209
|
-
|
|
210
|
-
} // end namespace Eigen
|
|
211
|
-
|
|
212
|
-
#endif // EIGEN_TYPE_CASTING_CUDA_H
|
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
-
// for linear algebra.
|
|
3
|
-
//
|
|
4
|
-
// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
|
|
5
|
-
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
6
|
-
//
|
|
7
|
-
// This Source Code Form is subject to the terms of the Mozilla
|
|
8
|
-
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
9
|
-
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
10
|
-
|
|
11
|
-
#ifndef EIGEN_GEOMETRY_SSE_H
|
|
12
|
-
#define EIGEN_GEOMETRY_SSE_H
|
|
13
|
-
|
|
14
|
-
namespace Eigen {
|
|
15
|
-
|
|
16
|
-
namespace internal {
|
|
17
|
-
|
|
18
|
-
template<class Derived, class OtherDerived>
|
|
19
|
-
struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
|
|
20
|
-
{
|
|
21
|
-
enum {
|
|
22
|
-
AAlignment = traits<Derived>::Alignment,
|
|
23
|
-
BAlignment = traits<OtherDerived>::Alignment,
|
|
24
|
-
ResAlignment = traits<Quaternion<float> >::Alignment
|
|
25
|
-
};
|
|
26
|
-
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
|
|
27
|
-
{
|
|
28
|
-
Quaternion<float> res;
|
|
29
|
-
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
|
|
30
|
-
__m128 a = _a.coeffs().template packet<AAlignment>(0);
|
|
31
|
-
__m128 b = _b.coeffs().template packet<BAlignment>(0);
|
|
32
|
-
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
|
|
33
|
-
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
|
|
34
|
-
pstoret<float,Packet4f,ResAlignment>(
|
|
35
|
-
&res.x(),
|
|
36
|
-
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
|
|
37
|
-
_mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
|
|
38
|
-
vec4f_swizzle1(b,1,2,0,0))),
|
|
39
|
-
_mm_xor_ps(mask,_mm_add_ps(s1,s2))));
|
|
40
|
-
|
|
41
|
-
return res;
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
template<class Derived>
|
|
46
|
-
struct quat_conj<Architecture::SSE, Derived, float>
|
|
47
|
-
{
|
|
48
|
-
enum {
|
|
49
|
-
ResAlignment = traits<Quaternion<float> >::Alignment
|
|
50
|
-
};
|
|
51
|
-
static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
|
|
52
|
-
{
|
|
53
|
-
Quaternion<float> res;
|
|
54
|
-
const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
|
|
55
|
-
pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
|
|
56
|
-
return res;
|
|
57
|
-
}
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
template<typename VectorLhs,typename VectorRhs>
|
|
62
|
-
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
|
63
|
-
{
|
|
64
|
-
enum {
|
|
65
|
-
ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
|
|
66
|
-
};
|
|
67
|
-
static inline typename plain_matrix_type<VectorLhs>::type
|
|
68
|
-
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
|
69
|
-
{
|
|
70
|
-
__m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
|
|
71
|
-
__m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
|
|
72
|
-
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
|
|
73
|
-
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
|
|
74
|
-
typename plain_matrix_type<VectorLhs>::type res;
|
|
75
|
-
pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
|
|
76
|
-
return res;
|
|
77
|
-
}
|
|
78
|
-
};
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
template<class Derived, class OtherDerived>
|
|
84
|
-
struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
|
|
85
|
-
{
|
|
86
|
-
enum {
|
|
87
|
-
BAlignment = traits<OtherDerived>::Alignment,
|
|
88
|
-
ResAlignment = traits<Quaternion<double> >::Alignment
|
|
89
|
-
};
|
|
90
|
-
|
|
91
|
-
static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
|
|
92
|
-
{
|
|
93
|
-
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
|
94
|
-
|
|
95
|
-
Quaternion<double> res;
|
|
96
|
-
|
|
97
|
-
const double* a = _a.coeffs().data();
|
|
98
|
-
Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
|
|
99
|
-
Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
|
|
100
|
-
Packet2d a_xx = pset1<Packet2d>(a[0]);
|
|
101
|
-
Packet2d a_yy = pset1<Packet2d>(a[1]);
|
|
102
|
-
Packet2d a_zz = pset1<Packet2d>(a[2]);
|
|
103
|
-
Packet2d a_ww = pset1<Packet2d>(a[3]);
|
|
104
|
-
|
|
105
|
-
// two temporaries:
|
|
106
|
-
Packet2d t1, t2;
|
|
107
|
-
|
|
108
|
-
/*
|
|
109
|
-
* t1 = ww*xy + yy*zw
|
|
110
|
-
* t2 = zz*xy - xx*zw
|
|
111
|
-
* res.xy = t1 +/- swap(t2)
|
|
112
|
-
*/
|
|
113
|
-
t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw));
|
|
114
|
-
t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
|
|
115
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
116
|
-
EIGEN_UNUSED_VARIABLE(mask)
|
|
117
|
-
pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
|
|
118
|
-
#else
|
|
119
|
-
pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
|
|
120
|
-
#endif
|
|
121
|
-
|
|
122
|
-
/*
|
|
123
|
-
* t1 = ww*zw - yy*xy
|
|
124
|
-
* t2 = zz*zw + xx*xy
|
|
125
|
-
* res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
|
|
126
|
-
*/
|
|
127
|
-
t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy));
|
|
128
|
-
t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
|
|
129
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
130
|
-
EIGEN_UNUSED_VARIABLE(mask)
|
|
131
|
-
pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
|
|
132
|
-
#else
|
|
133
|
-
pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
|
|
134
|
-
#endif
|
|
135
|
-
|
|
136
|
-
return res;
|
|
137
|
-
}
|
|
138
|
-
};
|
|
139
|
-
|
|
140
|
-
template<class Derived>
|
|
141
|
-
struct quat_conj<Architecture::SSE, Derived, double>
|
|
142
|
-
{
|
|
143
|
-
enum {
|
|
144
|
-
ResAlignment = traits<Quaternion<double> >::Alignment
|
|
145
|
-
};
|
|
146
|
-
static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
|
|
147
|
-
{
|
|
148
|
-
Quaternion<double> res;
|
|
149
|
-
const __m128d mask0 = _mm_setr_pd(-0.,-0.);
|
|
150
|
-
const __m128d mask2 = _mm_setr_pd(-0.,0.);
|
|
151
|
-
pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
|
|
152
|
-
pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
|
|
153
|
-
return res;
|
|
154
|
-
}
|
|
155
|
-
};
|
|
156
|
-
|
|
157
|
-
} // end namespace internal
|
|
158
|
-
|
|
159
|
-
} // end namespace Eigen
|
|
160
|
-
|
|
161
|
-
#endif // EIGEN_GEOMETRY_SSE_H
|