@smake/eigen 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +1 -1
- package/eigen/COPYING.MINPACK +51 -52
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +2 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -1,333 +0,0 @@
|
|
|
1
|
-
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
-
// for linear algebra.
|
|
3
|
-
//
|
|
4
|
-
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
|
5
|
-
//
|
|
6
|
-
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
-
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
-
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
-
|
|
10
|
-
#ifndef EIGEN_PACKET_MATH_CUDA_H
|
|
11
|
-
#define EIGEN_PACKET_MATH_CUDA_H
|
|
12
|
-
|
|
13
|
-
namespace Eigen {
|
|
14
|
-
|
|
15
|
-
namespace internal {
|
|
16
|
-
|
|
17
|
-
// Make sure this is only available when targeting a GPU: we don't want to
|
|
18
|
-
// introduce conflicts between these packet_traits definitions and the ones
|
|
19
|
-
// we'll use on the host side (SSE, AVX, ...)
|
|
20
|
-
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
|
21
|
-
template<> struct is_arithmetic<float4> { enum { value = true }; };
|
|
22
|
-
template<> struct is_arithmetic<double2> { enum { value = true }; };
|
|
23
|
-
|
|
24
|
-
template<> struct packet_traits<float> : default_packet_traits
|
|
25
|
-
{
|
|
26
|
-
typedef float4 type;
|
|
27
|
-
typedef float4 half;
|
|
28
|
-
enum {
|
|
29
|
-
Vectorizable = 1,
|
|
30
|
-
AlignedOnScalar = 1,
|
|
31
|
-
size=4,
|
|
32
|
-
HasHalfPacket = 0,
|
|
33
|
-
|
|
34
|
-
HasDiv = 1,
|
|
35
|
-
HasSin = 0,
|
|
36
|
-
HasCos = 0,
|
|
37
|
-
HasLog = 1,
|
|
38
|
-
HasExp = 1,
|
|
39
|
-
HasSqrt = 1,
|
|
40
|
-
HasRsqrt = 1,
|
|
41
|
-
HasLGamma = 1,
|
|
42
|
-
HasDiGamma = 1,
|
|
43
|
-
HasZeta = 1,
|
|
44
|
-
HasPolygamma = 1,
|
|
45
|
-
HasErf = 1,
|
|
46
|
-
HasErfc = 1,
|
|
47
|
-
HasIGamma = 1,
|
|
48
|
-
HasIGammac = 1,
|
|
49
|
-
HasBetaInc = 1,
|
|
50
|
-
|
|
51
|
-
HasBlend = 0,
|
|
52
|
-
};
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
template<> struct packet_traits<double> : default_packet_traits
|
|
56
|
-
{
|
|
57
|
-
typedef double2 type;
|
|
58
|
-
typedef double2 half;
|
|
59
|
-
enum {
|
|
60
|
-
Vectorizable = 1,
|
|
61
|
-
AlignedOnScalar = 1,
|
|
62
|
-
size=2,
|
|
63
|
-
HasHalfPacket = 0,
|
|
64
|
-
|
|
65
|
-
HasDiv = 1,
|
|
66
|
-
HasLog = 1,
|
|
67
|
-
HasExp = 1,
|
|
68
|
-
HasSqrt = 1,
|
|
69
|
-
HasRsqrt = 1,
|
|
70
|
-
HasLGamma = 1,
|
|
71
|
-
HasDiGamma = 1,
|
|
72
|
-
HasZeta = 1,
|
|
73
|
-
HasPolygamma = 1,
|
|
74
|
-
HasErf = 1,
|
|
75
|
-
HasErfc = 1,
|
|
76
|
-
HasIGamma = 1,
|
|
77
|
-
HasIGammac = 1,
|
|
78
|
-
HasBetaInc = 1,
|
|
79
|
-
|
|
80
|
-
HasBlend = 0,
|
|
81
|
-
};
|
|
82
|
-
};
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
|
|
86
|
-
template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
|
|
87
|
-
|
|
88
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
|
|
89
|
-
return make_float4(from, from, from, from);
|
|
90
|
-
}
|
|
91
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
|
|
92
|
-
return make_double2(from, from);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
|
97
|
-
return make_float4(a, a+1, a+2, a+3);
|
|
98
|
-
}
|
|
99
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
|
|
100
|
-
return make_double2(a, a+1);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
|
|
104
|
-
return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
|
|
105
|
-
}
|
|
106
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
|
|
107
|
-
return make_double2(a.x+b.x, a.y+b.y);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
|
|
111
|
-
return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
|
112
|
-
}
|
|
113
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
|
|
114
|
-
return make_double2(a.x-b.x, a.y-b.y);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
|
|
118
|
-
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
|
119
|
-
}
|
|
120
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
|
|
121
|
-
return make_double2(-a.x, -a.y);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
|
|
125
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
|
|
126
|
-
|
|
127
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
|
|
128
|
-
return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
|
|
129
|
-
}
|
|
130
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
|
|
131
|
-
return make_double2(a.x*b.x, a.y*b.y);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
|
|
135
|
-
return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
|
|
136
|
-
}
|
|
137
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
|
|
138
|
-
return make_double2(a.x/b.x, a.y/b.y);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
|
|
142
|
-
return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
|
|
143
|
-
}
|
|
144
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
|
|
145
|
-
return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
|
|
149
|
-
return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
|
|
150
|
-
}
|
|
151
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
|
|
152
|
-
return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
|
|
156
|
-
return *reinterpret_cast<const float4*>(from);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
|
|
160
|
-
return *reinterpret_cast<const double2*>(from);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
|
|
164
|
-
return make_float4(from[0], from[1], from[2], from[3]);
|
|
165
|
-
}
|
|
166
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
|
|
167
|
-
return make_double2(from[0], from[1]);
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
|
|
171
|
-
return make_float4(from[0], from[0], from[1], from[1]);
|
|
172
|
-
}
|
|
173
|
-
template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
|
|
174
|
-
return make_double2(from[0], from[0]);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
|
|
178
|
-
*reinterpret_cast<float4*>(to) = from;
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
|
|
182
|
-
*reinterpret_cast<double2*>(to) = from;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
|
|
186
|
-
to[0] = from.x;
|
|
187
|
-
to[1] = from.y;
|
|
188
|
-
to[2] = from.z;
|
|
189
|
-
to[3] = from.w;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
|
|
193
|
-
to[0] = from.x;
|
|
194
|
-
to[1] = from.y;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
template<>
|
|
198
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
|
|
199
|
-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
|
200
|
-
return __ldg((const float4*)from);
|
|
201
|
-
#else
|
|
202
|
-
return make_float4(from[0], from[1], from[2], from[3]);
|
|
203
|
-
#endif
|
|
204
|
-
}
|
|
205
|
-
template<>
|
|
206
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
|
|
207
|
-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
|
208
|
-
return __ldg((const double2*)from);
|
|
209
|
-
#else
|
|
210
|
-
return make_double2(from[0], from[1]);
|
|
211
|
-
#endif
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
template<>
|
|
215
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
|
|
216
|
-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
|
217
|
-
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
|
|
218
|
-
#else
|
|
219
|
-
return make_float4(from[0], from[1], from[2], from[3]);
|
|
220
|
-
#endif
|
|
221
|
-
}
|
|
222
|
-
template<>
|
|
223
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
|
|
224
|
-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
|
225
|
-
return make_double2(__ldg(from+0), __ldg(from+1));
|
|
226
|
-
#else
|
|
227
|
-
return make_double2(from[0], from[1]);
|
|
228
|
-
#endif
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
|
232
|
-
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
|
|
236
|
-
return make_double2(from[0*stride], from[1*stride]);
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
|
|
240
|
-
to[stride*0] = from.x;
|
|
241
|
-
to[stride*1] = from.y;
|
|
242
|
-
to[stride*2] = from.z;
|
|
243
|
-
to[stride*3] = from.w;
|
|
244
|
-
}
|
|
245
|
-
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
|
|
246
|
-
to[stride*0] = from.x;
|
|
247
|
-
to[stride*1] = from.y;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
|
|
251
|
-
return a.x;
|
|
252
|
-
}
|
|
253
|
-
template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
|
|
254
|
-
return a.x;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
|
|
258
|
-
return a.x + a.y + a.z + a.w;
|
|
259
|
-
}
|
|
260
|
-
template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
|
|
261
|
-
return a.x + a.y;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
|
|
265
|
-
return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
|
|
266
|
-
}
|
|
267
|
-
template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
|
|
268
|
-
return fmax(a.x, a.y);
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
|
|
272
|
-
return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
|
|
273
|
-
}
|
|
274
|
-
template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
|
|
275
|
-
return fmin(a.x, a.y);
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
|
|
279
|
-
return a.x * a.y * a.z * a.w;
|
|
280
|
-
}
|
|
281
|
-
template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
|
|
282
|
-
return a.x * a.y;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
|
|
286
|
-
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
|
|
287
|
-
}
|
|
288
|
-
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
|
289
|
-
return make_double2(fabs(a.x), fabs(a.y));
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
EIGEN_DEVICE_FUNC inline void
|
|
293
|
-
ptranspose(PacketBlock<float4,4>& kernel) {
|
|
294
|
-
float tmp = kernel.packet[0].y;
|
|
295
|
-
kernel.packet[0].y = kernel.packet[1].x;
|
|
296
|
-
kernel.packet[1].x = tmp;
|
|
297
|
-
|
|
298
|
-
tmp = kernel.packet[0].z;
|
|
299
|
-
kernel.packet[0].z = kernel.packet[2].x;
|
|
300
|
-
kernel.packet[2].x = tmp;
|
|
301
|
-
|
|
302
|
-
tmp = kernel.packet[0].w;
|
|
303
|
-
kernel.packet[0].w = kernel.packet[3].x;
|
|
304
|
-
kernel.packet[3].x = tmp;
|
|
305
|
-
|
|
306
|
-
tmp = kernel.packet[1].z;
|
|
307
|
-
kernel.packet[1].z = kernel.packet[2].y;
|
|
308
|
-
kernel.packet[2].y = tmp;
|
|
309
|
-
|
|
310
|
-
tmp = kernel.packet[1].w;
|
|
311
|
-
kernel.packet[1].w = kernel.packet[3].y;
|
|
312
|
-
kernel.packet[3].y = tmp;
|
|
313
|
-
|
|
314
|
-
tmp = kernel.packet[2].w;
|
|
315
|
-
kernel.packet[2].w = kernel.packet[3].z;
|
|
316
|
-
kernel.packet[3].z = tmp;
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
EIGEN_DEVICE_FUNC inline void
|
|
320
|
-
ptranspose(PacketBlock<double2,2>& kernel) {
|
|
321
|
-
double tmp = kernel.packet[0].y;
|
|
322
|
-
kernel.packet[0].y = kernel.packet[1].x;
|
|
323
|
-
kernel.packet[1].x = tmp;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
#endif
|
|
327
|
-
|
|
328
|
-
} // end namespace internal
|
|
329
|
-
|
|
330
|
-
} // end namespace Eigen
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
#endif // EIGEN_PACKET_MATH_CUDA_H
|