@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#ifndef EIGEN_ARRAYWRAPPER_H
|
|
11
11
|
#define EIGEN_ARRAYWRAPPER_H
|
|
12
12
|
|
|
13
|
-
namespace Eigen {
|
|
13
|
+
namespace Eigen {
|
|
14
14
|
|
|
15
15
|
/** \class ArrayWrapper
|
|
16
16
|
* \ingroup Core_Module
|
|
@@ -60,14 +60,14 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|
|
60
60
|
EIGEN_DEVICE_FUNC
|
|
61
61
|
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
|
62
62
|
|
|
63
|
-
EIGEN_DEVICE_FUNC
|
|
64
|
-
inline Index rows() const { return m_expression.rows(); }
|
|
65
|
-
EIGEN_DEVICE_FUNC
|
|
66
|
-
inline Index cols() const { return m_expression.cols(); }
|
|
67
|
-
EIGEN_DEVICE_FUNC
|
|
68
|
-
inline Index outerStride() const { return m_expression.outerStride(); }
|
|
69
|
-
EIGEN_DEVICE_FUNC
|
|
70
|
-
inline Index innerStride() const { return m_expression.innerStride(); }
|
|
63
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
64
|
+
inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }
|
|
65
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
66
|
+
inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }
|
|
67
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
68
|
+
inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }
|
|
69
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
70
|
+
inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }
|
|
71
71
|
|
|
72
72
|
EIGEN_DEVICE_FUNC
|
|
73
73
|
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
|
@@ -90,9 +90,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|
|
90
90
|
EIGEN_DEVICE_FUNC
|
|
91
91
|
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
|
92
92
|
|
|
93
|
-
const typename internal::remove_all<NestedExpressionType>::type&
|
|
94
93
|
EIGEN_DEVICE_FUNC
|
|
95
|
-
|
|
94
|
+
const typename internal::remove_all<NestedExpressionType>::type&
|
|
95
|
+
nestedExpression() const
|
|
96
96
|
{
|
|
97
97
|
return m_expression;
|
|
98
98
|
}
|
|
@@ -158,14 +158,14 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|
|
158
158
|
EIGEN_DEVICE_FUNC
|
|
159
159
|
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
|
160
160
|
|
|
161
|
-
EIGEN_DEVICE_FUNC
|
|
162
|
-
inline Index rows() const { return m_expression.rows(); }
|
|
163
|
-
EIGEN_DEVICE_FUNC
|
|
164
|
-
inline Index cols() const { return m_expression.cols(); }
|
|
165
|
-
EIGEN_DEVICE_FUNC
|
|
166
|
-
inline Index outerStride() const { return m_expression.outerStride(); }
|
|
167
|
-
EIGEN_DEVICE_FUNC
|
|
168
|
-
inline Index innerStride() const { return m_expression.innerStride(); }
|
|
161
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
162
|
+
inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }
|
|
163
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
164
|
+
inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }
|
|
165
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
166
|
+
inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }
|
|
167
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
|
168
|
+
inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }
|
|
169
169
|
|
|
170
170
|
EIGEN_DEVICE_FUNC
|
|
171
171
|
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
|
@@ -185,8 +185,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
EIGEN_DEVICE_FUNC
|
|
188
|
-
const typename internal::remove_all<NestedExpressionType>::type&
|
|
189
|
-
nestedExpression() const
|
|
188
|
+
const typename internal::remove_all<NestedExpressionType>::type&
|
|
189
|
+
nestedExpression() const
|
|
190
190
|
{
|
|
191
191
|
return m_expression;
|
|
192
192
|
}
|
|
@@ -16,7 +16,7 @@ namespace Eigen {
|
|
|
16
16
|
|
|
17
17
|
template<typename Derived>
|
|
18
18
|
template<typename OtherDerived>
|
|
19
|
-
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
|
|
19
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
|
|
20
20
|
::lazyAssign(const DenseBase<OtherDerived>& other)
|
|
21
21
|
{
|
|
22
22
|
enum{
|
|
@@ -17,24 +17,24 @@ namespace Eigen {
|
|
|
17
17
|
// This implementation is based on Assign.h
|
|
18
18
|
|
|
19
19
|
namespace internal {
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
/***************************************************************************
|
|
22
22
|
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
|
23
23
|
***************************************************************************/
|
|
24
24
|
|
|
25
25
|
// copy_using_evaluator_traits is based on assign_traits
|
|
26
26
|
|
|
27
|
-
template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
|
|
27
|
+
template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
|
|
28
28
|
struct copy_using_evaluator_traits
|
|
29
29
|
{
|
|
30
30
|
typedef typename DstEvaluator::XprType Dst;
|
|
31
31
|
typedef typename Dst::Scalar DstScalar;
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
enum {
|
|
34
34
|
DstFlags = DstEvaluator::Flags,
|
|
35
35
|
SrcFlags = SrcEvaluator::Flags
|
|
36
36
|
};
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
public:
|
|
39
39
|
enum {
|
|
40
40
|
DstAlignment = DstEvaluator::Alignment,
|
|
@@ -51,13 +51,15 @@ private:
|
|
|
51
51
|
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
|
|
52
52
|
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
|
53
53
|
: int(Dst::MaxRowsAtCompileTime),
|
|
54
|
+
RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
|
|
55
|
+
RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
|
|
54
56
|
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
|
|
55
57
|
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
|
|
56
58
|
};
|
|
57
59
|
|
|
58
60
|
// TODO distinguish between linear traversal and inner-traversals
|
|
59
|
-
typedef typename find_best_packet<DstScalar,
|
|
60
|
-
typedef typename find_best_packet<DstScalar,
|
|
61
|
+
typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
|
|
62
|
+
typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
|
|
61
63
|
|
|
62
64
|
enum {
|
|
63
65
|
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
|
|
@@ -97,7 +99,8 @@ private:
|
|
|
97
99
|
|
|
98
100
|
public:
|
|
99
101
|
enum {
|
|
100
|
-
Traversal =
|
|
102
|
+
Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
|
|
103
|
+
: (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
|
|
101
104
|
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
|
102
105
|
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
|
103
106
|
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
|
@@ -135,7 +138,7 @@ public:
|
|
|
135
138
|
? int(CompleteUnrolling)
|
|
136
139
|
: int(NoUnrolling) )
|
|
137
140
|
: int(Traversal) == int(LinearTraversal)
|
|
138
|
-
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
|
141
|
+
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
|
139
142
|
: int(NoUnrolling) )
|
|
140
143
|
#if EIGEN_UNALIGNED_VECTORIZE
|
|
141
144
|
: int(Traversal) == int(SliceVectorizedTraversal)
|
|
@@ -172,6 +175,8 @@ public:
|
|
|
172
175
|
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
|
173
176
|
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
|
|
174
177
|
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
|
|
178
|
+
EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
|
|
179
|
+
EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
|
|
175
180
|
EIGEN_DEBUG_VAR(UnrollingLimit)
|
|
176
181
|
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
|
177
182
|
EIGEN_DEBUG_VAR(MayUnrollInner)
|
|
@@ -195,7 +200,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
|
|
195
200
|
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
|
|
196
201
|
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
|
197
202
|
typedef typename DstEvaluatorType::XprType DstXprType;
|
|
198
|
-
|
|
203
|
+
|
|
199
204
|
enum {
|
|
200
205
|
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
|
201
206
|
inner = Index % DstXprType::InnerSizeAtCompileTime
|
|
@@ -261,7 +266,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
|
|
261
266
|
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
|
262
267
|
typedef typename DstEvaluatorType::XprType DstXprType;
|
|
263
268
|
typedef typename Kernel::PacketType PacketType;
|
|
264
|
-
|
|
269
|
+
|
|
265
270
|
enum {
|
|
266
271
|
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
|
267
272
|
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
|
@@ -312,6 +317,22 @@ template<typename Kernel,
|
|
|
312
317
|
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
|
313
318
|
struct dense_assignment_loop;
|
|
314
319
|
|
|
320
|
+
/************************
|
|
321
|
+
***** Special Cases *****
|
|
322
|
+
************************/
|
|
323
|
+
|
|
324
|
+
// Zero-sized assignment is a no-op.
|
|
325
|
+
template<typename Kernel, int Unrolling>
|
|
326
|
+
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
|
|
327
|
+
{
|
|
328
|
+
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
|
|
329
|
+
{
|
|
330
|
+
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
|
331
|
+
EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
|
|
332
|
+
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
|
|
315
336
|
/************************
|
|
316
337
|
*** Default traversal ***
|
|
317
338
|
************************/
|
|
@@ -426,10 +447,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
|
|
426
447
|
{
|
|
427
448
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
|
428
449
|
typedef typename Kernel::PacketType PacketType;
|
|
429
|
-
|
|
450
|
+
|
|
430
451
|
enum { size = DstXprType::SizeAtCompileTime,
|
|
431
452
|
packetSize =unpacket_traits<PacketType>::size,
|
|
432
|
-
alignedSize = (size/packetSize)*packetSize };
|
|
453
|
+
alignedSize = (int(size)/packetSize)*packetSize };
|
|
433
454
|
|
|
434
455
|
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
|
435
456
|
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
|
|
@@ -530,7 +551,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
|
|
530
551
|
const Scalar *dst_ptr = kernel.dstDataPtr();
|
|
531
552
|
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
|
|
532
553
|
{
|
|
533
|
-
// the pointer is not
|
|
554
|
+
// the pointer is not aligned-on scalar, so alignment is not possible
|
|
534
555
|
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
|
|
535
556
|
}
|
|
536
557
|
const Index packetAlignedMask = packetSize - 1;
|
|
@@ -568,14 +589,15 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
|
|
568
589
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
|
569
590
|
typedef typename Kernel::PacketType PacketType;
|
|
570
591
|
|
|
571
|
-
enum {
|
|
592
|
+
enum { innerSize = DstXprType::InnerSizeAtCompileTime,
|
|
572
593
|
packetSize =unpacket_traits<PacketType>::size,
|
|
573
|
-
vectorizableSize = (
|
|
594
|
+
vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
|
|
595
|
+
size = DstXprType::SizeAtCompileTime };
|
|
574
596
|
|
|
575
597
|
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
|
|
576
598
|
{
|
|
577
599
|
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
|
|
578
|
-
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize,
|
|
600
|
+
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
|
|
579
601
|
}
|
|
580
602
|
}
|
|
581
603
|
};
|
|
@@ -599,73 +621,74 @@ protected:
|
|
|
599
621
|
typedef typename DstEvaluatorTypeT::XprType DstXprType;
|
|
600
622
|
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
|
|
601
623
|
public:
|
|
602
|
-
|
|
624
|
+
|
|
603
625
|
typedef DstEvaluatorTypeT DstEvaluatorType;
|
|
604
626
|
typedef SrcEvaluatorTypeT SrcEvaluatorType;
|
|
605
627
|
typedef typename DstEvaluatorType::Scalar Scalar;
|
|
606
628
|
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
|
|
607
629
|
typedef typename AssignmentTraits::PacketType PacketType;
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
EIGEN_DEVICE_FUNC
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
633
|
+
generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
|
611
634
|
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
|
|
612
635
|
{
|
|
613
636
|
#ifdef EIGEN_DEBUG_ASSIGN
|
|
614
637
|
AssignmentTraits::debug();
|
|
615
638
|
#endif
|
|
616
639
|
}
|
|
617
|
-
|
|
618
|
-
EIGEN_DEVICE_FUNC Index size() const
|
|
619
|
-
EIGEN_DEVICE_FUNC Index innerSize() const
|
|
620
|
-
EIGEN_DEVICE_FUNC Index outerSize() const
|
|
621
|
-
EIGEN_DEVICE_FUNC Index rows() const
|
|
622
|
-
EIGEN_DEVICE_FUNC Index cols() const
|
|
623
|
-
EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
|
|
624
|
-
|
|
625
|
-
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
|
|
626
|
-
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
|
|
627
|
-
|
|
640
|
+
|
|
641
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
|
|
642
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
|
|
643
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
|
|
644
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
|
|
645
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
|
|
646
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
|
|
647
|
+
|
|
648
|
+
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
|
|
649
|
+
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
|
|
650
|
+
|
|
628
651
|
/// Assign src(row,col) to dst(row,col) through the assignment functor.
|
|
629
652
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
|
|
630
653
|
{
|
|
631
654
|
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
|
|
632
655
|
}
|
|
633
|
-
|
|
656
|
+
|
|
634
657
|
/// \sa assignCoeff(Index,Index)
|
|
635
658
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
|
|
636
659
|
{
|
|
637
660
|
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
|
|
638
661
|
}
|
|
639
|
-
|
|
662
|
+
|
|
640
663
|
/// \sa assignCoeff(Index,Index)
|
|
641
664
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
|
|
642
665
|
{
|
|
643
|
-
Index row = rowIndexByOuterInner(outer, inner);
|
|
644
|
-
Index col = colIndexByOuterInner(outer, inner);
|
|
666
|
+
Index row = rowIndexByOuterInner(outer, inner);
|
|
667
|
+
Index col = colIndexByOuterInner(outer, inner);
|
|
645
668
|
assignCoeff(row, col);
|
|
646
669
|
}
|
|
647
|
-
|
|
648
|
-
|
|
670
|
+
|
|
671
|
+
|
|
649
672
|
template<int StoreMode, int LoadMode, typename PacketType>
|
|
650
673
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
|
|
651
674
|
{
|
|
652
675
|
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
|
|
653
676
|
}
|
|
654
|
-
|
|
677
|
+
|
|
655
678
|
template<int StoreMode, int LoadMode, typename PacketType>
|
|
656
679
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
|
|
657
680
|
{
|
|
658
681
|
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
|
|
659
682
|
}
|
|
660
|
-
|
|
683
|
+
|
|
661
684
|
template<int StoreMode, int LoadMode, typename PacketType>
|
|
662
685
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
|
|
663
686
|
{
|
|
664
|
-
Index row = rowIndexByOuterInner(outer, inner);
|
|
687
|
+
Index row = rowIndexByOuterInner(outer, inner);
|
|
665
688
|
Index col = colIndexByOuterInner(outer, inner);
|
|
666
689
|
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
|
667
690
|
}
|
|
668
|
-
|
|
691
|
+
|
|
669
692
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
|
|
670
693
|
{
|
|
671
694
|
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
|
@@ -688,7 +711,7 @@ public:
|
|
|
688
711
|
{
|
|
689
712
|
return m_dstExpr.data();
|
|
690
713
|
}
|
|
691
|
-
|
|
714
|
+
|
|
692
715
|
protected:
|
|
693
716
|
DstEvaluatorType& m_dst;
|
|
694
717
|
const SrcEvaluatorType& m_src;
|
|
@@ -697,6 +720,27 @@ protected:
|
|
|
697
720
|
DstXprType& m_dstExpr;
|
|
698
721
|
};
|
|
699
722
|
|
|
723
|
+
// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
|
|
724
|
+
// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
|
|
725
|
+
// when computing the product.
|
|
726
|
+
|
|
727
|
+
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
|
|
728
|
+
class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
|
|
729
|
+
{
|
|
730
|
+
protected:
|
|
731
|
+
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
|
|
732
|
+
public:
|
|
733
|
+
typedef typename Base::Scalar Scalar;
|
|
734
|
+
typedef typename Base::DstXprType DstXprType;
|
|
735
|
+
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
|
|
736
|
+
typedef typename AssignmentTraits::PacketType PacketType;
|
|
737
|
+
|
|
738
|
+
EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
|
|
739
|
+
: Base(dst, src, func, dstExpr)
|
|
740
|
+
{
|
|
741
|
+
}
|
|
742
|
+
};
|
|
743
|
+
|
|
700
744
|
/***************************************************************************
|
|
701
745
|
* Part 5 : Entry point for dense rectangular assignment
|
|
702
746
|
***************************************************************************/
|
|
@@ -734,13 +778,23 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
|
|
|
734
778
|
resize_if_allowed(dst, src, func);
|
|
735
779
|
|
|
736
780
|
DstEvaluatorType dstEvaluator(dst);
|
|
737
|
-
|
|
781
|
+
|
|
738
782
|
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
|
739
783
|
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
|
740
784
|
|
|
741
785
|
dense_assignment_loop<Kernel>::run(kernel);
|
|
742
786
|
}
|
|
743
787
|
|
|
788
|
+
// Specialization for filling the destination with a constant value.
|
|
789
|
+
#ifndef EIGEN_GPU_COMPILE_PHASE
|
|
790
|
+
template<typename DstXprType>
|
|
791
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
|
|
792
|
+
{
|
|
793
|
+
resize_if_allowed(dst, src, func);
|
|
794
|
+
std::fill_n(dst.data(), dst.size(), src.functor()());
|
|
795
|
+
}
|
|
796
|
+
#endif
|
|
797
|
+
|
|
744
798
|
template<typename DstXprType, typename SrcXprType>
|
|
745
799
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
|
746
800
|
{
|
|
@@ -756,13 +810,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
|
|
|
756
810
|
// AssignmentKind must define a Kind typedef.
|
|
757
811
|
template<typename DstShape, typename SrcShape> struct AssignmentKind;
|
|
758
812
|
|
|
759
|
-
//
|
|
813
|
+
// Assignment kind defined in this file:
|
|
760
814
|
struct Dense2Dense {};
|
|
761
815
|
struct EigenBase2EigenBase {};
|
|
762
816
|
|
|
763
817
|
template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
|
|
764
818
|
template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
|
|
765
|
-
|
|
819
|
+
|
|
766
820
|
// This is the main assignment class
|
|
767
821
|
template< typename DstXprType, typename SrcXprType, typename Functor,
|
|
768
822
|
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
|
|
@@ -787,7 +841,7 @@ void call_assignment(const Dst& dst, const Src& src)
|
|
|
787
841
|
{
|
|
788
842
|
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
|
789
843
|
}
|
|
790
|
-
|
|
844
|
+
|
|
791
845
|
// Deal with "assume-aliasing"
|
|
792
846
|
template<typename Dst, typename Src, typename Func>
|
|
793
847
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
@@ -827,14 +881,35 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
|
|
827
881
|
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
|
|
828
882
|
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
|
|
829
883
|
ActualDstType actualDst(dst);
|
|
830
|
-
|
|
884
|
+
|
|
831
885
|
// TODO check whether this is the right place to perform these checks:
|
|
832
886
|
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
|
833
887
|
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
|
|
834
888
|
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
|
|
835
|
-
|
|
889
|
+
|
|
836
890
|
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
|
|
837
891
|
}
|
|
892
|
+
|
|
893
|
+
template<typename Dst, typename Src, typename Func>
|
|
894
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
895
|
+
void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
|
896
|
+
{
|
|
897
|
+
typedef evaluator<Dst> DstEvaluatorType;
|
|
898
|
+
typedef evaluator<Src> SrcEvaluatorType;
|
|
899
|
+
typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
|
|
900
|
+
|
|
901
|
+
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
|
902
|
+
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
|
|
903
|
+
|
|
904
|
+
SrcEvaluatorType srcEvaluator(src);
|
|
905
|
+
resize_if_allowed(dst, src, func);
|
|
906
|
+
|
|
907
|
+
DstEvaluatorType dstEvaluator(dst);
|
|
908
|
+
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
|
909
|
+
|
|
910
|
+
dense_assignment_loop<Kernel>::run(kernel);
|
|
911
|
+
}
|
|
912
|
+
|
|
838
913
|
template<typename Dst, typename Src>
|
|
839
914
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
840
915
|
void call_assignment_no_alias(Dst& dst, const Src& src)
|
|
@@ -875,7 +950,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
|
|
|
875
950
|
#ifndef EIGEN_NO_DEBUG
|
|
876
951
|
internal::check_for_aliasing(dst, src);
|
|
877
952
|
#endif
|
|
878
|
-
|
|
953
|
+
|
|
879
954
|
call_dense_assignment_loop(dst, src, func);
|
|
880
955
|
}
|
|
881
956
|
};
|
|
@@ -899,7 +974,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
|
|
899
974
|
src.evalTo(dst);
|
|
900
975
|
}
|
|
901
976
|
|
|
902
|
-
// NOTE The following two functions are templated to avoid their
|
|
977
|
+
// NOTE The following two functions are templated to avoid their instantiation if not needed
|
|
903
978
|
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
|
904
979
|
template<typename SrcScalarType>
|
|
905
980
|
EIGEN_DEVICE_FUNC
|
|
@@ -68,16 +68,16 @@ class vml_assign_traits
|
|
|
68
68
|
|
|
69
69
|
#define EIGEN_PP_EXPAND(ARG) ARG
|
|
70
70
|
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
|
71
|
-
#define
|
|
71
|
+
#define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
|
|
72
72
|
#else
|
|
73
|
-
#define
|
|
73
|
+
#define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
|
|
74
74
|
#endif
|
|
75
75
|
|
|
76
|
-
#define
|
|
76
|
+
#define EIGEN_VMLMODE_EXPAND_x_
|
|
77
77
|
|
|
78
|
-
#define
|
|
79
|
-
#define
|
|
80
|
-
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(
|
|
78
|
+
#define EIGEN_VMLMODE_PREFIX_xLA vm
|
|
79
|
+
#define EIGEN_VMLMODE_PREFIX_x_ v
|
|
80
|
+
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x,VMLMODE)
|
|
81
81
|
|
|
82
82
|
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
|
83
83
|
template< typename DstXprType, typename SrcXprNested> \
|
|
@@ -89,7 +89,7 @@ class vml_assign_traits
|
|
|
89
89
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
|
90
90
|
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
|
91
91
|
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
|
92
|
-
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(
|
|
92
|
+
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
|
|
93
93
|
} else { \
|
|
94
94
|
const Index outerSize = dst.outerSize(); \
|
|
95
95
|
for(Index outer = 0; outer < outerSize; ++outer) { \
|
|
@@ -97,7 +97,7 @@ class vml_assign_traits
|
|
|
97
97
|
&(src.nestedExpression().coeffRef(0, outer)); \
|
|
98
98
|
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
|
99
99
|
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
|
|
100
|
-
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(
|
|
100
|
+
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
|
|
101
101
|
} \
|
|
102
102
|
} \
|
|
103
103
|
} \
|
|
@@ -152,7 +152,7 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
|
|
152
152
|
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
|
153
153
|
{ \
|
|
154
154
|
VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
|
|
155
|
-
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(
|
|
155
|
+
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
|
|
156
156
|
} else { \
|
|
157
157
|
const Index outerSize = dst.outerSize(); \
|
|
158
158
|
for(Index outer = 0; outer < outerSize; ++outer) { \
|
|
@@ -160,7 +160,7 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
|
|
160
160
|
&(src.lhs().coeffRef(0, outer)); \
|
|
161
161
|
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
|
162
162
|
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
|
|
163
|
-
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(
|
|
163
|
+
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
|
|
164
164
|
} \
|
|
165
165
|
} \
|
|
166
166
|
} \
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#ifndef EIGEN_BANDMATRIX_H
|
|
11
11
|
#define EIGEN_BANDMATRIX_H
|
|
12
12
|
|
|
13
|
-
namespace Eigen {
|
|
13
|
+
namespace Eigen {
|
|
14
14
|
|
|
15
15
|
namespace internal {
|
|
16
16
|
|
|
@@ -45,7 +45,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
|
|
45
45
|
};
|
|
46
46
|
|
|
47
47
|
public:
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
using Base::derived;
|
|
50
50
|
using Base::rows;
|
|
51
51
|
using Base::cols;
|
|
@@ -55,10 +55,10 @@ class BandMatrixBase : public EigenBase<Derived>
|
|
|
55
55
|
|
|
56
56
|
/** \returns the number of sub diagonals */
|
|
57
57
|
inline Index subs() const { return derived().subs(); }
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
/** \returns an expression of the underlying coefficient matrix */
|
|
60
60
|
inline const CoefficientsType& coeffs() const { return derived().coeffs(); }
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
/** \returns an expression of the underlying coefficient matrix */
|
|
63
63
|
inline CoefficientsType& coeffs() { return derived().coeffs(); }
|
|
64
64
|
|
|
@@ -67,7 +67,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
|
|
67
67
|
* \warning the internal storage must be column major. */
|
|
68
68
|
inline Block<CoefficientsType,Dynamic,1> col(Index i)
|
|
69
69
|
{
|
|
70
|
-
EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
|
70
|
+
EIGEN_STATIC_ASSERT((int(Options) & int(RowMajor)) == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
|
71
71
|
Index start = 0;
|
|
72
72
|
Index len = coeffs().rows();
|
|
73
73
|
if (i<=supers())
|
|
@@ -90,7 +90,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
|
|
90
90
|
|
|
91
91
|
template<int Index> struct DiagonalIntReturnType {
|
|
92
92
|
enum {
|
|
93
|
-
ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)),
|
|
93
|
+
ReturnOpposite = (int(Options) & int(SelfAdjoint)) && (((Index) > 0 && Supers == 0) || ((Index) < 0 && Subs == 0)),
|
|
94
94
|
Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
|
|
95
95
|
ActualIndex = ReturnOpposite ? -Index : Index,
|
|
96
96
|
DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
|
|
@@ -130,7 +130,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
|
|
130
130
|
eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
|
|
131
131
|
return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
|
|
132
132
|
}
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
template<typename Dest> inline void evalTo(Dest& dst) const
|
|
135
135
|
{
|
|
136
136
|
dst.resize(rows(),cols());
|
|
@@ -192,7 +192,7 @@ struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
|
|
192
192
|
Options = _Options,
|
|
193
193
|
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
|
194
194
|
};
|
|
195
|
-
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
|
|
195
|
+
typedef Matrix<Scalar, DataRowsAtCompileTime, ColsAtCompileTime, int(Options) & int(RowMajor) ? RowMajor : ColMajor> CoefficientsType;
|
|
196
196
|
};
|
|
197
197
|
|
|
198
198
|
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
|
|
@@ -211,16 +211,16 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
|
|
|
211
211
|
}
|
|
212
212
|
|
|
213
213
|
/** \returns the number of columns */
|
|
214
|
-
inline Index rows() const { return m_rows.value(); }
|
|
214
|
+
inline EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); }
|
|
215
215
|
|
|
216
216
|
/** \returns the number of rows */
|
|
217
|
-
inline Index cols() const { return m_coeffs.cols(); }
|
|
217
|
+
inline EIGEN_CONSTEXPR Index cols() const { return m_coeffs.cols(); }
|
|
218
218
|
|
|
219
219
|
/** \returns the number of super diagonals */
|
|
220
|
-
inline Index supers() const { return m_supers.value(); }
|
|
220
|
+
inline EIGEN_CONSTEXPR Index supers() const { return m_supers.value(); }
|
|
221
221
|
|
|
222
222
|
/** \returns the number of sub diagonals */
|
|
223
|
-
inline Index subs() const { return m_subs.value(); }
|
|
223
|
+
inline EIGEN_CONSTEXPR Index subs() const { return m_subs.value(); }
|
|
224
224
|
|
|
225
225
|
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
|
226
226
|
inline CoefficientsType& coeffs() { return m_coeffs; }
|
|
@@ -275,16 +275,16 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
|
|
|
275
275
|
}
|
|
276
276
|
|
|
277
277
|
/** \returns the number of columns */
|
|
278
|
-
inline Index rows() const { return m_rows.value(); }
|
|
278
|
+
inline EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); }
|
|
279
279
|
|
|
280
280
|
/** \returns the number of rows */
|
|
281
|
-
inline Index cols() const { return m_coeffs.cols(); }
|
|
281
|
+
inline EIGEN_CONSTEXPR Index cols() const { return m_coeffs.cols(); }
|
|
282
282
|
|
|
283
283
|
/** \returns the number of super diagonals */
|
|
284
|
-
inline Index supers() const { return m_supers.value(); }
|
|
284
|
+
inline EIGEN_CONSTEXPR Index supers() const { return m_supers.value(); }
|
|
285
285
|
|
|
286
286
|
/** \returns the number of sub diagonals */
|
|
287
|
-
inline Index subs() const { return m_subs.value(); }
|
|
287
|
+
inline EIGEN_CONSTEXPR Index subs() const { return m_subs.value(); }
|
|
288
288
|
|
|
289
289
|
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
|
290
290
|
|