@smake/eigen 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +1 -1
- package/eigen/COPYING.MINPACK +51 -52
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +2 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -128,7 +128,7 @@ DenseBase<Derived>::Random()
|
|
|
128
128
|
* \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
|
|
129
129
|
*/
|
|
130
130
|
template<typename Derived>
|
|
131
|
-
inline Derived& DenseBase<Derived>::setRandom()
|
|
131
|
+
EIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom()
|
|
132
132
|
{
|
|
133
133
|
return *this = Random(rows(), cols());
|
|
134
134
|
}
|
|
@@ -177,6 +177,42 @@ PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
|
|
|
177
177
|
return setRandom();
|
|
178
178
|
}
|
|
179
179
|
|
|
180
|
+
/** Resizes to the given size, changing only the number of columns, and sets all
|
|
181
|
+
* coefficients in this expression to random values. For the parameter of type
|
|
182
|
+
* NoChange_t, just pass the special value \c NoChange.
|
|
183
|
+
*
|
|
184
|
+
* Numbers are uniformly spread through their whole definition range for integer types,
|
|
185
|
+
* and in the [-1:1] range for floating point scalar types.
|
|
186
|
+
*
|
|
187
|
+
* \not_reentrant
|
|
188
|
+
*
|
|
189
|
+
* \sa DenseBase::setRandom(), setRandom(Index), setRandom(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Random()
|
|
190
|
+
*/
|
|
191
|
+
template<typename Derived>
|
|
192
|
+
EIGEN_STRONG_INLINE Derived&
|
|
193
|
+
PlainObjectBase<Derived>::setRandom(NoChange_t, Index cols)
|
|
194
|
+
{
|
|
195
|
+
return setRandom(rows(), cols);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/** Resizes to the given size, changing only the number of rows, and sets all
|
|
199
|
+
* coefficients in this expression to random values. For the parameter of type
|
|
200
|
+
* NoChange_t, just pass the special value \c NoChange.
|
|
201
|
+
*
|
|
202
|
+
* Numbers are uniformly spread through their whole definition range for integer types,
|
|
203
|
+
* and in the [-1:1] range for floating point scalar types.
|
|
204
|
+
*
|
|
205
|
+
* \not_reentrant
|
|
206
|
+
*
|
|
207
|
+
* \sa DenseBase::setRandom(), setRandom(Index), setRandom(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Random()
|
|
208
|
+
*/
|
|
209
|
+
template<typename Derived>
|
|
210
|
+
EIGEN_STRONG_INLINE Derived&
|
|
211
|
+
PlainObjectBase<Derived>::setRandom(Index rows, NoChange_t)
|
|
212
|
+
{
|
|
213
|
+
return setRandom(rows, cols());
|
|
214
|
+
}
|
|
215
|
+
|
|
180
216
|
} // end namespace Eigen
|
|
181
217
|
|
|
182
218
|
#endif // EIGEN_RANDOM_H
|
|
@@ -23,23 +23,29 @@ namespace internal {
|
|
|
23
23
|
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
|
24
24
|
***************************************************************************/
|
|
25
25
|
|
|
26
|
-
template<typename Func, typename
|
|
26
|
+
template<typename Func, typename Evaluator>
|
|
27
27
|
struct redux_traits
|
|
28
28
|
{
|
|
29
29
|
public:
|
|
30
|
-
typedef typename find_best_packet<typename
|
|
30
|
+
typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
|
|
31
31
|
enum {
|
|
32
32
|
PacketSize = unpacket_traits<PacketType>::size,
|
|
33
|
-
InnerMaxSize = int(
|
|
34
|
-
?
|
|
35
|
-
:
|
|
33
|
+
InnerMaxSize = int(Evaluator::IsRowMajor)
|
|
34
|
+
? Evaluator::MaxColsAtCompileTime
|
|
35
|
+
: Evaluator::MaxRowsAtCompileTime,
|
|
36
|
+
OuterMaxSize = int(Evaluator::IsRowMajor)
|
|
37
|
+
? Evaluator::MaxRowsAtCompileTime
|
|
38
|
+
: Evaluator::MaxColsAtCompileTime,
|
|
39
|
+
SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
|
|
40
|
+
: int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
|
|
41
|
+
: (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
|
|
36
42
|
};
|
|
37
43
|
|
|
38
44
|
enum {
|
|
39
|
-
MightVectorize = (int(
|
|
45
|
+
MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
|
|
40
46
|
&& (functor_traits<Func>::PacketAccess),
|
|
41
|
-
MayLinearVectorize = bool(MightVectorize) && (int(
|
|
42
|
-
MaySliceVectorize = bool(MightVectorize) && int(
|
|
47
|
+
MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
|
|
48
|
+
MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
|
|
43
49
|
};
|
|
44
50
|
|
|
45
51
|
public:
|
|
@@ -51,8 +57,8 @@ public:
|
|
|
51
57
|
|
|
52
58
|
public:
|
|
53
59
|
enum {
|
|
54
|
-
Cost =
|
|
55
|
-
:
|
|
60
|
+
Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
|
|
61
|
+
: int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
|
|
56
62
|
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
|
57
63
|
};
|
|
58
64
|
|
|
@@ -64,18 +70,20 @@ public:
|
|
|
64
70
|
#ifdef EIGEN_DEBUG_ASSIGN
|
|
65
71
|
static void debug()
|
|
66
72
|
{
|
|
67
|
-
std::cerr << "Xpr: " << typeid(typename
|
|
73
|
+
std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
|
|
68
74
|
std::cerr.setf(std::ios::hex, std::ios::basefield);
|
|
69
|
-
EIGEN_DEBUG_VAR(
|
|
75
|
+
EIGEN_DEBUG_VAR(Evaluator::Flags)
|
|
70
76
|
std::cerr.unsetf(std::ios::hex);
|
|
71
77
|
EIGEN_DEBUG_VAR(InnerMaxSize)
|
|
78
|
+
EIGEN_DEBUG_VAR(OuterMaxSize)
|
|
79
|
+
EIGEN_DEBUG_VAR(SliceVectorizedWork)
|
|
72
80
|
EIGEN_DEBUG_VAR(PacketSize)
|
|
73
81
|
EIGEN_DEBUG_VAR(MightVectorize)
|
|
74
82
|
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
|
75
83
|
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
|
76
|
-
|
|
84
|
+
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
|
|
77
85
|
EIGEN_DEBUG_VAR(UnrollingLimit)
|
|
78
|
-
|
|
86
|
+
std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
|
|
79
87
|
std::cerr << std::endl;
|
|
80
88
|
}
|
|
81
89
|
#endif
|
|
@@ -87,88 +95,86 @@ public:
|
|
|
87
95
|
|
|
88
96
|
/*** no vectorization ***/
|
|
89
97
|
|
|
90
|
-
template<typename Func, typename
|
|
98
|
+
template<typename Func, typename Evaluator, int Start, int Length>
|
|
91
99
|
struct redux_novec_unroller
|
|
92
100
|
{
|
|
93
101
|
enum {
|
|
94
102
|
HalfLength = Length/2
|
|
95
103
|
};
|
|
96
104
|
|
|
97
|
-
typedef typename
|
|
105
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
98
106
|
|
|
99
107
|
EIGEN_DEVICE_FUNC
|
|
100
|
-
static EIGEN_STRONG_INLINE Scalar run(const
|
|
108
|
+
static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
|
|
101
109
|
{
|
|
102
|
-
return func(redux_novec_unroller<Func,
|
|
103
|
-
redux_novec_unroller<Func,
|
|
110
|
+
return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
|
|
111
|
+
redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
|
|
104
112
|
}
|
|
105
113
|
};
|
|
106
114
|
|
|
107
|
-
template<typename Func, typename
|
|
108
|
-
struct redux_novec_unroller<Func,
|
|
115
|
+
template<typename Func, typename Evaluator, int Start>
|
|
116
|
+
struct redux_novec_unroller<Func, Evaluator, Start, 1>
|
|
109
117
|
{
|
|
110
118
|
enum {
|
|
111
|
-
outer = Start /
|
|
112
|
-
inner = Start %
|
|
119
|
+
outer = Start / Evaluator::InnerSizeAtCompileTime,
|
|
120
|
+
inner = Start % Evaluator::InnerSizeAtCompileTime
|
|
113
121
|
};
|
|
114
122
|
|
|
115
|
-
typedef typename
|
|
123
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
116
124
|
|
|
117
125
|
EIGEN_DEVICE_FUNC
|
|
118
|
-
static EIGEN_STRONG_INLINE Scalar run(const
|
|
126
|
+
static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
|
|
119
127
|
{
|
|
120
|
-
return
|
|
128
|
+
return eval.coeffByOuterInner(outer, inner);
|
|
121
129
|
}
|
|
122
130
|
};
|
|
123
131
|
|
|
124
132
|
// This is actually dead code and will never be called. It is required
|
|
125
133
|
// to prevent false warnings regarding failed inlining though
|
|
126
134
|
// for 0 length run() will never be called at all.
|
|
127
|
-
template<typename Func, typename
|
|
128
|
-
struct redux_novec_unroller<Func,
|
|
135
|
+
template<typename Func, typename Evaluator, int Start>
|
|
136
|
+
struct redux_novec_unroller<Func, Evaluator, Start, 0>
|
|
129
137
|
{
|
|
130
|
-
typedef typename
|
|
138
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
131
139
|
EIGEN_DEVICE_FUNC
|
|
132
|
-
static EIGEN_STRONG_INLINE Scalar run(const
|
|
140
|
+
static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
|
|
133
141
|
};
|
|
134
142
|
|
|
135
143
|
/*** vectorization ***/
|
|
136
144
|
|
|
137
|
-
template<typename Func, typename
|
|
145
|
+
template<typename Func, typename Evaluator, int Start, int Length>
|
|
138
146
|
struct redux_vec_unroller
|
|
139
147
|
{
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
};
|
|
144
|
-
|
|
145
|
-
typedef typename Derived::Scalar Scalar;
|
|
146
|
-
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
|
147
|
-
|
|
148
|
-
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
|
|
148
|
+
template<typename PacketType>
|
|
149
|
+
EIGEN_DEVICE_FUNC
|
|
150
|
+
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
|
|
149
151
|
{
|
|
152
|
+
enum {
|
|
153
|
+
PacketSize = unpacket_traits<PacketType>::size,
|
|
154
|
+
HalfLength = Length/2
|
|
155
|
+
};
|
|
156
|
+
|
|
150
157
|
return func.packetOp(
|
|
151
|
-
redux_vec_unroller<Func,
|
|
152
|
-
redux_vec_unroller<Func,
|
|
158
|
+
redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
|
|
159
|
+
redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
|
|
153
160
|
}
|
|
154
161
|
};
|
|
155
162
|
|
|
156
|
-
template<typename Func, typename
|
|
157
|
-
struct redux_vec_unroller<Func,
|
|
163
|
+
template<typename Func, typename Evaluator, int Start>
|
|
164
|
+
struct redux_vec_unroller<Func, Evaluator, Start, 1>
|
|
158
165
|
{
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
inner = index % int(Derived::InnerSizeAtCompileTime),
|
|
163
|
-
alignment = Derived::Alignment
|
|
164
|
-
};
|
|
165
|
-
|
|
166
|
-
typedef typename Derived::Scalar Scalar;
|
|
167
|
-
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
|
168
|
-
|
|
169
|
-
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
|
166
|
+
template<typename PacketType>
|
|
167
|
+
EIGEN_DEVICE_FUNC
|
|
168
|
+
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
|
|
170
169
|
{
|
|
171
|
-
|
|
170
|
+
enum {
|
|
171
|
+
PacketSize = unpacket_traits<PacketType>::size,
|
|
172
|
+
index = Start * PacketSize,
|
|
173
|
+
outer = index / int(Evaluator::InnerSizeAtCompileTime),
|
|
174
|
+
inner = index % int(Evaluator::InnerSizeAtCompileTime),
|
|
175
|
+
alignment = Evaluator::Alignment
|
|
176
|
+
};
|
|
177
|
+
return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
|
|
172
178
|
}
|
|
173
179
|
};
|
|
174
180
|
|
|
@@ -176,53 +182,65 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
|
|
|
176
182
|
* Part 3 : implementation of all cases
|
|
177
183
|
***************************************************************************/
|
|
178
184
|
|
|
179
|
-
template<typename Func, typename
|
|
180
|
-
int Traversal = redux_traits<Func,
|
|
181
|
-
int Unrolling = redux_traits<Func,
|
|
185
|
+
template<typename Func, typename Evaluator,
|
|
186
|
+
int Traversal = redux_traits<Func, Evaluator>::Traversal,
|
|
187
|
+
int Unrolling = redux_traits<Func, Evaluator>::Unrolling
|
|
182
188
|
>
|
|
183
189
|
struct redux_impl;
|
|
184
190
|
|
|
185
|
-
template<typename Func, typename
|
|
186
|
-
struct redux_impl<Func,
|
|
191
|
+
template<typename Func, typename Evaluator>
|
|
192
|
+
struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
|
|
187
193
|
{
|
|
188
|
-
typedef typename
|
|
189
|
-
|
|
190
|
-
|
|
194
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
195
|
+
|
|
196
|
+
template<typename XprType>
|
|
197
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
|
198
|
+
Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
|
191
199
|
{
|
|
192
|
-
eigen_assert(
|
|
200
|
+
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
|
193
201
|
Scalar res;
|
|
194
|
-
res =
|
|
195
|
-
for(Index i = 1; i <
|
|
196
|
-
res = func(res,
|
|
197
|
-
for(Index i = 1; i <
|
|
198
|
-
for(Index j = 0; j <
|
|
199
|
-
res = func(res,
|
|
202
|
+
res = eval.coeffByOuterInner(0, 0);
|
|
203
|
+
for(Index i = 1; i < xpr.innerSize(); ++i)
|
|
204
|
+
res = func(res, eval.coeffByOuterInner(0, i));
|
|
205
|
+
for(Index i = 1; i < xpr.outerSize(); ++i)
|
|
206
|
+
for(Index j = 0; j < xpr.innerSize(); ++j)
|
|
207
|
+
res = func(res, eval.coeffByOuterInner(i, j));
|
|
200
208
|
return res;
|
|
201
209
|
}
|
|
202
210
|
};
|
|
203
211
|
|
|
204
|
-
template<typename Func, typename
|
|
205
|
-
struct redux_impl<Func,
|
|
206
|
-
:
|
|
207
|
-
{
|
|
212
|
+
template<typename Func, typename Evaluator>
|
|
213
|
+
struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
|
|
214
|
+
: redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
|
|
215
|
+
{
|
|
216
|
+
typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
|
|
217
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
218
|
+
template<typename XprType>
|
|
219
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
|
220
|
+
Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
|
|
221
|
+
{
|
|
222
|
+
return Base::run(eval,func);
|
|
223
|
+
}
|
|
224
|
+
};
|
|
208
225
|
|
|
209
|
-
template<typename Func, typename
|
|
210
|
-
struct redux_impl<Func,
|
|
226
|
+
template<typename Func, typename Evaluator>
|
|
227
|
+
struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
|
211
228
|
{
|
|
212
|
-
typedef typename
|
|
213
|
-
typedef typename redux_traits<Func,
|
|
229
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
230
|
+
typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
|
|
214
231
|
|
|
215
|
-
|
|
232
|
+
template<typename XprType>
|
|
233
|
+
static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
|
216
234
|
{
|
|
217
|
-
const Index size =
|
|
235
|
+
const Index size = xpr.size();
|
|
218
236
|
|
|
219
|
-
const Index packetSize = redux_traits<Func,
|
|
237
|
+
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
|
220
238
|
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
|
221
239
|
enum {
|
|
222
|
-
alignment0 = (bool(
|
|
223
|
-
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0,
|
|
240
|
+
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
|
241
|
+
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
|
|
224
242
|
};
|
|
225
|
-
const Index alignedStart = internal::first_default_aligned(
|
|
243
|
+
const Index alignedStart = internal::first_default_aligned(xpr);
|
|
226
244
|
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
|
227
245
|
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
|
228
246
|
const Index alignedEnd2 = alignedStart + alignedSize2;
|
|
@@ -230,34 +248,34 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
|
|
230
248
|
Scalar res;
|
|
231
249
|
if(alignedSize)
|
|
232
250
|
{
|
|
233
|
-
PacketScalar packet_res0 =
|
|
251
|
+
PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
|
|
234
252
|
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
|
|
235
253
|
{
|
|
236
|
-
PacketScalar packet_res1 =
|
|
254
|
+
PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
|
|
237
255
|
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
|
|
238
256
|
{
|
|
239
|
-
packet_res0 = func.packetOp(packet_res0,
|
|
240
|
-
packet_res1 = func.packetOp(packet_res1,
|
|
257
|
+
packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
|
|
258
|
+
packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
|
|
241
259
|
}
|
|
242
260
|
|
|
243
261
|
packet_res0 = func.packetOp(packet_res0,packet_res1);
|
|
244
262
|
if(alignedEnd>alignedEnd2)
|
|
245
|
-
packet_res0 = func.packetOp(packet_res0,
|
|
263
|
+
packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
|
|
246
264
|
}
|
|
247
265
|
res = func.predux(packet_res0);
|
|
248
266
|
|
|
249
267
|
for(Index index = 0; index < alignedStart; ++index)
|
|
250
|
-
res = func(res,
|
|
268
|
+
res = func(res,eval.coeff(index));
|
|
251
269
|
|
|
252
270
|
for(Index index = alignedEnd; index < size; ++index)
|
|
253
|
-
res = func(res,
|
|
271
|
+
res = func(res,eval.coeff(index));
|
|
254
272
|
}
|
|
255
273
|
else // too small to vectorize anything.
|
|
256
274
|
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
|
257
275
|
{
|
|
258
|
-
res =
|
|
276
|
+
res = eval.coeff(0);
|
|
259
277
|
for(Index index = 1; index < size; ++index)
|
|
260
|
-
res = func(res,
|
|
278
|
+
res = func(res,eval.coeff(index));
|
|
261
279
|
}
|
|
262
280
|
|
|
263
281
|
return res;
|
|
@@ -265,130 +283,108 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
|
|
265
283
|
};
|
|
266
284
|
|
|
267
285
|
// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
|
|
268
|
-
template<typename Func, typename
|
|
269
|
-
struct redux_impl<Func,
|
|
286
|
+
template<typename Func, typename Evaluator, int Unrolling>
|
|
287
|
+
struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
|
|
270
288
|
{
|
|
271
|
-
typedef typename
|
|
272
|
-
typedef typename redux_traits<Func,
|
|
289
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
290
|
+
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
|
273
291
|
|
|
274
|
-
|
|
292
|
+
template<typename XprType>
|
|
293
|
+
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
|
275
294
|
{
|
|
276
|
-
eigen_assert(
|
|
277
|
-
const Index innerSize =
|
|
278
|
-
const Index outerSize =
|
|
295
|
+
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
|
296
|
+
const Index innerSize = xpr.innerSize();
|
|
297
|
+
const Index outerSize = xpr.outerSize();
|
|
279
298
|
enum {
|
|
280
|
-
packetSize = redux_traits<Func,
|
|
299
|
+
packetSize = redux_traits<Func, Evaluator>::PacketSize
|
|
281
300
|
};
|
|
282
301
|
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
|
283
302
|
Scalar res;
|
|
284
303
|
if(packetedInnerSize)
|
|
285
304
|
{
|
|
286
|
-
PacketType packet_res =
|
|
305
|
+
PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
|
|
287
306
|
for(Index j=0; j<outerSize; ++j)
|
|
288
307
|
for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
|
|
289
|
-
packet_res = func.packetOp(packet_res,
|
|
308
|
+
packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
|
|
290
309
|
|
|
291
310
|
res = func.predux(packet_res);
|
|
292
311
|
for(Index j=0; j<outerSize; ++j)
|
|
293
312
|
for(Index i=packetedInnerSize; i<innerSize; ++i)
|
|
294
|
-
res = func(res,
|
|
313
|
+
res = func(res, eval.coeffByOuterInner(j,i));
|
|
295
314
|
}
|
|
296
315
|
else // too small to vectorize anything.
|
|
297
316
|
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
|
298
317
|
{
|
|
299
|
-
res = redux_impl<Func,
|
|
318
|
+
res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
|
|
300
319
|
}
|
|
301
320
|
|
|
302
321
|
return res;
|
|
303
322
|
}
|
|
304
323
|
};
|
|
305
324
|
|
|
306
|
-
template<typename Func, typename
|
|
307
|
-
struct redux_impl<Func,
|
|
325
|
+
template<typename Func, typename Evaluator>
|
|
326
|
+
struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
|
|
308
327
|
{
|
|
309
|
-
typedef typename
|
|
328
|
+
typedef typename Evaluator::Scalar Scalar;
|
|
310
329
|
|
|
311
|
-
typedef typename redux_traits<Func,
|
|
330
|
+
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
|
312
331
|
enum {
|
|
313
|
-
PacketSize = redux_traits<Func,
|
|
314
|
-
Size =
|
|
315
|
-
VectorizedSize = (Size / PacketSize) * PacketSize
|
|
332
|
+
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
|
|
333
|
+
Size = Evaluator::SizeAtCompileTime,
|
|
334
|
+
VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
|
|
316
335
|
};
|
|
317
|
-
|
|
336
|
+
|
|
337
|
+
template<typename XprType>
|
|
338
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
|
339
|
+
Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
|
|
318
340
|
{
|
|
319
|
-
|
|
341
|
+
EIGEN_ONLY_USED_FOR_DEBUG(xpr)
|
|
342
|
+
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
|
320
343
|
if (VectorizedSize > 0) {
|
|
321
|
-
Scalar res = func.predux(redux_vec_unroller<Func,
|
|
344
|
+
Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
|
|
322
345
|
if (VectorizedSize != Size)
|
|
323
|
-
res = func(res,redux_novec_unroller<Func,
|
|
346
|
+
res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
|
|
324
347
|
return res;
|
|
325
348
|
}
|
|
326
349
|
else {
|
|
327
|
-
return redux_novec_unroller<Func,
|
|
350
|
+
return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
|
|
328
351
|
}
|
|
329
352
|
}
|
|
330
353
|
};
|
|
331
354
|
|
|
332
355
|
// evaluator adaptor
|
|
333
356
|
template<typename _XprType>
|
|
334
|
-
class redux_evaluator
|
|
357
|
+
class redux_evaluator : public internal::evaluator<_XprType>
|
|
335
358
|
{
|
|
359
|
+
typedef internal::evaluator<_XprType> Base;
|
|
336
360
|
public:
|
|
337
361
|
typedef _XprType XprType;
|
|
338
|
-
EIGEN_DEVICE_FUNC
|
|
362
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
363
|
+
explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
|
|
339
364
|
|
|
340
365
|
typedef typename XprType::Scalar Scalar;
|
|
341
366
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
|
342
367
|
typedef typename XprType::PacketScalar PacketScalar;
|
|
343
|
-
typedef typename XprType::PacketReturnType PacketReturnType;
|
|
344
368
|
|
|
345
369
|
enum {
|
|
346
370
|
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
|
|
347
371
|
MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
|
|
348
372
|
// TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
|
|
349
|
-
Flags =
|
|
373
|
+
Flags = Base::Flags & ~DirectAccessBit,
|
|
350
374
|
IsRowMajor = XprType::IsRowMajor,
|
|
351
375
|
SizeAtCompileTime = XprType::SizeAtCompileTime,
|
|
352
|
-
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
|
|
353
|
-
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
|
|
354
|
-
Alignment = evaluator<XprType>::Alignment
|
|
376
|
+
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
|
|
355
377
|
};
|
|
356
378
|
|
|
357
|
-
EIGEN_DEVICE_FUNC
|
|
358
|
-
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
|
359
|
-
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
|
360
|
-
EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
|
|
361
|
-
EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
|
|
362
|
-
|
|
363
|
-
EIGEN_DEVICE_FUNC
|
|
364
|
-
CoeffReturnType coeff(Index row, Index col) const
|
|
365
|
-
{ return m_evaluator.coeff(row, col); }
|
|
366
|
-
|
|
367
|
-
EIGEN_DEVICE_FUNC
|
|
368
|
-
CoeffReturnType coeff(Index index) const
|
|
369
|
-
{ return m_evaluator.coeff(index); }
|
|
370
|
-
|
|
371
|
-
template<int LoadMode, typename PacketType>
|
|
372
|
-
PacketType packet(Index row, Index col) const
|
|
373
|
-
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
|
|
374
|
-
|
|
375
|
-
template<int LoadMode, typename PacketType>
|
|
376
|
-
PacketType packet(Index index) const
|
|
377
|
-
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
|
|
378
|
-
|
|
379
|
-
EIGEN_DEVICE_FUNC
|
|
379
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
380
380
|
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
|
381
|
-
{ return
|
|
381
|
+
{ return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
|
382
382
|
|
|
383
383
|
template<int LoadMode, typename PacketType>
|
|
384
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
384
385
|
PacketType packetByOuterInner(Index outer, Index inner) const
|
|
385
|
-
{ return
|
|
386
|
+
{ return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
|
386
387
|
|
|
387
|
-
const XprType & nestedExpression() const { return m_xpr; }
|
|
388
|
-
|
|
389
|
-
protected:
|
|
390
|
-
internal::evaluator<XprType> m_evaluator;
|
|
391
|
-
const XprType &m_xpr;
|
|
392
388
|
};
|
|
393
389
|
|
|
394
390
|
} // end namespace internal
|
|
@@ -403,39 +399,53 @@ protected:
|
|
|
403
399
|
* The template parameter \a BinaryOp is the type of the functor \a func which must be
|
|
404
400
|
* an associative operator. Both current C++98 and C++11 functor styles are handled.
|
|
405
401
|
*
|
|
402
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
403
|
+
*
|
|
406
404
|
* \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
|
|
407
405
|
*/
|
|
408
406
|
template<typename Derived>
|
|
409
407
|
template<typename Func>
|
|
410
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
408
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
411
409
|
DenseBase<Derived>::redux(const Func& func) const
|
|
412
410
|
{
|
|
413
411
|
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
|
414
412
|
|
|
415
413
|
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
|
|
416
414
|
ThisEvaluator thisEval(derived());
|
|
417
|
-
|
|
418
|
-
|
|
415
|
+
|
|
416
|
+
// The initial expression is passed to the reducer as an additional argument instead of
|
|
417
|
+
// passing it as a member of redux_evaluator to help
|
|
418
|
+
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
|
|
419
419
|
}
|
|
420
420
|
|
|
421
421
|
/** \returns the minimum of all coefficients of \c *this.
|
|
422
|
-
*
|
|
422
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
|
423
|
+
* NaNPropagation == PropagateFast : undefined
|
|
424
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
|
425
|
+
* NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
|
|
426
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
423
427
|
*/
|
|
424
428
|
template<typename Derived>
|
|
425
|
-
|
|
429
|
+
template<int NaNPropagation>
|
|
430
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
426
431
|
DenseBase<Derived>::minCoeff() const
|
|
427
432
|
{
|
|
428
|
-
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
|
|
433
|
+
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
|
|
429
434
|
}
|
|
430
435
|
|
|
431
|
-
/** \returns the maximum of all coefficients of \c *this.
|
|
432
|
-
*
|
|
436
|
+
/** \returns the maximum of all coefficients of \c *this.
|
|
437
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
|
438
|
+
* NaNPropagation == PropagateFast : undefined
|
|
439
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
|
440
|
+
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
|
441
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
433
442
|
*/
|
|
434
443
|
template<typename Derived>
|
|
435
|
-
|
|
444
|
+
template<int NaNPropagation>
|
|
445
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
436
446
|
DenseBase<Derived>::maxCoeff() const
|
|
437
447
|
{
|
|
438
|
-
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
|
|
448
|
+
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
|
|
439
449
|
}
|
|
440
450
|
|
|
441
451
|
/** \returns the sum of all coefficients of \c *this
|
|
@@ -445,7 +455,7 @@ DenseBase<Derived>::maxCoeff() const
|
|
|
445
455
|
* \sa trace(), prod(), mean()
|
|
446
456
|
*/
|
|
447
457
|
template<typename Derived>
|
|
448
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
458
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
449
459
|
DenseBase<Derived>::sum() const
|
|
450
460
|
{
|
|
451
461
|
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
|
@@ -458,7 +468,7 @@ DenseBase<Derived>::sum() const
|
|
|
458
468
|
* \sa trace(), prod(), sum()
|
|
459
469
|
*/
|
|
460
470
|
template<typename Derived>
|
|
461
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
471
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
462
472
|
DenseBase<Derived>::mean() const
|
|
463
473
|
{
|
|
464
474
|
#ifdef __INTEL_COMPILER
|
|
@@ -479,7 +489,7 @@ DenseBase<Derived>::mean() const
|
|
|
479
489
|
* \sa sum(), mean(), trace()
|
|
480
490
|
*/
|
|
481
491
|
template<typename Derived>
|
|
482
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
492
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
483
493
|
DenseBase<Derived>::prod() const
|
|
484
494
|
{
|
|
485
495
|
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
|
@@ -494,7 +504,7 @@ DenseBase<Derived>::prod() const
|
|
|
494
504
|
* \sa diagonal(), sum()
|
|
495
505
|
*/
|
|
496
506
|
template<typename Derived>
|
|
497
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
507
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
|
498
508
|
MatrixBase<Derived>::trace() const
|
|
499
509
|
{
|
|
500
510
|
return derived().diagonal().sum();
|