@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#ifndef EIGEN_VISITOR_H
|
|
11
11
|
#define EIGEN_VISITOR_H
|
|
12
12
|
|
|
13
|
-
namespace Eigen {
|
|
13
|
+
namespace Eigen {
|
|
14
14
|
|
|
15
15
|
namespace internal {
|
|
16
16
|
|
|
@@ -40,6 +40,14 @@ struct visitor_impl<Visitor, Derived, 1>
|
|
|
40
40
|
}
|
|
41
41
|
};
|
|
42
42
|
|
|
43
|
+
// This specialization enables visitors on empty matrices at compile-time
|
|
44
|
+
template<typename Visitor, typename Derived>
|
|
45
|
+
struct visitor_impl<Visitor, Derived, 0> {
|
|
46
|
+
EIGEN_DEVICE_FUNC
|
|
47
|
+
static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/)
|
|
48
|
+
{}
|
|
49
|
+
};
|
|
50
|
+
|
|
43
51
|
template<typename Visitor, typename Derived>
|
|
44
52
|
struct visitor_impl<Visitor, Derived, Dynamic>
|
|
45
53
|
{
|
|
@@ -62,22 +70,22 @@ class visitor_evaluator
|
|
|
62
70
|
public:
|
|
63
71
|
EIGEN_DEVICE_FUNC
|
|
64
72
|
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
|
|
65
|
-
|
|
73
|
+
|
|
66
74
|
typedef typename XprType::Scalar Scalar;
|
|
67
75
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
|
68
|
-
|
|
76
|
+
|
|
69
77
|
enum {
|
|
70
78
|
RowsAtCompileTime = XprType::RowsAtCompileTime,
|
|
71
79
|
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
|
|
72
80
|
};
|
|
73
|
-
|
|
74
|
-
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
|
75
|
-
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
|
76
|
-
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
|
81
|
+
|
|
82
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
|
|
83
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
|
|
84
|
+
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_xpr.size(); }
|
|
77
85
|
|
|
78
86
|
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
|
79
87
|
{ return m_evaluator.coeff(row, col); }
|
|
80
|
-
|
|
88
|
+
|
|
81
89
|
protected:
|
|
82
90
|
internal::evaluator<XprType> m_evaluator;
|
|
83
91
|
const XprType &m_xpr;
|
|
@@ -99,6 +107,8 @@ protected:
|
|
|
99
107
|
* \note compared to one or two \em for \em loops, visitors offer automatic
|
|
100
108
|
* unrolling for small fixed size matrix.
|
|
101
109
|
*
|
|
110
|
+
* \note if the matrix is empty, then the visitor is left unchanged.
|
|
111
|
+
*
|
|
102
112
|
* \sa minCoeff(Index*,Index*), maxCoeff(Index*,Index*), DenseBase::redux()
|
|
103
113
|
*/
|
|
104
114
|
template<typename Derived>
|
|
@@ -106,12 +116,15 @@ template<typename Visitor>
|
|
|
106
116
|
EIGEN_DEVICE_FUNC
|
|
107
117
|
void DenseBase<Derived>::visit(Visitor& visitor) const
|
|
108
118
|
{
|
|
119
|
+
if(size()==0)
|
|
120
|
+
return;
|
|
121
|
+
|
|
109
122
|
typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
|
|
110
123
|
ThisEvaluator thisEval(derived());
|
|
111
|
-
|
|
124
|
+
|
|
112
125
|
enum {
|
|
113
126
|
unroll = SizeAtCompileTime != Dynamic
|
|
114
|
-
&& SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
|
|
127
|
+
&& SizeAtCompileTime * int(ThisEvaluator::CoeffReadCost) + (SizeAtCompileTime-1) * int(internal::functor_traits<Visitor>::Cost) <= EIGEN_UNROLLING_LIMIT
|
|
115
128
|
};
|
|
116
129
|
return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
|
|
117
130
|
}
|
|
@@ -124,6 +137,9 @@ namespace internal {
|
|
|
124
137
|
template <typename Derived>
|
|
125
138
|
struct coeff_visitor
|
|
126
139
|
{
|
|
140
|
+
// default initialization to avoid countless invalid maybe-uninitialized warnings by gcc
|
|
141
|
+
EIGEN_DEVICE_FUNC
|
|
142
|
+
coeff_visitor() : row(-1), col(-1), res(0) {}
|
|
127
143
|
typedef typename Derived::Scalar Scalar;
|
|
128
144
|
Index row, col;
|
|
129
145
|
Scalar res;
|
|
@@ -141,7 +157,7 @@ struct coeff_visitor
|
|
|
141
157
|
*
|
|
142
158
|
* \sa DenseBase::minCoeff(Index*, Index*)
|
|
143
159
|
*/
|
|
144
|
-
template <typename Derived>
|
|
160
|
+
template <typename Derived, int NaNPropagation>
|
|
145
161
|
struct min_coeff_visitor : coeff_visitor<Derived>
|
|
146
162
|
{
|
|
147
163
|
typedef typename Derived::Scalar Scalar;
|
|
@@ -157,8 +173,40 @@ struct min_coeff_visitor : coeff_visitor<Derived>
|
|
|
157
173
|
}
|
|
158
174
|
};
|
|
159
175
|
|
|
160
|
-
template<typename
|
|
161
|
-
struct
|
|
176
|
+
template <typename Derived>
|
|
177
|
+
struct min_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
|
|
178
|
+
{
|
|
179
|
+
typedef typename Derived::Scalar Scalar;
|
|
180
|
+
EIGEN_DEVICE_FUNC
|
|
181
|
+
void operator() (const Scalar& value, Index i, Index j)
|
|
182
|
+
{
|
|
183
|
+
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value < this->res))
|
|
184
|
+
{
|
|
185
|
+
this->res = value;
|
|
186
|
+
this->row = i;
|
|
187
|
+
this->col = j;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
template <typename Derived>
|
|
193
|
+
struct min_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
|
|
194
|
+
{
|
|
195
|
+
typedef typename Derived::Scalar Scalar;
|
|
196
|
+
EIGEN_DEVICE_FUNC
|
|
197
|
+
void operator() (const Scalar& value, Index i, Index j)
|
|
198
|
+
{
|
|
199
|
+
if((numext::isnan)(value) || value < this->res)
|
|
200
|
+
{
|
|
201
|
+
this->res = value;
|
|
202
|
+
this->row = i;
|
|
203
|
+
this->col = j;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
template<typename Scalar, int NaNPropagation>
|
|
209
|
+
struct functor_traits<min_coeff_visitor<Scalar, NaNPropagation> > {
|
|
162
210
|
enum {
|
|
163
211
|
Cost = NumTraits<Scalar>::AddCost
|
|
164
212
|
};
|
|
@@ -169,10 +217,10 @@ struct functor_traits<min_coeff_visitor<Scalar> > {
|
|
|
169
217
|
*
|
|
170
218
|
* \sa DenseBase::maxCoeff(Index*, Index*)
|
|
171
219
|
*/
|
|
172
|
-
template <typename Derived>
|
|
220
|
+
template <typename Derived, int NaNPropagation>
|
|
173
221
|
struct max_coeff_visitor : coeff_visitor<Derived>
|
|
174
222
|
{
|
|
175
|
-
typedef typename Derived::Scalar Scalar;
|
|
223
|
+
typedef typename Derived::Scalar Scalar;
|
|
176
224
|
EIGEN_DEVICE_FUNC
|
|
177
225
|
void operator() (const Scalar& value, Index i, Index j)
|
|
178
226
|
{
|
|
@@ -185,8 +233,40 @@ struct max_coeff_visitor : coeff_visitor<Derived>
|
|
|
185
233
|
}
|
|
186
234
|
};
|
|
187
235
|
|
|
188
|
-
template<typename
|
|
189
|
-
struct
|
|
236
|
+
template <typename Derived>
|
|
237
|
+
struct max_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
|
|
238
|
+
{
|
|
239
|
+
typedef typename Derived::Scalar Scalar;
|
|
240
|
+
EIGEN_DEVICE_FUNC
|
|
241
|
+
void operator() (const Scalar& value, Index i, Index j)
|
|
242
|
+
{
|
|
243
|
+
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value > this->res))
|
|
244
|
+
{
|
|
245
|
+
this->res = value;
|
|
246
|
+
this->row = i;
|
|
247
|
+
this->col = j;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
template <typename Derived>
|
|
253
|
+
struct max_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
|
|
254
|
+
{
|
|
255
|
+
typedef typename Derived::Scalar Scalar;
|
|
256
|
+
EIGEN_DEVICE_FUNC
|
|
257
|
+
void operator() (const Scalar& value, Index i, Index j)
|
|
258
|
+
{
|
|
259
|
+
if((numext::isnan)(value) || value > this->res)
|
|
260
|
+
{
|
|
261
|
+
this->res = value;
|
|
262
|
+
this->row = i;
|
|
263
|
+
this->col = j;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
template<typename Scalar, int NaNPropagation>
|
|
269
|
+
struct functor_traits<max_coeff_visitor<Scalar, NaNPropagation> > {
|
|
190
270
|
enum {
|
|
191
271
|
Cost = NumTraits<Scalar>::AddCost
|
|
192
272
|
};
|
|
@@ -196,17 +276,24 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
|
|
196
276
|
|
|
197
277
|
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
|
198
278
|
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
|
199
|
-
*
|
|
279
|
+
*
|
|
280
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
|
281
|
+
* NaNPropagation == PropagateFast : undefined
|
|
282
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
|
283
|
+
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
|
284
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
200
285
|
*
|
|
201
286
|
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
|
202
287
|
*/
|
|
203
288
|
template<typename Derived>
|
|
204
|
-
template<typename IndexType>
|
|
289
|
+
template<int NaNPropagation, typename IndexType>
|
|
205
290
|
EIGEN_DEVICE_FUNC
|
|
206
291
|
typename internal::traits<Derived>::Scalar
|
|
207
292
|
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
|
208
293
|
{
|
|
209
|
-
|
|
294
|
+
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
|
295
|
+
|
|
296
|
+
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
|
|
210
297
|
this->visit(minVisitor);
|
|
211
298
|
*rowId = minVisitor.row;
|
|
212
299
|
if (colId) *colId = minVisitor.col;
|
|
@@ -214,18 +301,25 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
|
|
214
301
|
}
|
|
215
302
|
|
|
216
303
|
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
|
217
|
-
*
|
|
304
|
+
*
|
|
305
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
|
306
|
+
* NaNPropagation == PropagateFast : undefined
|
|
307
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
|
308
|
+
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
|
309
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
218
310
|
*
|
|
219
311
|
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
|
|
220
312
|
*/
|
|
221
313
|
template<typename Derived>
|
|
222
|
-
template<typename IndexType>
|
|
314
|
+
template<int NaNPropagation, typename IndexType>
|
|
223
315
|
EIGEN_DEVICE_FUNC
|
|
224
316
|
typename internal::traits<Derived>::Scalar
|
|
225
317
|
DenseBase<Derived>::minCoeff(IndexType* index) const
|
|
226
318
|
{
|
|
319
|
+
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
|
320
|
+
|
|
227
321
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
|
228
|
-
|
|
322
|
+
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
|
|
229
323
|
this->visit(minVisitor);
|
|
230
324
|
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
|
231
325
|
return minVisitor.res;
|
|
@@ -233,17 +327,24 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
|
|
233
327
|
|
|
234
328
|
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
|
235
329
|
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
|
236
|
-
*
|
|
330
|
+
*
|
|
331
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
|
332
|
+
* NaNPropagation == PropagateFast : undefined
|
|
333
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
|
334
|
+
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
|
335
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
237
336
|
*
|
|
238
337
|
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
|
239
338
|
*/
|
|
240
339
|
template<typename Derived>
|
|
241
|
-
template<typename IndexType>
|
|
340
|
+
template<int NaNPropagation, typename IndexType>
|
|
242
341
|
EIGEN_DEVICE_FUNC
|
|
243
342
|
typename internal::traits<Derived>::Scalar
|
|
244
343
|
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
|
245
344
|
{
|
|
246
|
-
|
|
345
|
+
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
|
346
|
+
|
|
347
|
+
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
|
|
247
348
|
this->visit(maxVisitor);
|
|
248
349
|
*rowPtr = maxVisitor.row;
|
|
249
350
|
if (colPtr) *colPtr = maxVisitor.col;
|
|
@@ -251,18 +352,25 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
|
|
251
352
|
}
|
|
252
353
|
|
|
253
354
|
/** \returns the maximum of all coefficients of *this and puts in *index its location.
|
|
254
|
-
*
|
|
355
|
+
*
|
|
356
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
|
357
|
+
* NaNPropagation == PropagateFast : undefined
|
|
358
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
|
359
|
+
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
|
360
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
|
255
361
|
*
|
|
256
362
|
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
|
|
257
363
|
*/
|
|
258
364
|
template<typename Derived>
|
|
259
|
-
template<typename IndexType>
|
|
365
|
+
template<int NaNPropagation, typename IndexType>
|
|
260
366
|
EIGEN_DEVICE_FUNC
|
|
261
367
|
typename internal::traits<Derived>::Scalar
|
|
262
368
|
DenseBase<Derived>::maxCoeff(IndexType* index) const
|
|
263
369
|
{
|
|
370
|
+
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
|
371
|
+
|
|
264
372
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
|
265
|
-
|
|
373
|
+
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
|
|
266
374
|
this->visit(maxVisitor);
|
|
267
375
|
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
|
|
268
376
|
return maxVisitor.res;
|
|
@@ -22,6 +22,7 @@ struct Packet4cf
|
|
|
22
22
|
__m256 v;
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
#ifndef EIGEN_VECTORIZE_AVX512
|
|
25
26
|
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
26
27
|
{
|
|
27
28
|
typedef Packet4cf type;
|
|
@@ -37,6 +38,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
|
37
38
|
HasMul = 1,
|
|
38
39
|
HasDiv = 1,
|
|
39
40
|
HasNegate = 1,
|
|
41
|
+
HasSqrt = 1,
|
|
40
42
|
HasAbs = 0,
|
|
41
43
|
HasAbs2 = 0,
|
|
42
44
|
HasMin = 0,
|
|
@@ -44,8 +46,20 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
|
44
46
|
HasSetLinear = 0
|
|
45
47
|
};
|
|
46
48
|
};
|
|
49
|
+
#endif
|
|
47
50
|
|
|
48
|
-
template<> struct unpacket_traits<Packet4cf> {
|
|
51
|
+
template<> struct unpacket_traits<Packet4cf> {
|
|
52
|
+
typedef std::complex<float> type;
|
|
53
|
+
typedef Packet2cf half;
|
|
54
|
+
typedef Packet8f as_real;
|
|
55
|
+
enum {
|
|
56
|
+
size=4,
|
|
57
|
+
alignment=Aligned32,
|
|
58
|
+
vectorizable=true,
|
|
59
|
+
masked_load_available=false,
|
|
60
|
+
masked_store_available=false
|
|
61
|
+
};
|
|
62
|
+
};
|
|
49
63
|
|
|
50
64
|
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
|
51
65
|
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
|
@@ -67,10 +81,17 @@ template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, con
|
|
|
67
81
|
return Packet4cf(result);
|
|
68
82
|
}
|
|
69
83
|
|
|
84
|
+
template <>
|
|
85
|
+
EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
|
|
86
|
+
__m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);
|
|
87
|
+
return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
template<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(Packet8f(a.v))); }
|
|
70
91
|
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
|
|
71
92
|
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
|
|
72
93
|
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
|
|
73
|
-
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(
|
|
94
|
+
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(b.v,a.v)); }
|
|
74
95
|
|
|
75
96
|
template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
|
|
76
97
|
template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
|
|
@@ -140,70 +161,12 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packe
|
|
|
140
161
|
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
|
141
162
|
}
|
|
142
163
|
|
|
143
|
-
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
|
|
144
|
-
{
|
|
145
|
-
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
146
|
-
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
147
|
-
t0 = _mm256_hadd_ps(t0,t1);
|
|
148
|
-
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
149
|
-
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
150
|
-
t2 = _mm256_hadd_ps(t2,t3);
|
|
151
|
-
|
|
152
|
-
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
|
|
153
|
-
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
|
|
154
|
-
|
|
155
|
-
return Packet4cf(_mm256_add_ps(t1,t3));
|
|
156
|
-
}
|
|
157
|
-
|
|
158
164
|
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
|
|
159
165
|
{
|
|
160
166
|
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
|
161
167
|
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
|
162
168
|
}
|
|
163
169
|
|
|
164
|
-
template<int Offset>
|
|
165
|
-
struct palign_impl<Offset,Packet4cf>
|
|
166
|
-
{
|
|
167
|
-
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
|
|
168
|
-
{
|
|
169
|
-
if (Offset==0) return;
|
|
170
|
-
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
|
|
171
|
-
}
|
|
172
|
-
};
|
|
173
|
-
|
|
174
|
-
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
|
|
175
|
-
{
|
|
176
|
-
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
177
|
-
{ return padd(pmul(x,y),c); }
|
|
178
|
-
|
|
179
|
-
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
180
|
-
{
|
|
181
|
-
return internal::pmul(a, pconj(b));
|
|
182
|
-
}
|
|
183
|
-
};
|
|
184
|
-
|
|
185
|
-
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
|
|
186
|
-
{
|
|
187
|
-
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
188
|
-
{ return padd(pmul(x,y),c); }
|
|
189
|
-
|
|
190
|
-
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
191
|
-
{
|
|
192
|
-
return internal::pmul(pconj(a), b);
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
|
|
196
|
-
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
|
197
|
-
{
|
|
198
|
-
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
199
|
-
{ return padd(pmul(x,y),c); }
|
|
200
|
-
|
|
201
|
-
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
202
|
-
{
|
|
203
|
-
return pconj(internal::pmul(a, b));
|
|
204
|
-
}
|
|
205
|
-
};
|
|
206
|
-
|
|
207
170
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
|
208
171
|
|
|
209
172
|
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
|
@@ -228,6 +191,7 @@ struct Packet2cd
|
|
|
228
191
|
__m256d v;
|
|
229
192
|
};
|
|
230
193
|
|
|
194
|
+
#ifndef EIGEN_VECTORIZE_AVX512
|
|
231
195
|
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
232
196
|
{
|
|
233
197
|
typedef Packet2cd type;
|
|
@@ -243,6 +207,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
243
207
|
HasMul = 1,
|
|
244
208
|
HasDiv = 1,
|
|
245
209
|
HasNegate = 1,
|
|
210
|
+
HasSqrt = 1,
|
|
246
211
|
HasAbs = 0,
|
|
247
212
|
HasAbs2 = 0,
|
|
248
213
|
HasMin = 0,
|
|
@@ -250,8 +215,20 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
250
215
|
HasSetLinear = 0
|
|
251
216
|
};
|
|
252
217
|
};
|
|
218
|
+
#endif
|
|
253
219
|
|
|
254
|
-
template<> struct unpacket_traits<Packet2cd> {
|
|
220
|
+
template<> struct unpacket_traits<Packet2cd> {
|
|
221
|
+
typedef std::complex<double> type;
|
|
222
|
+
typedef Packet1cd half;
|
|
223
|
+
typedef Packet4d as_real;
|
|
224
|
+
enum {
|
|
225
|
+
size=2,
|
|
226
|
+
alignment=Aligned32,
|
|
227
|
+
vectorizable=true,
|
|
228
|
+
masked_load_available=false,
|
|
229
|
+
masked_store_available=false
|
|
230
|
+
};
|
|
231
|
+
};
|
|
255
232
|
|
|
256
233
|
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
|
257
234
|
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
|
@@ -272,10 +249,17 @@ template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, con
|
|
|
272
249
|
return Packet2cd(_mm256_addsub_pd(even, odd));
|
|
273
250
|
}
|
|
274
251
|
|
|
252
|
+
template <>
|
|
253
|
+
EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
|
|
254
|
+
__m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ);
|
|
255
|
+
return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
template<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(Packet4d(a.v))); }
|
|
275
259
|
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
|
|
276
260
|
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
|
|
277
261
|
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
|
|
278
|
-
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(
|
|
262
|
+
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(b.v,a.v)); }
|
|
279
263
|
|
|
280
264
|
template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
|
|
281
265
|
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
|
|
@@ -327,63 +311,12 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Pack
|
|
|
327
311
|
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
|
328
312
|
}
|
|
329
313
|
|
|
330
|
-
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
|
|
331
|
-
{
|
|
332
|
-
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
|
|
333
|
-
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
|
|
334
|
-
|
|
335
|
-
return Packet2cd(_mm256_add_pd(t0,t1));
|
|
336
|
-
}
|
|
337
|
-
|
|
338
314
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
|
|
339
315
|
{
|
|
340
316
|
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
|
341
317
|
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
|
342
318
|
}
|
|
343
319
|
|
|
344
|
-
template<int Offset>
|
|
345
|
-
struct palign_impl<Offset,Packet2cd>
|
|
346
|
-
{
|
|
347
|
-
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
|
|
348
|
-
{
|
|
349
|
-
if (Offset==0) return;
|
|
350
|
-
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
|
|
351
|
-
}
|
|
352
|
-
};
|
|
353
|
-
|
|
354
|
-
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
|
|
355
|
-
{
|
|
356
|
-
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
357
|
-
{ return padd(pmul(x,y),c); }
|
|
358
|
-
|
|
359
|
-
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
360
|
-
{
|
|
361
|
-
return internal::pmul(a, pconj(b));
|
|
362
|
-
}
|
|
363
|
-
};
|
|
364
|
-
|
|
365
|
-
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
|
|
366
|
-
{
|
|
367
|
-
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
368
|
-
{ return padd(pmul(x,y),c); }
|
|
369
|
-
|
|
370
|
-
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
371
|
-
{
|
|
372
|
-
return internal::pmul(pconj(a), b);
|
|
373
|
-
}
|
|
374
|
-
};
|
|
375
|
-
|
|
376
|
-
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
|
377
|
-
{
|
|
378
|
-
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
379
|
-
{ return padd(pmul(x,y),c); }
|
|
380
|
-
|
|
381
|
-
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
382
|
-
{
|
|
383
|
-
return pconj(internal::pmul(a, b));
|
|
384
|
-
}
|
|
385
|
-
};
|
|
386
|
-
|
|
387
320
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
|
388
321
|
|
|
389
322
|
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
|
@@ -424,24 +357,12 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
|
|
424
357
|
kernel.packet[0].v = tmp;
|
|
425
358
|
}
|
|
426
359
|
|
|
427
|
-
template<> EIGEN_STRONG_INLINE
|
|
428
|
-
|
|
429
|
-
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
|
433
|
-
{
|
|
434
|
-
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
|
360
|
+
template<> EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(const Packet2cd& a) {
|
|
361
|
+
return psqrt_complex<Packet2cd>(a);
|
|
435
362
|
}
|
|
436
363
|
|
|
437
|
-
template<> EIGEN_STRONG_INLINE Packet4cf
|
|
438
|
-
|
|
439
|
-
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
|
443
|
-
{
|
|
444
|
-
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
|
364
|
+
template<> EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(const Packet4cf& a) {
|
|
365
|
+
return psqrt_complex<Packet4cf>(a);
|
|
445
366
|
}
|
|
446
367
|
|
|
447
368
|
} // end namespace internal
|