@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -15,9 +15,10 @@ namespace Eigen {
|
|
|
15
15
|
|
|
16
16
|
namespace internal {
|
|
17
17
|
|
|
18
|
-
inline uint32x4_t p4ui_CONJ_XOR()
|
|
18
|
+
inline uint32x4_t p4ui_CONJ_XOR()
|
|
19
|
+
{
|
|
19
20
|
// See bug 1325, clang fails to call vld1q_u64.
|
|
20
|
-
#if EIGEN_COMP_CLANG
|
|
21
|
+
#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
|
|
21
22
|
uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
|
22
23
|
return ret;
|
|
23
24
|
#else
|
|
@@ -26,61 +27,136 @@ inline uint32x4_t p4ui_CONJ_XOR() {
|
|
|
26
27
|
#endif
|
|
27
28
|
}
|
|
28
29
|
|
|
29
|
-
inline uint32x2_t p2ui_CONJ_XOR()
|
|
30
|
+
inline uint32x2_t p2ui_CONJ_XOR()
|
|
31
|
+
{
|
|
30
32
|
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
|
|
31
33
|
return vld1_u32( conj_XOR_DATA );
|
|
32
34
|
}
|
|
33
35
|
|
|
34
36
|
//---------- float ----------
|
|
37
|
+
|
|
38
|
+
struct Packet1cf
|
|
39
|
+
{
|
|
40
|
+
EIGEN_STRONG_INLINE Packet1cf() {}
|
|
41
|
+
EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
|
|
42
|
+
Packet2f v;
|
|
43
|
+
};
|
|
35
44
|
struct Packet2cf
|
|
36
45
|
{
|
|
37
46
|
EIGEN_STRONG_INLINE Packet2cf() {}
|
|
38
47
|
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
|
39
|
-
Packet4f
|
|
48
|
+
Packet4f v;
|
|
40
49
|
};
|
|
41
50
|
|
|
42
|
-
template<> struct packet_traits<std::complex<float> >
|
|
51
|
+
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
43
52
|
{
|
|
44
53
|
typedef Packet2cf type;
|
|
45
|
-
typedef
|
|
46
|
-
enum
|
|
54
|
+
typedef Packet1cf half;
|
|
55
|
+
enum
|
|
56
|
+
{
|
|
47
57
|
Vectorizable = 1,
|
|
48
58
|
AlignedOnScalar = 1,
|
|
49
59
|
size = 2,
|
|
50
|
-
HasHalfPacket =
|
|
51
|
-
|
|
52
|
-
HasAdd
|
|
53
|
-
HasSub
|
|
54
|
-
HasMul
|
|
55
|
-
HasDiv
|
|
56
|
-
HasNegate
|
|
57
|
-
HasAbs
|
|
58
|
-
HasAbs2
|
|
59
|
-
HasMin
|
|
60
|
-
HasMax
|
|
60
|
+
HasHalfPacket = 1,
|
|
61
|
+
|
|
62
|
+
HasAdd = 1,
|
|
63
|
+
HasSub = 1,
|
|
64
|
+
HasMul = 1,
|
|
65
|
+
HasDiv = 1,
|
|
66
|
+
HasNegate = 1,
|
|
67
|
+
HasAbs = 0,
|
|
68
|
+
HasAbs2 = 0,
|
|
69
|
+
HasMin = 0,
|
|
70
|
+
HasMax = 0,
|
|
61
71
|
HasSetLinear = 0
|
|
62
72
|
};
|
|
63
73
|
};
|
|
64
74
|
|
|
65
|
-
template<> struct unpacket_traits<
|
|
66
|
-
|
|
67
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
75
|
+
template<> struct unpacket_traits<Packet1cf>
|
|
68
76
|
{
|
|
69
|
-
|
|
70
|
-
|
|
77
|
+
typedef std::complex<float> type;
|
|
78
|
+
typedef Packet1cf half;
|
|
79
|
+
typedef Packet2f as_real;
|
|
80
|
+
enum
|
|
81
|
+
{
|
|
82
|
+
size = 1,
|
|
83
|
+
alignment = Aligned16,
|
|
84
|
+
vectorizable = true,
|
|
85
|
+
masked_load_available = false,
|
|
86
|
+
masked_store_available = false
|
|
87
|
+
};
|
|
88
|
+
};
|
|
89
|
+
template<> struct unpacket_traits<Packet2cf>
|
|
90
|
+
{
|
|
91
|
+
typedef std::complex<float> type;
|
|
92
|
+
typedef Packet1cf half;
|
|
93
|
+
typedef Packet4f as_real;
|
|
94
|
+
enum
|
|
95
|
+
{
|
|
96
|
+
size = 2,
|
|
97
|
+
alignment = Aligned16,
|
|
98
|
+
vectorizable = true,
|
|
99
|
+
masked_load_available = false,
|
|
100
|
+
masked_store_available = false
|
|
101
|
+
};
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)
|
|
105
|
+
{ return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }
|
|
106
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)
|
|
107
|
+
{ return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }
|
|
71
108
|
|
|
109
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)
|
|
110
|
+
{ return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }
|
|
111
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
112
|
+
{
|
|
113
|
+
const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
|
|
72
114
|
return Packet2cf(vcombine_f32(r64, r64));
|
|
73
115
|
}
|
|
74
116
|
|
|
75
|
-
template<> EIGEN_STRONG_INLINE
|
|
76
|
-
|
|
117
|
+
template<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
118
|
+
{ return Packet1cf(padd<Packet2f>(a.v, b.v)); }
|
|
119
|
+
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
120
|
+
{ return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
|
121
|
+
|
|
122
|
+
template<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
123
|
+
{ return Packet1cf(psub<Packet2f>(a.v, b.v)); }
|
|
124
|
+
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
125
|
+
{ return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
|
126
|
+
|
|
127
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }
|
|
77
128
|
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
|
|
129
|
+
|
|
130
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
|
|
131
|
+
{
|
|
132
|
+
const Packet2ui b = vreinterpret_u32_f32(a.v);
|
|
133
|
+
return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
|
|
134
|
+
}
|
|
78
135
|
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
|
79
136
|
{
|
|
80
|
-
Packet4ui b = vreinterpretq_u32_f32(a.v);
|
|
137
|
+
const Packet4ui b = vreinterpretq_u32_f32(a.v);
|
|
81
138
|
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
|
|
82
139
|
}
|
|
83
140
|
|
|
141
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
142
|
+
{
|
|
143
|
+
Packet2f v1, v2;
|
|
144
|
+
|
|
145
|
+
// Get the real values of a | a1_re | a1_re |
|
|
146
|
+
v1 = vdup_lane_f32(a.v, 0);
|
|
147
|
+
// Get the imag values of a | a1_im | a1_im |
|
|
148
|
+
v2 = vdup_lane_f32(a.v, 1);
|
|
149
|
+
// Multiply the real a with b
|
|
150
|
+
v1 = vmul_f32(v1, b.v);
|
|
151
|
+
// Multiply the imag a with b
|
|
152
|
+
v2 = vmul_f32(v2, b.v);
|
|
153
|
+
// Conjugate v2
|
|
154
|
+
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
|
|
155
|
+
// Swap real/imag elements in v2.
|
|
156
|
+
v2 = vrev64_f32(v2);
|
|
157
|
+
// Add and return the result
|
|
158
|
+
return Packet1cf(vadd_f32(v1, v2));
|
|
159
|
+
}
|
|
84
160
|
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
85
161
|
{
|
|
86
162
|
Packet4f v1, v2;
|
|
@@ -93,7 +169,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
|
|
|
93
169
|
v1 = vmulq_f32(v1, b.v);
|
|
94
170
|
// Multiply the imag a with b
|
|
95
171
|
v2 = vmulq_f32(v2, b.v);
|
|
96
|
-
// Conjugate v2
|
|
172
|
+
// Conjugate v2
|
|
97
173
|
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
|
|
98
174
|
// Swap real/imag elements in v2.
|
|
99
175
|
v2 = vrev64q_f32(v2);
|
|
@@ -101,98 +177,144 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
|
|
|
101
177
|
return Packet2cf(vaddq_f32(v1, v2));
|
|
102
178
|
}
|
|
103
179
|
|
|
104
|
-
template<> EIGEN_STRONG_INLINE
|
|
180
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)
|
|
105
181
|
{
|
|
106
|
-
|
|
182
|
+
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
183
|
+
// [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
|
|
184
|
+
Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
|
|
185
|
+
// Swap real/imag elements in the mask in to get:
|
|
186
|
+
// [im(a[0])==im(b[0]), re(a[0])==re(b[0])]
|
|
187
|
+
Packet2f eq_swapped = vrev64_f32(eq);
|
|
188
|
+
// Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
|
189
|
+
return Packet1cf(pand<Packet2f>(eq, eq_swapped));
|
|
107
190
|
}
|
|
108
|
-
template<> EIGEN_STRONG_INLINE Packet2cf
|
|
109
|
-
{
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
191
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
|
|
192
|
+
{
|
|
193
|
+
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
194
|
+
// [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
|
|
195
|
+
Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
|
|
196
|
+
// Swap real/imag elements in the mask in to get:
|
|
197
|
+
// [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
|
|
198
|
+
Packet4f eq_swapped = vrev64q_f32(eq);
|
|
199
|
+
// Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
|
200
|
+
return Packet2cf(pand<Packet4f>(eq, eq_swapped));
|
|
119
201
|
}
|
|
120
202
|
|
|
121
|
-
template<> EIGEN_STRONG_INLINE
|
|
122
|
-
|
|
203
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
204
|
+
{ return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
|
|
205
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
206
|
+
{ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
|
|
123
207
|
|
|
124
|
-
template<> EIGEN_STRONG_INLINE
|
|
208
|
+
template<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
209
|
+
{ return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
|
|
210
|
+
template<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
211
|
+
{ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
|
|
125
212
|
|
|
126
|
-
template<> EIGEN_STRONG_INLINE
|
|
127
|
-
|
|
213
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
214
|
+
{ return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
|
|
215
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
216
|
+
{ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
|
|
128
217
|
|
|
129
|
-
template<>
|
|
218
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
219
|
+
{ return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
|
|
220
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
221
|
+
{ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
|
|
222
|
+
|
|
223
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from)
|
|
224
|
+
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from)); }
|
|
225
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from)
|
|
226
|
+
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from))); }
|
|
227
|
+
|
|
228
|
+
template<> EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from)
|
|
229
|
+
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from)); }
|
|
230
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)
|
|
231
|
+
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from))); }
|
|
232
|
+
|
|
233
|
+
template<> EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from)
|
|
234
|
+
{ return pset1<Packet1cf>(*from); }
|
|
235
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
|
|
236
|
+
{ return pset1<Packet2cf>(*from); }
|
|
237
|
+
|
|
238
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
|
|
239
|
+
{ EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
|
240
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
|
|
241
|
+
{ EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v); }
|
|
242
|
+
|
|
243
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
|
|
244
|
+
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
|
245
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
|
|
246
|
+
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v); }
|
|
247
|
+
|
|
248
|
+
template<> EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(
|
|
249
|
+
const std::complex<float>* from, Index stride)
|
|
250
|
+
{
|
|
251
|
+
const Packet2f tmp = vdup_n_f32(std::real(from[0*stride]));
|
|
252
|
+
return Packet1cf(vset_lane_f32(std::imag(from[0*stride]), tmp, 1));
|
|
253
|
+
}
|
|
254
|
+
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
|
|
255
|
+
const std::complex<float>* from, Index stride)
|
|
130
256
|
{
|
|
131
|
-
Packet4f res =
|
|
132
|
-
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
|
257
|
+
Packet4f res = vdupq_n_f32(std::real(from[0*stride]));
|
|
133
258
|
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
|
134
259
|
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
|
135
260
|
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
|
|
136
261
|
return Packet2cf(res);
|
|
137
262
|
}
|
|
138
263
|
|
|
139
|
-
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>,
|
|
264
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(
|
|
265
|
+
std::complex<float>* to, const Packet1cf& from, Index stride)
|
|
266
|
+
{ to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }
|
|
267
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(
|
|
268
|
+
std::complex<float>* to, const Packet2cf& from, Index stride)
|
|
140
269
|
{
|
|
141
270
|
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
|
142
271
|
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
|
143
272
|
}
|
|
144
273
|
|
|
145
|
-
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *
|
|
274
|
+
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *addr)
|
|
275
|
+
{ EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr)); }
|
|
146
276
|
|
|
147
|
-
template<> EIGEN_STRONG_INLINE std::complex<float>
|
|
277
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a)
|
|
148
278
|
{
|
|
149
|
-
std::complex<float>
|
|
150
|
-
|
|
279
|
+
EIGEN_ALIGN16 std::complex<float> x;
|
|
280
|
+
vst1_f32(reinterpret_cast<float*>(&x), a.v);
|
|
281
|
+
return x;
|
|
282
|
+
}
|
|
283
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
|
284
|
+
{
|
|
285
|
+
EIGEN_ALIGN16 std::complex<float> x[2];
|
|
286
|
+
vst1q_f32(reinterpret_cast<float*>(x), a.v);
|
|
151
287
|
return x[0];
|
|
152
288
|
}
|
|
153
289
|
|
|
290
|
+
template<> EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) { return a; }
|
|
154
291
|
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
|
155
|
-
{
|
|
156
|
-
float32x2_t a_lo, a_hi;
|
|
157
|
-
Packet4f a_r128;
|
|
158
|
-
|
|
159
|
-
a_lo = vget_low_f32(a.v);
|
|
160
|
-
a_hi = vget_high_f32(a.v);
|
|
161
|
-
a_r128 = vcombine_f32(a_hi, a_lo);
|
|
162
|
-
|
|
163
|
-
return Packet2cf(a_r128);
|
|
164
|
-
}
|
|
292
|
+
{ return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v))); }
|
|
165
293
|
|
|
294
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a)
|
|
295
|
+
{ return Packet1cf(vrev64_f32(a.v)); }
|
|
166
296
|
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
|
|
297
|
+
{ return Packet2cf(vrev64q_f32(a.v)); }
|
|
298
|
+
|
|
299
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a)
|
|
167
300
|
{
|
|
168
|
-
|
|
301
|
+
std::complex<float> s;
|
|
302
|
+
vst1_f32((float *)&s, a.v);
|
|
303
|
+
return s;
|
|
169
304
|
}
|
|
170
|
-
|
|
171
305
|
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
|
172
306
|
{
|
|
173
|
-
float32x2_t a1, a2;
|
|
174
307
|
std::complex<float> s;
|
|
175
|
-
|
|
176
|
-
a1 = vget_low_f32(a.v);
|
|
177
|
-
a2 = vget_high_f32(a.v);
|
|
178
|
-
a2 = vadd_f32(a1, a2);
|
|
179
|
-
vst1_f32((float *)&s, a2);
|
|
180
|
-
|
|
308
|
+
vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
|
|
181
309
|
return s;
|
|
182
310
|
}
|
|
183
311
|
|
|
184
|
-
template<> EIGEN_STRONG_INLINE
|
|
312
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)
|
|
185
313
|
{
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
|
|
190
|
-
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
|
|
191
|
-
sum = vaddq_f32(sum1, sum2);
|
|
192
|
-
|
|
193
|
-
return Packet2cf(sum);
|
|
314
|
+
std::complex<float> s;
|
|
315
|
+
vst1_f32((float *)&s, a.v);
|
|
316
|
+
return s;
|
|
194
317
|
}
|
|
195
|
-
|
|
196
318
|
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
|
197
319
|
{
|
|
198
320
|
float32x2_t a1, a2, v1, v2, prod;
|
|
@@ -208,90 +330,67 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
|
|
208
330
|
v1 = vmul_f32(v1, a2);
|
|
209
331
|
// Multiply the imag a with b
|
|
210
332
|
v2 = vmul_f32(v2, a2);
|
|
211
|
-
// Conjugate v2
|
|
333
|
+
// Conjugate v2
|
|
212
334
|
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
|
|
213
335
|
// Swap real/imag elements in v2.
|
|
214
336
|
v2 = vrev64_f32(v2);
|
|
215
337
|
// Add v1, v2
|
|
216
338
|
prod = vadd_f32(v1, v2);
|
|
217
339
|
|
|
218
|
-
vst1_f32((
|
|
340
|
+
vst1_f32(reinterpret_cast<float*>(&s), prod);
|
|
219
341
|
|
|
220
342
|
return s;
|
|
221
343
|
}
|
|
222
344
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
{
|
|
226
|
-
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
|
227
|
-
{
|
|
228
|
-
if (Offset==1)
|
|
229
|
-
{
|
|
230
|
-
first.v = vextq_f32(first.v, second.v, 2);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
};
|
|
234
|
-
|
|
235
|
-
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
|
236
|
-
{
|
|
237
|
-
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
238
|
-
{ return padd(pmul(x,y),c); }
|
|
239
|
-
|
|
240
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
241
|
-
{
|
|
242
|
-
return internal::pmul(a, pconj(b));
|
|
243
|
-
}
|
|
244
|
-
};
|
|
245
|
-
|
|
246
|
-
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
|
247
|
-
{
|
|
248
|
-
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
249
|
-
{ return padd(pmul(x,y),c); }
|
|
250
|
-
|
|
251
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
252
|
-
{
|
|
253
|
-
return internal::pmul(pconj(a), b);
|
|
254
|
-
}
|
|
255
|
-
};
|
|
345
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)
|
|
346
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
|
256
347
|
|
|
257
|
-
template<>
|
|
348
|
+
template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
258
349
|
{
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
263
|
-
{
|
|
264
|
-
return pconj(internal::pmul(a, b));
|
|
265
|
-
}
|
|
266
|
-
};
|
|
350
|
+
// TODO optimize it for NEON
|
|
351
|
+
Packet1cf res = pmul(a, pconj(b));
|
|
352
|
+
Packet2f s, rev_s;
|
|
267
353
|
|
|
268
|
-
|
|
354
|
+
// this computes the norm
|
|
355
|
+
s = vmul_f32(b.v, b.v);
|
|
356
|
+
rev_s = vrev64_f32(s);
|
|
269
357
|
|
|
358
|
+
return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));
|
|
359
|
+
}
|
|
270
360
|
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
271
361
|
{
|
|
272
362
|
// TODO optimize it for NEON
|
|
273
|
-
Packet2cf res =
|
|
363
|
+
Packet2cf res = pmul(a,pconj(b));
|
|
274
364
|
Packet4f s, rev_s;
|
|
275
365
|
|
|
276
366
|
// this computes the norm
|
|
277
367
|
s = vmulq_f32(b.v, b.v);
|
|
278
368
|
rev_s = vrev64q_f32(s);
|
|
279
369
|
|
|
280
|
-
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
|
|
370
|
+
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));
|
|
281
371
|
}
|
|
282
372
|
|
|
283
|
-
EIGEN_DEVICE_FUNC inline void
|
|
284
|
-
ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
|
373
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
|
|
374
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)
|
|
375
|
+
{
|
|
285
376
|
Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
|
|
286
377
|
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
|
|
287
378
|
kernel.packet[1].v = tmp;
|
|
288
379
|
}
|
|
289
380
|
|
|
381
|
+
template<> EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
|
|
382
|
+
return psqrt_complex<Packet1cf>(a);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
|
|
386
|
+
return psqrt_complex<Packet2cf>(a);
|
|
387
|
+
}
|
|
388
|
+
|
|
290
389
|
//---------- double ----------
|
|
291
390
|
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
|
292
391
|
|
|
293
392
|
// See bug 1325, clang fails to call vld1q_u64.
|
|
294
|
-
#if EIGEN_COMP_CLANG
|
|
393
|
+
#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
|
|
295
394
|
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
|
|
296
395
|
#else
|
|
297
396
|
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
|
@@ -309,7 +408,8 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
309
408
|
{
|
|
310
409
|
typedef Packet1cd type;
|
|
311
410
|
typedef Packet1cd half;
|
|
312
|
-
enum
|
|
411
|
+
enum
|
|
412
|
+
{
|
|
313
413
|
Vectorizable = 1,
|
|
314
414
|
AlignedOnScalar = 0,
|
|
315
415
|
size = 1,
|
|
@@ -328,24 +428,50 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
328
428
|
};
|
|
329
429
|
};
|
|
330
430
|
|
|
331
|
-
template<> struct unpacket_traits<Packet1cd>
|
|
431
|
+
template<> struct unpacket_traits<Packet1cd>
|
|
432
|
+
{
|
|
433
|
+
typedef std::complex<double> type;
|
|
434
|
+
typedef Packet1cd half;
|
|
435
|
+
typedef Packet2d as_real;
|
|
436
|
+
enum
|
|
437
|
+
{
|
|
438
|
+
size=1,
|
|
439
|
+
alignment=Aligned16,
|
|
440
|
+
vectorizable=true,
|
|
441
|
+
masked_load_available=false,
|
|
442
|
+
masked_store_available=false
|
|
443
|
+
};
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from)
|
|
447
|
+
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from))); }
|
|
448
|
+
|
|
449
|
+
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
|
|
450
|
+
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from))); }
|
|
451
|
+
|
|
452
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
|
453
|
+
{
|
|
454
|
+
/* here we really have to use unaligned loads :( */
|
|
455
|
+
return ploadu<Packet1cd>(&from);
|
|
456
|
+
}
|
|
332
457
|
|
|
333
|
-
template<> EIGEN_STRONG_INLINE Packet1cd
|
|
334
|
-
|
|
458
|
+
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
459
|
+
{ return Packet1cd(padd<Packet2d>(a.v, b.v)); }
|
|
335
460
|
|
|
336
|
-
template<> EIGEN_STRONG_INLINE Packet1cd
|
|
337
|
-
{
|
|
461
|
+
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
462
|
+
{ return Packet1cd(psub<Packet2d>(a.v, b.v)); }
|
|
338
463
|
|
|
339
|
-
template<> EIGEN_STRONG_INLINE Packet1cd
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
|
|
464
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
|
|
465
|
+
{ return Packet1cd(pnegate<Packet2d>(a.v)); }
|
|
466
|
+
|
|
467
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
|
|
468
|
+
{ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
|
|
343
469
|
|
|
344
470
|
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
345
471
|
{
|
|
346
472
|
Packet2d v1, v2;
|
|
347
473
|
|
|
348
|
-
// Get the real values of a
|
|
474
|
+
// Get the real values of a
|
|
349
475
|
v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
|
|
350
476
|
// Get the imag values of a
|
|
351
477
|
v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
|
|
@@ -353,7 +479,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
|
|
353
479
|
v1 = vmulq_f64(v1, b.v);
|
|
354
480
|
// Multiply the imag a with b
|
|
355
481
|
v2 = vmulq_f64(v2, b.v);
|
|
356
|
-
// Conjugate v2
|
|
482
|
+
// Conjugate v2
|
|
357
483
|
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
|
|
358
484
|
// Swap real/imag elements in v2.
|
|
359
485
|
v2 = preverse<Packet2d>(v2);
|
|
@@ -361,31 +487,44 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
|
|
361
487
|
return Packet1cd(vaddq_f64(v1, v2));
|
|
362
488
|
}
|
|
363
489
|
|
|
364
|
-
template<> EIGEN_STRONG_INLINE Packet1cd
|
|
365
|
-
{
|
|
366
|
-
return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
|
367
|
-
}
|
|
368
|
-
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
369
|
-
{
|
|
370
|
-
return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
|
371
|
-
}
|
|
372
|
-
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
490
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
|
|
373
491
|
{
|
|
374
|
-
|
|
492
|
+
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
493
|
+
// [re(a)==re(b), im(a)==im(b)]
|
|
494
|
+
Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
|
|
495
|
+
// Swap real/imag elements in the mask in to get:
|
|
496
|
+
// [im(a)==im(b), re(a)==re(b)]
|
|
497
|
+
Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
|
|
498
|
+
// Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
|
499
|
+
return Packet1cd(pand<Packet2d>(eq, eq_swapped));
|
|
375
500
|
}
|
|
501
|
+
|
|
502
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
503
|
+
{ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
|
|
504
|
+
|
|
505
|
+
template<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
506
|
+
{ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
|
|
507
|
+
|
|
508
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
509
|
+
{ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
|
|
510
|
+
|
|
376
511
|
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
377
|
-
{
|
|
378
|
-
|
|
379
|
-
|
|
512
|
+
{ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
|
|
513
|
+
|
|
514
|
+
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
|
|
515
|
+
{ return pset1<Packet1cd>(*from); }
|
|
380
516
|
|
|
381
|
-
template<> EIGEN_STRONG_INLINE
|
|
517
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
|
|
518
|
+
{ EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v); }
|
|
382
519
|
|
|
383
|
-
template<> EIGEN_STRONG_INLINE void
|
|
384
|
-
|
|
520
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
|
|
521
|
+
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v); }
|
|
385
522
|
|
|
386
|
-
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *
|
|
523
|
+
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *addr)
|
|
524
|
+
{ EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr)); }
|
|
387
525
|
|
|
388
|
-
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
|
|
526
|
+
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
|
|
527
|
+
const std::complex<double>* from, Index stride)
|
|
389
528
|
{
|
|
390
529
|
Packet2d res = pset1<Packet2d>(0.0);
|
|
391
530
|
res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
|
|
@@ -393,17 +532,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Pack
|
|
|
393
532
|
return Packet1cd(res);
|
|
394
533
|
}
|
|
395
534
|
|
|
396
|
-
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
}
|
|
535
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
|
|
536
|
+
std::complex<double>* to, const Packet1cd& from, Index stride)
|
|
537
|
+
{ to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }
|
|
400
538
|
|
|
401
|
-
|
|
402
|
-
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
|
539
|
+
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
|
403
540
|
{
|
|
404
|
-
std::complex<double>
|
|
541
|
+
EIGEN_ALIGN16 std::complex<double> res;
|
|
405
542
|
pstore<std::complex<double> >(&res, a);
|
|
406
|
-
|
|
407
543
|
return res;
|
|
408
544
|
}
|
|
409
545
|
|
|
@@ -411,59 +547,14 @@ template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a
|
|
|
411
547
|
|
|
412
548
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
|
413
549
|
|
|
414
|
-
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
|
415
|
-
|
|
416
550
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
|
417
551
|
|
|
418
|
-
template<int Offset>
|
|
419
|
-
struct palign_impl<Offset,Packet1cd>
|
|
420
|
-
{
|
|
421
|
-
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
|
422
|
-
{
|
|
423
|
-
// FIXME is it sure we never have to align a Packet1cd?
|
|
424
|
-
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
|
425
|
-
}
|
|
426
|
-
};
|
|
427
|
-
|
|
428
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
|
429
|
-
{
|
|
430
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
431
|
-
{ return padd(pmul(x,y),c); }
|
|
432
|
-
|
|
433
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
434
|
-
{
|
|
435
|
-
return internal::pmul(a, pconj(b));
|
|
436
|
-
}
|
|
437
|
-
};
|
|
438
|
-
|
|
439
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
|
440
|
-
{
|
|
441
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
442
|
-
{ return padd(pmul(x,y),c); }
|
|
443
|
-
|
|
444
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
445
|
-
{
|
|
446
|
-
return internal::pmul(pconj(a), b);
|
|
447
|
-
}
|
|
448
|
-
};
|
|
449
|
-
|
|
450
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|
451
|
-
{
|
|
452
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
453
|
-
{ return padd(pmul(x,y),c); }
|
|
454
|
-
|
|
455
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
456
|
-
{
|
|
457
|
-
return pconj(internal::pmul(a, b));
|
|
458
|
-
}
|
|
459
|
-
};
|
|
460
|
-
|
|
461
552
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
|
462
553
|
|
|
463
554
|
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
464
555
|
{
|
|
465
556
|
// TODO optimize it for NEON
|
|
466
|
-
Packet1cd res =
|
|
557
|
+
Packet1cd res = pmul(a,pconj(b));
|
|
467
558
|
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
|
468
559
|
Packet2d rev_s = preverse<Packet2d>(s);
|
|
469
560
|
|
|
@@ -471,9 +562,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
|
|
|
471
562
|
}
|
|
472
563
|
|
|
473
564
|
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
|
474
|
-
{
|
|
475
|
-
return Packet1cd(preverse(Packet2d(x.v)));
|
|
476
|
-
}
|
|
565
|
+
{ return Packet1cd(preverse(Packet2d(x.v))); }
|
|
477
566
|
|
|
478
567
|
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
479
568
|
{
|
|
@@ -481,6 +570,11 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
|
481
570
|
kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
|
|
482
571
|
kernel.packet[1].v = tmp;
|
|
483
572
|
}
|
|
573
|
+
|
|
574
|
+
template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
|
|
575
|
+
return psqrt_complex<Packet1cd>(a);
|
|
576
|
+
}
|
|
577
|
+
|
|
484
578
|
#endif // EIGEN_ARCH_ARM64
|
|
485
579
|
|
|
486
580
|
} // end namespace internal
|