@smake/eigen 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +26 -0
- package/eigen/COPYING.GPL +674 -0
- package/eigen/COPYING.LGPL +502 -0
- package/eigen/COPYING.MINPACK +51 -0
- package/eigen/COPYING.MPL2 +373 -0
- package/eigen/COPYING.README +18 -0
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +5 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>
|
|
5
|
+
//
|
|
6
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
+
|
|
10
|
+
#ifndef EIGEN_TYPE_CASTING_AVX512_H
|
|
11
|
+
#define EIGEN_TYPE_CASTING_AVX512_H
|
|
12
|
+
|
|
13
|
+
namespace Eigen {
|
|
14
|
+
|
|
15
|
+
namespace internal {
|
|
16
|
+
|
|
17
|
+
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
|
|
18
|
+
return _mm512_cvttps_epi32(a);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
|
|
22
|
+
return _mm512_cvtepi32_ps(a);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
|
|
26
|
+
return _mm512_castps_si512(a);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(const Packet16i& a) {
|
|
30
|
+
return _mm512_castsi512_ps(a);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
template <>
|
|
34
|
+
struct type_casting_traits<half, float> {
|
|
35
|
+
enum {
|
|
36
|
+
VectorizedCast = 1,
|
|
37
|
+
SrcCoeffRatio = 1,
|
|
38
|
+
TgtCoeffRatio = 1
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
|
|
43
|
+
return half2float(a);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
template <>
|
|
47
|
+
struct type_casting_traits<float, half> {
|
|
48
|
+
enum {
|
|
49
|
+
VectorizedCast = 1,
|
|
50
|
+
SrcCoeffRatio = 1,
|
|
51
|
+
TgtCoeffRatio = 1
|
|
52
|
+
};
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
|
|
56
|
+
return float2half(a);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
template <>
|
|
60
|
+
struct type_casting_traits<bfloat16, float> {
|
|
61
|
+
enum {
|
|
62
|
+
VectorizedCast = 1,
|
|
63
|
+
SrcCoeffRatio = 1,
|
|
64
|
+
TgtCoeffRatio = 1
|
|
65
|
+
};
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16bf, Packet16f>(const Packet16bf& a) {
|
|
69
|
+
return Bf16ToF32(a);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
template <>
|
|
73
|
+
struct type_casting_traits<float, bfloat16> {
|
|
74
|
+
enum {
|
|
75
|
+
VectorizedCast = 1,
|
|
76
|
+
SrcCoeffRatio = 1,
|
|
77
|
+
TgtCoeffRatio = 1
|
|
78
|
+
};
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
template<> EIGEN_STRONG_INLINE Packet16bf pcast<Packet16f, Packet16bf>(const Packet16f& a) {
|
|
82
|
+
return F32ToBf16(a);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
} // end namespace internal
|
|
86
|
+
|
|
87
|
+
} // end namespace Eigen
|
|
88
|
+
|
|
89
|
+
#endif // EIGEN_TYPE_CASTING_AVX512_H
|
|
@@ -29,8 +29,54 @@ static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (P
|
|
|
29
29
|
//---------- float ----------
|
|
30
30
|
struct Packet2cf
|
|
31
31
|
{
|
|
32
|
-
EIGEN_STRONG_INLINE explicit Packet2cf()
|
|
32
|
+
EIGEN_STRONG_INLINE explicit Packet2cf() {}
|
|
33
33
|
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
|
34
|
+
|
|
35
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b)
|
|
36
|
+
{
|
|
37
|
+
Packet4f v1, v2;
|
|
38
|
+
|
|
39
|
+
// Permute and multiply the real parts of a and b
|
|
40
|
+
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
|
41
|
+
// Get the imaginary parts of a
|
|
42
|
+
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
|
43
|
+
// multiply a_re * b
|
|
44
|
+
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
|
45
|
+
// multiply a_im * b and get the conjugate result
|
|
46
|
+
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
|
47
|
+
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
|
48
|
+
// permute back to a proper order
|
|
49
|
+
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
|
50
|
+
|
|
51
|
+
return Packet2cf(padd<Packet4f>(v1, v2));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) {
|
|
55
|
+
v = pmul(Packet2cf(*this), b).v;
|
|
56
|
+
return *this;
|
|
57
|
+
}
|
|
58
|
+
EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const {
|
|
59
|
+
return Packet2cf(*this) *= b;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) {
|
|
63
|
+
v = padd(v, b.v);
|
|
64
|
+
return *this;
|
|
65
|
+
}
|
|
66
|
+
EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const {
|
|
67
|
+
return Packet2cf(*this) += b;
|
|
68
|
+
}
|
|
69
|
+
EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) {
|
|
70
|
+
v = psub(v, b.v);
|
|
71
|
+
return *this;
|
|
72
|
+
}
|
|
73
|
+
EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {
|
|
74
|
+
return Packet2cf(*this) -= b;
|
|
75
|
+
}
|
|
76
|
+
EIGEN_STRONG_INLINE Packet2cf operator-(void) const {
|
|
77
|
+
return Packet2cf(-v);
|
|
78
|
+
}
|
|
79
|
+
|
|
34
80
|
Packet4f v;
|
|
35
81
|
};
|
|
36
82
|
|
|
@@ -38,6 +84,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
|
38
84
|
{
|
|
39
85
|
typedef Packet2cf type;
|
|
40
86
|
typedef Packet2cf half;
|
|
87
|
+
typedef Packet4f as_real;
|
|
41
88
|
enum {
|
|
42
89
|
Vectorizable = 1,
|
|
43
90
|
AlignedOnScalar = 1,
|
|
@@ -60,7 +107,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
|
60
107
|
};
|
|
61
108
|
};
|
|
62
109
|
|
|
63
|
-
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
|
110
|
+
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; typedef Packet4f as_real; };
|
|
64
111
|
|
|
65
112
|
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
66
113
|
{
|
|
@@ -80,16 +127,35 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
|
|
80
127
|
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
|
81
128
|
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
|
82
129
|
|
|
130
|
+
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std::complex<float>* from1)
|
|
131
|
+
{
|
|
132
|
+
Packet4f res0, res1;
|
|
133
|
+
#ifdef __VSX__
|
|
134
|
+
__asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0));
|
|
135
|
+
__asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1));
|
|
136
|
+
#ifdef _BIG_ENDIAN
|
|
137
|
+
__asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
|
138
|
+
#else
|
|
139
|
+
__asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
|
140
|
+
#endif
|
|
141
|
+
#else
|
|
142
|
+
*reinterpret_cast<std::complex<float> *>(&res0) = *from0;
|
|
143
|
+
*reinterpret_cast<std::complex<float> *>(&res1) = *from1;
|
|
144
|
+
res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
|
|
145
|
+
#endif
|
|
146
|
+
return Packet2cf(res0);
|
|
147
|
+
}
|
|
148
|
+
|
|
83
149
|
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
|
84
150
|
{
|
|
85
|
-
std::complex<float>
|
|
151
|
+
EIGEN_ALIGN16 std::complex<float> af[2];
|
|
86
152
|
af[0] = from[0*stride];
|
|
87
153
|
af[1] = from[1*stride];
|
|
88
154
|
return pload<Packet2cf>(af);
|
|
89
155
|
}
|
|
90
156
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
|
91
157
|
{
|
|
92
|
-
std::complex<float>
|
|
158
|
+
EIGEN_ALIGN16 std::complex<float> af[2];
|
|
93
159
|
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
|
94
160
|
to[0*stride] = af[0];
|
|
95
161
|
to[1*stride] = af[1];
|
|
@@ -100,25 +166,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, con
|
|
|
100
166
|
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
|
101
167
|
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
|
|
102
168
|
|
|
103
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
104
|
-
{
|
|
105
|
-
Packet4f v1, v2;
|
|
106
|
-
|
|
107
|
-
// Permute and multiply the real parts of a and b
|
|
108
|
-
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
|
109
|
-
// Get the imaginary parts of a
|
|
110
|
-
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
|
111
|
-
// multiply a_re * b
|
|
112
|
-
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
|
113
|
-
// multiply a_im * b and get the conjugate result
|
|
114
|
-
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
|
115
|
-
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
|
116
|
-
// permute back to a proper order
|
|
117
|
-
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
|
118
|
-
|
|
119
|
-
return Packet2cf(padd<Packet4f>(v1, v2));
|
|
120
|
-
}
|
|
121
|
-
|
|
122
169
|
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
|
|
123
170
|
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
|
|
124
171
|
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
|
|
@@ -128,7 +175,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::co
|
|
|
128
175
|
|
|
129
176
|
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
|
130
177
|
{
|
|
131
|
-
std::complex<float>
|
|
178
|
+
EIGEN_ALIGN16 std::complex<float> res[2];
|
|
132
179
|
pstore((float *)&res, a.v);
|
|
133
180
|
|
|
134
181
|
return res[0];
|
|
@@ -149,22 +196,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
|
|
|
149
196
|
return pfirst<Packet2cf>(Packet2cf(b));
|
|
150
197
|
}
|
|
151
198
|
|
|
152
|
-
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
|
153
|
-
{
|
|
154
|
-
Packet4f b1, b2;
|
|
155
|
-
#ifdef _BIG_ENDIAN
|
|
156
|
-
b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
|
157
|
-
b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
|
158
|
-
#else
|
|
159
|
-
b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
|
160
|
-
b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
|
161
|
-
#endif
|
|
162
|
-
b2 = vec_sld(b2, b2, 8);
|
|
163
|
-
b2 = padd<Packet4f>(b1, b2);
|
|
164
|
-
|
|
165
|
-
return Packet2cf(b2);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
199
|
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
|
169
200
|
{
|
|
170
201
|
Packet4f b;
|
|
@@ -175,61 +206,12 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
|
|
175
206
|
return pfirst<Packet2cf>(prod);
|
|
176
207
|
}
|
|
177
208
|
|
|
178
|
-
template<int Offset>
|
|
179
|
-
struct palign_impl<Offset,Packet2cf>
|
|
180
|
-
{
|
|
181
|
-
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
|
182
|
-
{
|
|
183
|
-
if (Offset==1)
|
|
184
|
-
{
|
|
185
|
-
#ifdef _BIG_ENDIAN
|
|
186
|
-
first.v = vec_sld(first.v, second.v, 8);
|
|
187
|
-
#else
|
|
188
|
-
first.v = vec_sld(second.v, first.v, 8);
|
|
189
|
-
#endif
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
};
|
|
193
|
-
|
|
194
|
-
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
|
195
|
-
{
|
|
196
|
-
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
197
|
-
{ return padd(pmul(x,y),c); }
|
|
198
|
-
|
|
199
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
200
|
-
{
|
|
201
|
-
return internal::pmul(a, pconj(b));
|
|
202
|
-
}
|
|
203
|
-
};
|
|
204
|
-
|
|
205
|
-
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
|
206
|
-
{
|
|
207
|
-
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
208
|
-
{ return padd(pmul(x,y),c); }
|
|
209
|
-
|
|
210
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
211
|
-
{
|
|
212
|
-
return internal::pmul(pconj(a), b);
|
|
213
|
-
}
|
|
214
|
-
};
|
|
215
|
-
|
|
216
|
-
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|
217
|
-
{
|
|
218
|
-
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
219
|
-
{ return padd(pmul(x,y),c); }
|
|
220
|
-
|
|
221
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
222
|
-
{
|
|
223
|
-
return pconj(internal::pmul(a, b));
|
|
224
|
-
}
|
|
225
|
-
};
|
|
226
|
-
|
|
227
209
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
|
228
210
|
|
|
229
211
|
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
230
212
|
{
|
|
231
213
|
// TODO optimize it for AltiVec
|
|
232
|
-
Packet2cf res =
|
|
214
|
+
Packet2cf res = pmul(a, pconj(b));
|
|
233
215
|
Packet4f s = pmul<Packet4f>(b.v, b.v);
|
|
234
216
|
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
|
235
217
|
}
|
|
@@ -246,6 +228,11 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
|
|
246
228
|
kernel.packet[0].v = tmp;
|
|
247
229
|
}
|
|
248
230
|
|
|
231
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
|
|
232
|
+
Packet4f eq = reinterpret_cast<Packet4f>(vec_cmpeq(a.v,b.v));
|
|
233
|
+
return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV)));
|
|
234
|
+
}
|
|
235
|
+
|
|
249
236
|
#ifdef __VSX__
|
|
250
237
|
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
|
251
238
|
Packet2cf result;
|
|
@@ -254,12 +241,62 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con
|
|
|
254
241
|
}
|
|
255
242
|
#endif
|
|
256
243
|
|
|
244
|
+
template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a)
|
|
245
|
+
{
|
|
246
|
+
return psqrt_complex<Packet2cf>(a);
|
|
247
|
+
}
|
|
248
|
+
|
|
257
249
|
//---------- double ----------
|
|
258
250
|
#ifdef __VSX__
|
|
259
251
|
struct Packet1cd
|
|
260
252
|
{
|
|
261
253
|
EIGEN_STRONG_INLINE Packet1cd() {}
|
|
262
254
|
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
|
255
|
+
|
|
256
|
+
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b)
|
|
257
|
+
{
|
|
258
|
+
Packet2d a_re, a_im, v1, v2;
|
|
259
|
+
|
|
260
|
+
// Permute and multiply the real parts of a and b
|
|
261
|
+
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
|
|
262
|
+
// Get the imaginary parts of a
|
|
263
|
+
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
|
|
264
|
+
// multiply a_re * b
|
|
265
|
+
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
|
266
|
+
// multiply a_im * b and get the conjugate result
|
|
267
|
+
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
|
268
|
+
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
|
|
269
|
+
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
|
|
270
|
+
|
|
271
|
+
return Packet1cd(padd<Packet2d>(v1, v2));
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) {
|
|
275
|
+
v = pmul(Packet1cd(*this), b).v;
|
|
276
|
+
return *this;
|
|
277
|
+
}
|
|
278
|
+
EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const {
|
|
279
|
+
return Packet1cd(*this) *= b;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) {
|
|
283
|
+
v = padd(v, b.v);
|
|
284
|
+
return *this;
|
|
285
|
+
}
|
|
286
|
+
EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const {
|
|
287
|
+
return Packet1cd(*this) += b;
|
|
288
|
+
}
|
|
289
|
+
EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) {
|
|
290
|
+
v = psub(v, b.v);
|
|
291
|
+
return *this;
|
|
292
|
+
}
|
|
293
|
+
EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const {
|
|
294
|
+
return Packet1cd(*this) -= b;
|
|
295
|
+
}
|
|
296
|
+
EIGEN_STRONG_INLINE Packet1cd operator-(void) const {
|
|
297
|
+
return Packet1cd(-v);
|
|
298
|
+
}
|
|
299
|
+
|
|
263
300
|
Packet2d v;
|
|
264
301
|
};
|
|
265
302
|
|
|
@@ -267,6 +304,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
267
304
|
{
|
|
268
305
|
typedef Packet1cd type;
|
|
269
306
|
typedef Packet1cd half;
|
|
307
|
+
typedef Packet2d as_real;
|
|
270
308
|
enum {
|
|
271
309
|
Vectorizable = 1,
|
|
272
310
|
AlignedOnScalar = 0,
|
|
@@ -286,7 +324,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
286
324
|
};
|
|
287
325
|
};
|
|
288
326
|
|
|
289
|
-
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
|
327
|
+
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; typedef Packet2d as_real; };
|
|
290
328
|
|
|
291
329
|
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
|
|
292
330
|
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
|
@@ -296,19 +334,13 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<
|
|
|
296
334
|
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
|
297
335
|
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
|
298
336
|
|
|
299
|
-
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index
|
|
337
|
+
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
|
|
300
338
|
{
|
|
301
|
-
|
|
302
|
-
af[0] = from[0*stride];
|
|
303
|
-
af[1] = from[1*stride];
|
|
304
|
-
return pload<Packet1cd>(af);
|
|
339
|
+
return pload<Packet1cd>(from);
|
|
305
340
|
}
|
|
306
|
-
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index
|
|
341
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
|
|
307
342
|
{
|
|
308
|
-
std::complex<double>
|
|
309
|
-
pstore<std::complex<double> >(af, from);
|
|
310
|
-
to[0*stride] = af[0];
|
|
311
|
-
to[1*stride] = af[1];
|
|
343
|
+
pstore<std::complex<double> >(to, from);
|
|
312
344
|
}
|
|
313
345
|
|
|
314
346
|
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
|
@@ -316,24 +348,6 @@ template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, con
|
|
|
316
348
|
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
|
317
349
|
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
|
|
318
350
|
|
|
319
|
-
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
320
|
-
{
|
|
321
|
-
Packet2d a_re, a_im, v1, v2;
|
|
322
|
-
|
|
323
|
-
// Permute and multiply the real parts of a and b
|
|
324
|
-
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
|
|
325
|
-
// Get the imaginary parts of a
|
|
326
|
-
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
|
|
327
|
-
// multiply a_re * b
|
|
328
|
-
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
|
329
|
-
// multiply a_im * b and get the conjugate result
|
|
330
|
-
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
|
331
|
-
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
|
|
332
|
-
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
|
|
333
|
-
|
|
334
|
-
return Packet1cd(padd<Packet2d>(v1, v2));
|
|
335
|
-
}
|
|
336
|
-
|
|
337
351
|
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
|
|
338
352
|
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
|
|
339
353
|
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
|
|
@@ -345,7 +359,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
|
|
|
345
359
|
|
|
346
360
|
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
|
347
361
|
{
|
|
348
|
-
std::complex<double>
|
|
362
|
+
EIGEN_ALIGN16 std::complex<double> res[2];
|
|
349
363
|
pstore<std::complex<double> >(res, a);
|
|
350
364
|
|
|
351
365
|
return res[0];
|
|
@@ -354,59 +368,15 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
|
|
|
354
368
|
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
|
355
369
|
|
|
356
370
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
|
357
|
-
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
|
358
371
|
|
|
359
372
|
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
|
360
373
|
|
|
361
|
-
template<int Offset>
|
|
362
|
-
struct palign_impl<Offset,Packet1cd>
|
|
363
|
-
{
|
|
364
|
-
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
|
365
|
-
{
|
|
366
|
-
// FIXME is it sure we never have to align a Packet1cd?
|
|
367
|
-
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
|
368
|
-
}
|
|
369
|
-
};
|
|
370
|
-
|
|
371
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
|
372
|
-
{
|
|
373
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
374
|
-
{ return padd(pmul(x,y),c); }
|
|
375
|
-
|
|
376
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
377
|
-
{
|
|
378
|
-
return internal::pmul(a, pconj(b));
|
|
379
|
-
}
|
|
380
|
-
};
|
|
381
|
-
|
|
382
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
|
383
|
-
{
|
|
384
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
385
|
-
{ return padd(pmul(x,y),c); }
|
|
386
|
-
|
|
387
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
388
|
-
{
|
|
389
|
-
return internal::pmul(pconj(a), b);
|
|
390
|
-
}
|
|
391
|
-
};
|
|
392
|
-
|
|
393
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|
394
|
-
{
|
|
395
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
396
|
-
{ return padd(pmul(x,y),c); }
|
|
397
|
-
|
|
398
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
399
|
-
{
|
|
400
|
-
return pconj(internal::pmul(a, b));
|
|
401
|
-
}
|
|
402
|
-
};
|
|
403
|
-
|
|
404
374
|
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
|
405
375
|
|
|
406
376
|
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
407
377
|
{
|
|
408
378
|
// TODO optimize it for AltiVec
|
|
409
|
-
Packet1cd res =
|
|
379
|
+
Packet1cd res = pmul(a,pconj(b));
|
|
410
380
|
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
|
411
381
|
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
|
|
412
382
|
}
|
|
@@ -422,6 +392,23 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
|
422
392
|
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
|
423
393
|
kernel.packet[0].v = tmp;
|
|
424
394
|
}
|
|
395
|
+
|
|
396
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
|
|
397
|
+
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
398
|
+
// [re(a)==re(b), im(a)==im(b)]
|
|
399
|
+
Packet2d eq = reinterpret_cast<Packet2d>(vec_cmpeq(a.v,b.v));
|
|
400
|
+
// Swap real/imag elements in the mask in to get:
|
|
401
|
+
// [im(a)==im(b), re(a)==re(b)]
|
|
402
|
+
Packet2d eq_swapped = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(eq), reinterpret_cast<Packet4ui>(eq), 8));
|
|
403
|
+
// Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
|
404
|
+
return Packet1cd(vec_and(eq, eq_swapped));
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a)
|
|
408
|
+
{
|
|
409
|
+
return psqrt_complex<Packet1cd>(a);
|
|
410
|
+
}
|
|
411
|
+
|
|
425
412
|
#endif // __VSX__
|
|
426
413
|
} // end namespace internal
|
|
427
414
|
|