@smake/eigen 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/COPYING.APACHE +203 -0
- package/eigen/COPYING.BSD +1 -1
- package/eigen/COPYING.MINPACK +51 -52
- package/eigen/Eigen/Cholesky +0 -1
- package/eigen/Eigen/Core +108 -266
- package/eigen/Eigen/Eigenvalues +0 -1
- package/eigen/Eigen/Geometry +3 -6
- package/eigen/Eigen/Householder +0 -1
- package/eigen/Eigen/Jacobi +0 -1
- package/eigen/Eigen/KLUSupport +41 -0
- package/eigen/Eigen/LU +2 -5
- package/eigen/Eigen/OrderingMethods +0 -3
- package/eigen/Eigen/PaStiXSupport +1 -0
- package/eigen/Eigen/PardisoSupport +0 -0
- package/eigen/Eigen/QR +0 -1
- package/eigen/Eigen/QtAlignedMalloc +0 -1
- package/eigen/Eigen/SVD +0 -1
- package/eigen/Eigen/Sparse +0 -2
- package/eigen/Eigen/SparseCholesky +0 -8
- package/eigen/Eigen/SparseLU +4 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- package/eigen/Eigen/src/Core/Array.h +99 -11
- package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
- package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- package/eigen/Eigen/src/Core/Assign.h +1 -1
- package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- package/eigen/Eigen/src/Core/Block.h +56 -60
- package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
- package/eigen/Eigen/src/Core/DenseBase.h +128 -39
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
- package/eigen/Eigen/src/Core/Diagonal.h +21 -23
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- package/eigen/Eigen/src/Core/Dot.h +10 -10
- package/eigen/Eigen/src/Core/EigenBase.h +10 -9
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
- package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- package/eigen/Eigen/src/Core/IO.h +40 -7
- package/eigen/Eigen/src/Core/IndexedView.h +237 -0
- package/eigen/Eigen/src/Core/Inverse.h +9 -10
- package/eigen/Eigen/src/Core/Map.h +7 -7
- package/eigen/Eigen/src/Core/MapBase.h +5 -3
- package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- package/eigen/Eigen/src/Core/Matrix.h +131 -25
- package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
- package/eigen/Eigen/src/Core/NestByValue.h +25 -50
- package/eigen/Eigen/src/Core/NoAlias.h +4 -3
- package/eigen/Eigen/src/Core/NumTraits.h +107 -20
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
- package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
- package/eigen/Eigen/src/Core/Product.h +30 -25
- package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
- package/eigen/Eigen/src/Core/Random.h +37 -1
- package/eigen/Eigen/src/Core/Redux.h +180 -170
- package/eigen/Eigen/src/Core/Ref.h +118 -21
- package/eigen/Eigen/src/Core/Replicate.h +8 -8
- package/eigen/Eigen/src/Core/Reshaped.h +454 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- package/eigen/Eigen/src/Core/Reverse.h +18 -12
- package/eigen/Eigen/src/Core/Select.h +8 -6
- package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- package/eigen/Eigen/src/Core/Solve.h +14 -14
- package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
- package/eigen/Eigen/src/Core/SolverBase.h +41 -3
- package/eigen/Eigen/src/Core/StableNorm.h +100 -70
- package/eigen/Eigen/src/Core/StlIterators.h +463 -0
- package/eigen/Eigen/src/Core/Stride.h +9 -4
- package/eigen/Eigen/src/Core/Swap.h +5 -4
- package/eigen/Eigen/src/Core/Transpose.h +86 -27
- package/eigen/Eigen/src/Core/Transpositions.h +26 -8
- package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
- package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- package/eigen/Eigen/src/Core/Visitor.h +137 -29
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- package/eigen/Eigen/src/Core/util/Constants.h +25 -9
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- package/eigen/Eigen/src/Core/util/Macros.h +661 -250
- package/eigen/Eigen/src/Core/util/Memory.h +222 -52
- package/eigen/Eigen/src/Core/util/Meta.h +349 -105
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
- package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
- package/eigen/Eigen/src/Geometry/Transform.h +86 -65
- package/eigen/Eigen/src/Geometry/Translation.h +6 -6
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- package/eigen/Eigen/src/Householder/Householder.h +8 -4
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- package/eigen/Eigen/src/LU/Determinant.h +35 -19
- package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
- package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
- package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- package/eigen/Eigen/src/misc/lapacke.h +5 -4
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- package/eigen/README.md +2 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Mehdi Goli Codeplay Software Ltd.
|
|
5
|
+
// Ralph Potter Codeplay Software Ltd.
|
|
6
|
+
// Luke Iwanski Codeplay Software Ltd.
|
|
7
|
+
// Contact: <eigen@codeplay.com>
|
|
8
|
+
//
|
|
9
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
10
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
11
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
12
|
+
|
|
13
|
+
/*****************************************************************
|
|
14
|
+
* TypeCasting.h
|
|
15
|
+
*
|
|
16
|
+
* \brief:
|
|
17
|
+
* TypeCasting
|
|
18
|
+
*
|
|
19
|
+
*****************************************************************/
|
|
20
|
+
|
|
21
|
+
#ifndef EIGEN_TYPE_CASTING_SYCL_H
|
|
22
|
+
#define EIGEN_TYPE_CASTING_SYCL_H
|
|
23
|
+
|
|
24
|
+
namespace Eigen {
|
|
25
|
+
|
|
26
|
+
namespace internal {
|
|
27
|
+
#ifdef SYCL_DEVICE_ONLY
|
|
28
|
+
template <>
|
|
29
|
+
struct type_casting_traits<float, int> {
|
|
30
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
template <>
|
|
34
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4
|
|
35
|
+
pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
|
|
36
|
+
return a
|
|
37
|
+
.template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
template <>
|
|
41
|
+
struct type_casting_traits<int, float> {
|
|
42
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
template <>
|
|
46
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
|
|
47
|
+
pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
|
|
48
|
+
return a.template convert<cl::sycl::cl_float,
|
|
49
|
+
cl::sycl::rounding_mode::automatic>();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
template <>
|
|
53
|
+
struct type_casting_traits<double, float> {
|
|
54
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
template <>
|
|
58
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
|
|
59
|
+
pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(
|
|
60
|
+
const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
|
|
61
|
+
auto a1 = a.template convert<cl::sycl::cl_float,
|
|
62
|
+
cl::sycl::rounding_mode::automatic>();
|
|
63
|
+
auto b1 = b.template convert<cl::sycl::cl_float,
|
|
64
|
+
cl::sycl::rounding_mode::automatic>();
|
|
65
|
+
return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
template <>
|
|
69
|
+
struct type_casting_traits<float, double> {
|
|
70
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
template <>
|
|
74
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
|
|
75
|
+
pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
|
|
76
|
+
// Simply discard the second half of the input
|
|
77
|
+
return cl::sycl::cl_double2(a.x(), a.y());
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
#endif
|
|
81
|
+
} // end namespace internal
|
|
82
|
+
|
|
83
|
+
} // end namespace Eigen
|
|
84
|
+
|
|
85
|
+
#endif // EIGEN_TYPE_CASTING_SYCL_H
|
|
@@ -15,6 +15,10 @@ namespace Eigen {
|
|
|
15
15
|
|
|
16
16
|
namespace internal {
|
|
17
17
|
|
|
18
|
+
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
|
|
19
|
+
static Packet4ui p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; //vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
|
|
20
|
+
#endif
|
|
21
|
+
|
|
18
22
|
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
|
19
23
|
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
|
20
24
|
|
|
@@ -29,10 +33,14 @@ struct Packet2cf
|
|
|
29
33
|
{
|
|
30
34
|
EIGEN_STRONG_INLINE Packet2cf() {}
|
|
31
35
|
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
|
36
|
+
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
|
|
32
37
|
union {
|
|
33
38
|
Packet4f v;
|
|
34
39
|
Packet1cd cd[2];
|
|
35
40
|
};
|
|
41
|
+
#else
|
|
42
|
+
Packet4f v;
|
|
43
|
+
#endif
|
|
36
44
|
};
|
|
37
45
|
|
|
38
46
|
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
@@ -83,69 +91,33 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
|
83
91
|
};
|
|
84
92
|
};
|
|
85
93
|
|
|
86
|
-
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
|
87
|
-
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
|
94
|
+
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
|
|
95
|
+
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
|
|
88
96
|
|
|
89
97
|
/* Forward declaration */
|
|
90
98
|
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
|
|
91
99
|
|
|
92
|
-
|
|
100
|
+
/* complex<double> first */
|
|
93
101
|
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
|
94
|
-
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
|
95
102
|
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
|
96
|
-
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
|
97
103
|
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
|
98
|
-
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
|
99
104
|
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
|
100
105
|
|
|
101
106
|
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
|
102
107
|
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
|
103
108
|
|
|
104
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
105
|
-
{
|
|
106
|
-
Packet2cf res;
|
|
107
|
-
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
|
|
108
|
-
res.cd[1] = res.cd[0];
|
|
109
|
-
return res;
|
|
110
|
-
}
|
|
111
|
-
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
|
112
|
-
{
|
|
113
|
-
std::complex<float> EIGEN_ALIGN16 af[2];
|
|
114
|
-
af[0] = from[0*stride];
|
|
115
|
-
af[1] = from[1*stride];
|
|
116
|
-
return pload<Packet2cf>(af);
|
|
117
|
-
}
|
|
118
109
|
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
|
|
119
110
|
{
|
|
120
111
|
return pload<Packet1cd>(from);
|
|
121
112
|
}
|
|
122
|
-
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
|
123
|
-
{
|
|
124
|
-
std::complex<float> EIGEN_ALIGN16 af[2];
|
|
125
|
-
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
|
126
|
-
to[0*stride] = af[0];
|
|
127
|
-
to[1*stride] = af[1];
|
|
128
|
-
}
|
|
129
113
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
|
|
130
114
|
{
|
|
131
115
|
pstore<std::complex<double> >(to, from);
|
|
132
116
|
}
|
|
133
|
-
|
|
134
|
-
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
|
135
117
|
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
|
136
|
-
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
|
137
118
|
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
|
138
119
|
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
|
139
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
|
|
140
120
|
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
|
|
141
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
|
142
|
-
{
|
|
143
|
-
Packet2cf res;
|
|
144
|
-
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
|
|
145
|
-
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
|
|
146
|
-
return res;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
121
|
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
150
122
|
{
|
|
151
123
|
Packet2d a_re, a_im, v1, v2;
|
|
@@ -163,27 +135,17 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
|
|
163
135
|
|
|
164
136
|
return Packet1cd(v1 + v2);
|
|
165
137
|
}
|
|
166
|
-
template<> EIGEN_STRONG_INLINE
|
|
167
|
-
{
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
|
|
171
|
-
return res;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
|
175
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
|
|
176
|
-
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
|
177
|
-
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
|
|
178
|
-
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
|
179
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
|
|
180
|
-
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
|
181
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
|
|
182
|
-
|
|
138
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
|
139
|
+
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
|
140
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
|
141
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pandnot <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
|
183
142
|
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
|
184
|
-
template<> EIGEN_STRONG_INLINE
|
|
143
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
|
|
144
|
+
Packet2d eq = vec_cmpeq (a.v, b.v);
|
|
145
|
+
Packet2d tmp = { eq[1], eq[0] };
|
|
146
|
+
return (Packet1cd)pand<Packet2d>(eq, tmp);
|
|
147
|
+
}
|
|
185
148
|
|
|
186
|
-
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
|
187
149
|
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
|
188
150
|
|
|
189
151
|
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
|
@@ -193,160 +155,157 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
|
|
|
193
155
|
|
|
194
156
|
return res;
|
|
195
157
|
}
|
|
196
|
-
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
|
197
|
-
{
|
|
198
|
-
std::complex<float> EIGEN_ALIGN16 res[2];
|
|
199
|
-
pstore<std::complex<float> >(res, a);
|
|
200
|
-
|
|
201
|
-
return res[0];
|
|
202
|
-
}
|
|
203
158
|
|
|
204
159
|
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
|
205
|
-
template<> EIGEN_STRONG_INLINE
|
|
160
|
+
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
|
206
161
|
{
|
|
207
|
-
|
|
208
|
-
res.cd[0] = a.cd[1];
|
|
209
|
-
res.cd[1] = a.cd[0];
|
|
210
|
-
return res;
|
|
162
|
+
return pfirst(a);
|
|
211
163
|
}
|
|
212
|
-
|
|
213
|
-
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
|
164
|
+
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
|
214
165
|
{
|
|
215
166
|
return pfirst(a);
|
|
216
167
|
}
|
|
217
|
-
|
|
168
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
|
169
|
+
|
|
170
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
218
171
|
{
|
|
219
|
-
|
|
220
|
-
Packet1cd
|
|
221
|
-
|
|
222
|
-
return res;
|
|
172
|
+
// TODO optimize it for AltiVec
|
|
173
|
+
Packet1cd res = pmul(a,pconj(b));
|
|
174
|
+
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
|
|
175
|
+
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
|
|
223
176
|
}
|
|
224
177
|
|
|
225
|
-
|
|
178
|
+
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
|
226
179
|
{
|
|
227
|
-
return
|
|
180
|
+
return Packet1cd(preverse(Packet2d(x.v)));
|
|
228
181
|
}
|
|
229
|
-
|
|
182
|
+
|
|
183
|
+
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
230
184
|
{
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
185
|
+
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
|
186
|
+
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
|
187
|
+
kernel.packet[0].v = tmp;
|
|
188
|
+
}
|
|
235
189
|
|
|
236
|
-
|
|
237
|
-
}
|
|
190
|
+
/* complex<float> follows */
|
|
191
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
|
192
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
|
193
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
|
194
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
|
238
195
|
|
|
239
|
-
template<> EIGEN_STRONG_INLINE std::complex<
|
|
196
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
|
240
197
|
{
|
|
241
|
-
|
|
198
|
+
std::complex<float> EIGEN_ALIGN16 res[2];
|
|
199
|
+
pstore<std::complex<float> >(res, a);
|
|
200
|
+
|
|
201
|
+
return res[0];
|
|
242
202
|
}
|
|
243
|
-
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
|
|
206
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
244
207
|
{
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
208
|
+
Packet2cf res;
|
|
209
|
+
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
|
|
210
|
+
res.cd[1] = res.cd[0];
|
|
248
211
|
return res;
|
|
249
212
|
}
|
|
250
|
-
|
|
251
|
-
template<
|
|
252
|
-
struct palign_impl<Offset,Packet1cd>
|
|
213
|
+
#else
|
|
214
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
253
215
|
{
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
216
|
+
Packet2cf res;
|
|
217
|
+
if((std::ptrdiff_t(&from) % 16) == 0)
|
|
218
|
+
res.v = pload<Packet4f>((const float *)&from);
|
|
219
|
+
else
|
|
220
|
+
res.v = ploadu<Packet4f>((const float *)&from);
|
|
221
|
+
res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
|
|
222
|
+
return res;
|
|
223
|
+
}
|
|
224
|
+
#endif
|
|
260
225
|
|
|
261
|
-
template<
|
|
262
|
-
struct palign_impl<Offset,Packet2cf>
|
|
226
|
+
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
|
263
227
|
{
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
}
|
|
271
|
-
};
|
|
272
|
-
|
|
273
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
|
228
|
+
std::complex<float> EIGEN_ALIGN16 af[2];
|
|
229
|
+
af[0] = from[0*stride];
|
|
230
|
+
af[1] = from[1*stride];
|
|
231
|
+
return pload<Packet2cf>(af);
|
|
232
|
+
}
|
|
233
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
|
274
234
|
{
|
|
275
|
-
|
|
276
|
-
|
|
235
|
+
std::complex<float> EIGEN_ALIGN16 af[2];
|
|
236
|
+
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
|
237
|
+
to[0*stride] = af[0];
|
|
238
|
+
to[1*stride] = af[1];
|
|
239
|
+
}
|
|
277
240
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
}
|
|
282
|
-
};
|
|
241
|
+
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
|
242
|
+
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
|
243
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
|
|
283
244
|
|
|
284
|
-
template<>
|
|
285
|
-
{
|
|
286
|
-
|
|
287
|
-
|
|
245
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
|
|
246
|
+
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
|
|
247
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
|
|
248
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
|
|
288
249
|
|
|
289
|
-
|
|
290
|
-
{
|
|
291
|
-
return internal::pmul(pconj(a), b);
|
|
292
|
-
}
|
|
293
|
-
};
|
|
250
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
|
294
251
|
|
|
295
|
-
template<>
|
|
296
|
-
{
|
|
297
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
298
|
-
{ return padd(pmul(x,y),c); }
|
|
252
|
+
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
|
299
253
|
|
|
300
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
301
|
-
{
|
|
302
|
-
return pconj(internal::pmul(a, b));
|
|
303
|
-
}
|
|
304
|
-
};
|
|
305
254
|
|
|
306
|
-
|
|
307
|
-
{
|
|
308
|
-
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
309
|
-
{ return padd(pmul(x,y),c); }
|
|
255
|
+
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
|
|
310
256
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
}
|
|
315
|
-
};
|
|
257
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
|
|
258
|
+
Packet4f eq = pcmp_eq<Packet4f> (a.v, b.v);
|
|
259
|
+
Packet2cf res;
|
|
260
|
+
Packet2d tmp1 = { eq.v4f[0][1], eq.v4f[0][0] };
|
|
261
|
+
Packet2d tmp2 = { eq.v4f[1][1], eq.v4f[1][0] };
|
|
262
|
+
res.v.v4f[0] = pand<Packet2d>(eq.v4f[0], tmp1);
|
|
263
|
+
res.v.v4f[1] = pand<Packet2d>(eq.v4f[1], tmp2);
|
|
264
|
+
return res;
|
|
265
|
+
}
|
|
316
266
|
|
|
317
|
-
template<>
|
|
267
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
|
318
268
|
{
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
return internal::pmul(pconj(a), b);
|
|
325
|
-
}
|
|
326
|
-
};
|
|
269
|
+
Packet2cf res;
|
|
270
|
+
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
|
|
271
|
+
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
|
|
272
|
+
return res;
|
|
273
|
+
}
|
|
327
274
|
|
|
328
|
-
template<>
|
|
275
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
329
276
|
{
|
|
330
|
-
|
|
331
|
-
|
|
277
|
+
Packet2cf res;
|
|
278
|
+
res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
|
|
279
|
+
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
|
|
280
|
+
return res;
|
|
281
|
+
}
|
|
332
282
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
283
|
+
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
|
284
|
+
{
|
|
285
|
+
Packet2cf res;
|
|
286
|
+
res.cd[0] = a.cd[1];
|
|
287
|
+
res.cd[1] = a.cd[0];
|
|
288
|
+
return res;
|
|
289
|
+
}
|
|
338
290
|
|
|
339
|
-
|
|
340
|
-
|
|
291
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
|
292
|
+
{
|
|
293
|
+
std::complex<float> res;
|
|
294
|
+
Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
|
|
295
|
+
vec_st2f(b.v, (float*)&res);
|
|
296
|
+
return res;
|
|
297
|
+
}
|
|
341
298
|
|
|
342
|
-
template<> EIGEN_STRONG_INLINE
|
|
299
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
|
343
300
|
{
|
|
344
|
-
|
|
345
|
-
Packet1cd
|
|
346
|
-
|
|
347
|
-
return
|
|
301
|
+
std::complex<float> res;
|
|
302
|
+
Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
|
|
303
|
+
vec_st2f(b.v, (float*)&res);
|
|
304
|
+
return res;
|
|
348
305
|
}
|
|
349
306
|
|
|
307
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
|
308
|
+
|
|
350
309
|
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
351
310
|
{
|
|
352
311
|
// TODO optimize it for AltiVec
|
|
@@ -356,11 +315,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
|
|
|
356
315
|
return res;
|
|
357
316
|
}
|
|
358
317
|
|
|
359
|
-
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
|
360
|
-
{
|
|
361
|
-
return Packet1cd(preverse(Packet2d(x.v)));
|
|
362
|
-
}
|
|
363
|
-
|
|
364
318
|
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
|
365
319
|
{
|
|
366
320
|
Packet2cf res;
|
|
@@ -369,13 +323,6 @@ EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
|
|
369
323
|
return res;
|
|
370
324
|
}
|
|
371
325
|
|
|
372
|
-
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
373
|
-
{
|
|
374
|
-
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
|
375
|
-
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
|
376
|
-
kernel.packet[0].v = tmp;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
326
|
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
|
380
327
|
{
|
|
381
328
|
Packet1cd tmp = kernel.packet[0].cd[1];
|
|
@@ -389,6 +336,88 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con
|
|
|
389
336
|
result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
|
|
390
337
|
return result;
|
|
391
338
|
}
|
|
339
|
+
#else
|
|
340
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
|
|
341
|
+
Packet4f eq = vec_cmpeq (a.v, b.v);
|
|
342
|
+
Packet4f tmp = { eq[1], eq[0], eq[3], eq[2] };
|
|
343
|
+
return (Packet2cf)pand<Packet4f>(eq, tmp);
|
|
344
|
+
}
|
|
345
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
|
|
346
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
347
|
+
{
|
|
348
|
+
Packet4f a_re, a_im, prod, prod_im;
|
|
349
|
+
|
|
350
|
+
// Permute and multiply the real parts of a and b
|
|
351
|
+
a_re = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
|
352
|
+
|
|
353
|
+
// Get the imaginary parts of a
|
|
354
|
+
a_im = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
|
355
|
+
|
|
356
|
+
// multiply a_im * b and get the conjugate result
|
|
357
|
+
prod_im = a_im * b.v;
|
|
358
|
+
prod_im = pxor<Packet4f>(prod_im, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR));
|
|
359
|
+
// permute back to a proper order
|
|
360
|
+
prod_im = vec_perm(prod_im, prod_im, p16uc_COMPLEX32_REV);
|
|
361
|
+
|
|
362
|
+
// multiply a_re * b, add prod_im
|
|
363
|
+
prod = pmadd<Packet4f>(a_re, b.v, prod_im);
|
|
364
|
+
|
|
365
|
+
return Packet2cf(prod);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
|
369
|
+
{
|
|
370
|
+
Packet4f rev_a;
|
|
371
|
+
rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
|
|
372
|
+
return Packet2cf(rev_a);
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
|
376
|
+
{
|
|
377
|
+
Packet4f b;
|
|
378
|
+
b = vec_sld(a.v, a.v, 8);
|
|
379
|
+
b = padd<Packet4f>(a.v, b);
|
|
380
|
+
return pfirst<Packet2cf>(Packet2cf(b));
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
|
384
|
+
{
|
|
385
|
+
Packet4f b;
|
|
386
|
+
Packet2cf prod;
|
|
387
|
+
b = vec_sld(a.v, a.v, 8);
|
|
388
|
+
prod = pmul<Packet2cf>(a, Packet2cf(b));
|
|
389
|
+
|
|
390
|
+
return pfirst<Packet2cf>(prod);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
|
394
|
+
|
|
395
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
396
|
+
{
|
|
397
|
+
// TODO optimize it for AltiVec
|
|
398
|
+
Packet2cf res = pmul(a, pconj(b));
|
|
399
|
+
Packet4f s = pmul<Packet4f>(b.v, b.v);
|
|
400
|
+
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
|
|
404
|
+
{
|
|
405
|
+
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
|
409
|
+
{
|
|
410
|
+
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
|
411
|
+
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
|
412
|
+
kernel.packet[0].v = tmp;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
|
416
|
+
Packet2cf result;
|
|
417
|
+
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
|
|
418
|
+
return result;
|
|
419
|
+
}
|
|
420
|
+
#endif
|
|
392
421
|
|
|
393
422
|
} // end namespace internal
|
|
394
423
|
|