@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -10,397 +10,404 @@
|
|
|
10
10
|
#ifndef EIGEN_COMPLEX_AVX_H
|
|
11
11
|
#define EIGEN_COMPLEX_AVX_H
|
|
12
12
|
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
13
16
|
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
19
|
|
|
17
20
|
//---------- float ----------
|
|
18
|
-
struct Packet4cf
|
|
19
|
-
{
|
|
21
|
+
struct Packet4cf {
|
|
20
22
|
EIGEN_STRONG_INLINE Packet4cf() {}
|
|
21
23
|
EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
|
|
22
|
-
__m256
|
|
24
|
+
__m256 v;
|
|
23
25
|
};
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
#ifndef EIGEN_VECTORIZE_AVX512
|
|
28
|
+
template <>
|
|
29
|
+
struct packet_traits<std::complex<float> > : default_packet_traits {
|
|
27
30
|
typedef Packet4cf type;
|
|
28
31
|
typedef Packet2cf half;
|
|
29
32
|
enum {
|
|
30
33
|
Vectorizable = 1,
|
|
31
34
|
AlignedOnScalar = 1,
|
|
32
35
|
size = 4,
|
|
33
|
-
HasHalfPacket = 1,
|
|
34
36
|
|
|
35
|
-
HasAdd
|
|
36
|
-
HasSub
|
|
37
|
-
HasMul
|
|
38
|
-
HasDiv
|
|
37
|
+
HasAdd = 1,
|
|
38
|
+
HasSub = 1,
|
|
39
|
+
HasMul = 1,
|
|
40
|
+
HasDiv = 1,
|
|
39
41
|
HasNegate = 1,
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
HasSqrt = 1,
|
|
43
|
+
HasLog = 1,
|
|
44
|
+
HasExp = 1,
|
|
45
|
+
HasAbs = 0,
|
|
46
|
+
HasAbs2 = 0,
|
|
47
|
+
HasMin = 0,
|
|
48
|
+
HasMax = 0,
|
|
44
49
|
HasSetLinear = 0
|
|
45
50
|
};
|
|
46
51
|
};
|
|
52
|
+
#endif
|
|
47
53
|
|
|
48
|
-
template<>
|
|
54
|
+
template <>
|
|
55
|
+
struct unpacket_traits<Packet4cf> {
|
|
56
|
+
typedef std::complex<float> type;
|
|
57
|
+
typedef Packet2cf half;
|
|
58
|
+
typedef Packet8f as_real;
|
|
59
|
+
enum {
|
|
60
|
+
size = 4,
|
|
61
|
+
alignment = Aligned32,
|
|
62
|
+
vectorizable = true,
|
|
63
|
+
masked_load_available = false,
|
|
64
|
+
masked_store_available = false
|
|
65
|
+
};
|
|
66
|
+
};
|
|
49
67
|
|
|
50
|
-
template<>
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
{
|
|
54
|
-
return Packet4cf(pnegate(a.v));
|
|
68
|
+
template <>
|
|
69
|
+
EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
70
|
+
return Packet4cf(_mm256_add_ps(a.v, b.v));
|
|
55
71
|
}
|
|
56
|
-
template<>
|
|
57
|
-
{
|
|
58
|
-
|
|
59
|
-
return Packet4cf(_mm256_xor_ps(a.v,mask));
|
|
72
|
+
template <>
|
|
73
|
+
EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
74
|
+
return Packet4cf(_mm256_sub_ps(a.v, b.v));
|
|
60
75
|
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
76
|
+
template <>
|
|
77
|
+
EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a) {
|
|
78
|
+
return Packet4cf(pnegate(a.v));
|
|
79
|
+
}
|
|
80
|
+
template <>
|
|
81
|
+
EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) {
|
|
82
|
+
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
|
|
83
|
+
0x80000000, 0x00000000, 0x80000000));
|
|
84
|
+
return Packet4cf(_mm256_xor_ps(a.v, mask));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
template <>
|
|
88
|
+
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) {
|
|
89
|
+
__m256 tmp1 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
90
|
+
__m256 tmp2 = _mm256_moveldup_ps(a.v);
|
|
91
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
92
|
+
__m256 result = _mm256_fmaddsub_ps(tmp2, b.v, tmp1);
|
|
93
|
+
#else
|
|
94
|
+
__m256 result = _mm256_addsub_ps(_mm256_mul_ps(tmp2, b.v), tmp1);
|
|
95
|
+
#endif
|
|
67
96
|
return Packet4cf(result);
|
|
68
97
|
}
|
|
69
98
|
|
|
70
|
-
template<>
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
99
|
+
template <>
|
|
100
|
+
EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
|
|
101
|
+
__m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);
|
|
102
|
+
return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
|
|
103
|
+
}
|
|
74
104
|
|
|
75
|
-
template<>
|
|
76
|
-
|
|
105
|
+
template <>
|
|
106
|
+
EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) {
|
|
107
|
+
return Packet4cf(ptrue(Packet8f(a.v)));
|
|
108
|
+
}
|
|
109
|
+
template <>
|
|
110
|
+
EIGEN_STRONG_INLINE Packet4cf pand<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
111
|
+
return Packet4cf(_mm256_and_ps(a.v, b.v));
|
|
112
|
+
}
|
|
113
|
+
template <>
|
|
114
|
+
EIGEN_STRONG_INLINE Packet4cf por<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
115
|
+
return Packet4cf(_mm256_or_ps(a.v, b.v));
|
|
116
|
+
}
|
|
117
|
+
template <>
|
|
118
|
+
EIGEN_STRONG_INLINE Packet4cf pxor<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
119
|
+
return Packet4cf(_mm256_xor_ps(a.v, b.v));
|
|
120
|
+
}
|
|
121
|
+
template <>
|
|
122
|
+
EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
123
|
+
return Packet4cf(_mm256_andnot_ps(b.v, a.v));
|
|
124
|
+
}
|
|
77
125
|
|
|
126
|
+
template <>
|
|
127
|
+
EIGEN_STRONG_INLINE Packet4cf pload<Packet4cf>(const std::complex<float>* from) {
|
|
128
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(_mm256_load_ps(&numext::real_ref(*from)));
|
|
129
|
+
}
|
|
130
|
+
template <>
|
|
131
|
+
EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) {
|
|
132
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(_mm256_loadu_ps(&numext::real_ref(*from)));
|
|
133
|
+
}
|
|
78
134
|
|
|
79
|
-
template<>
|
|
80
|
-
{
|
|
81
|
-
|
|
135
|
+
template <>
|
|
136
|
+
EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from) {
|
|
137
|
+
const float re = std::real(from);
|
|
138
|
+
const float im = std::imag(from);
|
|
139
|
+
return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
|
|
82
140
|
}
|
|
83
141
|
|
|
84
|
-
template<>
|
|
85
|
-
{
|
|
142
|
+
template <>
|
|
143
|
+
EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from) {
|
|
86
144
|
// FIXME The following might be optimized using _mm256_movedup_pd
|
|
87
145
|
Packet2cf a = ploaddup<Packet2cf>(from);
|
|
88
|
-
Packet2cf b = ploaddup<Packet2cf>(from+1);
|
|
89
|
-
return
|
|
146
|
+
Packet2cf b = ploaddup<Packet2cf>(from + 1);
|
|
147
|
+
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
|
|
90
148
|
}
|
|
91
149
|
|
|
92
|
-
template<>
|
|
93
|
-
|
|
150
|
+
template <>
|
|
151
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
|
|
152
|
+
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(&numext::real_ref(*to), from.v);
|
|
153
|
+
}
|
|
154
|
+
template <>
|
|
155
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
|
|
156
|
+
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(&numext::real_ref(*to), from.v);
|
|
157
|
+
}
|
|
94
158
|
|
|
95
|
-
template<>
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
std::imag(from[1*stride]), std::real(from[1*stride]),
|
|
100
|
-
std::imag(from[0*stride]), std::real(from[0*stride])));
|
|
159
|
+
template <>
|
|
160
|
+
EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from,
|
|
161
|
+
Index stride) {
|
|
162
|
+
return Packet4cf(_mm256_set_ps(std::imag(from[3 * stride]), std::real(from[3 * stride]), std::imag(from[2 * stride]),
|
|
163
|
+
std::real(from[2 * stride]), std::imag(from[1 * stride]), std::real(from[1 * stride]),
|
|
164
|
+
std::imag(from[0 * stride]), std::real(from[0 * stride])));
|
|
101
165
|
}
|
|
102
166
|
|
|
103
|
-
template<>
|
|
104
|
-
|
|
167
|
+
template <>
|
|
168
|
+
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from,
|
|
169
|
+
Index stride) {
|
|
105
170
|
__m128 low = _mm256_extractf128_ps(from.v, 0);
|
|
106
|
-
to[stride*0] =
|
|
107
|
-
|
|
108
|
-
to[stride*1] =
|
|
109
|
-
|
|
171
|
+
to[stride * 0] =
|
|
172
|
+
std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
|
|
173
|
+
to[stride * 1] =
|
|
174
|
+
std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
|
|
110
175
|
|
|
111
176
|
__m128 high = _mm256_extractf128_ps(from.v, 1);
|
|
112
|
-
to[stride*2] =
|
|
113
|
-
|
|
114
|
-
to[stride*3] =
|
|
115
|
-
|
|
116
|
-
|
|
177
|
+
to[stride * 2] =
|
|
178
|
+
std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
|
|
179
|
+
to[stride * 3] =
|
|
180
|
+
std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
|
|
117
181
|
}
|
|
118
182
|
|
|
119
|
-
template<>
|
|
120
|
-
{
|
|
183
|
+
template <>
|
|
184
|
+
EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a) {
|
|
121
185
|
return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
|
|
122
186
|
}
|
|
123
187
|
|
|
124
|
-
template<>
|
|
125
|
-
|
|
188
|
+
template <>
|
|
189
|
+
EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
|
|
190
|
+
__m128 low = _mm256_extractf128_ps(a.v, 0);
|
|
126
191
|
__m128 high = _mm256_extractf128_ps(a.v, 1);
|
|
127
|
-
__m128d lowd
|
|
192
|
+
__m128d lowd = _mm_castps_pd(low);
|
|
128
193
|
__m128d highd = _mm_castps_pd(high);
|
|
129
|
-
low
|
|
130
|
-
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
|
|
194
|
+
low = _mm_castpd_ps(_mm_shuffle_pd(lowd, lowd, 0x1));
|
|
195
|
+
high = _mm_castpd_ps(_mm_shuffle_pd(highd, highd, 0x1));
|
|
131
196
|
__m256 result = _mm256_setzero_ps();
|
|
132
197
|
result = _mm256_insertf128_ps(result, low, 1);
|
|
133
198
|
result = _mm256_insertf128_ps(result, high, 0);
|
|
134
199
|
return Packet4cf(result);
|
|
135
200
|
}
|
|
136
201
|
|
|
137
|
-
template<>
|
|
138
|
-
{
|
|
139
|
-
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
|
|
140
|
-
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
|
|
144
|
-
{
|
|
145
|
-
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
146
|
-
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
147
|
-
t0 = _mm256_hadd_ps(t0,t1);
|
|
148
|
-
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
149
|
-
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
|
150
|
-
t2 = _mm256_hadd_ps(t2,t3);
|
|
151
|
-
|
|
152
|
-
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
|
|
153
|
-
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
|
|
154
|
-
|
|
155
|
-
return Packet4cf(_mm256_add_ps(t1,t3));
|
|
202
|
+
template <>
|
|
203
|
+
EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a) {
|
|
204
|
+
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
|
156
205
|
}
|
|
157
206
|
|
|
158
|
-
template<>
|
|
159
|
-
{
|
|
160
|
-
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
|
161
|
-
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
|
207
|
+
template <>
|
|
208
|
+
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a) {
|
|
209
|
+
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
|
162
210
|
}
|
|
163
211
|
|
|
164
|
-
|
|
165
|
-
struct palign_impl<Offset,Packet4cf>
|
|
166
|
-
{
|
|
167
|
-
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
|
|
168
|
-
{
|
|
169
|
-
if (Offset==0) return;
|
|
170
|
-
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
|
|
171
|
-
}
|
|
172
|
-
};
|
|
212
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf, Packet8f)
|
|
173
213
|
|
|
174
|
-
template<>
|
|
175
|
-
{
|
|
176
|
-
|
|
177
|
-
{ return padd(pmul(x,y),c); }
|
|
178
|
-
|
|
179
|
-
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
180
|
-
{
|
|
181
|
-
return internal::pmul(a, pconj(b));
|
|
182
|
-
}
|
|
183
|
-
};
|
|
184
|
-
|
|
185
|
-
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
|
|
186
|
-
{
|
|
187
|
-
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
188
|
-
{ return padd(pmul(x,y),c); }
|
|
189
|
-
|
|
190
|
-
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
191
|
-
{
|
|
192
|
-
return internal::pmul(pconj(a), b);
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
|
|
196
|
-
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
|
197
|
-
{
|
|
198
|
-
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
|
199
|
-
{ return padd(pmul(x,y),c); }
|
|
200
|
-
|
|
201
|
-
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
|
202
|
-
{
|
|
203
|
-
return pconj(internal::pmul(a, b));
|
|
204
|
-
}
|
|
205
|
-
};
|
|
206
|
-
|
|
207
|
-
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
|
208
|
-
|
|
209
|
-
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
|
210
|
-
{
|
|
211
|
-
Packet4cf num = pmul(a, pconj(b));
|
|
212
|
-
__m256 tmp = _mm256_mul_ps(b.v, b.v);
|
|
213
|
-
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
|
|
214
|
-
__m256 denom = _mm256_add_ps(tmp, tmp2);
|
|
215
|
-
return Packet4cf(_mm256_div_ps(num.v, denom));
|
|
214
|
+
template <>
|
|
215
|
+
EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
|
|
216
|
+
return pdiv_complex(a, b);
|
|
216
217
|
}
|
|
217
218
|
|
|
218
|
-
template<>
|
|
219
|
-
{
|
|
220
|
-
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0
|
|
219
|
+
template <>
|
|
220
|
+
EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x) {
|
|
221
|
+
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
221
222
|
}
|
|
222
223
|
|
|
223
224
|
//---------- double ----------
|
|
224
|
-
struct Packet2cd
|
|
225
|
-
{
|
|
225
|
+
struct Packet2cd {
|
|
226
226
|
EIGEN_STRONG_INLINE Packet2cd() {}
|
|
227
227
|
EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
|
|
228
|
-
__m256d
|
|
228
|
+
__m256d v;
|
|
229
229
|
};
|
|
230
230
|
|
|
231
|
-
|
|
232
|
-
|
|
231
|
+
#ifndef EIGEN_VECTORIZE_AVX512
|
|
232
|
+
template <>
|
|
233
|
+
struct packet_traits<std::complex<double> > : default_packet_traits {
|
|
233
234
|
typedef Packet2cd type;
|
|
234
235
|
typedef Packet1cd half;
|
|
235
236
|
enum {
|
|
236
237
|
Vectorizable = 1,
|
|
237
238
|
AlignedOnScalar = 0,
|
|
238
239
|
size = 2,
|
|
239
|
-
HasHalfPacket = 1,
|
|
240
240
|
|
|
241
|
-
HasAdd
|
|
242
|
-
HasSub
|
|
243
|
-
HasMul
|
|
244
|
-
HasDiv
|
|
241
|
+
HasAdd = 1,
|
|
242
|
+
HasSub = 1,
|
|
243
|
+
HasMul = 1,
|
|
244
|
+
HasDiv = 1,
|
|
245
245
|
HasNegate = 1,
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
246
|
+
HasSqrt = 1,
|
|
247
|
+
HasLog = 1,
|
|
248
|
+
HasAbs = 0,
|
|
249
|
+
HasAbs2 = 0,
|
|
250
|
+
HasMin = 0,
|
|
251
|
+
HasMax = 0,
|
|
250
252
|
HasSetLinear = 0
|
|
251
253
|
};
|
|
252
254
|
};
|
|
255
|
+
#endif
|
|
253
256
|
|
|
254
|
-
template<>
|
|
257
|
+
template <>
|
|
258
|
+
struct unpacket_traits<Packet2cd> {
|
|
259
|
+
typedef std::complex<double> type;
|
|
260
|
+
typedef Packet1cd half;
|
|
261
|
+
typedef Packet4d as_real;
|
|
262
|
+
enum {
|
|
263
|
+
size = 2,
|
|
264
|
+
alignment = Aligned32,
|
|
265
|
+
vectorizable = true,
|
|
266
|
+
masked_load_available = false,
|
|
267
|
+
masked_store_available = false
|
|
268
|
+
};
|
|
269
|
+
};
|
|
255
270
|
|
|
256
|
-
template<>
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
return Packet2cd(
|
|
271
|
+
template <>
|
|
272
|
+
EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
273
|
+
return Packet2cd(_mm256_add_pd(a.v, b.v));
|
|
274
|
+
}
|
|
275
|
+
template <>
|
|
276
|
+
EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
277
|
+
return Packet2cd(_mm256_sub_pd(a.v, b.v));
|
|
278
|
+
}
|
|
279
|
+
template <>
|
|
280
|
+
EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) {
|
|
281
|
+
return Packet2cd(pnegate(a.v));
|
|
282
|
+
}
|
|
283
|
+
template <>
|
|
284
|
+
EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) {
|
|
285
|
+
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
|
|
286
|
+
return Packet2cd(_mm256_xor_pd(a.v, mask));
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
template <>
|
|
290
|
+
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) {
|
|
291
|
+
__m256d tmp1 = _mm256_mul_pd(_mm256_permute_pd(a.v, 0xF), _mm256_permute_pd(b.v, 0x5));
|
|
292
|
+
__m256d tmp2 = _mm256_movedup_pd(a.v);
|
|
293
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
294
|
+
__m256d result = _mm256_fmaddsub_pd(tmp2, b.v, tmp1);
|
|
295
|
+
#else
|
|
296
|
+
__m256d result = _mm256_addsub_pd(_mm256_mul_pd(tmp2, b.v), tmp1);
|
|
297
|
+
#endif
|
|
298
|
+
return Packet2cd(result);
|
|
263
299
|
}
|
|
264
300
|
|
|
265
|
-
template<>
|
|
266
|
-
{
|
|
267
|
-
__m256d
|
|
268
|
-
|
|
269
|
-
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
|
|
270
|
-
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
|
|
271
|
-
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
|
|
272
|
-
return Packet2cd(_mm256_addsub_pd(even, odd));
|
|
301
|
+
template <>
|
|
302
|
+
EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
|
|
303
|
+
__m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ);
|
|
304
|
+
return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));
|
|
273
305
|
}
|
|
274
306
|
|
|
275
|
-
template<>
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
307
|
+
template <>
|
|
308
|
+
EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) {
|
|
309
|
+
return Packet2cd(ptrue(Packet4d(a.v)));
|
|
310
|
+
}
|
|
311
|
+
template <>
|
|
312
|
+
EIGEN_STRONG_INLINE Packet2cd pand<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
313
|
+
return Packet2cd(_mm256_and_pd(a.v, b.v));
|
|
314
|
+
}
|
|
315
|
+
template <>
|
|
316
|
+
EIGEN_STRONG_INLINE Packet2cd por<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
317
|
+
return Packet2cd(_mm256_or_pd(a.v, b.v));
|
|
318
|
+
}
|
|
319
|
+
template <>
|
|
320
|
+
EIGEN_STRONG_INLINE Packet2cd pxor<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
321
|
+
return Packet2cd(_mm256_xor_pd(a.v, b.v));
|
|
322
|
+
}
|
|
323
|
+
template <>
|
|
324
|
+
EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
325
|
+
return Packet2cd(_mm256_andnot_pd(b.v, a.v));
|
|
326
|
+
}
|
|
279
327
|
|
|
280
|
-
template<>
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
328
|
+
template <>
|
|
329
|
+
EIGEN_STRONG_INLINE Packet2cd pload<Packet2cd>(const std::complex<double>* from) {
|
|
330
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(_mm256_load_pd((const double*)from));
|
|
331
|
+
}
|
|
332
|
+
template <>
|
|
333
|
+
EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from) {
|
|
334
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(_mm256_loadu_pd((const double*)from));
|
|
335
|
+
}
|
|
284
336
|
|
|
285
|
-
template<>
|
|
286
|
-
{
|
|
337
|
+
template <>
|
|
338
|
+
EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from) {
|
|
287
339
|
// in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
|
|
288
|
-
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
|
|
289
|
-
|
|
340
|
+
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
|
|
341
|
+
return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
|
|
290
342
|
}
|
|
291
343
|
|
|
292
|
-
template<>
|
|
344
|
+
template <>
|
|
345
|
+
EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) {
|
|
346
|
+
return pset1<Packet2cd>(*from);
|
|
347
|
+
}
|
|
293
348
|
|
|
294
|
-
template<>
|
|
295
|
-
|
|
349
|
+
template <>
|
|
350
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
|
|
351
|
+
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd((double*)to, from.v);
|
|
352
|
+
}
|
|
353
|
+
template <>
|
|
354
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
|
|
355
|
+
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd((double*)to, from.v);
|
|
356
|
+
}
|
|
296
357
|
|
|
297
|
-
template<>
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
358
|
+
template <>
|
|
359
|
+
EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from,
|
|
360
|
+
Index stride) {
|
|
361
|
+
return Packet2cd(_mm256_set_pd(std::imag(from[1 * stride]), std::real(from[1 * stride]), std::imag(from[0 * stride]),
|
|
362
|
+
std::real(from[0 * stride])));
|
|
301
363
|
}
|
|
302
364
|
|
|
303
|
-
template<>
|
|
304
|
-
|
|
365
|
+
template <>
|
|
366
|
+
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from,
|
|
367
|
+
Index stride) {
|
|
305
368
|
__m128d low = _mm256_extractf128_pd(from.v, 0);
|
|
306
|
-
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
|
|
369
|
+
to[stride * 0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
|
|
307
370
|
__m128d high = _mm256_extractf128_pd(from.v, 1);
|
|
308
|
-
to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
|
|
371
|
+
to[stride * 1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
|
|
309
372
|
}
|
|
310
373
|
|
|
311
|
-
template<>
|
|
312
|
-
{
|
|
374
|
+
template <>
|
|
375
|
+
EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a) {
|
|
313
376
|
__m128d low = _mm256_extractf128_pd(a.v, 0);
|
|
314
377
|
EIGEN_ALIGN16 double res[2];
|
|
315
378
|
_mm_store_pd(res, low);
|
|
316
|
-
return std::complex<double>(res[0],res[1]);
|
|
379
|
+
return std::complex<double>(res[0], res[1]);
|
|
317
380
|
}
|
|
318
381
|
|
|
319
|
-
template<>
|
|
382
|
+
template <>
|
|
383
|
+
EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
|
|
320
384
|
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
|
|
321
385
|
return Packet2cd(result);
|
|
322
386
|
}
|
|
323
387
|
|
|
324
|
-
template<>
|
|
325
|
-
{
|
|
326
|
-
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
|
327
|
-
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
|
|
331
|
-
{
|
|
332
|
-
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
|
|
333
|
-
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
|
|
334
|
-
|
|
335
|
-
return Packet2cd(_mm256_add_pd(t0,t1));
|
|
388
|
+
template <>
|
|
389
|
+
EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a) {
|
|
390
|
+
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
|
|
336
391
|
}
|
|
337
392
|
|
|
338
|
-
template<>
|
|
339
|
-
{
|
|
340
|
-
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
|
341
|
-
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
|
393
|
+
template <>
|
|
394
|
+
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a) {
|
|
395
|
+
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
|
|
342
396
|
}
|
|
343
397
|
|
|
344
|
-
|
|
345
|
-
struct palign_impl<Offset,Packet2cd>
|
|
346
|
-
{
|
|
347
|
-
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
|
|
348
|
-
{
|
|
349
|
-
if (Offset==0) return;
|
|
350
|
-
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
|
|
351
|
-
}
|
|
352
|
-
};
|
|
353
|
-
|
|
354
|
-
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
|
|
355
|
-
{
|
|
356
|
-
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
357
|
-
{ return padd(pmul(x,y),c); }
|
|
358
|
-
|
|
359
|
-
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
360
|
-
{
|
|
361
|
-
return internal::pmul(a, pconj(b));
|
|
362
|
-
}
|
|
363
|
-
};
|
|
364
|
-
|
|
365
|
-
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
|
|
366
|
-
{
|
|
367
|
-
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
368
|
-
{ return padd(pmul(x,y),c); }
|
|
369
|
-
|
|
370
|
-
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
371
|
-
{
|
|
372
|
-
return internal::pmul(pconj(a), b);
|
|
373
|
-
}
|
|
374
|
-
};
|
|
375
|
-
|
|
376
|
-
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
|
377
|
-
{
|
|
378
|
-
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
|
379
|
-
{ return padd(pmul(x,y),c); }
|
|
380
|
-
|
|
381
|
-
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
|
382
|
-
{
|
|
383
|
-
return pconj(internal::pmul(a, b));
|
|
384
|
-
}
|
|
385
|
-
};
|
|
386
|
-
|
|
387
|
-
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
|
398
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd, Packet4d)
|
|
388
399
|
|
|
389
|
-
template<>
|
|
390
|
-
{
|
|
391
|
-
|
|
392
|
-
__m256d tmp = _mm256_mul_pd(b.v, b.v);
|
|
393
|
-
__m256d denom = _mm256_hadd_pd(tmp, tmp);
|
|
394
|
-
return Packet2cd(_mm256_div_pd(num.v, denom));
|
|
400
|
+
template <>
|
|
401
|
+
EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
|
|
402
|
+
return pdiv_complex(a, b);
|
|
395
403
|
}
|
|
396
404
|
|
|
397
|
-
template<>
|
|
398
|
-
{
|
|
405
|
+
template <>
|
|
406
|
+
EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x) {
|
|
399
407
|
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
|
|
400
408
|
}
|
|
401
409
|
|
|
402
|
-
EIGEN_DEVICE_FUNC inline void
|
|
403
|
-
ptranspose(PacketBlock<Packet4cf,4>& kernel) {
|
|
410
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4cf, 4>& kernel) {
|
|
404
411
|
__m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
|
|
405
412
|
__m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
|
|
406
413
|
__m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
|
|
@@ -417,35 +424,142 @@ ptranspose(PacketBlock<Packet4cf,4>& kernel) {
|
|
|
417
424
|
kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
|
|
418
425
|
}
|
|
419
426
|
|
|
420
|
-
EIGEN_DEVICE_FUNC inline void
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
kernel.packet[
|
|
424
|
-
|
|
427
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cd, 2>& kernel) {
|
|
428
|
+
__m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0 + (2 << 4));
|
|
429
|
+
kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1 + (3 << 4));
|
|
430
|
+
kernel.packet[0].v = tmp;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
template <>
|
|
434
|
+
EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(const Packet2cd& a) {
|
|
435
|
+
return psqrt_complex<Packet2cd>(a);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
template <>
|
|
439
|
+
EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(const Packet4cf& a) {
|
|
440
|
+
return psqrt_complex<Packet4cf>(a);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
template <>
|
|
444
|
+
EIGEN_STRONG_INLINE Packet2cd plog<Packet2cd>(const Packet2cd& a) {
|
|
445
|
+
return plog_complex<Packet2cd>(a);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
template <>
|
|
449
|
+
EIGEN_STRONG_INLINE Packet4cf plog<Packet4cf>(const Packet4cf& a) {
|
|
450
|
+
return plog_complex<Packet4cf>(a);
|
|
425
451
|
}
|
|
426
452
|
|
|
427
|
-
template<>
|
|
428
|
-
{
|
|
429
|
-
return
|
|
453
|
+
template <>
|
|
454
|
+
EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
|
|
455
|
+
return pexp_complex<Packet4cf>(a);
|
|
430
456
|
}
|
|
431
457
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
458
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
459
|
+
// std::complex<float>
|
|
460
|
+
template <>
|
|
461
|
+
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
|
462
|
+
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
|
463
|
+
__m256 a_even = _mm256_moveldup_ps(a.v);
|
|
464
|
+
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
|
465
|
+
__m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmaddsub_ps(a_odd, b_swap, c.v));
|
|
466
|
+
return Packet4cf(result);
|
|
467
|
+
}
|
|
468
|
+
template <>
|
|
469
|
+
EIGEN_STRONG_INLINE Packet4cf pmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
|
470
|
+
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
|
471
|
+
__m256 a_even = _mm256_moveldup_ps(a.v);
|
|
472
|
+
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
|
473
|
+
__m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmsubadd_ps(a_odd, b_swap, c.v));
|
|
474
|
+
return Packet4cf(result);
|
|
475
|
+
}
|
|
476
|
+
template <>
|
|
477
|
+
EIGEN_STRONG_INLINE Packet4cf pnmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
|
478
|
+
return pnegate(pmsub(a, b, c));
|
|
479
|
+
}
|
|
480
|
+
template <>
|
|
481
|
+
EIGEN_STRONG_INLINE Packet4cf pnmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
|
482
|
+
return pnegate(pmadd(a, b, c));
|
|
483
|
+
}
|
|
484
|
+
// std::complex<double>
|
|
485
|
+
template <>
|
|
486
|
+
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
|
487
|
+
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
|
488
|
+
__m256d a_even = _mm256_movedup_pd(a.v);
|
|
489
|
+
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
|
490
|
+
__m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmaddsub_pd(a_odd, b_swap, c.v));
|
|
491
|
+
return Packet2cd(result);
|
|
492
|
+
}
|
|
493
|
+
template <>
|
|
494
|
+
EIGEN_STRONG_INLINE Packet2cd pmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
|
495
|
+
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
|
496
|
+
__m256d a_even = _mm256_movedup_pd(a.v);
|
|
497
|
+
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
|
498
|
+
__m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmsubadd_pd(a_odd, b_swap, c.v));
|
|
499
|
+
return Packet2cd(result);
|
|
500
|
+
}
|
|
501
|
+
template <>
|
|
502
|
+
EIGEN_STRONG_INLINE Packet2cd pnmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
|
503
|
+
return pnegate(pmsub(a, b, c));
|
|
504
|
+
}
|
|
505
|
+
template <>
|
|
506
|
+
EIGEN_STRONG_INLINE Packet2cd pnmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
|
507
|
+
return pnegate(pmadd(a, b, c));
|
|
508
|
+
}
|
|
509
|
+
#endif
|
|
510
|
+
|
|
511
|
+
/*---------------- load/store segment support ----------------*/
|
|
512
|
+
|
|
513
|
+
/*---------------- std::complex<float> ----------------*/
|
|
514
|
+
|
|
515
|
+
template <>
|
|
516
|
+
struct has_packet_segment<Packet2cf> : std::true_type {};
|
|
517
|
+
|
|
518
|
+
template <>
|
|
519
|
+
struct has_packet_segment<Packet4cf> : std::true_type {};
|
|
520
|
+
|
|
521
|
+
template <>
|
|
522
|
+
inline Packet2cf ploaduSegment<Packet2cf>(const std::complex<float>* from, Index begin, Index count) {
|
|
523
|
+
return (Packet2cf)_mm_maskload_ps(&numext::real_ref(*from), segment_mask_2x64(begin, count));
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
template <>
|
|
527
|
+
inline void pstoreuSegment<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index begin,
|
|
528
|
+
Index count) {
|
|
529
|
+
_mm_maskstore_ps(&numext::real_ref(*to), segment_mask_2x64(begin, count), from.v);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
template <>
|
|
533
|
+
inline Packet4cf ploaduSegment<Packet4cf>(const std::complex<float>* from, Index begin, Index count) {
|
|
534
|
+
return (Packet4cf)_mm256_maskload_ps(&numext::real_ref(*from), segment_mask_4x64(begin, count));
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
template <>
|
|
538
|
+
inline void pstoreuSegment<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index begin,
|
|
539
|
+
Index count) {
|
|
540
|
+
_mm256_maskstore_ps(&numext::real_ref(*to), segment_mask_4x64(begin, count), from.v);
|
|
435
541
|
}
|
|
436
542
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
543
|
+
/*---------------- std::complex<double> ----------------*/
|
|
544
|
+
|
|
545
|
+
template <>
|
|
546
|
+
struct has_packet_segment<Packet2cd> : std::true_type {};
|
|
547
|
+
|
|
548
|
+
template <>
|
|
549
|
+
inline Packet2cd ploaduSegment<Packet2cd>(const std::complex<double>* from, Index begin, Index count) {
|
|
550
|
+
return (Packet2cd)_mm256_maskload_pd(&numext::real_ref(*from), segment_mask_4x64(2 * begin, 2 * count));
|
|
440
551
|
}
|
|
441
552
|
|
|
442
|
-
template<>
|
|
443
|
-
|
|
444
|
-
|
|
553
|
+
template <>
|
|
554
|
+
inline void pstoreuSegment<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from,
|
|
555
|
+
Index begin, Index count) {
|
|
556
|
+
_mm256_maskstore_pd(&numext::real_ref(*to), segment_mask_4x64(2 * begin, 2 * count), from.v);
|
|
445
557
|
}
|
|
446
558
|
|
|
447
|
-
|
|
559
|
+
/*---------------- end load/store segment support ----------------*/
|
|
560
|
+
|
|
561
|
+
} // end namespace internal
|
|
448
562
|
|
|
449
|
-
}
|
|
563
|
+
} // end namespace Eigen
|
|
450
564
|
|
|
451
|
-
#endif
|
|
565
|
+
#endif // EIGEN_COMPLEX_AVX_H
|