@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -10,427 +10,389 @@
|
|
|
10
10
|
#ifndef EIGEN_COMPLEX_SSE_H
|
|
11
11
|
#define EIGEN_COMPLEX_SSE_H
|
|
12
12
|
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
13
16
|
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
19
|
|
|
17
20
|
//---------- float ----------
|
|
18
|
-
struct Packet2cf
|
|
19
|
-
{
|
|
21
|
+
struct Packet2cf {
|
|
20
22
|
EIGEN_STRONG_INLINE Packet2cf() {}
|
|
21
23
|
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
|
|
22
|
-
|
|
24
|
+
Packet4f v;
|
|
23
25
|
};
|
|
24
26
|
|
|
25
27
|
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
|
26
28
|
// to leverage AVX instructions.
|
|
27
29
|
#ifndef EIGEN_VECTORIZE_AVX
|
|
28
|
-
template<>
|
|
29
|
-
{
|
|
30
|
+
template <>
|
|
31
|
+
struct packet_traits<std::complex<float> > : default_packet_traits {
|
|
30
32
|
typedef Packet2cf type;
|
|
31
33
|
typedef Packet2cf half;
|
|
32
34
|
enum {
|
|
33
35
|
Vectorizable = 1,
|
|
34
36
|
AlignedOnScalar = 1,
|
|
35
37
|
size = 2,
|
|
36
|
-
HasHalfPacket = 0,
|
|
37
38
|
|
|
38
|
-
HasAdd
|
|
39
|
-
HasSub
|
|
40
|
-
HasMul
|
|
41
|
-
HasDiv
|
|
39
|
+
HasAdd = 1,
|
|
40
|
+
HasSub = 1,
|
|
41
|
+
HasMul = 1,
|
|
42
|
+
HasDiv = 1,
|
|
42
43
|
HasNegate = 1,
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
HasSqrt = 1,
|
|
45
|
+
HasLog = 1,
|
|
46
|
+
HasExp = 1,
|
|
47
|
+
HasAbs = 0,
|
|
48
|
+
HasAbs2 = 0,
|
|
49
|
+
HasMin = 0,
|
|
50
|
+
HasMax = 0,
|
|
47
51
|
HasSetLinear = 0,
|
|
48
52
|
HasBlend = 1
|
|
49
53
|
};
|
|
50
54
|
};
|
|
51
55
|
#endif
|
|
52
56
|
|
|
53
|
-
template<>
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
{
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
69
|
-
{
|
|
70
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
71
|
-
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
|
|
72
|
-
_mm_mul_ps(_mm_movehdup_ps(a.v),
|
|
73
|
-
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
|
74
|
-
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
|
75
|
-
// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
|
76
|
-
// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
|
77
|
-
#else
|
|
78
|
-
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
|
|
79
|
-
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
|
80
|
-
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
|
81
|
-
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
|
82
|
-
#endif
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
|
|
86
|
-
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
|
|
87
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
|
|
88
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
|
|
89
|
-
|
|
90
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
|
|
91
|
-
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
|
|
92
|
-
|
|
93
|
-
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
94
|
-
{
|
|
95
|
-
Packet2cf res;
|
|
96
|
-
#if EIGEN_GNUC_AT_MOST(4,2)
|
|
97
|
-
// Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
|
|
98
|
-
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
|
|
99
|
-
#elif EIGEN_GNUC_AT_LEAST(4,6)
|
|
100
|
-
// Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6
|
|
101
|
-
#pragma GCC diagnostic push
|
|
102
|
-
#pragma GCC diagnostic ignored "-Wuninitialized"
|
|
103
|
-
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
|
104
|
-
#pragma GCC diagnostic pop
|
|
105
|
-
#else
|
|
106
|
-
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
|
107
|
-
#endif
|
|
108
|
-
return Packet2cf(_mm_movelh_ps(res.v,res.v));
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
|
112
|
-
|
|
113
|
-
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
|
|
114
|
-
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
|
|
57
|
+
template <>
|
|
58
|
+
struct unpacket_traits<Packet2cf> {
|
|
59
|
+
typedef std::complex<float> type;
|
|
60
|
+
typedef Packet2cf half;
|
|
61
|
+
typedef Packet4f as_real;
|
|
62
|
+
enum {
|
|
63
|
+
size = 2,
|
|
64
|
+
alignment = Aligned16,
|
|
65
|
+
vectorizable = true,
|
|
66
|
+
masked_load_available = false,
|
|
67
|
+
masked_store_available = false
|
|
68
|
+
};
|
|
69
|
+
};
|
|
115
70
|
|
|
71
|
+
template <>
|
|
72
|
+
EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
73
|
+
return Packet2cf(_mm_add_ps(a.v, b.v));
|
|
74
|
+
}
|
|
75
|
+
template <>
|
|
76
|
+
EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
77
|
+
return Packet2cf(_mm_sub_ps(a.v, b.v));
|
|
78
|
+
}
|
|
116
79
|
|
|
117
|
-
template<>
|
|
118
|
-
{
|
|
119
|
-
|
|
120
|
-
|
|
80
|
+
template <>
|
|
81
|
+
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) {
|
|
82
|
+
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000));
|
|
83
|
+
return Packet2cf(_mm_xor_ps(a.v, mask));
|
|
84
|
+
}
|
|
85
|
+
template <>
|
|
86
|
+
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
|
|
87
|
+
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000));
|
|
88
|
+
return Packet2cf(_mm_xor_ps(a.v, mask));
|
|
121
89
|
}
|
|
122
90
|
|
|
123
|
-
template<>
|
|
124
|
-
{
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
91
|
+
template <>
|
|
92
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) {
|
|
93
|
+
#ifdef EIGEN_VECTORIZE_SSE3
|
|
94
|
+
__m128 tmp1 = _mm_mul_ps(_mm_movehdup_ps(a.v), vec4f_swizzle1(b.v, 1, 0, 3, 2));
|
|
95
|
+
__m128 tmp2 = _mm_moveldup_ps(a.v);
|
|
96
|
+
#else
|
|
97
|
+
__m128 tmp1 = _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), vec4f_swizzle1(b.v, 1, 0, 3, 2));
|
|
98
|
+
__m128 tmp2 = vec4f_swizzle1(a.v, 0, 0, 2, 2);
|
|
99
|
+
#endif
|
|
100
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
101
|
+
__m128 result = _mm_fmaddsub_ps(tmp2, b.v, tmp1);
|
|
102
|
+
#else
|
|
103
|
+
#ifdef EIGEN_VECTORIZE_SSE3
|
|
104
|
+
__m128 result = _mm_addsub_ps(_mm_mul_ps(tmp2, b.v), tmp1);
|
|
105
|
+
#else
|
|
106
|
+
const __m128 mask = _mm_setr_ps(-0.0f, 0.0f, -0.0f, 0.0f);
|
|
107
|
+
__m128 result = _mm_add_ps(_mm_mul_ps(tmp2, b.v), _mm_xor_ps(tmp1, mask));
|
|
108
|
+
#endif
|
|
109
|
+
#endif
|
|
110
|
+
return Packet2cf(result);
|
|
129
111
|
}
|
|
130
112
|
|
|
131
|
-
template<>
|
|
113
|
+
template <>
|
|
114
|
+
EIGEN_STRONG_INLINE Packet2cf ptrue<Packet2cf>(const Packet2cf& a) {
|
|
115
|
+
return Packet2cf(ptrue(Packet4f(a.v)));
|
|
116
|
+
}
|
|
117
|
+
template <>
|
|
118
|
+
EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
119
|
+
return Packet2cf(_mm_and_ps(a.v, b.v));
|
|
120
|
+
}
|
|
121
|
+
template <>
|
|
122
|
+
EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
123
|
+
return Packet2cf(_mm_or_ps(a.v, b.v));
|
|
124
|
+
}
|
|
125
|
+
template <>
|
|
126
|
+
EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
127
|
+
return Packet2cf(_mm_xor_ps(a.v, b.v));
|
|
128
|
+
}
|
|
129
|
+
template <>
|
|
130
|
+
EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
131
|
+
return Packet2cf(_mm_andnot_ps(b.v, a.v));
|
|
132
|
+
}
|
|
132
133
|
|
|
133
|
-
template<>
|
|
134
|
-
{
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
return res[0];
|
|
141
|
-
#else
|
|
142
|
-
std::complex<float> res;
|
|
143
|
-
_mm_storel_pi((__m64*)&res, a.v);
|
|
144
|
-
return res;
|
|
145
|
-
#endif
|
|
134
|
+
template <>
|
|
135
|
+
EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
|
|
136
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(_mm_load_ps(&numext::real_ref(*from)));
|
|
137
|
+
}
|
|
138
|
+
template <>
|
|
139
|
+
EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {
|
|
140
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(_mm_loadu_ps(&numext::real_ref(*from)));
|
|
146
141
|
}
|
|
147
142
|
|
|
148
|
-
template<>
|
|
143
|
+
template <>
|
|
144
|
+
EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) {
|
|
145
|
+
const float re = std::real(from);
|
|
146
|
+
const float im = std::imag(from);
|
|
147
|
+
return Packet2cf(_mm_set_ps(im, re, im, re));
|
|
148
|
+
}
|
|
149
149
|
|
|
150
|
-
template<>
|
|
151
|
-
{
|
|
152
|
-
return
|
|
150
|
+
template <>
|
|
151
|
+
EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) {
|
|
152
|
+
return pset1<Packet2cf>(*from);
|
|
153
153
|
}
|
|
154
154
|
|
|
155
|
-
template<>
|
|
156
|
-
{
|
|
157
|
-
|
|
155
|
+
template <>
|
|
156
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
|
157
|
+
EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(&numext::real_ref(*to), from.v);
|
|
158
|
+
}
|
|
159
|
+
template <>
|
|
160
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
|
161
|
+
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(&numext::real_ref(*to), from.v);
|
|
158
162
|
}
|
|
159
163
|
|
|
160
|
-
template<>
|
|
161
|
-
|
|
162
|
-
|
|
164
|
+
template <>
|
|
165
|
+
EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from,
|
|
166
|
+
Index stride) {
|
|
167
|
+
return Packet2cf(_mm_set_ps(std::imag(from[1 * stride]), std::real(from[1 * stride]), std::imag(from[0 * stride]),
|
|
168
|
+
std::real(from[0 * stride])));
|
|
163
169
|
}
|
|
164
170
|
|
|
165
|
-
template
|
|
166
|
-
|
|
167
|
-
{
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
first.v = _mm_movelh_ps(first.v, second.v);
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
};
|
|
171
|
+
template <>
|
|
172
|
+
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from,
|
|
173
|
+
Index stride) {
|
|
174
|
+
to[stride * 0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
|
|
175
|
+
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
|
|
176
|
+
to[stride * 1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
|
|
177
|
+
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
|
178
|
+
}
|
|
177
179
|
|
|
178
|
-
template<>
|
|
179
|
-
{
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
184
|
-
{
|
|
185
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
186
|
-
return internal::pmul(a, pconj(b));
|
|
187
|
-
#else
|
|
188
|
-
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
|
189
|
-
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
|
|
190
|
-
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
|
191
|
-
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
|
192
|
-
#endif
|
|
193
|
-
}
|
|
194
|
-
};
|
|
180
|
+
template <>
|
|
181
|
+
EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float>* addr) {
|
|
182
|
+
_mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
|
|
183
|
+
}
|
|
195
184
|
|
|
196
|
-
template<>
|
|
197
|
-
{
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
{
|
|
203
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
204
|
-
return internal::pmul(pconj(a), b);
|
|
205
|
-
#else
|
|
206
|
-
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
|
207
|
-
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
|
208
|
-
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
|
209
|
-
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
|
210
|
-
#endif
|
|
211
|
-
}
|
|
212
|
-
};
|
|
185
|
+
template <>
|
|
186
|
+
EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) {
|
|
187
|
+
alignas(alignof(__m64)) std::complex<float> res;
|
|
188
|
+
_mm_storel_pi((__m64*)&res, a.v);
|
|
189
|
+
return res;
|
|
190
|
+
}
|
|
213
191
|
|
|
214
|
-
template<>
|
|
215
|
-
{
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
220
|
-
{
|
|
221
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
222
|
-
return pconj(internal::pmul(a, b));
|
|
223
|
-
#else
|
|
224
|
-
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
|
225
|
-
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
|
|
226
|
-
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
|
227
|
-
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
|
228
|
-
#endif
|
|
229
|
-
}
|
|
230
|
-
};
|
|
192
|
+
template <>
|
|
193
|
+
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) {
|
|
194
|
+
return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v)))));
|
|
195
|
+
}
|
|
231
196
|
|
|
232
|
-
|
|
197
|
+
template <>
|
|
198
|
+
EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {
|
|
199
|
+
return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v, a.v))));
|
|
200
|
+
}
|
|
233
201
|
|
|
234
|
-
template<>
|
|
235
|
-
{
|
|
236
|
-
|
|
237
|
-
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
|
238
|
-
__m128 s = _mm_mul_ps(b.v,b.v);
|
|
239
|
-
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
|
|
202
|
+
template <>
|
|
203
|
+
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {
|
|
204
|
+
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v, a.v))));
|
|
240
205
|
}
|
|
241
206
|
|
|
242
|
-
EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
|
|
243
|
-
{
|
|
207
|
+
EIGEN_STRONG_INLINE Packet2cf pcplxflip /* <Packet2cf> */ (const Packet2cf& x) {
|
|
244
208
|
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
|
|
245
209
|
}
|
|
246
210
|
|
|
211
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)
|
|
212
|
+
|
|
213
|
+
template <>
|
|
214
|
+
EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
215
|
+
return pdiv_complex(a, b);
|
|
216
|
+
}
|
|
247
217
|
|
|
248
218
|
//---------- double ----------
|
|
249
|
-
struct Packet1cd
|
|
250
|
-
{
|
|
219
|
+
struct Packet1cd {
|
|
251
220
|
EIGEN_STRONG_INLINE Packet1cd() {}
|
|
252
221
|
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
|
|
253
|
-
|
|
222
|
+
Packet2d v;
|
|
254
223
|
};
|
|
255
224
|
|
|
256
225
|
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
|
257
226
|
// to leverage AVX instructions.
|
|
258
227
|
#ifndef EIGEN_VECTORIZE_AVX
|
|
259
|
-
template<>
|
|
260
|
-
{
|
|
228
|
+
template <>
|
|
229
|
+
struct packet_traits<std::complex<double> > : default_packet_traits {
|
|
261
230
|
typedef Packet1cd type;
|
|
262
231
|
typedef Packet1cd half;
|
|
263
232
|
enum {
|
|
264
233
|
Vectorizable = 1,
|
|
265
234
|
AlignedOnScalar = 0,
|
|
266
235
|
size = 1,
|
|
267
|
-
HasHalfPacket = 0,
|
|
268
236
|
|
|
269
|
-
HasAdd
|
|
270
|
-
HasSub
|
|
271
|
-
HasMul
|
|
272
|
-
HasDiv
|
|
237
|
+
HasAdd = 1,
|
|
238
|
+
HasSub = 1,
|
|
239
|
+
HasMul = 1,
|
|
240
|
+
HasDiv = 1,
|
|
273
241
|
HasNegate = 1,
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
242
|
+
HasSqrt = 1,
|
|
243
|
+
HasLog = 1,
|
|
244
|
+
HasAbs = 0,
|
|
245
|
+
HasAbs2 = 0,
|
|
246
|
+
HasMin = 0,
|
|
247
|
+
HasMax = 0,
|
|
278
248
|
HasSetLinear = 0
|
|
279
249
|
};
|
|
280
250
|
};
|
|
281
251
|
#endif
|
|
282
252
|
|
|
283
|
-
template<>
|
|
253
|
+
template <>
|
|
254
|
+
struct unpacket_traits<Packet1cd> {
|
|
255
|
+
typedef std::complex<double> type;
|
|
256
|
+
typedef Packet1cd half;
|
|
257
|
+
typedef Packet2d as_real;
|
|
258
|
+
enum {
|
|
259
|
+
size = 1,
|
|
260
|
+
alignment = Aligned16,
|
|
261
|
+
vectorizable = true,
|
|
262
|
+
masked_load_available = false,
|
|
263
|
+
masked_store_available = false
|
|
264
|
+
};
|
|
265
|
+
};
|
|
284
266
|
|
|
285
|
-
template<>
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
return Packet1cd(
|
|
267
|
+
template <>
|
|
268
|
+
EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
269
|
+
return Packet1cd(_mm_add_pd(a.v, b.v));
|
|
270
|
+
}
|
|
271
|
+
template <>
|
|
272
|
+
EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
273
|
+
return Packet1cd(_mm_sub_pd(a.v, b.v));
|
|
274
|
+
}
|
|
275
|
+
template <>
|
|
276
|
+
EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
|
|
277
|
+
return Packet1cd(pnegate(Packet2d(a.v)));
|
|
278
|
+
}
|
|
279
|
+
template <>
|
|
280
|
+
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
|
|
281
|
+
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000, 0x0, 0x0, 0x0));
|
|
282
|
+
return Packet1cd(_mm_xor_pd(a.v, mask));
|
|
292
283
|
}
|
|
293
284
|
|
|
294
|
-
template<>
|
|
295
|
-
{
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
285
|
+
template <>
|
|
286
|
+
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) {
|
|
287
|
+
__m128d tmp1 = _mm_mul_pd(_mm_unpackhi_pd(a.v, a.v), vec2d_swizzle1(b.v, 1, 0));
|
|
288
|
+
#ifdef EIGEN_VECTORIZE_SSE3
|
|
289
|
+
__m128d tmp2 = _mm_movedup_pd(a.v);
|
|
290
|
+
#else
|
|
291
|
+
__m128d tmp2 = _mm_unpacklo_pd(a.v, a.v);
|
|
292
|
+
#endif
|
|
293
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
294
|
+
__m128d result = _mm_fmaddsub_pd(tmp2, b.v, tmp1);
|
|
295
|
+
#else
|
|
296
|
+
#ifdef EIGEN_VECTORIZE_SSE3
|
|
297
|
+
__m128d result = _mm_addsub_pd(_mm_mul_pd(tmp2, b.v), tmp1);
|
|
298
|
+
#else
|
|
299
|
+
const __m128d mask = _mm_setr_pd(-0.0, 0.0);
|
|
300
|
+
__m128d result = _mm_add_pd(_mm_mul_pd(tmp2, b.v), _mm_xor_pd(tmp1, mask));
|
|
301
|
+
#endif
|
|
302
|
+
#endif
|
|
303
|
+
return Packet1cd(result);
|
|
306
304
|
}
|
|
307
305
|
|
|
308
|
-
template<>
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
306
|
+
template <>
|
|
307
|
+
EIGEN_STRONG_INLINE Packet1cd ptrue<Packet1cd>(const Packet1cd& a) {
|
|
308
|
+
return Packet1cd(ptrue(Packet2d(a.v)));
|
|
309
|
+
}
|
|
310
|
+
template <>
|
|
311
|
+
EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
312
|
+
return Packet1cd(_mm_and_pd(a.v, b.v));
|
|
313
|
+
}
|
|
314
|
+
template <>
|
|
315
|
+
EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
316
|
+
return Packet1cd(_mm_or_pd(a.v, b.v));
|
|
317
|
+
}
|
|
318
|
+
template <>
|
|
319
|
+
EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
320
|
+
return Packet1cd(_mm_xor_pd(a.v, b.v));
|
|
321
|
+
}
|
|
322
|
+
template <>
|
|
323
|
+
EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
324
|
+
return Packet1cd(_mm_andnot_pd(b.v, a.v));
|
|
325
|
+
}
|
|
312
326
|
|
|
313
327
|
// FIXME force unaligned load, this is a temporary fix
|
|
314
|
-
template<>
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
template<>
|
|
319
|
-
|
|
328
|
+
template <>
|
|
329
|
+
EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
|
|
330
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(_mm_load_pd((const double*)from));
|
|
331
|
+
}
|
|
332
|
+
template <>
|
|
333
|
+
EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {
|
|
334
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(_mm_loadu_pd((const double*)from));
|
|
335
|
+
}
|
|
336
|
+
template <>
|
|
337
|
+
EIGEN_STRONG_INLINE Packet1cd
|
|
338
|
+
pset1<Packet1cd>(const std::complex<double>& from) { /* here we really have to use unaligned loads :( */
|
|
339
|
+
return ploadu<Packet1cd>(&from);
|
|
340
|
+
}
|
|
320
341
|
|
|
321
|
-
template<>
|
|
342
|
+
template <>
|
|
343
|
+
EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) {
|
|
344
|
+
return pset1<Packet1cd>(*from);
|
|
345
|
+
}
|
|
322
346
|
|
|
323
347
|
// FIXME force unaligned store, this is a temporary fix
|
|
324
|
-
template<>
|
|
325
|
-
|
|
348
|
+
template <>
|
|
349
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
|
350
|
+
EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd((double*)to, from.v);
|
|
351
|
+
}
|
|
352
|
+
template <>
|
|
353
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
|
354
|
+
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd((double*)to, from.v);
|
|
355
|
+
}
|
|
326
356
|
|
|
327
|
-
template<>
|
|
357
|
+
template <>
|
|
358
|
+
EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double>* addr) {
|
|
359
|
+
_mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
|
|
360
|
+
}
|
|
328
361
|
|
|
329
|
-
template<>
|
|
330
|
-
{
|
|
362
|
+
template <>
|
|
363
|
+
EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {
|
|
331
364
|
EIGEN_ALIGN16 double res[2];
|
|
332
365
|
_mm_store_pd(res, a.v);
|
|
333
|
-
return std::complex<double>(res[0],res[1]);
|
|
366
|
+
return std::complex<double>(res[0], res[1]);
|
|
334
367
|
}
|
|
335
368
|
|
|
336
|
-
template<>
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
{
|
|
340
|
-
return pfirst(a);
|
|
369
|
+
template <>
|
|
370
|
+
EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) {
|
|
371
|
+
return a;
|
|
341
372
|
}
|
|
342
373
|
|
|
343
|
-
template<>
|
|
344
|
-
{
|
|
345
|
-
return
|
|
374
|
+
template <>
|
|
375
|
+
EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {
|
|
376
|
+
return pfirst(a);
|
|
346
377
|
}
|
|
347
378
|
|
|
348
|
-
template<>
|
|
349
|
-
{
|
|
379
|
+
template <>
|
|
380
|
+
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {
|
|
350
381
|
return pfirst(a);
|
|
351
382
|
}
|
|
352
383
|
|
|
353
|
-
|
|
354
|
-
struct palign_impl<Offset,Packet1cd>
|
|
355
|
-
{
|
|
356
|
-
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
|
357
|
-
{
|
|
358
|
-
// FIXME is it sure we never have to align a Packet1cd?
|
|
359
|
-
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
|
360
|
-
}
|
|
361
|
-
};
|
|
362
|
-
|
|
363
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
|
364
|
-
{
|
|
365
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
366
|
-
{ return padd(pmul(x,y),c); }
|
|
367
|
-
|
|
368
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
369
|
-
{
|
|
370
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
371
|
-
return internal::pmul(a, pconj(b));
|
|
372
|
-
#else
|
|
373
|
-
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
|
374
|
-
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
|
|
375
|
-
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
|
376
|
-
vec2d_swizzle1(b.v, 1, 0))));
|
|
377
|
-
#endif
|
|
378
|
-
}
|
|
379
|
-
};
|
|
380
|
-
|
|
381
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
|
382
|
-
{
|
|
383
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
384
|
-
{ return padd(pmul(x,y),c); }
|
|
385
|
-
|
|
386
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
387
|
-
{
|
|
388
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
389
|
-
return internal::pmul(pconj(a), b);
|
|
390
|
-
#else
|
|
391
|
-
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
|
392
|
-
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
|
|
393
|
-
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
|
394
|
-
vec2d_swizzle1(b.v, 1, 0)), mask)));
|
|
395
|
-
#endif
|
|
396
|
-
}
|
|
397
|
-
};
|
|
398
|
-
|
|
399
|
-
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|
400
|
-
{
|
|
401
|
-
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
402
|
-
{ return padd(pmul(x,y),c); }
|
|
403
|
-
|
|
404
|
-
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
405
|
-
{
|
|
406
|
-
#ifdef EIGEN_VECTORIZE_SSE3
|
|
407
|
-
return pconj(internal::pmul(a, b));
|
|
408
|
-
#else
|
|
409
|
-
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
|
410
|
-
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
|
|
411
|
-
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
|
412
|
-
vec2d_swizzle1(b.v, 1, 0))));
|
|
413
|
-
#endif
|
|
414
|
-
}
|
|
415
|
-
};
|
|
416
|
-
|
|
417
|
-
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
|
384
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)
|
|
418
385
|
|
|
419
|
-
template<>
|
|
420
|
-
{
|
|
421
|
-
|
|
422
|
-
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
|
423
|
-
__m128d s = _mm_mul_pd(b.v,b.v);
|
|
424
|
-
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
|
|
386
|
+
template <>
|
|
387
|
+
EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
388
|
+
return pdiv_complex(a, b);
|
|
425
389
|
}
|
|
426
390
|
|
|
427
|
-
EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
|
|
428
|
-
{
|
|
391
|
+
EIGEN_STRONG_INLINE Packet1cd pcplxflip /* <Packet1cd> */ (const Packet1cd& x) {
|
|
429
392
|
return Packet1cd(preverse(Packet2d(x.v)));
|
|
430
393
|
}
|
|
431
394
|
|
|
432
|
-
EIGEN_DEVICE_FUNC inline void
|
|
433
|
-
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
|
395
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel) {
|
|
434
396
|
__m128d w1 = _mm_castps_pd(kernel.packet[0].v);
|
|
435
397
|
__m128d w2 = _mm_castps_pd(kernel.packet[1].v);
|
|
436
398
|
|
|
@@ -439,33 +401,103 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
|
|
439
401
|
kernel.packet[1].v = tmp;
|
|
440
402
|
}
|
|
441
403
|
|
|
442
|
-
template<>
|
|
404
|
+
template <>
|
|
405
|
+
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
|
|
406
|
+
__m128 eq = _mm_cmpeq_ps(a.v, b.v);
|
|
407
|
+
return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
template <>
|
|
411
|
+
EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
|
|
412
|
+
__m128d eq = _mm_cmpeq_pd(a.v, b.v);
|
|
413
|
+
return Packet1cd(pand<Packet2d>(eq, vec2d_swizzle1(eq, 1, 0)));
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
template <>
|
|
417
|
+
EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket,
|
|
418
|
+
const Packet2cf& elsePacket) {
|
|
443
419
|
__m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
|
|
444
420
|
return Packet2cf(_mm_castpd_ps(result));
|
|
445
421
|
}
|
|
446
422
|
|
|
447
|
-
template<>
|
|
448
|
-
{
|
|
449
|
-
return
|
|
423
|
+
template <>
|
|
424
|
+
EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
|
|
425
|
+
return psqrt_complex<Packet1cd>(a);
|
|
450
426
|
}
|
|
451
427
|
|
|
452
|
-
template<>
|
|
453
|
-
{
|
|
454
|
-
return
|
|
428
|
+
template <>
|
|
429
|
+
EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
|
|
430
|
+
return psqrt_complex<Packet2cf>(a);
|
|
455
431
|
}
|
|
456
432
|
|
|
457
|
-
template<>
|
|
458
|
-
{
|
|
459
|
-
return
|
|
433
|
+
template <>
|
|
434
|
+
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
|
|
435
|
+
return plog_complex<Packet1cd>(a);
|
|
460
436
|
}
|
|
461
437
|
|
|
462
|
-
template<>
|
|
463
|
-
{
|
|
464
|
-
return
|
|
438
|
+
template <>
|
|
439
|
+
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
|
|
440
|
+
return plog_complex<Packet2cf>(a);
|
|
465
441
|
}
|
|
466
442
|
|
|
467
|
-
|
|
443
|
+
template <>
|
|
444
|
+
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
|
445
|
+
return pexp_complex<Packet2cf>(a);
|
|
446
|
+
}
|
|
468
447
|
|
|
469
|
-
|
|
448
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
449
|
+
// std::complex<float>
|
|
450
|
+
template <>
|
|
451
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
|
452
|
+
__m128 a_odd = _mm_movehdup_ps(a.v);
|
|
453
|
+
__m128 a_even = _mm_moveldup_ps(a.v);
|
|
454
|
+
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
|
455
|
+
__m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmaddsub_ps(a_odd, b_swap, c.v));
|
|
456
|
+
return Packet2cf(result);
|
|
457
|
+
}
|
|
458
|
+
template <>
|
|
459
|
+
EIGEN_STRONG_INLINE Packet2cf pmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
|
460
|
+
__m128 a_odd = _mm_movehdup_ps(a.v);
|
|
461
|
+
__m128 a_even = _mm_moveldup_ps(a.v);
|
|
462
|
+
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
|
463
|
+
__m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmsubadd_ps(a_odd, b_swap, c.v));
|
|
464
|
+
return Packet2cf(result);
|
|
465
|
+
}
|
|
466
|
+
template <>
|
|
467
|
+
EIGEN_STRONG_INLINE Packet2cf pnmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
|
468
|
+
return pnegate(pmsub(a, b, c));
|
|
469
|
+
}
|
|
470
|
+
template <>
|
|
471
|
+
EIGEN_STRONG_INLINE Packet2cf pnmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
|
472
|
+
return pnegate(pmadd(a, b, c));
|
|
473
|
+
}
|
|
474
|
+
// std::complex<double>
|
|
475
|
+
template <>
|
|
476
|
+
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
|
477
|
+
__m128d a_odd = _mm_permute_pd(a.v, 0x3);
|
|
478
|
+
__m128d a_even = _mm_movedup_pd(a.v);
|
|
479
|
+
__m128d b_swap = _mm_permute_pd(b.v, 0x1);
|
|
480
|
+
__m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmaddsub_pd(a_odd, b_swap, c.v));
|
|
481
|
+
return Packet1cd(result);
|
|
482
|
+
}
|
|
483
|
+
template <>
|
|
484
|
+
EIGEN_STRONG_INLINE Packet1cd pmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
|
485
|
+
__m128d a_odd = _mm_permute_pd(a.v, 0x3);
|
|
486
|
+
__m128d a_even = _mm_movedup_pd(a.v);
|
|
487
|
+
__m128d b_swap = _mm_permute_pd(b.v, 0x1);
|
|
488
|
+
__m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmsubadd_pd(a_odd, b_swap, c.v));
|
|
489
|
+
return Packet1cd(result);
|
|
490
|
+
}
|
|
491
|
+
template <>
|
|
492
|
+
EIGEN_STRONG_INLINE Packet1cd pnmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
|
493
|
+
return pnegate(pmsub(a, b, c));
|
|
494
|
+
}
|
|
495
|
+
template <>
|
|
496
|
+
EIGEN_STRONG_INLINE Packet1cd pnmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
|
497
|
+
return pnegate(pmadd(a, b, c));
|
|
498
|
+
}
|
|
499
|
+
#endif
|
|
500
|
+
} // end namespace internal
|
|
501
|
+
} // end namespace Eigen
|
|
470
502
|
|
|
471
|
-
#endif
|
|
503
|
+
#endif // EIGEN_COMPLEX_SSE_H
|