@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -11,135 +11,160 @@
|
|
|
11
11
|
#ifndef EIGEN_COMPLEX_NEON_H
|
|
12
12
|
#define EIGEN_COMPLEX_NEON_H
|
|
13
13
|
|
|
14
|
+
// IWYU pragma: private
|
|
15
|
+
#include "../../InternalHeaderCheck.h"
|
|
16
|
+
|
|
14
17
|
namespace Eigen {
|
|
15
18
|
|
|
16
19
|
namespace internal {
|
|
17
20
|
|
|
18
|
-
inline uint32x4_t p4ui_CONJ_XOR()
|
|
19
|
-
{
|
|
21
|
+
inline uint32x4_t p4ui_CONJ_XOR() {
|
|
20
22
|
// See bug 1325, clang fails to call vld1q_u64.
|
|
21
23
|
#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
|
|
22
|
-
uint32x4_t ret = {
|
|
24
|
+
uint32x4_t ret = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
|
|
23
25
|
return ret;
|
|
24
26
|
#else
|
|
25
|
-
static const uint32_t conj_XOR_DATA[] = {
|
|
26
|
-
return vld1q_u32(
|
|
27
|
+
static const uint32_t conj_XOR_DATA[] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
|
|
28
|
+
return vld1q_u32(conj_XOR_DATA);
|
|
27
29
|
#endif
|
|
28
30
|
}
|
|
29
31
|
|
|
30
|
-
inline uint32x2_t p2ui_CONJ_XOR()
|
|
31
|
-
{
|
|
32
|
-
|
|
33
|
-
return vld1_u32( conj_XOR_DATA );
|
|
32
|
+
inline uint32x2_t p2ui_CONJ_XOR() {
|
|
33
|
+
static const uint32_t conj_XOR_DATA[] = {0x00000000, 0x80000000};
|
|
34
|
+
return vld1_u32(conj_XOR_DATA);
|
|
34
35
|
}
|
|
35
36
|
|
|
36
37
|
//---------- float ----------
|
|
37
38
|
|
|
38
|
-
struct Packet1cf
|
|
39
|
-
{
|
|
39
|
+
struct Packet1cf {
|
|
40
40
|
EIGEN_STRONG_INLINE Packet1cf() {}
|
|
41
41
|
EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
|
|
42
42
|
Packet2f v;
|
|
43
43
|
};
|
|
44
|
-
struct Packet2cf
|
|
45
|
-
{
|
|
44
|
+
struct Packet2cf {
|
|
46
45
|
EIGEN_STRONG_INLINE Packet2cf() {}
|
|
47
46
|
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
|
48
47
|
Packet4f v;
|
|
49
48
|
};
|
|
50
49
|
|
|
51
|
-
template<>
|
|
52
|
-
{
|
|
50
|
+
template <>
|
|
51
|
+
struct packet_traits<std::complex<float>> : default_packet_traits {
|
|
53
52
|
typedef Packet2cf type;
|
|
54
53
|
typedef Packet1cf half;
|
|
55
|
-
enum
|
|
56
|
-
{
|
|
54
|
+
enum {
|
|
57
55
|
Vectorizable = 1,
|
|
58
56
|
AlignedOnScalar = 1,
|
|
59
57
|
size = 2,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
58
|
+
|
|
59
|
+
HasAdd = 1,
|
|
60
|
+
HasSub = 1,
|
|
61
|
+
HasMul = 1,
|
|
62
|
+
HasDiv = 1,
|
|
63
|
+
HasNegate = 1,
|
|
64
|
+
HasSqrt = 1,
|
|
65
|
+
HasLog = 1,
|
|
66
|
+
HasExp = 1,
|
|
67
|
+
HasAbs = 0,
|
|
68
|
+
HasAbs2 = 0,
|
|
69
|
+
HasMin = 0,
|
|
70
|
+
HasMax = 0,
|
|
71
71
|
HasSetLinear = 0
|
|
72
72
|
};
|
|
73
73
|
};
|
|
74
74
|
|
|
75
|
-
template<>
|
|
76
|
-
{
|
|
77
|
-
|
|
78
|
-
typedef Packet1cf half;
|
|
79
|
-
typedef Packet2f as_real;
|
|
80
|
-
enum
|
|
81
|
-
{
|
|
82
|
-
size = 1,
|
|
83
|
-
alignment = Aligned16,
|
|
84
|
-
vectorizable = true,
|
|
85
|
-
masked_load_available = false,
|
|
86
|
-
masked_store_available = false
|
|
87
|
-
};
|
|
75
|
+
template <>
|
|
76
|
+
struct unpacket_traits<Packet1cf> : neon_unpacket_default<Packet1cf, std::complex<float>> {
|
|
77
|
+
using as_real = Packet2f;
|
|
88
78
|
};
|
|
89
|
-
template<>
|
|
90
|
-
{
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
typedef Packet4f as_real;
|
|
94
|
-
enum
|
|
95
|
-
{
|
|
96
|
-
size = 2,
|
|
97
|
-
alignment = Aligned16,
|
|
98
|
-
vectorizable = true,
|
|
99
|
-
masked_load_available = false,
|
|
100
|
-
masked_store_available = false
|
|
101
|
-
};
|
|
79
|
+
template <>
|
|
80
|
+
struct unpacket_traits<Packet2cf> : neon_unpacket_default<Packet2cf, std::complex<float>> {
|
|
81
|
+
using half = Packet1cf;
|
|
82
|
+
using as_real = Packet4f;
|
|
102
83
|
};
|
|
103
84
|
|
|
104
|
-
template<>
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
85
|
+
template <>
|
|
86
|
+
EIGEN_STRONG_INLINE Packet1cf pcast<float, Packet1cf>(const float& a) {
|
|
87
|
+
return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0));
|
|
88
|
+
}
|
|
89
|
+
template <>
|
|
90
|
+
EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f, Packet2cf>(const Packet2f& a) {
|
|
91
|
+
return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a))));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
template <>
|
|
95
|
+
EIGEN_STRONG_INLINE Packet1cf pzero(const Packet1cf& /*a*/) {
|
|
96
|
+
return Packet1cf(vdup_n_f32(0.0f));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
template <>
|
|
100
|
+
EIGEN_STRONG_INLINE Packet2cf pzero(const Packet2cf& /*a*/) {
|
|
101
|
+
return Packet2cf(vdupq_n_f32(0.0f));
|
|
102
|
+
}
|
|
108
103
|
|
|
109
|
-
template<>
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
104
|
+
template <>
|
|
105
|
+
EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from) {
|
|
106
|
+
return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from)));
|
|
107
|
+
}
|
|
108
|
+
template <>
|
|
109
|
+
EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) {
|
|
113
110
|
const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
|
|
114
111
|
return Packet2cf(vcombine_f32(r64, r64));
|
|
115
112
|
}
|
|
116
113
|
|
|
117
|
-
template<>
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
114
|
+
template <>
|
|
115
|
+
EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
116
|
+
return Packet1cf(padd<Packet2f>(a.v, b.v));
|
|
117
|
+
}
|
|
118
|
+
template <>
|
|
119
|
+
EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
120
|
+
return Packet2cf(padd<Packet4f>(a.v, b.v));
|
|
121
|
+
}
|
|
121
122
|
|
|
122
|
-
template<>
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
123
|
+
template <>
|
|
124
|
+
EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
125
|
+
return Packet1cf(psub<Packet2f>(a.v, b.v));
|
|
126
|
+
}
|
|
127
|
+
template <>
|
|
128
|
+
EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
129
|
+
return Packet2cf(psub<Packet4f>(a.v, b.v));
|
|
130
|
+
}
|
|
126
131
|
|
|
127
|
-
template<>
|
|
128
|
-
|
|
132
|
+
template <>
|
|
133
|
+
EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) {
|
|
134
|
+
return Packet1cf(pnegate<Packet2f>(a.v));
|
|
135
|
+
}
|
|
136
|
+
template <>
|
|
137
|
+
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) {
|
|
138
|
+
return Packet2cf(pnegate<Packet4f>(a.v));
|
|
139
|
+
}
|
|
129
140
|
|
|
130
|
-
template<>
|
|
131
|
-
{
|
|
132
|
-
const Packet2ui b = vreinterpret_u32_f32(a.v);
|
|
141
|
+
template <>
|
|
142
|
+
EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a) {
|
|
143
|
+
const Packet2ui b = Packet2ui(vreinterpret_u32_f32(a.v));
|
|
133
144
|
return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
|
|
134
145
|
}
|
|
135
|
-
template<>
|
|
136
|
-
{
|
|
137
|
-
const Packet4ui b = vreinterpretq_u32_f32(a.v);
|
|
146
|
+
template <>
|
|
147
|
+
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
|
|
148
|
+
const Packet4ui b = Packet4ui(vreinterpretq_u32_f32(a.v));
|
|
138
149
|
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
|
|
139
150
|
}
|
|
140
151
|
|
|
141
|
-
|
|
142
|
-
|
|
152
|
+
#ifdef __ARM_FEATURE_COMPLEX
|
|
153
|
+
template <>
|
|
154
|
+
EIGEN_STRONG_INLINE Packet1cf pmadd<Packet1cf>(const Packet1cf& a, const Packet1cf& b, const Packet1cf& c) {
|
|
155
|
+
Packet1cf result;
|
|
156
|
+
result.v = vcmla_f32(c.v, a.v, b.v);
|
|
157
|
+
result.v = vcmla_rot90_f32(result.v, a.v, b.v);
|
|
158
|
+
return result;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
template <>
|
|
162
|
+
EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
163
|
+
return pmadd(a, b, pzero(a));
|
|
164
|
+
}
|
|
165
|
+
#else
|
|
166
|
+
template <>
|
|
167
|
+
EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
143
168
|
Packet2f v1, v2;
|
|
144
169
|
|
|
145
170
|
// Get the real values of a | a1_re | a1_re |
|
|
@@ -157,8 +182,24 @@ template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, con
|
|
|
157
182
|
// Add and return the result
|
|
158
183
|
return Packet1cf(vadd_f32(v1, v2));
|
|
159
184
|
}
|
|
160
|
-
|
|
161
|
-
|
|
185
|
+
#endif
|
|
186
|
+
|
|
187
|
+
#ifdef __ARM_FEATURE_COMPLEX
|
|
188
|
+
template <>
|
|
189
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd<Packet2cf>(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
|
190
|
+
Packet2cf result;
|
|
191
|
+
result.v = vcmlaq_f32(c.v, a.v, b.v);
|
|
192
|
+
result.v = vcmlaq_rot90_f32(result.v, a.v, b.v);
|
|
193
|
+
return result;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
template <>
|
|
197
|
+
EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
198
|
+
return pmadd(a, b, pzero(a));
|
|
199
|
+
}
|
|
200
|
+
#else
|
|
201
|
+
template <>
|
|
202
|
+
EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
162
203
|
Packet4f v1, v2;
|
|
163
204
|
|
|
164
205
|
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
|
@@ -176,9 +217,10 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
|
|
|
176
217
|
// Add and return the result
|
|
177
218
|
return Packet2cf(vaddq_f32(v1, v2));
|
|
178
219
|
}
|
|
220
|
+
#endif
|
|
179
221
|
|
|
180
|
-
template<>
|
|
181
|
-
{
|
|
222
|
+
template <>
|
|
223
|
+
EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b) {
|
|
182
224
|
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
183
225
|
// [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
|
|
184
226
|
Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
|
|
@@ -188,8 +230,8 @@ template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packe
|
|
|
188
230
|
// Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
|
189
231
|
return Packet1cf(pand<Packet2f>(eq, eq_swapped));
|
|
190
232
|
}
|
|
191
|
-
template<>
|
|
192
|
-
{
|
|
233
|
+
template <>
|
|
234
|
+
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
|
|
193
235
|
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
194
236
|
// [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
|
|
195
237
|
Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
|
|
@@ -200,129 +242,182 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packe
|
|
|
200
242
|
return Packet2cf(pand<Packet4f>(eq, eq_swapped));
|
|
201
243
|
}
|
|
202
244
|
|
|
203
|
-
template<>
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
template<>
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
template<>
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
return Packet1cf(
|
|
253
|
-
}
|
|
254
|
-
template<>
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
245
|
+
template <>
|
|
246
|
+
EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
247
|
+
return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
|
|
248
|
+
}
|
|
249
|
+
template <>
|
|
250
|
+
EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
251
|
+
return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
template <>
|
|
255
|
+
EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
256
|
+
return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
|
|
257
|
+
}
|
|
258
|
+
template <>
|
|
259
|
+
EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
260
|
+
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
template <>
|
|
264
|
+
EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
265
|
+
return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
|
|
266
|
+
}
|
|
267
|
+
template <>
|
|
268
|
+
EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
269
|
+
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
template <>
|
|
273
|
+
EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
274
|
+
return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v))));
|
|
275
|
+
}
|
|
276
|
+
template <>
|
|
277
|
+
EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
278
|
+
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v))));
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
template <>
|
|
282
|
+
EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from) {
|
|
283
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(
|
|
284
|
+
pload<Packet2f>(assume_aligned<unpacket_traits<Packet1cf>::alignment>(reinterpret_cast<const float*>(from))));
|
|
285
|
+
}
|
|
286
|
+
template <>
|
|
287
|
+
EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
|
|
288
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(
|
|
289
|
+
pload<Packet4f>(assume_aligned<unpacket_traits<Packet2cf>::alignment>(reinterpret_cast<const float*>(from))));
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
template <>
|
|
293
|
+
EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from) {
|
|
294
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from));
|
|
295
|
+
}
|
|
296
|
+
template <>
|
|
297
|
+
EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {
|
|
298
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from)));
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
template <>
|
|
302
|
+
EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from) {
|
|
303
|
+
return pset1<Packet1cf>(*from);
|
|
304
|
+
}
|
|
305
|
+
template <>
|
|
306
|
+
EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) {
|
|
307
|
+
return pset1<Packet2cf>(*from);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
template <>
|
|
311
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<float>>(std::complex<float>* to, const Packet1cf& from) {
|
|
312
|
+
EIGEN_DEBUG_ALIGNED_STORE pstore(assume_aligned<unpacket_traits<Packet1cf>::alignment>(reinterpret_cast<float*>(to)),
|
|
313
|
+
from.v);
|
|
314
|
+
}
|
|
315
|
+
template <>
|
|
316
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<float>>(std::complex<float>* to, const Packet2cf& from) {
|
|
317
|
+
EIGEN_DEBUG_ALIGNED_STORE pstore(assume_aligned<unpacket_traits<Packet2cf>::alignment>(reinterpret_cast<float*>(to)),
|
|
318
|
+
from.v);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
template <>
|
|
322
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<float>>(std::complex<float>* to, const Packet1cf& from) {
|
|
323
|
+
EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v);
|
|
324
|
+
}
|
|
325
|
+
template <>
|
|
326
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<float>>(std::complex<float>* to, const Packet2cf& from) {
|
|
327
|
+
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
template <>
|
|
331
|
+
EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(const std::complex<float>* from,
|
|
332
|
+
Index stride) {
|
|
333
|
+
const Packet2f tmp = vdup_n_f32(std::real(from[0 * stride]));
|
|
334
|
+
return Packet1cf(vset_lane_f32(std::imag(from[0 * stride]), tmp, 1));
|
|
335
|
+
}
|
|
336
|
+
template <>
|
|
337
|
+
EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from,
|
|
338
|
+
Index stride) {
|
|
339
|
+
Packet4f res = vdupq_n_f32(std::real(from[0 * stride]));
|
|
340
|
+
res = vsetq_lane_f32(std::imag(from[0 * stride]), res, 1);
|
|
341
|
+
res = vsetq_lane_f32(std::real(from[1 * stride]), res, 2);
|
|
342
|
+
res = vsetq_lane_f32(std::imag(from[1 * stride]), res, 3);
|
|
261
343
|
return Packet2cf(res);
|
|
262
344
|
}
|
|
263
345
|
|
|
264
|
-
template<>
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
346
|
+
template <>
|
|
347
|
+
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(std::complex<float>* to, const Packet1cf& from,
|
|
348
|
+
Index stride) {
|
|
349
|
+
to[stride * 0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1));
|
|
350
|
+
}
|
|
351
|
+
template <>
|
|
352
|
+
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from,
|
|
353
|
+
Index stride) {
|
|
354
|
+
to[stride * 0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
|
355
|
+
to[stride * 1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
|
272
356
|
}
|
|
273
357
|
|
|
274
|
-
template<>
|
|
275
|
-
|
|
358
|
+
template <>
|
|
359
|
+
EIGEN_STRONG_INLINE void prefetch<std::complex<float>>(const std::complex<float>* addr) {
|
|
360
|
+
EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr));
|
|
361
|
+
}
|
|
276
362
|
|
|
277
|
-
template<>
|
|
278
|
-
{
|
|
363
|
+
template <>
|
|
364
|
+
EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a) {
|
|
279
365
|
EIGEN_ALIGN16 std::complex<float> x;
|
|
280
366
|
vst1_f32(reinterpret_cast<float*>(&x), a.v);
|
|
281
367
|
return x;
|
|
282
368
|
}
|
|
283
|
-
template<>
|
|
284
|
-
{
|
|
369
|
+
template <>
|
|
370
|
+
EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) {
|
|
285
371
|
EIGEN_ALIGN16 std::complex<float> x[2];
|
|
286
372
|
vst1q_f32(reinterpret_cast<float*>(x), a.v);
|
|
287
373
|
return x[0];
|
|
288
374
|
}
|
|
289
375
|
|
|
290
|
-
template<>
|
|
291
|
-
|
|
292
|
-
|
|
376
|
+
template <>
|
|
377
|
+
EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) {
|
|
378
|
+
return a;
|
|
379
|
+
}
|
|
380
|
+
template <>
|
|
381
|
+
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) {
|
|
382
|
+
return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v)));
|
|
383
|
+
}
|
|
293
384
|
|
|
294
|
-
template<>
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
385
|
+
template <>
|
|
386
|
+
EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a) {
|
|
387
|
+
return Packet1cf(vrev64_f32(a.v));
|
|
388
|
+
}
|
|
389
|
+
template <>
|
|
390
|
+
EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {
|
|
391
|
+
return Packet2cf(vrev64q_f32(a.v));
|
|
392
|
+
}
|
|
298
393
|
|
|
299
|
-
template<>
|
|
300
|
-
{
|
|
394
|
+
template <>
|
|
395
|
+
EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a) {
|
|
301
396
|
std::complex<float> s;
|
|
302
|
-
vst1_f32((float
|
|
397
|
+
vst1_f32((float*)&s, a.v);
|
|
303
398
|
return s;
|
|
304
399
|
}
|
|
305
|
-
template<>
|
|
306
|
-
{
|
|
400
|
+
template <>
|
|
401
|
+
EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {
|
|
307
402
|
std::complex<float> s;
|
|
308
403
|
vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
|
|
309
404
|
return s;
|
|
310
405
|
}
|
|
311
406
|
|
|
312
|
-
template<>
|
|
313
|
-
{
|
|
407
|
+
template <>
|
|
408
|
+
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a) {
|
|
314
409
|
std::complex<float> s;
|
|
315
|
-
vst1_f32((float
|
|
410
|
+
vst1_f32((float*)&s, a.v);
|
|
316
411
|
return s;
|
|
317
412
|
}
|
|
318
|
-
template<>
|
|
319
|
-
{
|
|
413
|
+
template <>
|
|
414
|
+
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {
|
|
320
415
|
float32x2_t a1, a2, v1, v2, prod;
|
|
321
416
|
std::complex<float> s;
|
|
322
417
|
|
|
323
418
|
a1 = vget_low_f32(a.v);
|
|
324
419
|
a2 = vget_high_f32(a.v);
|
|
325
|
-
|
|
420
|
+
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
|
326
421
|
v1 = vdup_lane_f32(a1, 0);
|
|
327
422
|
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
|
328
423
|
v2 = vdup_lane_f32(a1, 1);
|
|
@@ -342,133 +437,156 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
|
|
|
342
437
|
return s;
|
|
343
438
|
}
|
|
344
439
|
|
|
345
|
-
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)
|
|
346
|
-
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
|
347
|
-
|
|
348
|
-
template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
|
|
349
|
-
{
|
|
350
|
-
// TODO optimize it for NEON
|
|
351
|
-
Packet1cf res = pmul(a, pconj(b));
|
|
352
|
-
Packet2f s, rev_s;
|
|
353
|
-
|
|
354
|
-
// this computes the norm
|
|
355
|
-
s = vmul_f32(b.v, b.v);
|
|
356
|
-
rev_s = vrev64_f32(s);
|
|
440
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf, Packet2f)
|
|
441
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)
|
|
357
442
|
|
|
358
|
-
|
|
443
|
+
template <>
|
|
444
|
+
EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b) {
|
|
445
|
+
return pdiv_complex(a, b);
|
|
359
446
|
}
|
|
360
|
-
template<>
|
|
361
|
-
{
|
|
362
|
-
|
|
363
|
-
Packet2cf res = pmul(a,pconj(b));
|
|
364
|
-
Packet4f s, rev_s;
|
|
365
|
-
|
|
366
|
-
// this computes the norm
|
|
367
|
-
s = vmulq_f32(b.v, b.v);
|
|
368
|
-
rev_s = vrev64q_f32(s);
|
|
369
|
-
|
|
370
|
-
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));
|
|
447
|
+
template <>
|
|
448
|
+
EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
|
449
|
+
return pdiv_complex(a, b);
|
|
371
450
|
}
|
|
372
451
|
|
|
373
452
|
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
|
|
374
|
-
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)
|
|
375
|
-
{
|
|
453
|
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel) {
|
|
376
454
|
Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
|
|
377
455
|
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
|
|
378
456
|
kernel.packet[1].v = tmp;
|
|
379
457
|
}
|
|
380
458
|
|
|
381
|
-
template<>
|
|
459
|
+
template <>
|
|
460
|
+
EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
|
|
382
461
|
return psqrt_complex<Packet1cf>(a);
|
|
383
462
|
}
|
|
384
463
|
|
|
385
|
-
template<>
|
|
464
|
+
template <>
|
|
465
|
+
EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
|
|
386
466
|
return psqrt_complex<Packet2cf>(a);
|
|
387
467
|
}
|
|
388
468
|
|
|
469
|
+
template <>
|
|
470
|
+
EIGEN_STRONG_INLINE Packet1cf plog<Packet1cf>(const Packet1cf& a) {
|
|
471
|
+
return plog_complex(a);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
template <>
|
|
475
|
+
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
|
|
476
|
+
return plog_complex(a);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
template <>
|
|
480
|
+
EIGEN_STRONG_INLINE Packet1cf pexp<Packet1cf>(const Packet1cf& a) {
|
|
481
|
+
return pexp_complex(a);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
template <>
|
|
485
|
+
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
|
486
|
+
return pexp_complex(a);
|
|
487
|
+
}
|
|
488
|
+
|
|
389
489
|
//---------- double ----------
|
|
390
490
|
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
|
391
491
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
|
397
|
-
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
|
|
398
|
-
#endif
|
|
492
|
+
inline uint64x2_t p2ul_CONJ_XOR() {
|
|
493
|
+
static const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000};
|
|
494
|
+
return vld1q_u64(p2ul_conj_XOR_DATA);
|
|
495
|
+
}
|
|
399
496
|
|
|
400
|
-
struct Packet1cd
|
|
401
|
-
{
|
|
497
|
+
struct Packet1cd {
|
|
402
498
|
EIGEN_STRONG_INLINE Packet1cd() {}
|
|
403
499
|
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
|
404
500
|
Packet2d v;
|
|
405
501
|
};
|
|
406
502
|
|
|
407
|
-
template<>
|
|
408
|
-
{
|
|
503
|
+
template <>
|
|
504
|
+
struct packet_traits<std::complex<double>> : default_packet_traits {
|
|
409
505
|
typedef Packet1cd type;
|
|
410
506
|
typedef Packet1cd half;
|
|
411
|
-
enum
|
|
412
|
-
{
|
|
507
|
+
enum {
|
|
413
508
|
Vectorizable = 1,
|
|
414
509
|
AlignedOnScalar = 0,
|
|
415
510
|
size = 1,
|
|
416
|
-
HasHalfPacket = 0,
|
|
417
511
|
|
|
418
|
-
HasAdd
|
|
419
|
-
HasSub
|
|
420
|
-
HasMul
|
|
421
|
-
HasDiv
|
|
512
|
+
HasAdd = 1,
|
|
513
|
+
HasSub = 1,
|
|
514
|
+
HasMul = 1,
|
|
515
|
+
HasDiv = 1,
|
|
422
516
|
HasNegate = 1,
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
517
|
+
HasSqrt = 1,
|
|
518
|
+
HasLog = 1,
|
|
519
|
+
HasAbs = 0,
|
|
520
|
+
HasAbs2 = 0,
|
|
521
|
+
HasMin = 0,
|
|
522
|
+
HasMax = 0,
|
|
427
523
|
HasSetLinear = 0
|
|
428
524
|
};
|
|
429
525
|
};
|
|
430
526
|
|
|
431
|
-
template<>
|
|
432
|
-
{
|
|
433
|
-
|
|
434
|
-
typedef Packet1cd half;
|
|
435
|
-
typedef Packet2d as_real;
|
|
436
|
-
enum
|
|
437
|
-
{
|
|
438
|
-
size=1,
|
|
439
|
-
alignment=Aligned16,
|
|
440
|
-
vectorizable=true,
|
|
441
|
-
masked_load_available=false,
|
|
442
|
-
masked_store_available=false
|
|
443
|
-
};
|
|
527
|
+
template <>
|
|
528
|
+
struct unpacket_traits<Packet1cd> : neon_unpacket_default<Packet1cd, std::complex<double>> {
|
|
529
|
+
using as_real = Packet2d;
|
|
444
530
|
};
|
|
445
531
|
|
|
446
|
-
template<>
|
|
447
|
-
|
|
532
|
+
template <>
|
|
533
|
+
EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
|
|
534
|
+
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(
|
|
535
|
+
pload<Packet2d>(assume_aligned<unpacket_traits<Packet1cd>::alignment>(reinterpret_cast<const double*>(from))));
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
template <>
|
|
539
|
+
EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {
|
|
540
|
+
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from)));
|
|
541
|
+
}
|
|
448
542
|
|
|
449
|
-
template<>
|
|
450
|
-
|
|
543
|
+
template <>
|
|
544
|
+
EIGEN_STRONG_INLINE Packet1cd pzero<Packet1cd>(const Packet1cd& /*a*/) {
|
|
545
|
+
return Packet1cd(vdupq_n_f64(0.0));
|
|
546
|
+
}
|
|
451
547
|
|
|
452
|
-
template<>
|
|
453
|
-
{
|
|
548
|
+
template <>
|
|
549
|
+
EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) {
|
|
454
550
|
/* here we really have to use unaligned loads :( */
|
|
455
551
|
return ploadu<Packet1cd>(&from);
|
|
456
552
|
}
|
|
457
553
|
|
|
458
|
-
template<>
|
|
459
|
-
|
|
554
|
+
template <>
|
|
555
|
+
EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
556
|
+
return Packet1cd(padd<Packet2d>(a.v, b.v));
|
|
557
|
+
}
|
|
460
558
|
|
|
461
|
-
template<>
|
|
462
|
-
|
|
559
|
+
template <>
|
|
560
|
+
EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
561
|
+
return Packet1cd(psub<Packet2d>(a.v, b.v));
|
|
562
|
+
}
|
|
463
563
|
|
|
464
|
-
template<>
|
|
465
|
-
|
|
564
|
+
template <>
|
|
565
|
+
EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
|
|
566
|
+
return Packet1cd(pnegate<Packet2d>(a.v));
|
|
567
|
+
}
|
|
466
568
|
|
|
467
|
-
template<>
|
|
468
|
-
|
|
569
|
+
template <>
|
|
570
|
+
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
|
|
571
|
+
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR())));
|
|
572
|
+
}
|
|
469
573
|
|
|
470
|
-
|
|
471
|
-
|
|
574
|
+
#ifdef __ARM_FEATURE_COMPLEX
|
|
575
|
+
template <>
|
|
576
|
+
EIGEN_STRONG_INLINE Packet1cd pmadd<Packet1cd>(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
|
577
|
+
Packet1cd result;
|
|
578
|
+
result.v = vcmlaq_f64(c.v, a.v, b.v);
|
|
579
|
+
result.v = vcmlaq_rot90_f64(result.v, a.v, b.v);
|
|
580
|
+
return result;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
template <>
|
|
584
|
+
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
585
|
+
return pmadd(a, b, pzero(a));
|
|
586
|
+
}
|
|
587
|
+
#else
|
|
588
|
+
template <>
|
|
589
|
+
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
472
590
|
Packet2d v1, v2;
|
|
473
591
|
|
|
474
592
|
// Get the real values of a
|
|
@@ -480,15 +598,16 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
|
|
480
598
|
// Multiply the imag a with b
|
|
481
599
|
v2 = vmulq_f64(v2, b.v);
|
|
482
600
|
// Conjugate v2
|
|
483
|
-
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
|
|
601
|
+
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR()));
|
|
484
602
|
// Swap real/imag elements in v2.
|
|
485
603
|
v2 = preverse<Packet2d>(v2);
|
|
486
604
|
// Add and return the result
|
|
487
605
|
return Packet1cd(vaddq_f64(v1, v2));
|
|
488
606
|
}
|
|
607
|
+
#endif
|
|
489
608
|
|
|
490
|
-
template<>
|
|
491
|
-
{
|
|
609
|
+
template <>
|
|
610
|
+
EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
|
|
492
611
|
// Compare real and imaginary parts of a and b to get the mask vector:
|
|
493
612
|
// [re(a)==re(b), im(a)==im(b)]
|
|
494
613
|
Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
|
|
@@ -499,86 +618,115 @@ template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packe
|
|
|
499
618
|
return Packet1cd(pand<Packet2d>(eq, eq_swapped));
|
|
500
619
|
}
|
|
501
620
|
|
|
502
|
-
template<>
|
|
503
|
-
|
|
621
|
+
template <>
|
|
622
|
+
EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
623
|
+
return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
|
|
624
|
+
}
|
|
504
625
|
|
|
505
|
-
template<>
|
|
506
|
-
|
|
626
|
+
template <>
|
|
627
|
+
EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
628
|
+
return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
|
|
629
|
+
}
|
|
507
630
|
|
|
508
|
-
template<>
|
|
509
|
-
|
|
631
|
+
template <>
|
|
632
|
+
EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
633
|
+
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
|
|
634
|
+
}
|
|
510
635
|
|
|
511
|
-
template<>
|
|
512
|
-
|
|
636
|
+
template <>
|
|
637
|
+
EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
638
|
+
return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v), vreinterpretq_u64_f64(b.v))));
|
|
639
|
+
}
|
|
513
640
|
|
|
514
|
-
template<>
|
|
515
|
-
|
|
641
|
+
template <>
|
|
642
|
+
EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) {
|
|
643
|
+
return pset1<Packet1cd>(*from);
|
|
644
|
+
}
|
|
516
645
|
|
|
517
|
-
template<>
|
|
518
|
-
|
|
646
|
+
template <>
|
|
647
|
+
EIGEN_STRONG_INLINE void pstore<std::complex<double>>(std::complex<double>* to, const Packet1cd& from) {
|
|
648
|
+
EIGEN_DEBUG_ALIGNED_STORE pstore(assume_aligned<unpacket_traits<Packet1cd>::alignment>(reinterpret_cast<double*>(to)),
|
|
649
|
+
from.v);
|
|
650
|
+
}
|
|
519
651
|
|
|
520
|
-
template<>
|
|
521
|
-
|
|
652
|
+
template <>
|
|
653
|
+
EIGEN_STRONG_INLINE void pstoreu<std::complex<double>>(std::complex<double>* to, const Packet1cd& from) {
|
|
654
|
+
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v);
|
|
655
|
+
}
|
|
522
656
|
|
|
523
|
-
template<>
|
|
524
|
-
|
|
657
|
+
template <>
|
|
658
|
+
EIGEN_STRONG_INLINE void prefetch<std::complex<double>>(const std::complex<double>* addr) {
|
|
659
|
+
EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr));
|
|
660
|
+
}
|
|
525
661
|
|
|
526
|
-
template<>
|
|
527
|
-
|
|
528
|
-
{
|
|
662
|
+
template <>
|
|
663
|
+
EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from,
|
|
664
|
+
Index stride) {
|
|
529
665
|
Packet2d res = pset1<Packet2d>(0.0);
|
|
530
|
-
res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
|
|
531
|
-
res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
|
|
666
|
+
res = vsetq_lane_f64(std::real(from[0 * stride]), res, 0);
|
|
667
|
+
res = vsetq_lane_f64(std::imag(from[0 * stride]), res, 1);
|
|
532
668
|
return Packet1cd(res);
|
|
533
669
|
}
|
|
534
670
|
|
|
535
|
-
template<>
|
|
536
|
-
|
|
537
|
-
|
|
671
|
+
template <>
|
|
672
|
+
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from,
|
|
673
|
+
Index stride) {
|
|
674
|
+
to[stride * 0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
|
|
675
|
+
}
|
|
538
676
|
|
|
539
|
-
template<>
|
|
540
|
-
{
|
|
677
|
+
template <>
|
|
678
|
+
EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {
|
|
541
679
|
EIGEN_ALIGN16 std::complex<double> res;
|
|
542
|
-
pstore<std::complex<double
|
|
680
|
+
pstore<std::complex<double>>(&res, a);
|
|
543
681
|
return res;
|
|
544
682
|
}
|
|
545
683
|
|
|
546
|
-
template<>
|
|
547
|
-
|
|
548
|
-
|
|
684
|
+
template <>
|
|
685
|
+
EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) {
|
|
686
|
+
return a;
|
|
687
|
+
}
|
|
549
688
|
|
|
550
|
-
template<>
|
|
689
|
+
template <>
|
|
690
|
+
EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {
|
|
691
|
+
return pfirst(a);
|
|
692
|
+
}
|
|
551
693
|
|
|
552
|
-
|
|
694
|
+
template <>
|
|
695
|
+
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {
|
|
696
|
+
return pfirst(a);
|
|
697
|
+
}
|
|
553
698
|
|
|
554
|
-
|
|
555
|
-
{
|
|
556
|
-
// TODO optimize it for NEON
|
|
557
|
-
Packet1cd res = pmul(a,pconj(b));
|
|
558
|
-
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
|
559
|
-
Packet2d rev_s = preverse<Packet2d>(s);
|
|
699
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)
|
|
560
700
|
|
|
561
|
-
|
|
701
|
+
template <>
|
|
702
|
+
EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
|
703
|
+
return pdiv_complex(a, b);
|
|
562
704
|
}
|
|
563
705
|
|
|
564
|
-
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
|
565
|
-
|
|
706
|
+
EIGEN_STRONG_INLINE Packet1cd pcplxflip /*<Packet1cd>*/ (const Packet1cd& x) {
|
|
707
|
+
return Packet1cd(preverse(Packet2d(x.v)));
|
|
708
|
+
}
|
|
566
709
|
|
|
567
|
-
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
568
|
-
{
|
|
710
|
+
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd, 2>& kernel) {
|
|
569
711
|
Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
|
|
570
712
|
kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
|
|
571
713
|
kernel.packet[1].v = tmp;
|
|
572
714
|
}
|
|
573
715
|
|
|
574
|
-
template<>
|
|
716
|
+
template <>
|
|
717
|
+
EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
|
|
575
718
|
return psqrt_complex<Packet1cd>(a);
|
|
576
719
|
}
|
|
577
720
|
|
|
578
|
-
|
|
721
|
+
template <>
|
|
722
|
+
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
|
|
723
|
+
return plog_complex(a);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
#endif // EIGEN_ARCH_ARM64
|
|
579
727
|
|
|
580
|
-
}
|
|
728
|
+
} // end namespace internal
|
|
581
729
|
|
|
582
|
-
}
|
|
730
|
+
} // end namespace Eigen
|
|
583
731
|
|
|
584
|
-
#endif
|
|
732
|
+
#endif // EIGEN_COMPLEX_NEON_H
|