@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -23,56 +23,55 @@
|
|
|
23
23
|
// to be used to declare statically aligned buffers.
|
|
24
24
|
//------------------------------------------------------------------------------------------
|
|
25
25
|
|
|
26
|
-
|
|
27
26
|
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
|
|
28
27
|
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
|
|
29
28
|
* so that vectorization doesn't affect binary compatibility.
|
|
30
29
|
*
|
|
31
30
|
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
|
|
32
31
|
* vectorized and non-vectorized code.
|
|
33
|
-
*
|
|
34
|
-
* FIXME: this code can be cleaned up once we switch to proper C++11 only.
|
|
35
32
|
*/
|
|
36
33
|
#if (defined EIGEN_CUDACC)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
34
|
+
#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
|
|
35
|
+
#define EIGEN_ALIGNOF(x) __alignof(x)
|
|
36
|
+
#else
|
|
37
|
+
#define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
|
|
38
|
+
#define EIGEN_ALIGNOF(x) alignof(x)
|
|
39
|
+
#endif
|
|
40
|
+
|
|
41
|
+
// Align to the boundary that avoids false sharing.
|
|
42
|
+
// https://en.cppreference.com/w/cpp/thread/hardware_destructive_interference_size
|
|
43
|
+
// There is a bug in android NDK < r26 where the macro is defined but std::hardware_destructive_interference_size
|
|
44
|
+
// still does not exist.
|
|
45
|
+
#if defined(__cpp_lib_hardware_interference_size) && __cpp_lib_hardware_interference_size >= 201603 && \
|
|
46
|
+
(!EIGEN_OS_ANDROID || __NDK_MAJOR__ + 0 >= 26)
|
|
47
|
+
#include <new>
|
|
48
|
+
#define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(std::hardware_destructive_interference_size)
|
|
52
49
|
#else
|
|
53
|
-
|
|
50
|
+
// Overalign for the cache line size of 128 bytes (Apple M1)
|
|
51
|
+
#define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(128)
|
|
54
52
|
#endif
|
|
55
53
|
|
|
56
54
|
// If the user explicitly disable vectorization, then we also disable alignment
|
|
57
55
|
#if defined(EIGEN_DONT_VECTORIZE)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
56
|
+
#if defined(EIGEN_GPUCC)
|
|
57
|
+
// GPU code is always vectorized and requires memory alignment for
|
|
58
|
+
// statically allocated buffers.
|
|
59
|
+
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
|
|
60
|
+
#else
|
|
61
|
+
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
|
|
62
|
+
#endif
|
|
65
63
|
#elif defined(__AVX512F__)
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
// 64 bytes static alignment is preferred only if really required
|
|
65
|
+
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
|
|
68
66
|
#elif defined(__AVX__)
|
|
69
|
-
|
|
70
|
-
|
|
67
|
+
// 32 bytes static alignment is preferred only if really required
|
|
68
|
+
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
|
|
69
|
+
#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
|
|
70
|
+
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
|
|
71
71
|
#else
|
|
72
|
-
|
|
72
|
+
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
|
|
73
73
|
#endif
|
|
74
74
|
|
|
75
|
-
|
|
76
75
|
// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
|
|
77
76
|
#define EIGEN_MIN_ALIGN_BYTES 16
|
|
78
77
|
|
|
@@ -80,99 +79,91 @@
|
|
|
80
79
|
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
|
|
81
80
|
// aligned at all regardless of the value of this #define.
|
|
82
81
|
|
|
83
|
-
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))
|
|
82
|
+
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \
|
|
83
|
+
EIGEN_MAX_STATIC_ALIGN_BYTES > 0
|
|
84
84
|
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
|
|
85
85
|
#endif
|
|
86
86
|
|
|
87
87
|
// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
|
|
88
88
|
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
|
|
89
89
|
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
90
|
+
#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
91
|
+
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
92
|
+
#endif
|
|
93
|
+
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
|
|
94
94
|
#endif
|
|
95
95
|
|
|
96
96
|
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
97
97
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
#endif
|
|
126
|
-
|
|
127
|
-
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
|
|
128
|
-
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
|
|
129
|
-
#else
|
|
130
|
-
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
|
|
131
|
-
#endif
|
|
98
|
+
// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
99
|
+
|
|
100
|
+
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
|
|
101
|
+
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
|
|
102
|
+
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
|
|
103
|
+
// certain common platform (compiler+architecture combinations) to avoid these problems.
|
|
104
|
+
// Only static alignment is really problematic (relies on nonstandard compiler extensions),
|
|
105
|
+
// try to keep heap alignment even when we have to disable static alignment.
|
|
106
|
+
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
|
|
107
|
+
EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
|
|
108
|
+
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
|
|
109
|
+
#else
|
|
110
|
+
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
|
|
111
|
+
#endif
|
|
112
|
+
|
|
113
|
+
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
|
|
114
|
+
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX
|
|
115
|
+
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
|
|
116
|
+
#else
|
|
117
|
+
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
|
|
118
|
+
#endif
|
|
119
|
+
|
|
120
|
+
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
|
|
121
|
+
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
|
|
122
|
+
#else
|
|
123
|
+
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
|
|
124
|
+
#endif
|
|
132
125
|
|
|
133
126
|
#endif
|
|
134
127
|
|
|
135
128
|
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
136
|
-
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
129
|
+
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
137
130
|
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
|
|
138
131
|
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
|
139
132
|
#endif
|
|
140
133
|
|
|
141
|
-
#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
|
|
142
|
-
|
|
134
|
+
#if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
|
|
135
|
+
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
|
143
136
|
#endif
|
|
144
137
|
|
|
145
138
|
// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
|
|
146
|
-
// It takes into account both the user choice to explicitly enable/disable alignment (by setting
|
|
147
|
-
// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
|
|
148
|
-
//
|
|
149
|
-
|
|
139
|
+
// It takes into account both the user choice to explicitly enable/disable alignment (by setting
|
|
140
|
+
// EIGEN_MAX_STATIC_ALIGN_BYTES) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only
|
|
141
|
+
// EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
|
|
150
142
|
|
|
151
143
|
// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
|
|
152
|
-
#define EIGEN_ALIGN8
|
|
144
|
+
#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
|
|
153
145
|
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
|
|
154
146
|
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
|
|
155
147
|
#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
|
|
156
|
-
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
|
|
148
|
+
#if EIGEN_MAX_STATIC_ALIGN_BYTES > 0
|
|
157
149
|
#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
|
|
158
150
|
#else
|
|
159
151
|
#define EIGEN_ALIGN_MAX
|
|
160
152
|
#endif
|
|
161
153
|
|
|
162
|
-
|
|
163
154
|
// Dynamic alignment control
|
|
164
155
|
|
|
165
|
-
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
|
|
156
|
+
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0
|
|
166
157
|
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
|
|
167
158
|
#endif
|
|
168
159
|
|
|
169
160
|
#ifdef EIGEN_DONT_ALIGN
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
161
|
+
#ifdef EIGEN_MAX_ALIGN_BYTES
|
|
162
|
+
#undef EIGEN_MAX_ALIGN_BYTES
|
|
163
|
+
#endif
|
|
164
|
+
#define EIGEN_MAX_ALIGN_BYTES 0
|
|
174
165
|
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
|
|
175
|
-
|
|
166
|
+
#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
|
|
176
167
|
#endif
|
|
177
168
|
|
|
178
169
|
#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
|
|
@@ -181,7 +172,6 @@
|
|
|
181
172
|
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
|
182
173
|
#endif
|
|
183
174
|
|
|
184
|
-
|
|
185
175
|
#ifndef EIGEN_UNALIGNED_VECTORIZE
|
|
186
176
|
#define EIGEN_UNALIGNED_VECTORIZE 1
|
|
187
177
|
#endif
|
|
@@ -190,220 +180,237 @@
|
|
|
190
180
|
|
|
191
181
|
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
|
|
192
182
|
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
|
|
193
|
-
#if EIGEN_MAX_ALIGN_BYTES==0
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
183
|
+
#if EIGEN_MAX_ALIGN_BYTES == 0
|
|
184
|
+
#ifndef EIGEN_DONT_VECTORIZE
|
|
185
|
+
#define EIGEN_DONT_VECTORIZE
|
|
186
|
+
#endif
|
|
197
187
|
#endif
|
|
198
|
-
|
|
199
188
|
|
|
200
189
|
// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
|
|
201
190
|
// removed as gcc 4.1 and msvc 2008 are not supported anyways.
|
|
202
191
|
#if EIGEN_COMP_MSVC
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
#endif
|
|
209
|
-
#endif
|
|
192
|
+
#include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
|
|
193
|
+
// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
|
|
194
|
+
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
|
|
195
|
+
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
|
|
196
|
+
#endif
|
|
210
197
|
#else
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
198
|
+
#if defined(__SSE2__)
|
|
199
|
+
#define EIGEN_SSE2_ON_NON_MSVC
|
|
200
|
+
#endif
|
|
214
201
|
#endif
|
|
215
202
|
|
|
216
203
|
#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
|
|
217
204
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
205
|
+
#if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
|
|
206
|
+
|
|
207
|
+
// Defines symbols for compile-time detection of which instructions are
|
|
208
|
+
// used.
|
|
209
|
+
// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
|
|
210
|
+
#define EIGEN_VECTORIZE
|
|
211
|
+
#define EIGEN_VECTORIZE_SSE
|
|
212
|
+
#define EIGEN_VECTORIZE_SSE2
|
|
213
|
+
|
|
214
|
+
// Detect sse3/ssse3/sse4:
|
|
215
|
+
// gcc and icc defines __SSE3__, ...
|
|
216
|
+
// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
|
|
217
|
+
// want to force the use of those instructions with msvc.
|
|
218
|
+
#ifdef __SSE3__
|
|
219
|
+
#define EIGEN_VECTORIZE_SSE3
|
|
220
|
+
#endif
|
|
221
|
+
#ifdef __SSSE3__
|
|
222
|
+
#define EIGEN_VECTORIZE_SSSE3
|
|
223
|
+
#endif
|
|
224
|
+
#ifdef __SSE4_1__
|
|
225
|
+
#define EIGEN_VECTORIZE_SSE4_1
|
|
226
|
+
#endif
|
|
227
|
+
#ifdef __SSE4_2__
|
|
228
|
+
#define EIGEN_VECTORIZE_SSE4_2
|
|
229
|
+
#endif
|
|
230
|
+
#ifdef __AVX__
|
|
231
|
+
#if !defined(EIGEN_USE_SYCL) && !EIGEN_COMP_EMSCRIPTEN
|
|
232
|
+
#define EIGEN_VECTORIZE_AVX
|
|
233
|
+
#endif
|
|
234
|
+
#define EIGEN_VECTORIZE_SSE3
|
|
235
|
+
#define EIGEN_VECTORIZE_SSSE3
|
|
236
|
+
#define EIGEN_VECTORIZE_SSE4_1
|
|
237
|
+
#define EIGEN_VECTORIZE_SSE4_2
|
|
238
|
+
#endif
|
|
239
|
+
#ifdef __AVX2__
|
|
240
|
+
#ifndef EIGEN_USE_SYCL
|
|
241
|
+
#define EIGEN_VECTORIZE_AVX2
|
|
242
|
+
#define EIGEN_VECTORIZE_AVX
|
|
243
|
+
#endif
|
|
244
|
+
#define EIGEN_VECTORIZE_SSE3
|
|
245
|
+
#define EIGEN_VECTORIZE_SSSE3
|
|
246
|
+
#define EIGEN_VECTORIZE_SSE4_1
|
|
247
|
+
#define EIGEN_VECTORIZE_SSE4_2
|
|
248
|
+
#endif
|
|
249
|
+
#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
|
|
250
|
+
// MSVC does not expose a switch dedicated for FMA
|
|
251
|
+
// For MSVC, AVX2 => FMA
|
|
252
|
+
#define EIGEN_VECTORIZE_FMA
|
|
253
|
+
#endif
|
|
254
|
+
#if defined(__AVX512F__)
|
|
255
|
+
#ifndef EIGEN_VECTORIZE_FMA
|
|
256
|
+
#if EIGEN_COMP_GNUC
|
|
257
|
+
#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
|
|
258
|
+
#else
|
|
259
|
+
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
|
|
260
|
+
#endif
|
|
261
|
+
#endif
|
|
262
|
+
#ifndef EIGEN_USE_SYCL
|
|
263
|
+
#define EIGEN_VECTORIZE_AVX512
|
|
264
|
+
#define EIGEN_VECTORIZE_AVX2
|
|
265
|
+
#define EIGEN_VECTORIZE_AVX
|
|
266
|
+
#endif
|
|
267
|
+
#define EIGEN_VECTORIZE_FMA
|
|
268
|
+
#define EIGEN_VECTORIZE_SSE3
|
|
269
|
+
#define EIGEN_VECTORIZE_SSSE3
|
|
270
|
+
#define EIGEN_VECTORIZE_SSE4_1
|
|
271
|
+
#define EIGEN_VECTORIZE_SSE4_2
|
|
272
|
+
#ifndef EIGEN_USE_SYCL
|
|
273
|
+
#ifdef __AVX512DQ__
|
|
274
|
+
#define EIGEN_VECTORIZE_AVX512DQ
|
|
275
|
+
#endif
|
|
276
|
+
#ifdef __AVX512ER__
|
|
277
|
+
#define EIGEN_VECTORIZE_AVX512ER
|
|
278
|
+
#endif
|
|
279
|
+
#ifdef __AVX512BF16__
|
|
280
|
+
#define EIGEN_VECTORIZE_AVX512BF16
|
|
281
|
+
#endif
|
|
282
|
+
#ifdef __AVX512VL__
|
|
283
|
+
#define EIGEN_VECTORIZE_AVX512VL
|
|
284
|
+
#endif
|
|
285
|
+
#ifdef __AVX512FP16__
|
|
286
|
+
#ifdef __AVX512VL__
|
|
287
|
+
#define EIGEN_VECTORIZE_AVX512FP16
|
|
288
|
+
// Built-in _Float16.
|
|
289
|
+
#define EIGEN_HAS_BUILTIN_FLOAT16 1
|
|
290
|
+
#else
|
|
291
|
+
#if EIGEN_COMP_GNUC
|
|
292
|
+
#error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
|
|
293
|
+
#else
|
|
294
|
+
#error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
|
|
295
|
+
#endif
|
|
296
|
+
#endif
|
|
297
|
+
#endif
|
|
298
|
+
#endif
|
|
299
|
+
#endif
|
|
300
|
+
|
|
301
|
+
// Disable AVX support on broken xcode versions
|
|
302
|
+
#if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)
|
|
303
|
+
// A nasty bug in the clang compiler shipped with xcode in a common compilation situation
|
|
304
|
+
// when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
|
|
305
|
+
#ifdef EIGEN_VECTORIZE_AVX
|
|
306
|
+
#undef EIGEN_VECTORIZE_AVX
|
|
307
|
+
#warning \
|
|
308
|
+
"Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
|
|
309
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
310
|
+
#undef EIGEN_VECTORIZE_AVX2
|
|
311
|
+
#endif
|
|
312
|
+
#ifdef EIGEN_VECTORIZE_FMA
|
|
313
|
+
#undef EIGEN_VECTORIZE_FMA
|
|
314
|
+
#endif
|
|
315
|
+
#ifdef EIGEN_VECTORIZE_AVX512
|
|
316
|
+
#undef EIGEN_VECTORIZE_AVX512
|
|
317
|
+
#endif
|
|
318
|
+
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
|
319
|
+
#undef EIGEN_VECTORIZE_AVX512DQ
|
|
320
|
+
#endif
|
|
321
|
+
#ifdef EIGEN_VECTORIZE_AVX512ER
|
|
322
|
+
#undef EIGEN_VECTORIZE_AVX512ER
|
|
323
|
+
#endif
|
|
324
|
+
#endif
|
|
325
|
+
// NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
|
|
326
|
+
// NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2
|
|
327
|
+
// produce core dumps in 3 tests NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all
|
|
328
|
+
// cases NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping
|
|
329
|
+
// tests
|
|
330
|
+
// with -macosx-version-min=10.15 and AVX
|
|
331
|
+
// NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
|
|
332
|
+
// -macosx-version-min=10.15 and AVX
|
|
333
|
+
#endif
|
|
334
|
+
|
|
335
|
+
// include files
|
|
336
|
+
|
|
337
|
+
// This extern "C" works around a MINGW-w64 compilation issue
|
|
338
|
+
// https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
|
|
339
|
+
// In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
|
|
340
|
+
// However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
|
|
341
|
+
// with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
|
|
342
|
+
// so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
|
|
343
|
+
// notice that since these are C headers, the extern "C" is theoretically needed anyways.
|
|
344
|
+
extern "C" {
|
|
345
|
+
// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
|
|
346
|
+
// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
|
|
347
|
+
#if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
|
|
348
|
+
#include <immintrin.h>
|
|
349
|
+
#else
|
|
350
|
+
#include <mmintrin.h>
|
|
351
|
+
#include <emmintrin.h>
|
|
352
|
+
#include <xmmintrin.h>
|
|
353
|
+
#ifdef EIGEN_VECTORIZE_SSE3
|
|
354
|
+
#include <pmmintrin.h>
|
|
355
|
+
#endif
|
|
356
|
+
#ifdef EIGEN_VECTORIZE_SSSE3
|
|
357
|
+
#include <tmmintrin.h>
|
|
358
|
+
#endif
|
|
359
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
360
|
+
#include <smmintrin.h>
|
|
361
|
+
#endif
|
|
362
|
+
#ifdef EIGEN_VECTORIZE_SSE4_2
|
|
363
|
+
#include <nmmintrin.h>
|
|
364
|
+
#endif
|
|
365
|
+
#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
|
|
366
|
+
#include <immintrin.h>
|
|
367
|
+
#endif
|
|
368
|
+
#endif
|
|
369
|
+
} // end extern "C"
|
|
370
|
+
|
|
371
|
+
#elif defined(__VSX__) && !defined(__APPLE__)
|
|
372
|
+
|
|
373
|
+
#define EIGEN_VECTORIZE
|
|
374
|
+
#define EIGEN_VECTORIZE_VSX 1
|
|
375
|
+
#define EIGEN_VECTORIZE_FMA
|
|
376
|
+
#include <altivec.h>
|
|
377
|
+
// We need to #undef all these ugly tokens defined in <altivec.h>
|
|
378
|
+
// => use __vector instead of vector
|
|
379
|
+
#undef bool
|
|
380
|
+
#undef vector
|
|
381
|
+
#undef pixel
|
|
382
|
+
|
|
383
|
+
#elif defined __ALTIVEC__
|
|
384
|
+
|
|
385
|
+
#define EIGEN_VECTORIZE
|
|
386
|
+
#define EIGEN_VECTORIZE_ALTIVEC
|
|
387
|
+
#define EIGEN_VECTORIZE_FMA
|
|
388
|
+
#include <altivec.h>
|
|
389
|
+
// We need to #undef all these ugly tokens defined in <altivec.h>
|
|
390
|
+
// => use __vector instead of vector
|
|
391
|
+
#undef bool
|
|
392
|
+
#undef vector
|
|
393
|
+
#undef pixel
|
|
394
|
+
|
|
395
|
+
#elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
|
|
396
|
+
|
|
397
|
+
#define EIGEN_VECTORIZE
|
|
398
|
+
#define EIGEN_VECTORIZE_NEON
|
|
399
|
+
#include <arm_neon.h>
|
|
400
|
+
|
|
401
|
+
// We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
|
|
402
|
+
// will not select the backend automatically
|
|
403
|
+
#elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
|
|
404
|
+
|
|
405
|
+
#define EIGEN_VECTORIZE
|
|
406
|
+
#define EIGEN_VECTORIZE_SVE
|
|
407
|
+
#include <arm_sve.h>
|
|
408
|
+
|
|
409
|
+
// Since we depend on knowing SVE vector lengths at compile-time, we need
|
|
410
|
+
// to ensure a fixed lengths is set
|
|
411
|
+
#if defined __ARM_FEATURE_SVE_BITS
|
|
412
|
+
#define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
|
|
413
|
+
#else
|
|
407
414
|
#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
|
|
408
415
|
#endif
|
|
409
416
|
|
|
@@ -428,6 +435,18 @@
|
|
|
428
435
|
#include <msa.h>
|
|
429
436
|
#endif
|
|
430
437
|
|
|
438
|
+
#elif (defined __loongarch64 && defined __loongarch_sx)
|
|
439
|
+
|
|
440
|
+
#define EIGEN_VECTORIZE
|
|
441
|
+
#define EIGEN_VECTORIZE_LSX
|
|
442
|
+
#include <lsxintrin.h>
|
|
443
|
+
|
|
444
|
+
#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
|
|
445
|
+
|
|
446
|
+
#define EIGEN_VECTORIZE
|
|
447
|
+
#define EIGEN_VECTORIZE_HVX
|
|
448
|
+
#include <hexagon_types.h>
|
|
449
|
+
|
|
431
450
|
#endif
|
|
432
451
|
#endif
|
|
433
452
|
|
|
@@ -435,43 +454,54 @@
|
|
|
435
454
|
// compilers seem to follow this. We therefore include it explicitly.
|
|
436
455
|
// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
|
|
437
456
|
#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
|
438
|
-
|
|
457
|
+
#include <arm_fp16.h>
|
|
439
458
|
#endif
|
|
440
459
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
460
|
+
// Enable FMA for ARM.
|
|
461
|
+
#if defined(__ARM_FEATURE_FMA)
|
|
462
|
+
#define EIGEN_VECTORIZE_FMA
|
|
463
|
+
#endif
|
|
444
464
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
465
|
+
#if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
|
|
466
|
+
// We can use the optimized fp16 to float and float to fp16 conversion routines
|
|
467
|
+
#define EIGEN_HAS_FP16_C
|
|
468
|
+
|
|
469
|
+
#if EIGEN_COMP_GNUC
|
|
470
|
+
// Make sure immintrin.h is included, even if e.g. vectorization is
|
|
471
|
+
// explicitly disabled (see also issue #2395).
|
|
472
|
+
// Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
|
|
473
|
+
// as opposed to emmintrin.h as suggested by Intel:
|
|
474
|
+
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
|
|
475
|
+
#include <immintrin.h>
|
|
476
|
+
#endif
|
|
451
477
|
#endif
|
|
452
478
|
|
|
453
479
|
#if defined EIGEN_CUDACC
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
480
|
+
#define EIGEN_VECTORIZE_GPU
|
|
481
|
+
#include <vector_types.h>
|
|
482
|
+
#if EIGEN_CUDA_SDK_VER >= 70500
|
|
483
|
+
#define EIGEN_HAS_CUDA_FP16
|
|
484
|
+
#endif
|
|
459
485
|
#endif
|
|
460
486
|
|
|
461
487
|
#if defined(EIGEN_HAS_CUDA_FP16)
|
|
462
|
-
|
|
463
|
-
|
|
488
|
+
#include <cuda_runtime_api.h>
|
|
489
|
+
#include <cuda_fp16.h>
|
|
464
490
|
#endif
|
|
465
491
|
|
|
466
492
|
#if defined(EIGEN_HIPCC)
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
493
|
+
#define EIGEN_VECTORIZE_GPU
|
|
494
|
+
#include <hip/hip_vector_types.h>
|
|
495
|
+
#define EIGEN_HAS_HIP_FP16
|
|
496
|
+
#include <hip/hip_fp16.h>
|
|
497
|
+
#define EIGEN_HAS_HIP_BF16
|
|
498
|
+
#include <hip/hip_bfloat16.h>
|
|
471
499
|
#endif
|
|
472
500
|
|
|
473
|
-
|
|
474
501
|
/** \brief Namespace containing all symbols from the %Eigen library. */
|
|
502
|
+
// IWYU pragma: private
|
|
503
|
+
#include "../InternalHeaderCheck.h"
|
|
504
|
+
|
|
475
505
|
namespace Eigen {
|
|
476
506
|
|
|
477
507
|
inline static const char *SimdInstructionSetsInUse(void) {
|
|
@@ -501,12 +531,13 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
|
|
501
531
|
return "S390X ZVECTOR";
|
|
502
532
|
#elif defined(EIGEN_VECTORIZE_MSA)
|
|
503
533
|
return "MIPS MSA";
|
|
534
|
+
#elif defined(EIGEN_VECTORIZE_LSX)
|
|
535
|
+
return "LOONGARCH64 LSX";
|
|
504
536
|
#else
|
|
505
537
|
return "None";
|
|
506
538
|
#endif
|
|
507
539
|
}
|
|
508
540
|
|
|
509
|
-
}
|
|
510
|
-
|
|
541
|
+
} // end namespace Eigen
|
|
511
542
|
|
|
512
|
-
#endif
|
|
543
|
+
#endif // EIGEN_CONFIGURE_VECTORIZATION_H
|