@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
13
13
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
/*****************************************************************************
|
|
17
16
|
*** Platform checks for aligned malloc functions ***
|
|
18
17
|
*****************************************************************************/
|
|
@@ -31,11 +30,11 @@
|
|
|
31
30
|
// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
|
|
32
31
|
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
|
|
33
32
|
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
|
|
34
|
-
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
#if defined(__GLIBC__) && ((__GLIBC__ >= 2 && __GLIBC_MINOR__ >= 8) || __GLIBC__ > 2) && defined(__LP64__) && \
|
|
34
|
+
!defined(__SANITIZE_ADDRESS__) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
|
|
35
|
+
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
|
|
37
36
|
#else
|
|
38
|
-
|
|
37
|
+
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
|
|
39
38
|
#endif
|
|
40
39
|
|
|
41
40
|
// FreeBSD 6 seems to have 16-byte aligned malloc
|
|
@@ -43,49 +42,107 @@
|
|
|
43
42
|
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
|
|
44
43
|
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
|
|
45
44
|
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
|
|
46
|
-
|
|
45
|
+
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
|
|
46
|
+
#else
|
|
47
|
+
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
|
|
48
|
+
#endif
|
|
49
|
+
|
|
50
|
+
#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || \
|
|
51
|
+
EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
|
|
52
|
+
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
|
|
47
53
|
#else
|
|
48
|
-
|
|
54
|
+
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
|
|
55
|
+
#endif
|
|
56
|
+
|
|
49
57
|
#endif
|
|
50
58
|
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
59
|
+
#ifndef EIGEN_MALLOC_CHECK_THREAD_LOCAL
|
|
60
|
+
|
|
61
|
+
// Check whether we can use the thread_local keyword to allow or disallow
|
|
62
|
+
// allocating memory with per-thread granularity, by means of the
|
|
63
|
+
// set_is_malloc_allowed() function.
|
|
64
|
+
#ifndef EIGEN_AVOID_THREAD_LOCAL
|
|
65
|
+
|
|
66
|
+
#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC >= 1900) && \
|
|
67
|
+
!defined(EIGEN_GPU_COMPILE_PHASE)
|
|
68
|
+
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL thread_local
|
|
56
69
|
#else
|
|
57
|
-
|
|
70
|
+
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL
|
|
58
71
|
#endif
|
|
59
72
|
|
|
73
|
+
#else // EIGEN_AVOID_THREAD_LOCAL
|
|
74
|
+
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL
|
|
75
|
+
#endif // EIGEN_AVOID_THREAD_LOCAL
|
|
76
|
+
|
|
60
77
|
#endif
|
|
61
78
|
|
|
79
|
+
// IWYU pragma: private
|
|
80
|
+
#include "../InternalHeaderCheck.h"
|
|
81
|
+
|
|
62
82
|
namespace Eigen {
|
|
63
83
|
|
|
64
84
|
namespace internal {
|
|
65
85
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
86
|
+
/*****************************************************************************
|
|
87
|
+
*** Implementation of portable aligned versions of malloc/free/realloc ***
|
|
88
|
+
*****************************************************************************/
|
|
89
|
+
|
|
90
|
+
#ifdef EIGEN_NO_MALLOC
|
|
91
|
+
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
|
|
92
|
+
eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
|
|
93
|
+
}
|
|
94
|
+
EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
|
|
95
|
+
eigen_assert(false && "heap deallocation is forbidden (EIGEN_NO_MALLOC is defined)");
|
|
96
|
+
}
|
|
97
|
+
#elif defined EIGEN_RUNTIME_NO_MALLOC
|
|
98
|
+
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) {
|
|
99
|
+
EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
|
|
100
|
+
if (update == 1) value = new_value;
|
|
101
|
+
return value;
|
|
102
|
+
}
|
|
103
|
+
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
|
|
104
|
+
EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
|
|
105
|
+
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
|
|
106
|
+
eigen_assert(is_malloc_allowed() &&
|
|
107
|
+
"heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_malloc_allowed is false)");
|
|
108
|
+
}
|
|
109
|
+
EIGEN_DEVICE_FUNC inline bool is_free_allowed_impl(bool update, bool new_value = false) {
|
|
110
|
+
EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
|
|
111
|
+
if (update == 1) value = new_value;
|
|
112
|
+
return value;
|
|
113
|
+
}
|
|
114
|
+
EIGEN_DEVICE_FUNC inline bool is_free_allowed() { return is_free_allowed_impl(false); }
|
|
115
|
+
EIGEN_DEVICE_FUNC inline bool set_is_free_allowed(bool new_value) { return is_free_allowed_impl(true, new_value); }
|
|
116
|
+
EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
|
|
117
|
+
eigen_assert(is_malloc_allowed() &&
|
|
118
|
+
"heap deallocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_free_allowed is false)");
|
|
119
|
+
}
|
|
120
|
+
#else
|
|
121
|
+
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {}
|
|
122
|
+
EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {}
|
|
123
|
+
#endif
|
|
124
|
+
|
|
125
|
+
EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() {
|
|
126
|
+
#ifdef EIGEN_EXCEPTIONS
|
|
127
|
+
throw std::bad_alloc();
|
|
128
|
+
#else
|
|
129
|
+
std::size_t huge = static_cast<std::size_t>(-1);
|
|
130
|
+
#if defined(EIGEN_HIPCC)
|
|
131
|
+
//
|
|
132
|
+
// calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
|
|
133
|
+
// and as a consequence the code in the #else block triggers the hipcc warning :
|
|
134
|
+
// "no overloaded function has restriction specifiers that are compatible with the ambient context"
|
|
135
|
+
//
|
|
136
|
+
// "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
|
|
137
|
+
// the same on "operator new"
|
|
138
|
+
// Reverting code back to the old version in this #if block for the hipcc compiler
|
|
139
|
+
//
|
|
140
|
+
new int[huge];
|
|
141
|
+
#else
|
|
142
|
+
void* unused = ::operator new(huge);
|
|
143
|
+
EIGEN_UNUSED_VARIABLE(unused);
|
|
144
|
+
#endif
|
|
145
|
+
#endif
|
|
89
146
|
}
|
|
90
147
|
|
|
91
148
|
/*****************************************************************************
|
|
@@ -94,137 +151,132 @@ inline void throw_std_bad_alloc()
|
|
|
94
151
|
|
|
95
152
|
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
|
|
96
153
|
|
|
97
|
-
/** \internal Like malloc, but the returned pointer is guaranteed to be
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size,
|
|
101
|
-
{
|
|
102
|
-
eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 &&
|
|
154
|
+
/** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
|
|
155
|
+
* Fast, but wastes `alignment` additional bytes of memory. Does not throw any exception.
|
|
156
|
+
*/
|
|
157
|
+
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size,
|
|
158
|
+
std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
|
|
159
|
+
eigen_assert(alignment >= sizeof(void*) && alignment <= 256 && (alignment & (alignment - 1)) == 0 &&
|
|
160
|
+
"Alignment must be at least sizeof(void*), less than or equal to 256, and a power of 2");
|
|
103
161
|
|
|
162
|
+
check_that_malloc_is_allowed();
|
|
104
163
|
EIGEN_USING_STD(malloc)
|
|
105
|
-
void
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
void
|
|
109
|
-
|
|
164
|
+
void* original = malloc(size + alignment);
|
|
165
|
+
if (original == nullptr) return nullptr;
|
|
166
|
+
std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
|
|
167
|
+
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
|
|
168
|
+
// Store offset - 1, since it is guaranteed to be at least 1.
|
|
169
|
+
*(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
|
|
110
170
|
return aligned;
|
|
111
171
|
}
|
|
112
172
|
|
|
113
173
|
/** \internal Frees memory allocated with handmade_aligned_malloc */
|
|
114
|
-
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void
|
|
115
|
-
{
|
|
116
|
-
|
|
174
|
+
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void* ptr) {
|
|
175
|
+
if (ptr != nullptr) {
|
|
176
|
+
std::size_t offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
|
|
177
|
+
void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
|
|
178
|
+
|
|
179
|
+
check_that_free_is_allowed();
|
|
117
180
|
EIGEN_USING_STD(free)
|
|
118
|
-
free(
|
|
181
|
+
free(original);
|
|
119
182
|
}
|
|
120
183
|
}
|
|
121
184
|
|
|
122
185
|
/** \internal
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
inline void* handmade_aligned_realloc(void* ptr, std::size_t
|
|
128
|
-
{
|
|
129
|
-
if (ptr ==
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
void
|
|
136
|
-
if(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
*
|
|
186
|
+
* \brief Reallocates aligned memory.
|
|
187
|
+
* Since we know that our handmade version is based on std::malloc
|
|
188
|
+
* we can use std::realloc to implement efficient reallocation.
|
|
189
|
+
*/
|
|
190
|
+
EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size,
|
|
191
|
+
std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
|
|
192
|
+
if (ptr == nullptr) return handmade_aligned_malloc(new_size, alignment);
|
|
193
|
+
std::size_t old_offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
|
|
194
|
+
void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
|
|
195
|
+
|
|
196
|
+
check_that_malloc_is_allowed();
|
|
197
|
+
EIGEN_USING_STD(realloc)
|
|
198
|
+
void* original = realloc(old_original, new_size + alignment);
|
|
199
|
+
if (original == nullptr) return nullptr;
|
|
200
|
+
if (original == old_original) return ptr;
|
|
201
|
+
std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
|
|
202
|
+
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
|
|
203
|
+
if (offset != old_offset) {
|
|
204
|
+
const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
|
|
205
|
+
std::size_t count = (std::min)(new_size, old_size);
|
|
206
|
+
std::memmove(aligned, src, count);
|
|
207
|
+
}
|
|
208
|
+
// Store offset - 1, since it is guaranteed to be at least 1.
|
|
209
|
+
*(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
|
|
140
210
|
return aligned;
|
|
141
211
|
}
|
|
142
212
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
213
|
+
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on
|
|
214
|
+
* the requirements. On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
|
|
215
|
+
*/
|
|
216
|
+
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) {
|
|
217
|
+
if (size == 0) return nullptr;
|
|
146
218
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
{
|
|
150
|
-
eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
|
|
151
|
-
}
|
|
152
|
-
#elif defined EIGEN_RUNTIME_NO_MALLOC
|
|
153
|
-
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
|
|
154
|
-
{
|
|
155
|
-
static bool value = true;
|
|
156
|
-
if (update == 1)
|
|
157
|
-
value = new_value;
|
|
158
|
-
return value;
|
|
159
|
-
}
|
|
160
|
-
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
|
|
161
|
-
EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
|
|
162
|
-
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
|
163
|
-
{
|
|
164
|
-
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
|
|
165
|
-
}
|
|
166
|
-
#else
|
|
167
|
-
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
|
168
|
-
{}
|
|
169
|
-
#endif
|
|
219
|
+
void* result;
|
|
220
|
+
#if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
170
221
|
|
|
171
|
-
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
|
|
172
|
-
* On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
|
|
173
|
-
*/
|
|
174
|
-
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
|
175
|
-
{
|
|
176
222
|
check_that_malloc_is_allowed();
|
|
223
|
+
EIGEN_USING_STD(malloc)
|
|
224
|
+
result = malloc(size);
|
|
177
225
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
#endif
|
|
187
|
-
#else
|
|
188
|
-
result = handmade_aligned_malloc(size);
|
|
189
|
-
#endif
|
|
226
|
+
#if EIGEN_DEFAULT_ALIGN_BYTES == 16
|
|
227
|
+
eigen_assert((size < 16 || (std::size_t(result) % 16) == 0) &&
|
|
228
|
+
"System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback "
|
|
229
|
+
"to handmade aligned memory allocator.");
|
|
230
|
+
#endif
|
|
231
|
+
#else
|
|
232
|
+
result = handmade_aligned_malloc(size);
|
|
233
|
+
#endif
|
|
190
234
|
|
|
191
|
-
if(!result && size)
|
|
192
|
-
throw_std_bad_alloc();
|
|
235
|
+
if (!result && size) throw_std_bad_alloc();
|
|
193
236
|
|
|
194
237
|
return result;
|
|
195
238
|
}
|
|
196
239
|
|
|
197
240
|
/** \internal Frees memory allocated with aligned_malloc. */
|
|
198
|
-
EIGEN_DEVICE_FUNC inline void aligned_free(void
|
|
199
|
-
|
|
200
|
-
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
241
|
+
EIGEN_DEVICE_FUNC inline void aligned_free(void* ptr) {
|
|
242
|
+
#if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
201
243
|
|
|
244
|
+
if (ptr != nullptr) {
|
|
245
|
+
check_that_free_is_allowed();
|
|
202
246
|
EIGEN_USING_STD(free)
|
|
203
247
|
free(ptr);
|
|
248
|
+
}
|
|
204
249
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
250
|
+
#else
|
|
251
|
+
handmade_aligned_free(ptr);
|
|
252
|
+
#endif
|
|
208
253
|
}
|
|
209
254
|
|
|
210
255
|
/**
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
inline void* aligned_realloc(void
|
|
216
|
-
|
|
256
|
+
* \internal
|
|
257
|
+
* \brief Reallocates an aligned block of memory.
|
|
258
|
+
* \throws std::bad_alloc on allocation failure
|
|
259
|
+
*/
|
|
260
|
+
EIGEN_DEVICE_FUNC inline void* aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
|
|
261
|
+
if (ptr == nullptr) return aligned_malloc(new_size);
|
|
262
|
+
if (old_size == new_size) return ptr;
|
|
263
|
+
if (new_size == 0) {
|
|
264
|
+
aligned_free(ptr);
|
|
265
|
+
return nullptr;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
void* result;
|
|
269
|
+
#if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
217
270
|
EIGEN_UNUSED_VARIABLE(old_size)
|
|
218
271
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
result =
|
|
272
|
+
check_that_malloc_is_allowed();
|
|
273
|
+
EIGEN_USING_STD(realloc)
|
|
274
|
+
result = realloc(ptr, new_size);
|
|
222
275
|
#else
|
|
223
|
-
result = handmade_aligned_realloc(ptr,new_size,old_size);
|
|
276
|
+
result = handmade_aligned_realloc(ptr, new_size, old_size);
|
|
224
277
|
#endif
|
|
225
278
|
|
|
226
|
-
if (!result && new_size)
|
|
227
|
-
throw_std_bad_alloc();
|
|
279
|
+
if (!result && new_size) throw_std_bad_alloc();
|
|
228
280
|
|
|
229
281
|
return result;
|
|
230
282
|
}
|
|
@@ -234,45 +286,58 @@ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_si
|
|
|
234
286
|
*****************************************************************************/
|
|
235
287
|
|
|
236
288
|
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
template<bool Align>
|
|
240
|
-
{
|
|
289
|
+
* On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
|
|
290
|
+
*/
|
|
291
|
+
template <bool Align>
|
|
292
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) {
|
|
241
293
|
return aligned_malloc(size);
|
|
242
294
|
}
|
|
243
295
|
|
|
244
|
-
template<>
|
|
245
|
-
{
|
|
246
|
-
|
|
296
|
+
template <>
|
|
297
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size) {
|
|
298
|
+
if (size == 0) return nullptr;
|
|
247
299
|
|
|
300
|
+
check_that_malloc_is_allowed();
|
|
248
301
|
EIGEN_USING_STD(malloc)
|
|
249
|
-
void
|
|
302
|
+
void* result = malloc(size);
|
|
250
303
|
|
|
251
|
-
if(!result && size)
|
|
252
|
-
throw_std_bad_alloc();
|
|
304
|
+
if (!result && size) throw_std_bad_alloc();
|
|
253
305
|
return result;
|
|
254
306
|
}
|
|
255
307
|
|
|
256
308
|
/** \internal Frees memory allocated with conditional_aligned_malloc */
|
|
257
|
-
template<bool Align>
|
|
258
|
-
{
|
|
309
|
+
template <bool Align>
|
|
310
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void* ptr) {
|
|
259
311
|
aligned_free(ptr);
|
|
260
312
|
}
|
|
261
313
|
|
|
262
|
-
template<>
|
|
263
|
-
{
|
|
264
|
-
|
|
265
|
-
|
|
314
|
+
template <>
|
|
315
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void* ptr) {
|
|
316
|
+
if (ptr != nullptr) {
|
|
317
|
+
check_that_free_is_allowed();
|
|
318
|
+
EIGEN_USING_STD(free)
|
|
319
|
+
free(ptr);
|
|
320
|
+
}
|
|
266
321
|
}
|
|
267
322
|
|
|
268
|
-
template<bool Align>
|
|
269
|
-
{
|
|
323
|
+
template <bool Align>
|
|
324
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
|
|
270
325
|
return aligned_realloc(ptr, new_size, old_size);
|
|
271
326
|
}
|
|
272
327
|
|
|
273
|
-
template<>
|
|
274
|
-
|
|
275
|
-
|
|
328
|
+
template <>
|
|
329
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size,
|
|
330
|
+
std::size_t old_size) {
|
|
331
|
+
if (ptr == nullptr) return conditional_aligned_malloc<false>(new_size);
|
|
332
|
+
if (old_size == new_size) return ptr;
|
|
333
|
+
if (new_size == 0) {
|
|
334
|
+
conditional_aligned_free<false>(ptr);
|
|
335
|
+
return nullptr;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
check_that_malloc_is_allowed();
|
|
339
|
+
EIGEN_USING_STD(realloc)
|
|
340
|
+
return realloc(ptr, new_size);
|
|
276
341
|
}
|
|
277
342
|
|
|
278
343
|
/*****************************************************************************
|
|
@@ -280,75 +345,95 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_
|
|
|
280
345
|
*****************************************************************************/
|
|
281
346
|
|
|
282
347
|
/** \internal Destructs the elements of an array.
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
template<typename T>
|
|
286
|
-
{
|
|
348
|
+
* The \a size parameters tells on how many objects to call the destructor of T.
|
|
349
|
+
*/
|
|
350
|
+
template <typename T>
|
|
351
|
+
EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T* ptr, std::size_t size) {
|
|
287
352
|
// always destruct an array starting from the end.
|
|
288
|
-
if(ptr)
|
|
289
|
-
while(size) ptr[--size].~T();
|
|
353
|
+
if (ptr)
|
|
354
|
+
while (size) ptr[--size].~T();
|
|
290
355
|
}
|
|
291
356
|
|
|
292
357
|
/** \internal Constructs the elements of an array.
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
template<typename T>
|
|
296
|
-
{
|
|
297
|
-
std::size_t i;
|
|
298
|
-
EIGEN_TRY
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
358
|
+
* The \a size parameter tells on how many objects to call the constructor of T.
|
|
359
|
+
*/
|
|
360
|
+
template <typename T>
|
|
361
|
+
EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T* ptr, std::size_t size) {
|
|
362
|
+
std::size_t i = 0;
|
|
363
|
+
EIGEN_TRY {
|
|
364
|
+
for (i = 0; i < size; ++i) ::new (ptr + i) T;
|
|
365
|
+
}
|
|
366
|
+
EIGEN_CATCH(...) {
|
|
367
|
+
destruct_elements_of_array(ptr, i);
|
|
368
|
+
EIGEN_THROW;
|
|
369
|
+
}
|
|
370
|
+
return ptr;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/** \internal Copy-constructs the elements of an array.
|
|
374
|
+
* The \a size parameter tells on how many objects to copy.
|
|
375
|
+
*/
|
|
376
|
+
template <typename T>
|
|
377
|
+
EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T* ptr, const T* src, std::size_t size) {
|
|
378
|
+
std::size_t i = 0;
|
|
379
|
+
EIGEN_TRY {
|
|
380
|
+
for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
|
|
302
381
|
}
|
|
303
|
-
EIGEN_CATCH(...)
|
|
304
|
-
{
|
|
382
|
+
EIGEN_CATCH(...) {
|
|
305
383
|
destruct_elements_of_array(ptr, i);
|
|
306
384
|
EIGEN_THROW;
|
|
307
385
|
}
|
|
308
|
-
return
|
|
386
|
+
return ptr;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/** \internal Move-constructs the elements of an array.
|
|
390
|
+
* The \a size parameter tells on how many objects to move.
|
|
391
|
+
*/
|
|
392
|
+
template <typename T>
|
|
393
|
+
EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T* ptr, T* src, std::size_t size) {
|
|
394
|
+
std::size_t i = 0;
|
|
395
|
+
EIGEN_TRY {
|
|
396
|
+
for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
|
|
397
|
+
}
|
|
398
|
+
EIGEN_CATCH(...) {
|
|
399
|
+
destruct_elements_of_array(ptr, i);
|
|
400
|
+
EIGEN_THROW;
|
|
401
|
+
}
|
|
402
|
+
return ptr;
|
|
309
403
|
}
|
|
310
404
|
|
|
311
405
|
/*****************************************************************************
|
|
312
406
|
*** Implementation of aligned new/delete-like functions ***
|
|
313
407
|
*****************************************************************************/
|
|
314
408
|
|
|
315
|
-
template<typename T>
|
|
316
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
|
|
317
|
-
|
|
318
|
-
if(size >
|
|
319
|
-
throw_std_bad_alloc();
|
|
409
|
+
template <typename T>
|
|
410
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) {
|
|
411
|
+
constexpr std::size_t max_elements = (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T);
|
|
412
|
+
if (size > max_elements) throw_std_bad_alloc();
|
|
320
413
|
}
|
|
321
414
|
|
|
322
415
|
/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
template<typename T>
|
|
327
|
-
{
|
|
416
|
+
* On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
|
|
417
|
+
* The default constructor of T is called.
|
|
418
|
+
*/
|
|
419
|
+
template <typename T>
|
|
420
|
+
EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) {
|
|
328
421
|
check_size_for_overflow<T>(size);
|
|
329
|
-
T
|
|
330
|
-
EIGEN_TRY
|
|
331
|
-
{
|
|
332
|
-
return construct_elements_of_array(result, size);
|
|
333
|
-
}
|
|
334
|
-
EIGEN_CATCH(...)
|
|
335
|
-
{
|
|
422
|
+
T* result = static_cast<T*>(aligned_malloc(sizeof(T) * size));
|
|
423
|
+
EIGEN_TRY { return default_construct_elements_of_array(result, size); }
|
|
424
|
+
EIGEN_CATCH(...) {
|
|
336
425
|
aligned_free(result);
|
|
337
426
|
EIGEN_THROW;
|
|
338
427
|
}
|
|
339
428
|
return result;
|
|
340
429
|
}
|
|
341
430
|
|
|
342
|
-
template<typename T, bool Align>
|
|
343
|
-
{
|
|
431
|
+
template <typename T, bool Align>
|
|
432
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) {
|
|
344
433
|
check_size_for_overflow<T>(size);
|
|
345
|
-
T
|
|
346
|
-
EIGEN_TRY
|
|
347
|
-
{
|
|
348
|
-
return construct_elements_of_array(result, size);
|
|
349
|
-
}
|
|
350
|
-
EIGEN_CATCH(...)
|
|
351
|
-
{
|
|
434
|
+
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
|
|
435
|
+
EIGEN_TRY { return default_construct_elements_of_array(result, size); }
|
|
436
|
+
EIGEN_CATCH(...) {
|
|
352
437
|
conditional_aligned_free<Align>(result);
|
|
353
438
|
EIGEN_THROW;
|
|
354
439
|
}
|
|
@@ -356,60 +441,62 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
|
|
356
441
|
}
|
|
357
442
|
|
|
358
443
|
/** \internal Deletes objects constructed with aligned_new
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
template<typename T>
|
|
362
|
-
{
|
|
444
|
+
* The \a size parameters tells on how many objects to call the destructor of T.
|
|
445
|
+
*/
|
|
446
|
+
template <typename T>
|
|
447
|
+
EIGEN_DEVICE_FUNC inline void aligned_delete(T* ptr, std::size_t size) {
|
|
363
448
|
destruct_elements_of_array<T>(ptr, size);
|
|
364
|
-
|
|
449
|
+
aligned_free(ptr);
|
|
365
450
|
}
|
|
366
451
|
|
|
367
452
|
/** \internal Deletes objects constructed with conditional_aligned_new
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
template<typename T, bool Align>
|
|
371
|
-
{
|
|
453
|
+
* The \a size parameters tells on how many objects to call the destructor of T.
|
|
454
|
+
*/
|
|
455
|
+
template <typename T, bool Align>
|
|
456
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T* ptr, std::size_t size) {
|
|
372
457
|
destruct_elements_of_array<T>(ptr, size);
|
|
373
458
|
conditional_aligned_free<Align>(ptr);
|
|
374
459
|
}
|
|
375
460
|
|
|
376
|
-
template<typename T, bool Align>
|
|
377
|
-
{
|
|
461
|
+
template <typename T, bool Align>
|
|
462
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) {
|
|
378
463
|
check_size_for_overflow<T>(new_size);
|
|
379
464
|
check_size_for_overflow<T>(old_size);
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
465
|
+
|
|
466
|
+
// If elements need to be explicitly initialized, we cannot simply realloc
|
|
467
|
+
// (or memcpy) the memory block - each element needs to be reconstructed.
|
|
468
|
+
// Otherwise, objects that contain internal pointers like mpfr or
|
|
469
|
+
// AnnoyingScalar can be pointing to the wrong thing.
|
|
470
|
+
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * new_size));
|
|
471
|
+
EIGEN_TRY {
|
|
472
|
+
// Move-construct initial elements.
|
|
473
|
+
std::size_t copy_size = (std::min)(old_size, new_size);
|
|
474
|
+
move_construct_elements_of_array(result, pts, copy_size);
|
|
475
|
+
|
|
476
|
+
// Default-construct remaining elements.
|
|
477
|
+
if (new_size > old_size) {
|
|
478
|
+
default_construct_elements_of_array(result + copy_size, new_size - old_size);
|
|
393
479
|
}
|
|
480
|
+
|
|
481
|
+
// Delete old elements.
|
|
482
|
+
conditional_aligned_delete<T, Align>(pts, old_size);
|
|
483
|
+
}
|
|
484
|
+
EIGEN_CATCH(...) {
|
|
485
|
+
conditional_aligned_free<Align>(result);
|
|
486
|
+
EIGEN_THROW;
|
|
394
487
|
}
|
|
488
|
+
|
|
395
489
|
return result;
|
|
396
490
|
}
|
|
397
491
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
if(size==0)
|
|
402
|
-
return 0; // short-cut. Also fixes Bug 884
|
|
492
|
+
template <typename T, bool Align>
|
|
493
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) {
|
|
494
|
+
if (size == 0) return nullptr; // short-cut. Also fixes Bug 884
|
|
403
495
|
check_size_for_overflow<T>(size);
|
|
404
|
-
T
|
|
405
|
-
if(NumTraits<T>::RequireInitialization)
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
{
|
|
409
|
-
construct_elements_of_array(result, size);
|
|
410
|
-
}
|
|
411
|
-
EIGEN_CATCH(...)
|
|
412
|
-
{
|
|
496
|
+
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
|
|
497
|
+
if (NumTraits<T>::RequireInitialization) {
|
|
498
|
+
EIGEN_TRY { default_construct_elements_of_array(result, size); }
|
|
499
|
+
EIGEN_CATCH(...) {
|
|
413
500
|
conditional_aligned_free<Align>(result);
|
|
414
501
|
EIGEN_THROW;
|
|
415
502
|
}
|
|
@@ -417,166 +504,140 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
|
|
417
504
|
return result;
|
|
418
505
|
}
|
|
419
506
|
|
|
420
|
-
template<typename T, bool Align>
|
|
421
|
-
{
|
|
507
|
+
template <typename T, bool Align>
|
|
508
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) {
|
|
509
|
+
if (NumTraits<T>::RequireInitialization) {
|
|
510
|
+
return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
|
|
511
|
+
}
|
|
512
|
+
|
|
422
513
|
check_size_for_overflow<T>(new_size);
|
|
423
514
|
check_size_for_overflow<T>(old_size);
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
|
|
427
|
-
if(NumTraits<T>::RequireInitialization && (new_size > old_size))
|
|
428
|
-
{
|
|
429
|
-
EIGEN_TRY
|
|
430
|
-
{
|
|
431
|
-
construct_elements_of_array(result+old_size, new_size-old_size);
|
|
432
|
-
}
|
|
433
|
-
EIGEN_CATCH(...)
|
|
434
|
-
{
|
|
435
|
-
conditional_aligned_free<Align>(result);
|
|
436
|
-
EIGEN_THROW;
|
|
437
|
-
}
|
|
438
|
-
}
|
|
439
|
-
return result;
|
|
515
|
+
return static_cast<T*>(
|
|
516
|
+
conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T) * new_size, sizeof(T) * old_size));
|
|
440
517
|
}
|
|
441
518
|
|
|
442
|
-
template<typename T, bool Align>
|
|
443
|
-
{
|
|
444
|
-
if(NumTraits<T>::RequireInitialization)
|
|
445
|
-
destruct_elements_of_array<T>(ptr, size);
|
|
519
|
+
template <typename T, bool Align>
|
|
520
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T* ptr, std::size_t size) {
|
|
521
|
+
if (NumTraits<T>::RequireInitialization) destruct_elements_of_array<T>(ptr, size);
|
|
446
522
|
conditional_aligned_free<Align>(ptr);
|
|
447
523
|
}
|
|
448
524
|
|
|
449
525
|
/****************************************************************************/
|
|
450
526
|
|
|
451
|
-
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
{
|
|
527
|
+
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a
|
|
528
|
+
* Alignment.
|
|
529
|
+
*
|
|
530
|
+
* \tparam Alignment requested alignment in Bytes.
|
|
531
|
+
* \param array the address of the start of the array
|
|
532
|
+
* \param size the size of the array
|
|
533
|
+
*
|
|
534
|
+
* \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
|
|
535
|
+
* the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
|
|
536
|
+
* packet size for the given scalar type is 1, then everything is considered well-aligned.
|
|
537
|
+
*
|
|
538
|
+
* \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
|
|
539
|
+
* power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails
|
|
540
|
+
* for example with Scalar=double on certain 32-bit platforms, see bug #79.
|
|
541
|
+
*
|
|
542
|
+
* There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
|
|
543
|
+
* \sa first_default_aligned()
|
|
544
|
+
*/
|
|
545
|
+
template <int Alignment, typename Scalar, typename Index>
|
|
546
|
+
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) {
|
|
471
547
|
const Index ScalarSize = sizeof(Scalar);
|
|
472
548
|
const Index AlignmentSize = Alignment / ScalarSize;
|
|
473
|
-
const Index AlignmentMask = AlignmentSize-1;
|
|
549
|
+
const Index AlignmentMask = AlignmentSize - 1;
|
|
474
550
|
|
|
475
|
-
if(AlignmentSize<=1)
|
|
476
|
-
{
|
|
551
|
+
if (AlignmentSize <= 1) {
|
|
477
552
|
// Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
|
|
478
553
|
// so that all elements of the array have the same alignment.
|
|
479
554
|
return 0;
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
|
|
484
|
-
// Consequently, no element of the array is well aligned.
|
|
555
|
+
} else if ((std::uintptr_t(array) & (sizeof(Scalar) - 1)) || (Alignment % ScalarSize) != 0) {
|
|
556
|
+
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the
|
|
557
|
+
// scalar size. Consequently, no element of the array is well aligned.
|
|
485
558
|
return size;
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
{
|
|
489
|
-
Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
|
|
559
|
+
} else {
|
|
560
|
+
Index first = (AlignmentSize - (Index((std::uintptr_t(array) / sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
|
|
490
561
|
return (first < size) ? first : size;
|
|
491
562
|
}
|
|
492
563
|
}
|
|
493
564
|
|
|
494
|
-
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet
|
|
495
|
-
|
|
496
|
-
template<typename Scalar, typename Index>
|
|
497
|
-
EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
|
|
498
|
-
{
|
|
565
|
+
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet
|
|
566
|
+
* requirement. \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
|
|
567
|
+
template <typename Scalar, typename Index>
|
|
568
|
+
EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) {
|
|
499
569
|
typedef typename packet_traits<Scalar>::type DefaultPacketType;
|
|
500
570
|
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
|
|
501
571
|
}
|
|
502
572
|
|
|
503
573
|
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
|
|
504
|
-
|
|
505
|
-
template<typename Index>
|
|
506
|
-
inline Index first_multiple(Index size, Index base)
|
|
507
|
-
|
|
508
|
-
return ((size+base-1)/base)*base;
|
|
574
|
+
*/
|
|
575
|
+
template <typename Index>
|
|
576
|
+
inline Index first_multiple(Index size, Index base) {
|
|
577
|
+
return ((size + base - 1) / base) * base;
|
|
509
578
|
}
|
|
510
579
|
|
|
511
580
|
// std::copy is much slower than memcpy, so let's introduce a smart_copy which
|
|
512
581
|
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
|
|
513
|
-
template<typename T, bool UseMemcpy>
|
|
582
|
+
template <typename T, bool UseMemcpy>
|
|
583
|
+
struct smart_copy_helper;
|
|
514
584
|
|
|
515
|
-
template<typename T>
|
|
516
|
-
{
|
|
517
|
-
smart_copy_helper<T
|
|
585
|
+
template <typename T>
|
|
586
|
+
EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) {
|
|
587
|
+
smart_copy_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
|
|
518
588
|
}
|
|
519
589
|
|
|
520
|
-
template<typename T>
|
|
521
|
-
|
|
522
|
-
{
|
|
523
|
-
|
|
524
|
-
if(size==0) return;
|
|
525
|
-
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
|
590
|
+
template <typename T>
|
|
591
|
+
struct smart_copy_helper<T, true> {
|
|
592
|
+
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) {
|
|
593
|
+
std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
|
|
594
|
+
if (size == 0) return;
|
|
595
|
+
eigen_internal_assert(start != 0 && end != 0 && target != 0);
|
|
526
596
|
EIGEN_USING_STD(memcpy)
|
|
527
597
|
memcpy(target, start, size);
|
|
528
598
|
}
|
|
529
599
|
};
|
|
530
600
|
|
|
531
|
-
template<typename T>
|
|
532
|
-
|
|
533
|
-
{ std::copy(start, end, target); }
|
|
601
|
+
template <typename T>
|
|
602
|
+
struct smart_copy_helper<T, false> {
|
|
603
|
+
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { std::copy(start, end, target); }
|
|
534
604
|
};
|
|
535
605
|
|
|
536
606
|
// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
|
|
537
|
-
template<typename T, bool UseMemmove>
|
|
607
|
+
template <typename T, bool UseMemmove>
|
|
608
|
+
struct smart_memmove_helper;
|
|
538
609
|
|
|
539
|
-
template<typename T>
|
|
540
|
-
{
|
|
541
|
-
smart_memmove_helper<T
|
|
610
|
+
template <typename T>
|
|
611
|
+
void smart_memmove(const T* start, const T* end, T* target) {
|
|
612
|
+
smart_memmove_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
|
|
542
613
|
}
|
|
543
614
|
|
|
544
|
-
template<typename T>
|
|
545
|
-
|
|
546
|
-
{
|
|
547
|
-
|
|
548
|
-
if(size==0) return;
|
|
549
|
-
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
|
615
|
+
template <typename T>
|
|
616
|
+
struct smart_memmove_helper<T, true> {
|
|
617
|
+
static inline void run(const T* start, const T* end, T* target) {
|
|
618
|
+
std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
|
|
619
|
+
if (size == 0) return;
|
|
620
|
+
eigen_internal_assert(start != 0 && end != 0 && target != 0);
|
|
550
621
|
std::memmove(target, start, size);
|
|
551
622
|
}
|
|
552
623
|
};
|
|
553
624
|
|
|
554
|
-
template<typename T>
|
|
555
|
-
|
|
556
|
-
{
|
|
557
|
-
if (
|
|
558
|
-
{
|
|
625
|
+
template <typename T>
|
|
626
|
+
struct smart_memmove_helper<T, false> {
|
|
627
|
+
static inline void run(const T* start, const T* end, T* target) {
|
|
628
|
+
if (std::uintptr_t(target) < std::uintptr_t(start)) {
|
|
559
629
|
std::copy(start, end, target);
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
{
|
|
563
|
-
std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
|
|
630
|
+
} else {
|
|
631
|
+
std::ptrdiff_t count = (std::ptrdiff_t(end) - std::ptrdiff_t(start)) / sizeof(T);
|
|
564
632
|
std::copy_backward(start, end, target + count);
|
|
565
633
|
}
|
|
566
634
|
}
|
|
567
635
|
};
|
|
568
636
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
{
|
|
637
|
+
template <typename T>
|
|
638
|
+
EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) {
|
|
572
639
|
return std::move(start, end, target);
|
|
573
640
|
}
|
|
574
|
-
#else
|
|
575
|
-
template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
|
|
576
|
-
{
|
|
577
|
-
return std::copy(start, end, target);
|
|
578
|
-
}
|
|
579
|
-
#endif
|
|
580
641
|
|
|
581
642
|
/*****************************************************************************
|
|
582
643
|
*** Implementation of runtime stack allocation (falling back to malloc) ***
|
|
@@ -584,12 +645,12 @@ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target
|
|
|
584
645
|
|
|
585
646
|
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
|
|
586
647
|
// to the appropriate stack allocation function
|
|
587
|
-
#if !
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
648
|
+
#if !defined EIGEN_ALLOCA && !defined EIGEN_GPU_COMPILE_PHASE
|
|
649
|
+
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
|
|
650
|
+
#define EIGEN_ALLOCA alloca
|
|
651
|
+
#elif EIGEN_COMP_MSVC
|
|
652
|
+
#define EIGEN_ALLOCA _alloca
|
|
653
|
+
#endif
|
|
593
654
|
#endif
|
|
594
655
|
|
|
595
656
|
// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
|
|
@@ -598,183 +659,178 @@ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target
|
|
|
598
659
|
// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
|
|
599
660
|
// is fixed.
|
|
600
661
|
#if defined(__clang__) && defined(__thumb__)
|
|
601
|
-
|
|
662
|
+
#undef EIGEN_ALLOCA
|
|
602
663
|
#endif
|
|
603
664
|
|
|
604
665
|
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
|
|
605
666
|
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
|
|
606
|
-
template<typename T>
|
|
607
|
-
{
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
Eigen::internal::aligned_free(m_ptr);
|
|
629
|
-
}
|
|
630
|
-
protected:
|
|
631
|
-
T* m_ptr;
|
|
632
|
-
std::size_t m_size;
|
|
633
|
-
bool m_deallocate;
|
|
667
|
+
template <typename T>
|
|
668
|
+
class aligned_stack_memory_handler : noncopyable {
|
|
669
|
+
public:
|
|
670
|
+
/* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
|
|
671
|
+
* Note that \a ptr can be 0 regardless of the other parameters.
|
|
672
|
+
* This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type
|
|
673
|
+
*T (see NumTraits<T>::RequireInitialization). In this case, the buffer elements will also be destructed when this
|
|
674
|
+
*handler will be destructed. Finally, if \a dealloc is true, then the pointer \a ptr is freed.
|
|
675
|
+
**/
|
|
676
|
+
EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
|
|
677
|
+
: m_ptr(ptr), m_size(size), m_deallocate(dealloc) {
|
|
678
|
+
if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::default_construct_elements_of_array(m_ptr, size);
|
|
679
|
+
}
|
|
680
|
+
EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() {
|
|
681
|
+
if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
|
|
682
|
+
if (m_deallocate) Eigen::internal::aligned_free(m_ptr);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
protected:
|
|
686
|
+
T* m_ptr;
|
|
687
|
+
std::size_t m_size;
|
|
688
|
+
bool m_deallocate;
|
|
634
689
|
};
|
|
635
690
|
|
|
636
691
|
#ifdef EIGEN_ALLOCA
|
|
637
692
|
|
|
638
|
-
template<typename Xpr, int NbEvaluations,
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
{
|
|
643
|
-
static const bool NeedExternalBuffer = false;
|
|
693
|
+
template <typename Xpr, int NbEvaluations,
|
|
694
|
+
bool MapExternalBuffer = nested_eval<Xpr, NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime == Dynamic>
|
|
695
|
+
struct local_nested_eval_wrapper {
|
|
696
|
+
static constexpr bool NeedExternalBuffer = false;
|
|
644
697
|
typedef typename Xpr::Scalar Scalar;
|
|
645
|
-
typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
|
|
698
|
+
typedef typename nested_eval<Xpr, NbEvaluations>::type ObjectType;
|
|
646
699
|
ObjectType object;
|
|
647
700
|
|
|
648
|
-
EIGEN_DEVICE_FUNC
|
|
649
|
-
local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
|
|
650
|
-
{
|
|
701
|
+
EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) {
|
|
651
702
|
EIGEN_UNUSED_VARIABLE(ptr);
|
|
652
|
-
eigen_internal_assert(ptr==0);
|
|
703
|
+
eigen_internal_assert(ptr == 0);
|
|
653
704
|
}
|
|
654
705
|
};
|
|
655
706
|
|
|
656
|
-
template<typename Xpr, int NbEvaluations>
|
|
657
|
-
struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
|
|
658
|
-
|
|
659
|
-
static const bool NeedExternalBuffer = true;
|
|
707
|
+
template <typename Xpr, int NbEvaluations>
|
|
708
|
+
struct local_nested_eval_wrapper<Xpr, NbEvaluations, true> {
|
|
709
|
+
static constexpr bool NeedExternalBuffer = true;
|
|
660
710
|
typedef typename Xpr::Scalar Scalar;
|
|
661
711
|
typedef typename plain_object_eval<Xpr>::type PlainObject;
|
|
662
|
-
typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
|
|
712
|
+
typedef Map<PlainObject, EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
|
|
663
713
|
ObjectType object;
|
|
664
714
|
|
|
665
|
-
EIGEN_DEVICE_FUNC
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
Eigen::internal::construct_elements_of_array(object.data(), object.size());
|
|
715
|
+
EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
|
|
716
|
+
: object(ptr == 0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar) * xpr.size())) : ptr,
|
|
717
|
+
xpr.rows(), xpr.cols()),
|
|
718
|
+
m_deallocate(ptr == 0) {
|
|
719
|
+
if (NumTraits<Scalar>::RequireInitialization && object.data())
|
|
720
|
+
Eigen::internal::default_construct_elements_of_array(object.data(), object.size());
|
|
672
721
|
object = xpr;
|
|
673
722
|
}
|
|
674
723
|
|
|
675
|
-
EIGEN_DEVICE_FUNC
|
|
676
|
-
|
|
677
|
-
{
|
|
678
|
-
if(NumTraits<Scalar>::RequireInitialization && object.data())
|
|
724
|
+
EIGEN_DEVICE_FUNC ~local_nested_eval_wrapper() {
|
|
725
|
+
if (NumTraits<Scalar>::RequireInitialization && object.data())
|
|
679
726
|
Eigen::internal::destruct_elements_of_array(object.data(), object.size());
|
|
680
|
-
if(m_deallocate)
|
|
681
|
-
Eigen::internal::aligned_free(object.data());
|
|
727
|
+
if (m_deallocate) Eigen::internal::aligned_free(object.data());
|
|
682
728
|
}
|
|
683
729
|
|
|
684
|
-
private:
|
|
730
|
+
private:
|
|
685
731
|
bool m_deallocate;
|
|
686
732
|
};
|
|
687
733
|
|
|
688
|
-
#endif
|
|
734
|
+
#endif // EIGEN_ALLOCA
|
|
689
735
|
|
|
690
|
-
template<typename T>
|
|
691
|
-
{
|
|
736
|
+
template <typename T>
|
|
737
|
+
class scoped_array : noncopyable {
|
|
692
738
|
T* m_ptr;
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
{
|
|
696
|
-
|
|
697
|
-
}
|
|
698
|
-
~scoped_array()
|
|
699
|
-
{
|
|
700
|
-
delete[] m_ptr;
|
|
701
|
-
}
|
|
739
|
+
|
|
740
|
+
public:
|
|
741
|
+
explicit scoped_array(std::ptrdiff_t size) { m_ptr = new T[size]; }
|
|
742
|
+
~scoped_array() { delete[] m_ptr; }
|
|
702
743
|
T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
|
|
703
744
|
const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
|
|
704
|
-
T
|
|
745
|
+
T*& ptr() { return m_ptr; }
|
|
705
746
|
const T* ptr() const { return m_ptr; }
|
|
706
747
|
operator const T*() const { return m_ptr; }
|
|
707
748
|
};
|
|
708
749
|
|
|
709
|
-
template<typename T>
|
|
710
|
-
{
|
|
711
|
-
std::swap(a.ptr(),b.ptr());
|
|
750
|
+
template <typename T>
|
|
751
|
+
void swap(scoped_array<T>& a, scoped_array<T>& b) {
|
|
752
|
+
std::swap(a.ptr(), b.ptr());
|
|
712
753
|
}
|
|
713
754
|
|
|
714
|
-
}
|
|
755
|
+
} // end namespace internal
|
|
715
756
|
|
|
716
757
|
/** \internal
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
#
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
|
|
761
|
-
Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
|
|
762
|
-
( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
|
|
763
|
-
? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
|
|
764
|
-
typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
|
|
758
|
+
*
|
|
759
|
+
* The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
|
|
760
|
+
* and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
|
|
761
|
+
* if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the
|
|
762
|
+
* platform (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. The
|
|
763
|
+
* allocated buffer is automatically deleted when exiting the scope of this declaration. If BUFFER is non null, then the
|
|
764
|
+
* declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. Here is an example: \code
|
|
765
|
+
* {
|
|
766
|
+
* ei_declare_aligned_stack_constructed_variable(float,data,size,0);
|
|
767
|
+
* // use data[0] to data[size-1]
|
|
768
|
+
* }
|
|
769
|
+
* \endcode
|
|
770
|
+
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
|
|
771
|
+
*
|
|
772
|
+
* The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
|
|
773
|
+
* \code
|
|
774
|
+
* typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
|
|
775
|
+
* \endcode
|
|
776
|
+
* with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
|
|
777
|
+
* This is accomplished through alloca if this later is supported and if the required number of bytes
|
|
778
|
+
* is below EIGEN_STACK_ALLOCATION_LIMIT.
|
|
779
|
+
*/
|
|
780
|
+
#if defined(EIGEN_ALLOCA) && !defined(EIGEN_NO_ALLOCA)
|
|
781
|
+
|
|
782
|
+
#if EIGEN_DEFAULT_ALIGN_BYTES > 0
|
|
783
|
+
// We always manually re-align the result of EIGEN_ALLOCA.
|
|
784
|
+
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
|
785
|
+
|
|
786
|
+
#if ((EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) && !EIGEN_COMP_NVHPC)
|
|
787
|
+
#define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
|
|
788
|
+
#else
|
|
789
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
|
|
790
|
+
constexpr std::uintptr_t mask = EIGEN_DEFAULT_ALIGN_BYTES - 1;
|
|
791
|
+
std::uintptr_t ptr_int = std::uintptr_t(ptr);
|
|
792
|
+
std::uintptr_t aligned_ptr_int = (ptr_int + mask) & ~mask;
|
|
793
|
+
std::uintptr_t offset = aligned_ptr_int - ptr_int;
|
|
794
|
+
return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
|
|
795
|
+
}
|
|
796
|
+
#define EIGEN_ALIGNED_ALLOCA(SIZE) eigen_aligned_alloca_helper(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1))
|
|
797
|
+
#endif
|
|
765
798
|
|
|
766
799
|
#else
|
|
800
|
+
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
|
|
801
|
+
#endif
|
|
767
802
|
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
803
|
+
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
|
804
|
+
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
|
805
|
+
TYPE* NAME = (BUFFER) != 0 ? (BUFFER) \
|
|
806
|
+
: reinterpret_cast<TYPE*>((sizeof(TYPE) * (SIZE) <= EIGEN_STACK_ALLOCATION_LIMIT) \
|
|
807
|
+
? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE) * (SIZE)) \
|
|
808
|
+
: Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
|
|
809
|
+
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
|
810
|
+
(BUFFER) == 0 ? NAME : 0, SIZE, sizeof(TYPE) * (SIZE) > EIGEN_STACK_ALLOCATION_LIMIT)
|
|
811
|
+
|
|
812
|
+
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
|
|
813
|
+
Eigen::internal::local_nested_eval_wrapper<XPR_T, N> EIGEN_CAT(NAME, _wrapper)( \
|
|
814
|
+
XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
|
|
815
|
+
((Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::NeedExternalBuffer) && \
|
|
816
|
+
((sizeof(typename XPR_T::Scalar) * XPR.size()) <= EIGEN_STACK_ALLOCATION_LIMIT)) \
|
|
817
|
+
? EIGEN_ALIGNED_ALLOCA(sizeof(typename XPR_T::Scalar) * XPR.size()) \
|
|
818
|
+
: 0)); \
|
|
819
|
+
typename Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::ObjectType NAME(EIGEN_CAT(NAME, _wrapper).object)
|
|
772
820
|
|
|
821
|
+
#else
|
|
773
822
|
|
|
774
|
-
#define
|
|
823
|
+
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
|
824
|
+
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
|
825
|
+
TYPE* NAME = \
|
|
826
|
+
(BUFFER) != 0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
|
|
827
|
+
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
|
828
|
+
(BUFFER) == 0 ? NAME : 0, SIZE, true)
|
|
775
829
|
|
|
776
|
-
#
|
|
830
|
+
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
|
|
831
|
+
typename Eigen::internal::nested_eval<XPR_T, N>::type NAME(XPR)
|
|
777
832
|
|
|
833
|
+
#endif
|
|
778
834
|
|
|
779
835
|
/*****************************************************************************
|
|
780
836
|
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
|
|
@@ -787,315 +843,432 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
|
787
843
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
|
|
788
844
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
|
789
845
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
|
790
|
-
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
|
|
846
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size)
|
|
791
847
|
|
|
792
848
|
#else
|
|
793
849
|
|
|
794
850
|
// HIP does not support new/delete on device.
|
|
795
|
-
#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
} \
|
|
836
|
-
typedef void eigen_aligned_operator_new_marker_type;
|
|
851
|
+
#if EIGEN_MAX_ALIGN_BYTES != 0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
852
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
|
853
|
+
EIGEN_DEVICE_FUNC void* operator new(std::size_t size, const std::nothrow_t&) noexcept { \
|
|
854
|
+
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
|
|
855
|
+
EIGEN_CATCH(...) { return 0; } \
|
|
856
|
+
}
|
|
857
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
|
|
858
|
+
EIGEN_DEVICE_FUNC void* operator new(std::size_t size) { \
|
|
859
|
+
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
|
860
|
+
} \
|
|
861
|
+
EIGEN_DEVICE_FUNC void* operator new[](std::size_t size) { \
|
|
862
|
+
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
|
863
|
+
} \
|
|
864
|
+
EIGEN_DEVICE_FUNC void operator delete(void* ptr) noexcept { \
|
|
865
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
866
|
+
} \
|
|
867
|
+
EIGEN_DEVICE_FUNC void operator delete[](void* ptr) noexcept { \
|
|
868
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
869
|
+
} \
|
|
870
|
+
EIGEN_DEVICE_FUNC void operator delete(void* ptr, std::size_t /* sz */) noexcept { \
|
|
871
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
872
|
+
} \
|
|
873
|
+
EIGEN_DEVICE_FUNC void operator delete[](void* ptr, std::size_t /* sz */) noexcept { \
|
|
874
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
875
|
+
} \
|
|
876
|
+
/* in-place new and delete. since (at least afaik) there is no actual */ \
|
|
877
|
+
/* memory allocated we can safely let the default implementation handle */ \
|
|
878
|
+
/* this particular case. */ \
|
|
879
|
+
EIGEN_DEVICE_FUNC static void* operator new(std::size_t size, void* ptr) { return ::operator new(size, ptr); } \
|
|
880
|
+
EIGEN_DEVICE_FUNC static void* operator new[](std::size_t size, void* ptr) { return ::operator new[](size, ptr); } \
|
|
881
|
+
EIGEN_DEVICE_FUNC void operator delete(void* memory, void* ptr) noexcept { return ::operator delete(memory, ptr); } \
|
|
882
|
+
EIGEN_DEVICE_FUNC void operator delete[](void* memory, void* ptr) noexcept { \
|
|
883
|
+
return ::operator delete[](memory, ptr); \
|
|
884
|
+
} \
|
|
885
|
+
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
|
|
886
|
+
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
|
887
|
+
EIGEN_DEVICE_FUNC void operator delete(void* ptr, const std::nothrow_t&) noexcept { \
|
|
888
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
889
|
+
} \
|
|
890
|
+
typedef void eigen_aligned_operator_new_marker_type;
|
|
837
891
|
#else
|
|
838
|
-
|
|
892
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
|
839
893
|
#endif
|
|
840
894
|
|
|
841
895
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
|
|
842
|
-
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
|
|
843
|
-
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
896
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size) \
|
|
897
|
+
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF( \
|
|
898
|
+
bool(((Size) != Eigen::Dynamic) && \
|
|
899
|
+
(((EIGEN_MAX_ALIGN_BYTES >= 16) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES) == 0)) || \
|
|
900
|
+
((EIGEN_MAX_ALIGN_BYTES >= 32) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 2) == 0)) || \
|
|
901
|
+
((EIGEN_MAX_ALIGN_BYTES >= 64) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 4) == 0)))))
|
|
848
902
|
|
|
849
903
|
#endif
|
|
850
904
|
|
|
851
905
|
/****************************************************************************/
|
|
852
906
|
|
|
853
907
|
/** \class aligned_allocator
|
|
854
|
-
* \ingroup Core_Module
|
|
855
|
-
*
|
|
856
|
-
* \brief STL compatible allocator to use with types requiring a non
|
|
857
|
-
*
|
|
858
|
-
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
|
|
859
|
-
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
|
|
860
|
-
* - 32 bytes alignment if AVX is enabled.
|
|
861
|
-
* - 64 bytes alignment if AVX512 is enabled.
|
|
862
|
-
*
|
|
863
|
-
* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
|
|
864
|
-
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
|
865
|
-
*
|
|
866
|
-
* Example:
|
|
867
|
-
* \code
|
|
868
|
-
* // Matrix4f requires 16 bytes alignment:
|
|
869
|
-
* std::map< int, Matrix4f, std::less<int>,
|
|
870
|
-
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
|
|
871
|
-
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
|
|
872
|
-
* std::map< int, Vector3f > my_map_vec3;
|
|
873
|
-
* \endcode
|
|
874
|
-
*
|
|
875
|
-
* \sa \blank \ref TopicStlContainers.
|
|
876
|
-
*/
|
|
877
|
-
template<class T>
|
|
878
|
-
class aligned_allocator
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
typedef std::
|
|
882
|
-
typedef
|
|
883
|
-
typedef T*
|
|
884
|
-
typedef
|
|
885
|
-
typedef T&
|
|
886
|
-
typedef
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
struct rebind
|
|
891
|
-
{
|
|
908
|
+
* \ingroup Core_Module
|
|
909
|
+
*
|
|
910
|
+
* \brief STL compatible allocator to use with types requiring a non-standard alignment.
|
|
911
|
+
*
|
|
912
|
+
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
|
|
913
|
+
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
|
|
914
|
+
* - 32 bytes alignment if AVX is enabled.
|
|
915
|
+
* - 64 bytes alignment if AVX512 is enabled.
|
|
916
|
+
*
|
|
917
|
+
* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
|
|
918
|
+
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
|
919
|
+
*
|
|
920
|
+
* Example:
|
|
921
|
+
* \code
|
|
922
|
+
* // Matrix4f requires 16 bytes alignment:
|
|
923
|
+
* std::map< int, Matrix4f, std::less<int>,
|
|
924
|
+
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
|
|
925
|
+
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
|
|
926
|
+
* std::map< int, Vector3f > my_map_vec3;
|
|
927
|
+
* \endcode
|
|
928
|
+
*
|
|
929
|
+
* \sa \blank \ref TopicStlContainers.
|
|
930
|
+
*/
|
|
931
|
+
template <class T>
|
|
932
|
+
class aligned_allocator {
|
|
933
|
+
public:
|
|
934
|
+
typedef std::size_t size_type;
|
|
935
|
+
typedef std::ptrdiff_t difference_type;
|
|
936
|
+
typedef T* pointer;
|
|
937
|
+
typedef const T* const_pointer;
|
|
938
|
+
typedef T& reference;
|
|
939
|
+
typedef const T& const_reference;
|
|
940
|
+
typedef T value_type;
|
|
941
|
+
|
|
942
|
+
template <class U>
|
|
943
|
+
struct rebind {
|
|
892
944
|
typedef aligned_allocator<U> other;
|
|
893
945
|
};
|
|
894
946
|
|
|
895
|
-
aligned_allocator()
|
|
947
|
+
aligned_allocator() = default;
|
|
896
948
|
|
|
897
|
-
aligned_allocator(const aligned_allocator&
|
|
949
|
+
aligned_allocator(const aligned_allocator&) = default;
|
|
898
950
|
|
|
899
|
-
template<class U>
|
|
900
|
-
aligned_allocator(const aligned_allocator<U>&
|
|
951
|
+
template <class U>
|
|
952
|
+
aligned_allocator(const aligned_allocator<U>&) {}
|
|
901
953
|
|
|
902
|
-
|
|
954
|
+
template <class U>
|
|
955
|
+
constexpr bool operator==(const aligned_allocator<U>&) const noexcept {
|
|
956
|
+
return true;
|
|
957
|
+
}
|
|
958
|
+
template <class U>
|
|
959
|
+
constexpr bool operator!=(const aligned_allocator<U>&) const noexcept {
|
|
960
|
+
return false;
|
|
961
|
+
}
|
|
903
962
|
|
|
904
|
-
|
|
963
|
+
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0)
|
|
905
964
|
// In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
|
|
906
|
-
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object
|
|
907
|
-
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
|
908
|
-
size_type max_size() const {
|
|
909
|
-
|
|
910
|
-
}
|
|
911
|
-
#endif
|
|
965
|
+
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object
|
|
966
|
+
// size 9223372036854775807 See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
|
967
|
+
size_type max_size() const { return (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T); }
|
|
968
|
+
#endif
|
|
912
969
|
|
|
913
|
-
pointer allocate(size_type num, const void* /*hint*/ = 0)
|
|
914
|
-
{
|
|
970
|
+
pointer allocate(size_type num, const void* /*hint*/ = 0) {
|
|
915
971
|
internal::check_size_for_overflow<T>(num);
|
|
916
|
-
return static_cast<pointer>(
|
|
972
|
+
return static_cast<pointer>(internal::aligned_malloc(num * sizeof(T)));
|
|
917
973
|
}
|
|
918
974
|
|
|
919
|
-
void deallocate(pointer p, size_type /*num*/)
|
|
920
|
-
{
|
|
921
|
-
internal::aligned_free(p);
|
|
922
|
-
}
|
|
975
|
+
void deallocate(pointer p, size_type /*num*/) { internal::aligned_free(p); }
|
|
923
976
|
};
|
|
924
977
|
|
|
925
978
|
//---------- Cache sizes ----------
|
|
926
979
|
|
|
927
980
|
#if !defined(EIGEN_NO_CPUID)
|
|
928
|
-
#
|
|
929
|
-
#
|
|
930
|
-
|
|
931
|
-
#
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
#
|
|
944
|
-
|
|
945
|
-
#
|
|
946
|
-
|
|
947
|
-
#
|
|
981
|
+
#if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
|
|
982
|
+
#if defined(__PIC__) && EIGEN_ARCH_i386
|
|
983
|
+
// Case for x86 with PIC
|
|
984
|
+
#define EIGEN_CPUID(abcd, func, id) \
|
|
985
|
+
__asm__ __volatile__("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1" \
|
|
986
|
+
: "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
|
|
987
|
+
: "a"(func), "c"(id));
|
|
988
|
+
#elif defined(__PIC__) && EIGEN_ARCH_x86_64
|
|
989
|
+
// Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with
|
|
990
|
+
// the default small code model. However, we cannot detect which code model is used, and the xchg overhead is negligible
|
|
991
|
+
// anyway.
|
|
992
|
+
#define EIGEN_CPUID(abcd, func, id) \
|
|
993
|
+
__asm__ __volatile__("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1" \
|
|
994
|
+
: "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
|
|
995
|
+
: "0"(func), "2"(id));
|
|
996
|
+
#else
|
|
997
|
+
// Case for x86_64 or x86 w/o PIC
|
|
998
|
+
#define EIGEN_CPUID(abcd, func, id) \
|
|
999
|
+
__asm__ __volatile__("cpuid" : "=a"(abcd[0]), "=b"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) : "0"(func), "2"(id));
|
|
1000
|
+
#endif
|
|
1001
|
+
#elif EIGEN_COMP_MSVC
|
|
1002
|
+
#if EIGEN_ARCH_i386_OR_x86_64
|
|
1003
|
+
#define EIGEN_CPUID(abcd, func, id) __cpuidex((int*)abcd, func, id)
|
|
1004
|
+
#endif
|
|
1005
|
+
#endif
|
|
948
1006
|
#endif
|
|
949
1007
|
|
|
950
1008
|
namespace internal {
|
|
951
1009
|
|
|
952
1010
|
#ifdef EIGEN_CPUID
|
|
953
1011
|
|
|
954
|
-
inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
|
|
955
|
-
|
|
956
|
-
return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
|
|
1012
|
+
inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) {
|
|
1013
|
+
return abcd[1] == vendor[0] && abcd[3] == vendor[1] && abcd[2] == vendor[2];
|
|
957
1014
|
}
|
|
958
1015
|
|
|
959
|
-
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
|
|
960
|
-
{
|
|
1016
|
+
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) {
|
|
961
1017
|
int abcd[4];
|
|
962
1018
|
l1 = l2 = l3 = 0;
|
|
963
1019
|
int cache_id = 0;
|
|
964
1020
|
int cache_type = 0;
|
|
965
1021
|
do {
|
|
966
1022
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
967
|
-
EIGEN_CPUID(abcd,0x4,cache_id);
|
|
968
|
-
cache_type
|
|
969
|
-
if(cache_type==1||cache_type==3)
|
|
1023
|
+
EIGEN_CPUID(abcd, 0x4, cache_id);
|
|
1024
|
+
cache_type = (abcd[0] & 0x0F) >> 0;
|
|
1025
|
+
if (cache_type == 1 || cache_type == 3) // data or unified cache
|
|
970
1026
|
{
|
|
971
|
-
int cache_level = (abcd[0] & 0xE0) >> 5;
|
|
972
|
-
int ways
|
|
973
|
-
int partitions
|
|
974
|
-
int line_size
|
|
975
|
-
int sets
|
|
976
|
-
|
|
977
|
-
int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
|
|
978
|
-
|
|
979
|
-
switch(cache_level)
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
case
|
|
984
|
-
|
|
1027
|
+
int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
|
|
1028
|
+
int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
|
|
1029
|
+
int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
|
|
1030
|
+
int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
|
|
1031
|
+
int sets = (abcd[2]); // C[31:0]
|
|
1032
|
+
|
|
1033
|
+
int cache_size = (ways + 1) * (partitions + 1) * (line_size + 1) * (sets + 1);
|
|
1034
|
+
|
|
1035
|
+
switch (cache_level) {
|
|
1036
|
+
case 1:
|
|
1037
|
+
l1 = cache_size;
|
|
1038
|
+
break;
|
|
1039
|
+
case 2:
|
|
1040
|
+
l2 = cache_size;
|
|
1041
|
+
break;
|
|
1042
|
+
case 3:
|
|
1043
|
+
l3 = cache_size;
|
|
1044
|
+
break;
|
|
1045
|
+
default:
|
|
1046
|
+
break;
|
|
985
1047
|
}
|
|
986
1048
|
}
|
|
987
1049
|
cache_id++;
|
|
988
|
-
} while(cache_type>0 && cache_id<16);
|
|
1050
|
+
} while (cache_type > 0 && cache_id < 16);
|
|
989
1051
|
}
|
|
990
1052
|
|
|
991
|
-
inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
|
|
992
|
-
{
|
|
1053
|
+
inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) {
|
|
993
1054
|
int abcd[4];
|
|
994
1055
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
995
1056
|
l1 = l2 = l3 = 0;
|
|
996
|
-
EIGEN_CPUID(abcd,0x00000002,0);
|
|
997
|
-
unsigned char
|
|
1057
|
+
EIGEN_CPUID(abcd, 0x00000002, 0);
|
|
1058
|
+
unsigned char* bytes = reinterpret_cast<unsigned char*>(abcd) + 2;
|
|
998
1059
|
bool check_for_p2_core2 = false;
|
|
999
|
-
for(int i=0; i<14; ++i)
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
case 0x0C:
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
case
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
case
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
case
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
case
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
case
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
case
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
case
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
case
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
case
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
case
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
case
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
case
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
case
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
case
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
case
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
case
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
case
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
case
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1060
|
+
for (int i = 0; i < 14; ++i) {
|
|
1061
|
+
switch (bytes[i]) {
|
|
1062
|
+
case 0x0A:
|
|
1063
|
+
l1 = 8;
|
|
1064
|
+
break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
|
|
1065
|
+
case 0x0C:
|
|
1066
|
+
l1 = 16;
|
|
1067
|
+
break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
|
|
1068
|
+
case 0x0E:
|
|
1069
|
+
l1 = 24;
|
|
1070
|
+
break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
|
|
1071
|
+
case 0x10:
|
|
1072
|
+
l1 = 16;
|
|
1073
|
+
break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
|
1074
|
+
case 0x15:
|
|
1075
|
+
l1 = 16;
|
|
1076
|
+
break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
|
1077
|
+
case 0x2C:
|
|
1078
|
+
l1 = 32;
|
|
1079
|
+
break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
|
|
1080
|
+
case 0x30:
|
|
1081
|
+
l1 = 32;
|
|
1082
|
+
break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
|
|
1083
|
+
case 0x60:
|
|
1084
|
+
l1 = 16;
|
|
1085
|
+
break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
|
|
1086
|
+
case 0x66:
|
|
1087
|
+
l1 = 8;
|
|
1088
|
+
break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
|
|
1089
|
+
case 0x67:
|
|
1090
|
+
l1 = 16;
|
|
1091
|
+
break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
|
|
1092
|
+
case 0x68:
|
|
1093
|
+
l1 = 32;
|
|
1094
|
+
break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
|
|
1095
|
+
case 0x1A:
|
|
1096
|
+
l2 = 96;
|
|
1097
|
+
break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
|
|
1098
|
+
case 0x22:
|
|
1099
|
+
l3 = 512;
|
|
1100
|
+
break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
|
|
1101
|
+
case 0x23:
|
|
1102
|
+
l3 = 1024;
|
|
1103
|
+
break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1104
|
+
case 0x25:
|
|
1105
|
+
l3 = 2048;
|
|
1106
|
+
break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1107
|
+
case 0x29:
|
|
1108
|
+
l3 = 4096;
|
|
1109
|
+
break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1110
|
+
case 0x39:
|
|
1111
|
+
l2 = 128;
|
|
1112
|
+
break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
|
|
1113
|
+
case 0x3A:
|
|
1114
|
+
l2 = 192;
|
|
1115
|
+
break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
|
|
1116
|
+
case 0x3B:
|
|
1117
|
+
l2 = 128;
|
|
1118
|
+
break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
|
|
1119
|
+
case 0x3C:
|
|
1120
|
+
l2 = 256;
|
|
1121
|
+
break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
|
|
1122
|
+
case 0x3D:
|
|
1123
|
+
l2 = 384;
|
|
1124
|
+
break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
|
|
1125
|
+
case 0x3E:
|
|
1126
|
+
l2 = 512;
|
|
1127
|
+
break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
|
|
1128
|
+
case 0x40:
|
|
1129
|
+
l2 = 0;
|
|
1130
|
+
break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
|
|
1131
|
+
case 0x41:
|
|
1132
|
+
l2 = 128;
|
|
1133
|
+
break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
|
|
1134
|
+
case 0x42:
|
|
1135
|
+
l2 = 256;
|
|
1136
|
+
break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
|
|
1137
|
+
case 0x43:
|
|
1138
|
+
l2 = 512;
|
|
1139
|
+
break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
|
|
1140
|
+
case 0x44:
|
|
1141
|
+
l2 = 1024;
|
|
1142
|
+
break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
|
|
1143
|
+
case 0x45:
|
|
1144
|
+
l2 = 2048;
|
|
1145
|
+
break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
|
|
1146
|
+
case 0x46:
|
|
1147
|
+
l3 = 4096;
|
|
1148
|
+
break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
|
|
1149
|
+
case 0x47:
|
|
1150
|
+
l3 = 8192;
|
|
1151
|
+
break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
|
|
1152
|
+
case 0x48:
|
|
1153
|
+
l2 = 3072;
|
|
1154
|
+
break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
|
|
1155
|
+
case 0x49:
|
|
1156
|
+
if (l2 != 0)
|
|
1157
|
+
l3 = 4096;
|
|
1158
|
+
else {
|
|
1159
|
+
check_for_p2_core2 = true;
|
|
1160
|
+
l3 = l2 = 4096;
|
|
1161
|
+
}
|
|
1162
|
+
break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
|
|
1163
|
+
case 0x4A:
|
|
1164
|
+
l3 = 6144;
|
|
1165
|
+
break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
|
|
1166
|
+
case 0x4B:
|
|
1167
|
+
l3 = 8192;
|
|
1168
|
+
break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
|
|
1169
|
+
case 0x4C:
|
|
1170
|
+
l3 = 12288;
|
|
1171
|
+
break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
|
|
1172
|
+
case 0x4D:
|
|
1173
|
+
l3 = 16384;
|
|
1174
|
+
break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
|
|
1175
|
+
case 0x4E:
|
|
1176
|
+
l2 = 6144;
|
|
1177
|
+
break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
|
|
1178
|
+
case 0x78:
|
|
1179
|
+
l2 = 1024;
|
|
1180
|
+
break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
|
|
1181
|
+
case 0x79:
|
|
1182
|
+
l2 = 128;
|
|
1183
|
+
break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1184
|
+
case 0x7A:
|
|
1185
|
+
l2 = 256;
|
|
1186
|
+
break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1187
|
+
case 0x7B:
|
|
1188
|
+
l2 = 512;
|
|
1189
|
+
break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1190
|
+
case 0x7C:
|
|
1191
|
+
l2 = 1024;
|
|
1192
|
+
break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1193
|
+
case 0x7D:
|
|
1194
|
+
l2 = 2048;
|
|
1195
|
+
break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
|
|
1196
|
+
case 0x7E:
|
|
1197
|
+
l2 = 256;
|
|
1198
|
+
break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
|
|
1199
|
+
case 0x7F:
|
|
1200
|
+
l2 = 512;
|
|
1201
|
+
break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
|
|
1202
|
+
case 0x80:
|
|
1203
|
+
l2 = 512;
|
|
1204
|
+
break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
|
|
1205
|
+
case 0x81:
|
|
1206
|
+
l2 = 128;
|
|
1207
|
+
break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
|
|
1208
|
+
case 0x82:
|
|
1209
|
+
l2 = 256;
|
|
1210
|
+
break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
|
|
1211
|
+
case 0x83:
|
|
1212
|
+
l2 = 512;
|
|
1213
|
+
break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
|
|
1214
|
+
case 0x84:
|
|
1215
|
+
l2 = 1024;
|
|
1216
|
+
break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
|
|
1217
|
+
case 0x85:
|
|
1218
|
+
l2 = 2048;
|
|
1219
|
+
break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
|
|
1220
|
+
case 0x86:
|
|
1221
|
+
l2 = 512;
|
|
1222
|
+
break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
|
|
1223
|
+
case 0x87:
|
|
1224
|
+
l2 = 1024;
|
|
1225
|
+
break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
|
|
1226
|
+
case 0x88:
|
|
1227
|
+
l3 = 2048;
|
|
1228
|
+
break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
|
|
1229
|
+
case 0x89:
|
|
1230
|
+
l3 = 4096;
|
|
1231
|
+
break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
|
|
1232
|
+
case 0x8A:
|
|
1233
|
+
l3 = 8192;
|
|
1234
|
+
break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
|
|
1235
|
+
case 0x8D:
|
|
1236
|
+
l3 = 3072;
|
|
1237
|
+
break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
|
|
1238
|
+
|
|
1239
|
+
default:
|
|
1240
|
+
break;
|
|
1062
1241
|
}
|
|
1063
1242
|
}
|
|
1064
|
-
if(check_for_p2_core2 && l2 == l3)
|
|
1065
|
-
l3 = 0;
|
|
1243
|
+
if (check_for_p2_core2 && l2 == l3) l3 = 0;
|
|
1066
1244
|
l1 *= 1024;
|
|
1067
1245
|
l2 *= 1024;
|
|
1068
1246
|
l3 *= 1024;
|
|
1069
1247
|
}
|
|
1070
1248
|
|
|
1071
|
-
inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
queryCacheSizes_intel_codes(l1,l2,l3);
|
|
1249
|
+
inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) {
|
|
1250
|
+
if (max_std_funcs >= 4)
|
|
1251
|
+
queryCacheSizes_intel_direct(l1, l2, l3);
|
|
1252
|
+
else if (max_std_funcs >= 2)
|
|
1253
|
+
queryCacheSizes_intel_codes(l1, l2, l3);
|
|
1077
1254
|
else
|
|
1078
1255
|
l1 = l2 = l3 = 0;
|
|
1079
1256
|
}
|
|
1080
1257
|
|
|
1081
|
-
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
|
|
1082
|
-
{
|
|
1258
|
+
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) {
|
|
1083
1259
|
int abcd[4];
|
|
1084
1260
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
1085
|
-
|
|
1261
|
+
|
|
1086
1262
|
// First query the max supported function.
|
|
1087
|
-
EIGEN_CPUID(abcd,0x80000000,0);
|
|
1088
|
-
if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
|
|
1263
|
+
EIGEN_CPUID(abcd, 0x80000000, 0);
|
|
1264
|
+
if (static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) {
|
|
1265
|
+
EIGEN_CPUID(abcd, 0x80000005, 0);
|
|
1266
|
+
l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
|
|
1092
1267
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
1093
|
-
EIGEN_CPUID(abcd,0x80000006,0);
|
|
1094
|
-
l2 = (abcd[2] >> 16) * 1024;
|
|
1095
|
-
l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
|
|
1096
|
-
}
|
|
1097
|
-
else
|
|
1098
|
-
{
|
|
1268
|
+
EIGEN_CPUID(abcd, 0x80000006, 0);
|
|
1269
|
+
l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
|
|
1270
|
+
l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
|
|
1271
|
+
} else {
|
|
1099
1272
|
l1 = l2 = l3 = 0;
|
|
1100
1273
|
}
|
|
1101
1274
|
}
|
|
@@ -1103,61 +1276,110 @@ inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
|
|
|
1103
1276
|
|
|
1104
1277
|
/** \internal
|
|
1105
1278
|
* Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
|
|
1106
|
-
inline void queryCacheSizes(int& l1, int& l2, int& l3)
|
|
1107
|
-
|
|
1108
|
-
#ifdef EIGEN_CPUID
|
|
1279
|
+
inline void queryCacheSizes(int& l1, int& l2, int& l3) {
|
|
1280
|
+
#ifdef EIGEN_CPUID
|
|
1109
1281
|
int abcd[4];
|
|
1110
1282
|
const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
|
|
1111
1283
|
const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
|
|
1112
|
-
const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574};
|
|
1284
|
+
const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
|
|
1113
1285
|
|
|
1114
1286
|
// identify the CPU vendor
|
|
1115
|
-
EIGEN_CPUID(abcd,0x0,0);
|
|
1287
|
+
EIGEN_CPUID(abcd, 0x0, 0);
|
|
1116
1288
|
int max_std_funcs = abcd[0];
|
|
1117
|
-
if(cpuid_is_vendor(abcd,GenuineIntel))
|
|
1118
|
-
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
|
1119
|
-
else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
|
|
1120
|
-
queryCacheSizes_amd(l1,l2,l3);
|
|
1289
|
+
if (cpuid_is_vendor(abcd, GenuineIntel))
|
|
1290
|
+
queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
|
|
1291
|
+
else if (cpuid_is_vendor(abcd, AuthenticAMD) || cpuid_is_vendor(abcd, AMDisbetter_))
|
|
1292
|
+
queryCacheSizes_amd(l1, l2, l3);
|
|
1121
1293
|
else
|
|
1122
1294
|
// by default let's use Intel's API
|
|
1123
|
-
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
|
|
1127
|
-
// ||cpuid_is_vendor(abcd,"CyrixInstead")
|
|
1128
|
-
// ||cpuid_is_vendor(abcd,"CentaurHauls")
|
|
1129
|
-
// ||cpuid_is_vendor(abcd,"GenuineTMx86")
|
|
1130
|
-
// ||cpuid_is_vendor(abcd,"TransmetaCPU")
|
|
1131
|
-
// ||cpuid_is_vendor(abcd,"RiseRiseRise")
|
|
1132
|
-
// ||cpuid_is_vendor(abcd,"Geode by NSC")
|
|
1133
|
-
// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
|
|
1134
|
-
// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
|
|
1135
|
-
// ||cpuid_is_vendor(abcd,"NexGenDriven")
|
|
1136
|
-
|
|
1295
|
+
queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
|
|
1296
|
+
|
|
1297
|
+
// here is the list of other vendors:
|
|
1298
|
+
// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
|
|
1299
|
+
// ||cpuid_is_vendor(abcd,"CyrixInstead")
|
|
1300
|
+
// ||cpuid_is_vendor(abcd,"CentaurHauls")
|
|
1301
|
+
// ||cpuid_is_vendor(abcd,"GenuineTMx86")
|
|
1302
|
+
// ||cpuid_is_vendor(abcd,"TransmetaCPU")
|
|
1303
|
+
// ||cpuid_is_vendor(abcd,"RiseRiseRise")
|
|
1304
|
+
// ||cpuid_is_vendor(abcd,"Geode by NSC")
|
|
1305
|
+
// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
|
|
1306
|
+
// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
|
|
1307
|
+
// ||cpuid_is_vendor(abcd,"NexGenDriven")
|
|
1308
|
+
#else
|
|
1137
1309
|
l1 = l2 = l3 = -1;
|
|
1138
|
-
|
|
1310
|
+
#endif
|
|
1139
1311
|
}
|
|
1140
1312
|
|
|
1141
1313
|
/** \internal
|
|
1142
1314
|
* \returns the size in Bytes of the L1 data cache */
|
|
1143
|
-
inline int queryL1CacheSize()
|
|
1144
|
-
{
|
|
1315
|
+
inline int queryL1CacheSize() {
|
|
1145
1316
|
int l1(-1), l2, l3;
|
|
1146
|
-
queryCacheSizes(l1,l2,l3);
|
|
1317
|
+
queryCacheSizes(l1, l2, l3);
|
|
1147
1318
|
return l1;
|
|
1148
1319
|
}
|
|
1149
1320
|
|
|
1150
1321
|
/** \internal
|
|
1151
1322
|
* \returns the size in Bytes of the L2 or L3 cache if this later is present */
|
|
1152
|
-
inline int queryTopLevelCacheSize()
|
|
1153
|
-
{
|
|
1323
|
+
inline int queryTopLevelCacheSize() {
|
|
1154
1324
|
int l1, l2(-1), l3(-1);
|
|
1155
|
-
queryCacheSizes(l1,l2,l3);
|
|
1156
|
-
return (std::max)(l2,l3);
|
|
1325
|
+
queryCacheSizes(l1, l2, l3);
|
|
1326
|
+
return (std::max)(l2, l3);
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
/** \internal
|
|
1330
|
+
* This wraps C++20's std::construct_at, using placement new instead if it is not available.
|
|
1331
|
+
*/
|
|
1332
|
+
|
|
1333
|
+
#if EIGEN_COMP_CXXVER >= 20 && defined(__cpp_lib_constexpr_dynamic_alloc) && \
|
|
1334
|
+
__cpp_lib_constexpr_dynamic_alloc >= 201907L
|
|
1335
|
+
using std::construct_at;
|
|
1336
|
+
#else
|
|
1337
|
+
template <class T, class... Args>
|
|
1338
|
+
EIGEN_DEVICE_FUNC T* construct_at(T* p, Args&&... args) {
|
|
1339
|
+
return ::new (const_cast<void*>(static_cast<const volatile void*>(p))) T(std::forward<Args>(args)...);
|
|
1340
|
+
}
|
|
1341
|
+
#endif
|
|
1342
|
+
|
|
1343
|
+
/** \internal
|
|
1344
|
+
* This wraps C++17's std::destroy_at. If it's not available it calls the destructor.
|
|
1345
|
+
* The wrapper is not a full replacement for C++20's std::destroy_at as it cannot
|
|
1346
|
+
* be applied to std::array.
|
|
1347
|
+
*/
|
|
1348
|
+
#if EIGEN_COMP_CXXVER >= 17
|
|
1349
|
+
using std::destroy_at;
|
|
1350
|
+
#else
|
|
1351
|
+
template <class T>
|
|
1352
|
+
EIGEN_DEVICE_FUNC void destroy_at(T* p) {
|
|
1353
|
+
p->~T();
|
|
1157
1354
|
}
|
|
1355
|
+
#endif
|
|
1356
|
+
|
|
1357
|
+
// FIXME(rmlarsen): Work around missing linker symbol with msan on ARM.
|
|
1358
|
+
#if !defined(EIGEN_DONT_ASSUME_ALIGNED) && __has_feature(memory_sanitizer) && \
|
|
1359
|
+
(EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64)
|
|
1360
|
+
#define EIGEN_DONT_ASSUME_ALIGNED
|
|
1361
|
+
#endif
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
#if !defined(EIGEN_DONT_ASSUME_ALIGNED) && defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L)
|
|
1365
|
+
template <std::size_t N, typename T>
|
|
1366
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
|
|
1367
|
+
return std::assume_aligned<N, T>(ptr);
|
|
1368
|
+
}
|
|
1369
|
+
#elif !defined(EIGEN_DONT_ASSUME_ALIGNED) && EIGEN_HAS_BUILTIN(__builtin_assume_aligned)
|
|
1370
|
+
template <std::size_t N, typename T>
|
|
1371
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC T* assume_aligned(T* ptr) {
|
|
1372
|
+
return static_cast<T*>(__builtin_assume_aligned(ptr, N));
|
|
1373
|
+
}
|
|
1374
|
+
#else
|
|
1375
|
+
template <std::size_t N, typename T>
|
|
1376
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
|
|
1377
|
+
return ptr;
|
|
1378
|
+
}
|
|
1379
|
+
#endif
|
|
1158
1380
|
|
|
1159
|
-
}
|
|
1381
|
+
} // end namespace internal
|
|
1160
1382
|
|
|
1161
|
-
}
|
|
1383
|
+
} // end namespace Eigen
|
|
1162
1384
|
|
|
1163
|
-
#endif
|
|
1385
|
+
#endif // EIGEN_MEMORY_H
|