@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
13
13
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
/*****************************************************************************
|
|
17
16
|
*** Platform checks for aligned malloc functions ***
|
|
18
17
|
*****************************************************************************/
|
|
@@ -31,11 +30,11 @@
|
|
|
31
30
|
// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
|
|
32
31
|
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
|
|
33
32
|
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
|
|
34
|
-
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
#if defined(__GLIBC__) && ((__GLIBC__ >= 2 && __GLIBC_MINOR__ >= 8) || __GLIBC__ > 2) && defined(__LP64__) && \
|
|
34
|
+
!defined(__SANITIZE_ADDRESS__) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
|
|
35
|
+
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
|
|
37
36
|
#else
|
|
38
|
-
|
|
37
|
+
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
|
|
39
38
|
#endif
|
|
40
39
|
|
|
41
40
|
// FreeBSD 6 seems to have 16-byte aligned malloc
|
|
@@ -43,35 +42,107 @@
|
|
|
43
42
|
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
|
|
44
43
|
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
|
|
45
44
|
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
|
|
46
|
-
|
|
45
|
+
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
|
|
47
46
|
#else
|
|
48
|
-
|
|
47
|
+
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
|
|
49
48
|
#endif
|
|
50
49
|
|
|
51
|
-
#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16))
|
|
52
|
-
||
|
|
53
|
-
|
|
54
|
-
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
|
|
55
|
-
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
|
|
50
|
+
#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || \
|
|
51
|
+
EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
|
|
52
|
+
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
|
|
56
53
|
#else
|
|
57
|
-
|
|
54
|
+
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
|
|
58
55
|
#endif
|
|
59
56
|
|
|
60
57
|
#endif
|
|
61
58
|
|
|
59
|
+
#ifndef EIGEN_MALLOC_CHECK_THREAD_LOCAL
|
|
60
|
+
|
|
61
|
+
// Check whether we can use the thread_local keyword to allow or disallow
|
|
62
|
+
// allocating memory with per-thread granularity, by means of the
|
|
63
|
+
// set_is_malloc_allowed() function.
|
|
64
|
+
#ifndef EIGEN_AVOID_THREAD_LOCAL
|
|
65
|
+
|
|
66
|
+
#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC >= 1900) && \
|
|
67
|
+
!defined(EIGEN_GPU_COMPILE_PHASE)
|
|
68
|
+
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL thread_local
|
|
69
|
+
#else
|
|
70
|
+
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL
|
|
71
|
+
#endif
|
|
72
|
+
|
|
73
|
+
#else // EIGEN_AVOID_THREAD_LOCAL
|
|
74
|
+
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL
|
|
75
|
+
#endif // EIGEN_AVOID_THREAD_LOCAL
|
|
76
|
+
|
|
77
|
+
#endif
|
|
78
|
+
|
|
79
|
+
// IWYU pragma: private
|
|
80
|
+
#include "../InternalHeaderCheck.h"
|
|
81
|
+
|
|
62
82
|
namespace Eigen {
|
|
63
83
|
|
|
64
84
|
namespace internal {
|
|
65
85
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
86
|
+
/*****************************************************************************
|
|
87
|
+
*** Implementation of portable aligned versions of malloc/free/realloc ***
|
|
88
|
+
*****************************************************************************/
|
|
89
|
+
|
|
90
|
+
#ifdef EIGEN_NO_MALLOC
|
|
91
|
+
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
|
|
92
|
+
eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
|
|
93
|
+
}
|
|
94
|
+
EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
|
|
95
|
+
eigen_assert(false && "heap deallocation is forbidden (EIGEN_NO_MALLOC is defined)");
|
|
96
|
+
}
|
|
97
|
+
#elif defined EIGEN_RUNTIME_NO_MALLOC
|
|
98
|
+
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) {
|
|
99
|
+
EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
|
|
100
|
+
if (update == 1) value = new_value;
|
|
101
|
+
return value;
|
|
102
|
+
}
|
|
103
|
+
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
|
|
104
|
+
EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
|
|
105
|
+
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
|
|
106
|
+
eigen_assert(is_malloc_allowed() &&
|
|
107
|
+
"heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_malloc_allowed is false)");
|
|
108
|
+
}
|
|
109
|
+
EIGEN_DEVICE_FUNC inline bool is_free_allowed_impl(bool update, bool new_value = false) {
|
|
110
|
+
EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
|
|
111
|
+
if (update == 1) value = new_value;
|
|
112
|
+
return value;
|
|
113
|
+
}
|
|
114
|
+
EIGEN_DEVICE_FUNC inline bool is_free_allowed() { return is_free_allowed_impl(false); }
|
|
115
|
+
EIGEN_DEVICE_FUNC inline bool set_is_free_allowed(bool new_value) { return is_free_allowed_impl(true, new_value); }
|
|
116
|
+
EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
|
|
117
|
+
eigen_assert(is_malloc_allowed() &&
|
|
118
|
+
"heap deallocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_free_allowed is false)");
|
|
119
|
+
}
|
|
120
|
+
#else
|
|
121
|
+
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {}
|
|
122
|
+
EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {}
|
|
123
|
+
#endif
|
|
124
|
+
|
|
125
|
+
EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() {
|
|
126
|
+
#ifdef EIGEN_EXCEPTIONS
|
|
127
|
+
throw std::bad_alloc();
|
|
128
|
+
#else
|
|
129
|
+
std::size_t huge = static_cast<std::size_t>(-1);
|
|
130
|
+
#if defined(EIGEN_HIPCC)
|
|
131
|
+
//
|
|
132
|
+
// calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
|
|
133
|
+
// and as a consequence the code in the #else block triggers the hipcc warning :
|
|
134
|
+
// "no overloaded function has restriction specifiers that are compatible with the ambient context"
|
|
135
|
+
//
|
|
136
|
+
// "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
|
|
137
|
+
// the same on "operator new"
|
|
138
|
+
// Reverting code back to the old version in this #if block for the hipcc compiler
|
|
139
|
+
//
|
|
140
|
+
new int[huge];
|
|
141
|
+
#else
|
|
142
|
+
void* unused = ::operator new(huge);
|
|
143
|
+
EIGEN_UNUSED_VARIABLE(unused);
|
|
144
|
+
#endif
|
|
145
|
+
#endif
|
|
75
146
|
}
|
|
76
147
|
|
|
77
148
|
/*****************************************************************************
|
|
@@ -80,124 +151,132 @@ inline void throw_std_bad_alloc()
|
|
|
80
151
|
|
|
81
152
|
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
|
|
82
153
|
|
|
83
|
-
/** \internal Like malloc, but the returned pointer is guaranteed to be
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
inline void* handmade_aligned_malloc(std::size_t size
|
|
87
|
-
{
|
|
88
|
-
void
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
154
|
+
/** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
|
|
155
|
+
* Fast, but wastes `alignment` additional bytes of memory. Does not throw any exception.
|
|
156
|
+
*/
|
|
157
|
+
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size,
|
|
158
|
+
std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
|
|
159
|
+
eigen_assert(alignment >= sizeof(void*) && alignment <= 256 && (alignment & (alignment - 1)) == 0 &&
|
|
160
|
+
"Alignment must be at least sizeof(void*), less than or equal to 256, and a power of 2");
|
|
161
|
+
|
|
162
|
+
check_that_malloc_is_allowed();
|
|
163
|
+
EIGEN_USING_STD(malloc)
|
|
164
|
+
void* original = malloc(size + alignment);
|
|
165
|
+
if (original == nullptr) return nullptr;
|
|
166
|
+
std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
|
|
167
|
+
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
|
|
168
|
+
// Store offset - 1, since it is guaranteed to be at least 1.
|
|
169
|
+
*(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
|
|
92
170
|
return aligned;
|
|
93
171
|
}
|
|
94
172
|
|
|
95
173
|
/** \internal Frees memory allocated with handmade_aligned_malloc */
|
|
96
|
-
inline void handmade_aligned_free(void
|
|
97
|
-
{
|
|
98
|
-
|
|
174
|
+
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void* ptr) {
|
|
175
|
+
if (ptr != nullptr) {
|
|
176
|
+
std::size_t offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
|
|
177
|
+
void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
|
|
178
|
+
|
|
179
|
+
check_that_free_is_allowed();
|
|
180
|
+
EIGEN_USING_STD(free)
|
|
181
|
+
free(original);
|
|
182
|
+
}
|
|
99
183
|
}
|
|
100
184
|
|
|
101
185
|
/** \internal
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
inline void* handmade_aligned_realloc(void* ptr, std::size_t
|
|
107
|
-
{
|
|
108
|
-
if (ptr ==
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
void
|
|
115
|
-
if(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
*
|
|
186
|
+
* \brief Reallocates aligned memory.
|
|
187
|
+
* Since we know that our handmade version is based on std::malloc
|
|
188
|
+
* we can use std::realloc to implement efficient reallocation.
|
|
189
|
+
*/
|
|
190
|
+
EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size,
|
|
191
|
+
std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
|
|
192
|
+
if (ptr == nullptr) return handmade_aligned_malloc(new_size, alignment);
|
|
193
|
+
std::size_t old_offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
|
|
194
|
+
void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
|
|
195
|
+
|
|
196
|
+
check_that_malloc_is_allowed();
|
|
197
|
+
EIGEN_USING_STD(realloc)
|
|
198
|
+
void* original = realloc(old_original, new_size + alignment);
|
|
199
|
+
if (original == nullptr) return nullptr;
|
|
200
|
+
if (original == old_original) return ptr;
|
|
201
|
+
std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
|
|
202
|
+
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
|
|
203
|
+
if (offset != old_offset) {
|
|
204
|
+
const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
|
|
205
|
+
std::size_t count = (std::min)(new_size, old_size);
|
|
206
|
+
std::memmove(aligned, src, count);
|
|
207
|
+
}
|
|
208
|
+
// Store offset - 1, since it is guaranteed to be at least 1.
|
|
209
|
+
*(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
|
|
119
210
|
return aligned;
|
|
120
211
|
}
|
|
121
212
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
213
|
+
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on
|
|
214
|
+
* the requirements. On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
|
|
215
|
+
*/
|
|
216
|
+
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) {
|
|
217
|
+
if (size == 0) return nullptr;
|
|
125
218
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
{
|
|
129
|
-
eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
|
|
130
|
-
}
|
|
131
|
-
#elif defined EIGEN_RUNTIME_NO_MALLOC
|
|
132
|
-
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
|
|
133
|
-
{
|
|
134
|
-
static bool value = true;
|
|
135
|
-
if (update == 1)
|
|
136
|
-
value = new_value;
|
|
137
|
-
return value;
|
|
138
|
-
}
|
|
139
|
-
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
|
|
140
|
-
EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
|
|
141
|
-
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
|
142
|
-
{
|
|
143
|
-
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
|
|
144
|
-
}
|
|
145
|
-
#else
|
|
146
|
-
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
|
147
|
-
{}
|
|
148
|
-
#endif
|
|
219
|
+
void* result;
|
|
220
|
+
#if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
149
221
|
|
|
150
|
-
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
|
|
151
|
-
* On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
|
|
152
|
-
*/
|
|
153
|
-
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
|
154
|
-
{
|
|
155
222
|
check_that_malloc_is_allowed();
|
|
223
|
+
EIGEN_USING_STD(malloc)
|
|
224
|
+
result = malloc(size);
|
|
156
225
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
#endif
|
|
226
|
+
#if EIGEN_DEFAULT_ALIGN_BYTES == 16
|
|
227
|
+
eigen_assert((size < 16 || (std::size_t(result) % 16) == 0) &&
|
|
228
|
+
"System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback "
|
|
229
|
+
"to handmade aligned memory allocator.");
|
|
230
|
+
#endif
|
|
231
|
+
#else
|
|
232
|
+
result = handmade_aligned_malloc(size);
|
|
233
|
+
#endif
|
|
166
234
|
|
|
167
|
-
if(!result && size)
|
|
168
|
-
throw_std_bad_alloc();
|
|
235
|
+
if (!result && size) throw_std_bad_alloc();
|
|
169
236
|
|
|
170
237
|
return result;
|
|
171
238
|
}
|
|
172
239
|
|
|
173
240
|
/** \internal Frees memory allocated with aligned_malloc. */
|
|
174
|
-
EIGEN_DEVICE_FUNC inline void aligned_free(void
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
241
|
+
EIGEN_DEVICE_FUNC inline void aligned_free(void* ptr) {
|
|
242
|
+
#if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
243
|
+
|
|
244
|
+
if (ptr != nullptr) {
|
|
245
|
+
check_that_free_is_allowed();
|
|
246
|
+
EIGEN_USING_STD(free)
|
|
247
|
+
free(ptr);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
#else
|
|
251
|
+
handmade_aligned_free(ptr);
|
|
252
|
+
#endif
|
|
181
253
|
}
|
|
182
254
|
|
|
183
255
|
/**
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
inline void* aligned_realloc(void
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
256
|
+
* \internal
|
|
257
|
+
* \brief Reallocates an aligned block of memory.
|
|
258
|
+
* \throws std::bad_alloc on allocation failure
|
|
259
|
+
*/
|
|
260
|
+
EIGEN_DEVICE_FUNC inline void* aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
|
|
261
|
+
if (ptr == nullptr) return aligned_malloc(new_size);
|
|
262
|
+
if (old_size == new_size) return ptr;
|
|
263
|
+
if (new_size == 0) {
|
|
264
|
+
aligned_free(ptr);
|
|
265
|
+
return nullptr;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
void* result;
|
|
269
|
+
#if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
|
270
|
+
EIGEN_UNUSED_VARIABLE(old_size)
|
|
271
|
+
|
|
272
|
+
check_that_malloc_is_allowed();
|
|
273
|
+
EIGEN_USING_STD(realloc)
|
|
274
|
+
result = realloc(ptr, new_size);
|
|
195
275
|
#else
|
|
196
|
-
result = handmade_aligned_realloc(ptr,new_size,old_size);
|
|
276
|
+
result = handmade_aligned_realloc(ptr, new_size, old_size);
|
|
197
277
|
#endif
|
|
198
278
|
|
|
199
|
-
if (!result && new_size)
|
|
200
|
-
throw_std_bad_alloc();
|
|
279
|
+
if (!result && new_size) throw_std_bad_alloc();
|
|
201
280
|
|
|
202
281
|
return result;
|
|
203
282
|
}
|
|
@@ -207,42 +286,58 @@ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_si
|
|
|
207
286
|
*****************************************************************************/
|
|
208
287
|
|
|
209
288
|
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
template<bool Align>
|
|
213
|
-
{
|
|
289
|
+
* On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
|
|
290
|
+
*/
|
|
291
|
+
template <bool Align>
|
|
292
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) {
|
|
214
293
|
return aligned_malloc(size);
|
|
215
294
|
}
|
|
216
295
|
|
|
217
|
-
template<>
|
|
218
|
-
{
|
|
296
|
+
template <>
|
|
297
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size) {
|
|
298
|
+
if (size == 0) return nullptr;
|
|
299
|
+
|
|
219
300
|
check_that_malloc_is_allowed();
|
|
301
|
+
EIGEN_USING_STD(malloc)
|
|
302
|
+
void* result = malloc(size);
|
|
220
303
|
|
|
221
|
-
|
|
222
|
-
if(!result && size)
|
|
223
|
-
throw_std_bad_alloc();
|
|
304
|
+
if (!result && size) throw_std_bad_alloc();
|
|
224
305
|
return result;
|
|
225
306
|
}
|
|
226
307
|
|
|
227
308
|
/** \internal Frees memory allocated with conditional_aligned_malloc */
|
|
228
|
-
template<bool Align>
|
|
229
|
-
{
|
|
309
|
+
template <bool Align>
|
|
310
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void* ptr) {
|
|
230
311
|
aligned_free(ptr);
|
|
231
312
|
}
|
|
232
313
|
|
|
233
|
-
template<>
|
|
234
|
-
{
|
|
235
|
-
|
|
314
|
+
template <>
|
|
315
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void* ptr) {
|
|
316
|
+
if (ptr != nullptr) {
|
|
317
|
+
check_that_free_is_allowed();
|
|
318
|
+
EIGEN_USING_STD(free)
|
|
319
|
+
free(ptr);
|
|
320
|
+
}
|
|
236
321
|
}
|
|
237
322
|
|
|
238
|
-
template<bool Align>
|
|
239
|
-
{
|
|
323
|
+
template <bool Align>
|
|
324
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
|
|
240
325
|
return aligned_realloc(ptr, new_size, old_size);
|
|
241
326
|
}
|
|
242
327
|
|
|
243
|
-
template<>
|
|
244
|
-
|
|
245
|
-
|
|
328
|
+
template <>
|
|
329
|
+
EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size,
|
|
330
|
+
std::size_t old_size) {
|
|
331
|
+
if (ptr == nullptr) return conditional_aligned_malloc<false>(new_size);
|
|
332
|
+
if (old_size == new_size) return ptr;
|
|
333
|
+
if (new_size == 0) {
|
|
334
|
+
conditional_aligned_free<false>(ptr);
|
|
335
|
+
return nullptr;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
check_that_malloc_is_allowed();
|
|
339
|
+
EIGEN_USING_STD(realloc)
|
|
340
|
+
return realloc(ptr, new_size);
|
|
246
341
|
}
|
|
247
342
|
|
|
248
343
|
/*****************************************************************************
|
|
@@ -250,75 +345,95 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_
|
|
|
250
345
|
*****************************************************************************/
|
|
251
346
|
|
|
252
347
|
/** \internal Destructs the elements of an array.
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
template<typename T>
|
|
256
|
-
{
|
|
348
|
+
* The \a size parameters tells on how many objects to call the destructor of T.
|
|
349
|
+
*/
|
|
350
|
+
template <typename T>
|
|
351
|
+
EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T* ptr, std::size_t size) {
|
|
257
352
|
// always destruct an array starting from the end.
|
|
258
|
-
if(ptr)
|
|
259
|
-
while(size) ptr[--size].~T();
|
|
353
|
+
if (ptr)
|
|
354
|
+
while (size) ptr[--size].~T();
|
|
260
355
|
}
|
|
261
356
|
|
|
262
357
|
/** \internal Constructs the elements of an array.
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
template<typename T>
|
|
266
|
-
{
|
|
267
|
-
std::size_t i;
|
|
268
|
-
EIGEN_TRY
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
358
|
+
* The \a size parameter tells on how many objects to call the constructor of T.
|
|
359
|
+
*/
|
|
360
|
+
template <typename T>
|
|
361
|
+
EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T* ptr, std::size_t size) {
|
|
362
|
+
std::size_t i = 0;
|
|
363
|
+
EIGEN_TRY {
|
|
364
|
+
for (i = 0; i < size; ++i) ::new (ptr + i) T;
|
|
365
|
+
}
|
|
366
|
+
EIGEN_CATCH(...) {
|
|
367
|
+
destruct_elements_of_array(ptr, i);
|
|
368
|
+
EIGEN_THROW;
|
|
369
|
+
}
|
|
370
|
+
return ptr;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/** \internal Copy-constructs the elements of an array.
|
|
374
|
+
* The \a size parameter tells on how many objects to copy.
|
|
375
|
+
*/
|
|
376
|
+
template <typename T>
|
|
377
|
+
EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T* ptr, const T* src, std::size_t size) {
|
|
378
|
+
std::size_t i = 0;
|
|
379
|
+
EIGEN_TRY {
|
|
380
|
+
for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
|
|
381
|
+
}
|
|
382
|
+
EIGEN_CATCH(...) {
|
|
275
383
|
destruct_elements_of_array(ptr, i);
|
|
276
384
|
EIGEN_THROW;
|
|
277
385
|
}
|
|
278
|
-
return
|
|
386
|
+
return ptr;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/** \internal Move-constructs the elements of an array.
|
|
390
|
+
* The \a size parameter tells on how many objects to move.
|
|
391
|
+
*/
|
|
392
|
+
template <typename T>
|
|
393
|
+
EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T* ptr, T* src, std::size_t size) {
|
|
394
|
+
std::size_t i = 0;
|
|
395
|
+
EIGEN_TRY {
|
|
396
|
+
for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
|
|
397
|
+
}
|
|
398
|
+
EIGEN_CATCH(...) {
|
|
399
|
+
destruct_elements_of_array(ptr, i);
|
|
400
|
+
EIGEN_THROW;
|
|
401
|
+
}
|
|
402
|
+
return ptr;
|
|
279
403
|
}
|
|
280
404
|
|
|
281
405
|
/*****************************************************************************
|
|
282
406
|
*** Implementation of aligned new/delete-like functions ***
|
|
283
407
|
*****************************************************************************/
|
|
284
408
|
|
|
285
|
-
template<typename T>
|
|
286
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
|
|
287
|
-
|
|
288
|
-
if(size >
|
|
289
|
-
throw_std_bad_alloc();
|
|
409
|
+
template <typename T>
|
|
410
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) {
|
|
411
|
+
constexpr std::size_t max_elements = (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T);
|
|
412
|
+
if (size > max_elements) throw_std_bad_alloc();
|
|
290
413
|
}
|
|
291
414
|
|
|
292
415
|
/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
template<typename T>
|
|
297
|
-
{
|
|
416
|
+
* On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
|
|
417
|
+
* The default constructor of T is called.
|
|
418
|
+
*/
|
|
419
|
+
template <typename T>
|
|
420
|
+
EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) {
|
|
298
421
|
check_size_for_overflow<T>(size);
|
|
299
|
-
T
|
|
300
|
-
EIGEN_TRY
|
|
301
|
-
{
|
|
302
|
-
return construct_elements_of_array(result, size);
|
|
303
|
-
}
|
|
304
|
-
EIGEN_CATCH(...)
|
|
305
|
-
{
|
|
422
|
+
T* result = static_cast<T*>(aligned_malloc(sizeof(T) * size));
|
|
423
|
+
EIGEN_TRY { return default_construct_elements_of_array(result, size); }
|
|
424
|
+
EIGEN_CATCH(...) {
|
|
306
425
|
aligned_free(result);
|
|
307
426
|
EIGEN_THROW;
|
|
308
427
|
}
|
|
309
428
|
return result;
|
|
310
429
|
}
|
|
311
430
|
|
|
312
|
-
template<typename T, bool Align>
|
|
313
|
-
{
|
|
431
|
+
template <typename T, bool Align>
|
|
432
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) {
|
|
314
433
|
check_size_for_overflow<T>(size);
|
|
315
|
-
T
|
|
316
|
-
EIGEN_TRY
|
|
317
|
-
{
|
|
318
|
-
return construct_elements_of_array(result, size);
|
|
319
|
-
}
|
|
320
|
-
EIGEN_CATCH(...)
|
|
321
|
-
{
|
|
434
|
+
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
|
|
435
|
+
EIGEN_TRY { return default_construct_elements_of_array(result, size); }
|
|
436
|
+
EIGEN_CATCH(...) {
|
|
322
437
|
conditional_aligned_free<Align>(result);
|
|
323
438
|
EIGEN_THROW;
|
|
324
439
|
}
|
|
@@ -326,60 +441,62 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
|
|
326
441
|
}
|
|
327
442
|
|
|
328
443
|
/** \internal Deletes objects constructed with aligned_new
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
template<typename T>
|
|
332
|
-
{
|
|
444
|
+
* The \a size parameters tells on how many objects to call the destructor of T.
|
|
445
|
+
*/
|
|
446
|
+
template <typename T>
|
|
447
|
+
EIGEN_DEVICE_FUNC inline void aligned_delete(T* ptr, std::size_t size) {
|
|
333
448
|
destruct_elements_of_array<T>(ptr, size);
|
|
334
449
|
aligned_free(ptr);
|
|
335
450
|
}
|
|
336
451
|
|
|
337
452
|
/** \internal Deletes objects constructed with conditional_aligned_new
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
template<typename T, bool Align>
|
|
341
|
-
{
|
|
453
|
+
* The \a size parameters tells on how many objects to call the destructor of T.
|
|
454
|
+
*/
|
|
455
|
+
template <typename T, bool Align>
|
|
456
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T* ptr, std::size_t size) {
|
|
342
457
|
destruct_elements_of_array<T>(ptr, size);
|
|
343
458
|
conditional_aligned_free<Align>(ptr);
|
|
344
459
|
}
|
|
345
460
|
|
|
346
|
-
template<typename T, bool Align>
|
|
347
|
-
{
|
|
461
|
+
template <typename T, bool Align>
|
|
462
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) {
|
|
348
463
|
check_size_for_overflow<T>(new_size);
|
|
349
464
|
check_size_for_overflow<T>(old_size);
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
465
|
+
|
|
466
|
+
// If elements need to be explicitly initialized, we cannot simply realloc
|
|
467
|
+
// (or memcpy) the memory block - each element needs to be reconstructed.
|
|
468
|
+
// Otherwise, objects that contain internal pointers like mpfr or
|
|
469
|
+
// AnnoyingScalar can be pointing to the wrong thing.
|
|
470
|
+
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * new_size));
|
|
471
|
+
EIGEN_TRY {
|
|
472
|
+
// Move-construct initial elements.
|
|
473
|
+
std::size_t copy_size = (std::min)(old_size, new_size);
|
|
474
|
+
move_construct_elements_of_array(result, pts, copy_size);
|
|
475
|
+
|
|
476
|
+
// Default-construct remaining elements.
|
|
477
|
+
if (new_size > old_size) {
|
|
478
|
+
default_construct_elements_of_array(result + copy_size, new_size - old_size);
|
|
363
479
|
}
|
|
480
|
+
|
|
481
|
+
// Delete old elements.
|
|
482
|
+
conditional_aligned_delete<T, Align>(pts, old_size);
|
|
483
|
+
}
|
|
484
|
+
EIGEN_CATCH(...) {
|
|
485
|
+
conditional_aligned_free<Align>(result);
|
|
486
|
+
EIGEN_THROW;
|
|
364
487
|
}
|
|
488
|
+
|
|
365
489
|
return result;
|
|
366
490
|
}
|
|
367
491
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
if(size==0)
|
|
372
|
-
return 0; // short-cut. Also fixes Bug 884
|
|
492
|
+
template <typename T, bool Align>
|
|
493
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) {
|
|
494
|
+
if (size == 0) return nullptr; // short-cut. Also fixes Bug 884
|
|
373
495
|
check_size_for_overflow<T>(size);
|
|
374
|
-
T
|
|
375
|
-
if(NumTraits<T>::RequireInitialization)
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
{
|
|
379
|
-
construct_elements_of_array(result, size);
|
|
380
|
-
}
|
|
381
|
-
EIGEN_CATCH(...)
|
|
382
|
-
{
|
|
496
|
+
T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
|
|
497
|
+
if (NumTraits<T>::RequireInitialization) {
|
|
498
|
+
EIGEN_TRY { default_construct_elements_of_array(result, size); }
|
|
499
|
+
EIGEN_CATCH(...) {
|
|
383
500
|
conditional_aligned_free<Align>(result);
|
|
384
501
|
EIGEN_THROW;
|
|
385
502
|
}
|
|
@@ -387,154 +504,140 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
|
|
387
504
|
return result;
|
|
388
505
|
}
|
|
389
506
|
|
|
390
|
-
template<typename T, bool Align>
|
|
391
|
-
{
|
|
507
|
+
template <typename T, bool Align>
|
|
508
|
+
EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) {
|
|
509
|
+
if (NumTraits<T>::RequireInitialization) {
|
|
510
|
+
return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
|
|
511
|
+
}
|
|
512
|
+
|
|
392
513
|
check_size_for_overflow<T>(new_size);
|
|
393
514
|
check_size_for_overflow<T>(old_size);
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
|
|
397
|
-
if(NumTraits<T>::RequireInitialization && (new_size > old_size))
|
|
398
|
-
{
|
|
399
|
-
EIGEN_TRY
|
|
400
|
-
{
|
|
401
|
-
construct_elements_of_array(result+old_size, new_size-old_size);
|
|
402
|
-
}
|
|
403
|
-
EIGEN_CATCH(...)
|
|
404
|
-
{
|
|
405
|
-
conditional_aligned_free<Align>(result);
|
|
406
|
-
EIGEN_THROW;
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
return result;
|
|
515
|
+
return static_cast<T*>(
|
|
516
|
+
conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T) * new_size, sizeof(T) * old_size));
|
|
410
517
|
}
|
|
411
518
|
|
|
412
|
-
template<typename T, bool Align>
|
|
413
|
-
{
|
|
414
|
-
if(NumTraits<T>::RequireInitialization)
|
|
415
|
-
destruct_elements_of_array<T>(ptr, size);
|
|
519
|
+
template <typename T, bool Align>
|
|
520
|
+
EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T* ptr, std::size_t size) {
|
|
521
|
+
if (NumTraits<T>::RequireInitialization) destruct_elements_of_array<T>(ptr, size);
|
|
416
522
|
conditional_aligned_free<Align>(ptr);
|
|
417
523
|
}
|
|
418
524
|
|
|
419
525
|
/****************************************************************************/
|
|
420
526
|
|
|
421
|
-
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
{
|
|
527
|
+
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a
|
|
528
|
+
* Alignment.
|
|
529
|
+
*
|
|
530
|
+
* \tparam Alignment requested alignment in Bytes.
|
|
531
|
+
* \param array the address of the start of the array
|
|
532
|
+
* \param size the size of the array
|
|
533
|
+
*
|
|
534
|
+
* \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
|
|
535
|
+
* the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
|
|
536
|
+
* packet size for the given scalar type is 1, then everything is considered well-aligned.
|
|
537
|
+
*
|
|
538
|
+
* \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
|
|
539
|
+
* power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails
|
|
540
|
+
* for example with Scalar=double on certain 32-bit platforms, see bug #79.
|
|
541
|
+
*
|
|
542
|
+
* There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
|
|
543
|
+
* \sa first_default_aligned()
|
|
544
|
+
*/
|
|
545
|
+
template <int Alignment, typename Scalar, typename Index>
|
|
546
|
+
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) {
|
|
441
547
|
const Index ScalarSize = sizeof(Scalar);
|
|
442
548
|
const Index AlignmentSize = Alignment / ScalarSize;
|
|
443
|
-
const Index AlignmentMask = AlignmentSize-1;
|
|
549
|
+
const Index AlignmentMask = AlignmentSize - 1;
|
|
444
550
|
|
|
445
|
-
if(AlignmentSize<=1)
|
|
446
|
-
{
|
|
551
|
+
if (AlignmentSize <= 1) {
|
|
447
552
|
// Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
|
|
448
553
|
// so that all elements of the array have the same alignment.
|
|
449
554
|
return 0;
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
|
|
454
|
-
// Consequently, no element of the array is well aligned.
|
|
555
|
+
} else if ((std::uintptr_t(array) & (sizeof(Scalar) - 1)) || (Alignment % ScalarSize) != 0) {
|
|
556
|
+
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the
|
|
557
|
+
// scalar size. Consequently, no element of the array is well aligned.
|
|
455
558
|
return size;
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
{
|
|
459
|
-
Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
|
|
559
|
+
} else {
|
|
560
|
+
Index first = (AlignmentSize - (Index((std::uintptr_t(array) / sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
|
|
460
561
|
return (first < size) ? first : size;
|
|
461
562
|
}
|
|
462
563
|
}
|
|
463
564
|
|
|
464
|
-
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet
|
|
465
|
-
|
|
466
|
-
template<typename Scalar, typename Index>
|
|
467
|
-
EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
|
|
468
|
-
{
|
|
565
|
+
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet
|
|
566
|
+
* requirement. \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
|
|
567
|
+
template <typename Scalar, typename Index>
|
|
568
|
+
EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) {
|
|
469
569
|
typedef typename packet_traits<Scalar>::type DefaultPacketType;
|
|
470
570
|
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
|
|
471
571
|
}
|
|
472
572
|
|
|
473
573
|
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
|
|
474
|
-
|
|
475
|
-
template<typename Index>
|
|
476
|
-
inline Index first_multiple(Index size, Index base)
|
|
477
|
-
|
|
478
|
-
return ((size+base-1)/base)*base;
|
|
574
|
+
*/
|
|
575
|
+
template <typename Index>
|
|
576
|
+
inline Index first_multiple(Index size, Index base) {
|
|
577
|
+
return ((size + base - 1) / base) * base;
|
|
479
578
|
}
|
|
480
579
|
|
|
481
580
|
// std::copy is much slower than memcpy, so let's introduce a smart_copy which
|
|
482
581
|
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
|
|
483
|
-
template<typename T, bool UseMemcpy>
|
|
582
|
+
template <typename T, bool UseMemcpy>
|
|
583
|
+
struct smart_copy_helper;
|
|
484
584
|
|
|
485
|
-
template<typename T>
|
|
486
|
-
{
|
|
487
|
-
smart_copy_helper<T
|
|
585
|
+
template <typename T>
|
|
586
|
+
EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) {
|
|
587
|
+
smart_copy_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
|
|
488
588
|
}
|
|
489
589
|
|
|
490
|
-
template<typename T>
|
|
491
|
-
|
|
492
|
-
{
|
|
493
|
-
|
|
494
|
-
if(size==0) return;
|
|
495
|
-
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
|
496
|
-
|
|
590
|
+
template <typename T>
|
|
591
|
+
struct smart_copy_helper<T, true> {
|
|
592
|
+
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) {
|
|
593
|
+
std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
|
|
594
|
+
if (size == 0) return;
|
|
595
|
+
eigen_internal_assert(start != 0 && end != 0 && target != 0);
|
|
596
|
+
EIGEN_USING_STD(memcpy)
|
|
597
|
+
memcpy(target, start, size);
|
|
497
598
|
}
|
|
498
599
|
};
|
|
499
600
|
|
|
500
|
-
template<typename T>
|
|
501
|
-
|
|
502
|
-
{ std::copy(start, end, target); }
|
|
601
|
+
template <typename T>
|
|
602
|
+
struct smart_copy_helper<T, false> {
|
|
603
|
+
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { std::copy(start, end, target); }
|
|
503
604
|
};
|
|
504
605
|
|
|
505
|
-
// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
|
|
506
|
-
template<typename T, bool UseMemmove>
|
|
606
|
+
// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
|
|
607
|
+
template <typename T, bool UseMemmove>
|
|
608
|
+
struct smart_memmove_helper;
|
|
507
609
|
|
|
508
|
-
template<typename T>
|
|
509
|
-
{
|
|
510
|
-
smart_memmove_helper<T
|
|
610
|
+
template <typename T>
|
|
611
|
+
void smart_memmove(const T* start, const T* end, T* target) {
|
|
612
|
+
smart_memmove_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
|
|
511
613
|
}
|
|
512
614
|
|
|
513
|
-
template<typename T>
|
|
514
|
-
|
|
515
|
-
{
|
|
516
|
-
|
|
517
|
-
if(size==0) return;
|
|
518
|
-
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
|
615
|
+
template <typename T>
|
|
616
|
+
struct smart_memmove_helper<T, true> {
|
|
617
|
+
static inline void run(const T* start, const T* end, T* target) {
|
|
618
|
+
std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
|
|
619
|
+
if (size == 0) return;
|
|
620
|
+
eigen_internal_assert(start != 0 && end != 0 && target != 0);
|
|
519
621
|
std::memmove(target, start, size);
|
|
520
622
|
}
|
|
521
623
|
};
|
|
522
624
|
|
|
523
|
-
template<typename T>
|
|
524
|
-
|
|
525
|
-
{
|
|
526
|
-
if (
|
|
527
|
-
{
|
|
625
|
+
template <typename T>
|
|
626
|
+
struct smart_memmove_helper<T, false> {
|
|
627
|
+
static inline void run(const T* start, const T* end, T* target) {
|
|
628
|
+
if (std::uintptr_t(target) < std::uintptr_t(start)) {
|
|
528
629
|
std::copy(start, end, target);
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
|
|
533
|
-
std::copy_backward(start, end, target + count);
|
|
630
|
+
} else {
|
|
631
|
+
std::ptrdiff_t count = (std::ptrdiff_t(end) - std::ptrdiff_t(start)) / sizeof(T);
|
|
632
|
+
std::copy_backward(start, end, target + count);
|
|
534
633
|
}
|
|
535
634
|
}
|
|
536
635
|
};
|
|
537
636
|
|
|
637
|
+
template <typename T>
|
|
638
|
+
EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) {
|
|
639
|
+
return std::move(start, end, target);
|
|
640
|
+
}
|
|
538
641
|
|
|
539
642
|
/*****************************************************************************
|
|
540
643
|
*** Implementation of runtime stack allocation (falling back to malloc) ***
|
|
@@ -542,452 +645,741 @@ template<typename T> struct smart_memmove_helper<T,false> {
|
|
|
542
645
|
|
|
543
646
|
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
|
|
544
647
|
// to the appropriate stack allocation function
|
|
545
|
-
#
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
648
|
+
#if !defined EIGEN_ALLOCA && !defined EIGEN_GPU_COMPILE_PHASE
|
|
649
|
+
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
|
|
650
|
+
#define EIGEN_ALLOCA alloca
|
|
651
|
+
#elif EIGEN_COMP_MSVC
|
|
652
|
+
#define EIGEN_ALLOCA _alloca
|
|
653
|
+
#endif
|
|
654
|
+
#endif
|
|
655
|
+
|
|
656
|
+
// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
|
|
657
|
+
// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
|
|
658
|
+
// the compiler still emits bad code because stack allocation checks use "<=".
|
|
659
|
+
// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
|
|
660
|
+
// is fixed.
|
|
661
|
+
#if defined(__clang__) && defined(__thumb__)
|
|
662
|
+
#undef EIGEN_ALLOCA
|
|
551
663
|
#endif
|
|
552
664
|
|
|
553
665
|
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
|
|
554
666
|
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
|
|
555
|
-
template<typename T>
|
|
556
|
-
{
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
if(NumTraits<T>::RequireInitialization && m_ptr)
|
|
573
|
-
Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
|
|
574
|
-
if(m_deallocate)
|
|
575
|
-
Eigen::internal::aligned_free(m_ptr);
|
|
576
|
-
}
|
|
577
|
-
protected:
|
|
578
|
-
T* m_ptr;
|
|
579
|
-
std::size_t m_size;
|
|
580
|
-
bool m_deallocate;
|
|
581
|
-
};
|
|
667
|
+
template <typename T>
|
|
668
|
+
class aligned_stack_memory_handler : noncopyable {
|
|
669
|
+
public:
|
|
670
|
+
/* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
|
|
671
|
+
* Note that \a ptr can be 0 regardless of the other parameters.
|
|
672
|
+
* This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type
|
|
673
|
+
*T (see NumTraits<T>::RequireInitialization). In this case, the buffer elements will also be destructed when this
|
|
674
|
+
*handler will be destructed. Finally, if \a dealloc is true, then the pointer \a ptr is freed.
|
|
675
|
+
**/
|
|
676
|
+
EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
|
|
677
|
+
: m_ptr(ptr), m_size(size), m_deallocate(dealloc) {
|
|
678
|
+
if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::default_construct_elements_of_array(m_ptr, size);
|
|
679
|
+
}
|
|
680
|
+
EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() {
|
|
681
|
+
if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
|
|
682
|
+
if (m_deallocate) Eigen::internal::aligned_free(m_ptr);
|
|
683
|
+
}
|
|
582
684
|
|
|
583
|
-
|
|
584
|
-
{
|
|
685
|
+
protected:
|
|
585
686
|
T* m_ptr;
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
687
|
+
std::size_t m_size;
|
|
688
|
+
bool m_deallocate;
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
#ifdef EIGEN_ALLOCA
|
|
692
|
+
|
|
693
|
+
template <typename Xpr, int NbEvaluations,
|
|
694
|
+
bool MapExternalBuffer = nested_eval<Xpr, NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime == Dynamic>
|
|
695
|
+
struct local_nested_eval_wrapper {
|
|
696
|
+
static constexpr bool NeedExternalBuffer = false;
|
|
697
|
+
typedef typename Xpr::Scalar Scalar;
|
|
698
|
+
typedef typename nested_eval<Xpr, NbEvaluations>::type ObjectType;
|
|
699
|
+
ObjectType object;
|
|
700
|
+
|
|
701
|
+
EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) {
|
|
702
|
+
EIGEN_UNUSED_VARIABLE(ptr);
|
|
703
|
+
eigen_internal_assert(ptr == 0);
|
|
590
704
|
}
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
705
|
+
};
|
|
706
|
+
|
|
707
|
+
template <typename Xpr, int NbEvaluations>
|
|
708
|
+
struct local_nested_eval_wrapper<Xpr, NbEvaluations, true> {
|
|
709
|
+
static constexpr bool NeedExternalBuffer = true;
|
|
710
|
+
typedef typename Xpr::Scalar Scalar;
|
|
711
|
+
typedef typename plain_object_eval<Xpr>::type PlainObject;
|
|
712
|
+
typedef Map<PlainObject, EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
|
|
713
|
+
ObjectType object;
|
|
714
|
+
|
|
715
|
+
EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
|
|
716
|
+
: object(ptr == 0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar) * xpr.size())) : ptr,
|
|
717
|
+
xpr.rows(), xpr.cols()),
|
|
718
|
+
m_deallocate(ptr == 0) {
|
|
719
|
+
if (NumTraits<Scalar>::RequireInitialization && object.data())
|
|
720
|
+
Eigen::internal::default_construct_elements_of_array(object.data(), object.size());
|
|
721
|
+
object = xpr;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
EIGEN_DEVICE_FUNC ~local_nested_eval_wrapper() {
|
|
725
|
+
if (NumTraits<Scalar>::RequireInitialization && object.data())
|
|
726
|
+
Eigen::internal::destruct_elements_of_array(object.data(), object.size());
|
|
727
|
+
if (m_deallocate) Eigen::internal::aligned_free(object.data());
|
|
594
728
|
}
|
|
729
|
+
|
|
730
|
+
private:
|
|
731
|
+
bool m_deallocate;
|
|
732
|
+
};
|
|
733
|
+
|
|
734
|
+
#endif // EIGEN_ALLOCA
|
|
735
|
+
|
|
736
|
+
template <typename T>
|
|
737
|
+
class scoped_array : noncopyable {
|
|
738
|
+
T* m_ptr;
|
|
739
|
+
|
|
740
|
+
public:
|
|
741
|
+
explicit scoped_array(std::ptrdiff_t size) { m_ptr = new T[size]; }
|
|
742
|
+
~scoped_array() { delete[] m_ptr; }
|
|
595
743
|
T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
|
|
596
744
|
const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
|
|
597
|
-
T
|
|
745
|
+
T*& ptr() { return m_ptr; }
|
|
598
746
|
const T* ptr() const { return m_ptr; }
|
|
599
747
|
operator const T*() const { return m_ptr; }
|
|
600
748
|
};
|
|
601
749
|
|
|
602
|
-
template<typename T>
|
|
603
|
-
{
|
|
604
|
-
std::swap(a.ptr(),b.ptr());
|
|
750
|
+
template <typename T>
|
|
751
|
+
void swap(scoped_array<T>& a, scoped_array<T>& b) {
|
|
752
|
+
std::swap(a.ptr(), b.ptr());
|
|
605
753
|
}
|
|
606
|
-
|
|
607
|
-
} // end namespace internal
|
|
608
754
|
|
|
609
|
-
|
|
610
|
-
* Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
|
|
611
|
-
* if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
|
|
612
|
-
* (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
|
|
613
|
-
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
|
|
614
|
-
* If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
|
|
615
|
-
* Here is an example:
|
|
616
|
-
* \code
|
|
617
|
-
* {
|
|
618
|
-
* ei_declare_aligned_stack_constructed_variable(float,data,size,0);
|
|
619
|
-
* // use data[0] to data[size-1]
|
|
620
|
-
* }
|
|
621
|
-
* \endcode
|
|
622
|
-
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
|
|
623
|
-
*/
|
|
624
|
-
#ifdef EIGEN_ALLOCA
|
|
625
|
-
|
|
626
|
-
#if EIGEN_DEFAULT_ALIGN_BYTES>0
|
|
627
|
-
// We always manually re-align the result of EIGEN_ALLOCA.
|
|
628
|
-
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
|
629
|
-
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
|
|
630
|
-
#else
|
|
631
|
-
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
|
|
632
|
-
#endif
|
|
633
|
-
|
|
634
|
-
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
|
|
635
|
-
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
|
636
|
-
TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
|
|
637
|
-
: reinterpret_cast<TYPE*>( \
|
|
638
|
-
(sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
|
|
639
|
-
: Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
|
|
640
|
-
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
|
|
755
|
+
} // end namespace internal
|
|
641
756
|
|
|
757
|
+
/** \internal
|
|
758
|
+
*
|
|
759
|
+
* The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
|
|
760
|
+
* and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
|
|
761
|
+
* if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the
|
|
762
|
+
* platform (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. The
|
|
763
|
+
* allocated buffer is automatically deleted when exiting the scope of this declaration. If BUFFER is non null, then the
|
|
764
|
+
* declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. Here is an example: \code
|
|
765
|
+
* {
|
|
766
|
+
* ei_declare_aligned_stack_constructed_variable(float,data,size,0);
|
|
767
|
+
* // use data[0] to data[size-1]
|
|
768
|
+
* }
|
|
769
|
+
* \endcode
|
|
770
|
+
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
|
|
771
|
+
*
|
|
772
|
+
* The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
|
|
773
|
+
* \code
|
|
774
|
+
* typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
|
|
775
|
+
* \endcode
|
|
776
|
+
* with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
|
|
777
|
+
* This is accomplished through alloca if this later is supported and if the required number of bytes
|
|
778
|
+
* is below EIGEN_STACK_ALLOCATION_LIMIT.
|
|
779
|
+
*/
|
|
780
|
+
#if defined(EIGEN_ALLOCA) && !defined(EIGEN_NO_ALLOCA)
|
|
781
|
+
|
|
782
|
+
#if EIGEN_DEFAULT_ALIGN_BYTES > 0
|
|
783
|
+
// We always manually re-align the result of EIGEN_ALLOCA.
|
|
784
|
+
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
|
785
|
+
|
|
786
|
+
#if ((EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) && !EIGEN_COMP_NVHPC)
|
|
787
|
+
#define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
|
|
642
788
|
#else
|
|
789
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
|
|
790
|
+
constexpr std::uintptr_t mask = EIGEN_DEFAULT_ALIGN_BYTES - 1;
|
|
791
|
+
std::uintptr_t ptr_int = std::uintptr_t(ptr);
|
|
792
|
+
std::uintptr_t aligned_ptr_int = (ptr_int + mask) & ~mask;
|
|
793
|
+
std::uintptr_t offset = aligned_ptr_int - ptr_int;
|
|
794
|
+
return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
|
|
795
|
+
}
|
|
796
|
+
#define EIGEN_ALIGNED_ALLOCA(SIZE) eigen_aligned_alloca_helper(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1))
|
|
797
|
+
#endif
|
|
643
798
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
|
|
647
|
-
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
|
|
648
|
-
|
|
799
|
+
#else
|
|
800
|
+
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
|
|
649
801
|
#endif
|
|
650
802
|
|
|
803
|
+
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
|
804
|
+
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
|
805
|
+
TYPE* NAME = (BUFFER) != 0 ? (BUFFER) \
|
|
806
|
+
: reinterpret_cast<TYPE*>((sizeof(TYPE) * (SIZE) <= EIGEN_STACK_ALLOCATION_LIMIT) \
|
|
807
|
+
? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE) * (SIZE)) \
|
|
808
|
+
: Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
|
|
809
|
+
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
|
810
|
+
(BUFFER) == 0 ? NAME : 0, SIZE, sizeof(TYPE) * (SIZE) > EIGEN_STACK_ALLOCATION_LIMIT)
|
|
811
|
+
|
|
812
|
+
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
|
|
813
|
+
Eigen::internal::local_nested_eval_wrapper<XPR_T, N> EIGEN_CAT(NAME, _wrapper)( \
|
|
814
|
+
XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
|
|
815
|
+
((Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::NeedExternalBuffer) && \
|
|
816
|
+
((sizeof(typename XPR_T::Scalar) * XPR.size()) <= EIGEN_STACK_ALLOCATION_LIMIT)) \
|
|
817
|
+
? EIGEN_ALIGNED_ALLOCA(sizeof(typename XPR_T::Scalar) * XPR.size()) \
|
|
818
|
+
: 0)); \
|
|
819
|
+
typename Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::ObjectType NAME(EIGEN_CAT(NAME, _wrapper).object)
|
|
820
|
+
|
|
821
|
+
#else
|
|
822
|
+
|
|
823
|
+
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
|
824
|
+
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
|
825
|
+
TYPE* NAME = \
|
|
826
|
+
(BUFFER) != 0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
|
|
827
|
+
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
|
828
|
+
(BUFFER) == 0 ? NAME : 0, SIZE, true)
|
|
829
|
+
|
|
830
|
+
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
|
|
831
|
+
typename Eigen::internal::nested_eval<XPR_T, N>::type NAME(XPR)
|
|
832
|
+
|
|
833
|
+
#endif
|
|
651
834
|
|
|
652
835
|
/*****************************************************************************
|
|
653
836
|
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
|
|
654
837
|
*****************************************************************************/
|
|
655
838
|
|
|
656
|
-
#if
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
} \
|
|
666
|
-
void *operator new[](std::size_t size) { \
|
|
667
|
-
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
|
668
|
-
} \
|
|
669
|
-
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
|
670
|
-
void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
|
671
|
-
void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
|
672
|
-
void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
|
673
|
-
/* in-place new and delete. since (at least afaik) there is no actual */ \
|
|
674
|
-
/* memory allocated we can safely let the default implementation handle */ \
|
|
675
|
-
/* this particular case. */ \
|
|
676
|
-
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
|
|
677
|
-
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
|
|
678
|
-
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
|
|
679
|
-
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
|
|
680
|
-
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
|
|
681
|
-
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
|
682
|
-
void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
|
|
683
|
-
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
684
|
-
} \
|
|
685
|
-
typedef void eigen_aligned_operator_new_marker_type;
|
|
839
|
+
#if EIGEN_HAS_CXX17_OVERALIGN
|
|
840
|
+
|
|
841
|
+
// C++17 -> no need to bother about alignment anymore :)
|
|
842
|
+
|
|
843
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
|
|
844
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
|
845
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
|
846
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size)
|
|
847
|
+
|
|
686
848
|
#else
|
|
687
|
-
|
|
849
|
+
|
|
850
|
+
// HIP does not support new/delete on device.
|
|
851
|
+
#if EIGEN_MAX_ALIGN_BYTES != 0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
852
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
|
853
|
+
EIGEN_DEVICE_FUNC void* operator new(std::size_t size, const std::nothrow_t&) noexcept { \
|
|
854
|
+
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
|
|
855
|
+
EIGEN_CATCH(...) { return 0; } \
|
|
856
|
+
}
|
|
857
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
|
|
858
|
+
EIGEN_DEVICE_FUNC void* operator new(std::size_t size) { \
|
|
859
|
+
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
|
860
|
+
} \
|
|
861
|
+
EIGEN_DEVICE_FUNC void* operator new[](std::size_t size) { \
|
|
862
|
+
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
|
863
|
+
} \
|
|
864
|
+
EIGEN_DEVICE_FUNC void operator delete(void* ptr) noexcept { \
|
|
865
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
866
|
+
} \
|
|
867
|
+
EIGEN_DEVICE_FUNC void operator delete[](void* ptr) noexcept { \
|
|
868
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
869
|
+
} \
|
|
870
|
+
EIGEN_DEVICE_FUNC void operator delete(void* ptr, std::size_t /* sz */) noexcept { \
|
|
871
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
872
|
+
} \
|
|
873
|
+
EIGEN_DEVICE_FUNC void operator delete[](void* ptr, std::size_t /* sz */) noexcept { \
|
|
874
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
875
|
+
} \
|
|
876
|
+
/* in-place new and delete. since (at least afaik) there is no actual */ \
|
|
877
|
+
/* memory allocated we can safely let the default implementation handle */ \
|
|
878
|
+
/* this particular case. */ \
|
|
879
|
+
EIGEN_DEVICE_FUNC static void* operator new(std::size_t size, void* ptr) { return ::operator new(size, ptr); } \
|
|
880
|
+
EIGEN_DEVICE_FUNC static void* operator new[](std::size_t size, void* ptr) { return ::operator new[](size, ptr); } \
|
|
881
|
+
EIGEN_DEVICE_FUNC void operator delete(void* memory, void* ptr) noexcept { return ::operator delete(memory, ptr); } \
|
|
882
|
+
EIGEN_DEVICE_FUNC void operator delete[](void* memory, void* ptr) noexcept { \
|
|
883
|
+
return ::operator delete[](memory, ptr); \
|
|
884
|
+
} \
|
|
885
|
+
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
|
|
886
|
+
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
|
887
|
+
EIGEN_DEVICE_FUNC void operator delete(void* ptr, const std::nothrow_t&) noexcept { \
|
|
888
|
+
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
|
889
|
+
} \
|
|
890
|
+
typedef void eigen_aligned_operator_new_marker_type;
|
|
891
|
+
#else
|
|
892
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
|
688
893
|
#endif
|
|
689
894
|
|
|
690
895
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
|
|
691
|
-
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
|
|
692
|
-
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(
|
|
896
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size) \
|
|
897
|
+
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF( \
|
|
898
|
+
bool(((Size) != Eigen::Dynamic) && \
|
|
899
|
+
(((EIGEN_MAX_ALIGN_BYTES >= 16) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES) == 0)) || \
|
|
900
|
+
((EIGEN_MAX_ALIGN_BYTES >= 32) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 2) == 0)) || \
|
|
901
|
+
((EIGEN_MAX_ALIGN_BYTES >= 64) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 4) == 0)))))
|
|
902
|
+
|
|
903
|
+
#endif
|
|
693
904
|
|
|
694
905
|
/****************************************************************************/
|
|
695
906
|
|
|
696
907
|
/** \class aligned_allocator
|
|
697
|
-
* \ingroup Core_Module
|
|
698
|
-
*
|
|
699
|
-
* \brief STL compatible allocator to use with types requiring a non
|
|
700
|
-
*
|
|
701
|
-
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
|
|
702
|
-
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
|
|
703
|
-
* - 32 bytes alignment if AVX is enabled.
|
|
704
|
-
* - 64 bytes alignment if AVX512 is enabled.
|
|
705
|
-
*
|
|
706
|
-
* This can be
|
|
707
|
-
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
|
708
|
-
*
|
|
709
|
-
* Example:
|
|
710
|
-
* \code
|
|
711
|
-
* // Matrix4f requires 16 bytes alignment:
|
|
712
|
-
* std::map< int, Matrix4f, std::less<int>,
|
|
713
|
-
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
|
|
714
|
-
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
|
|
715
|
-
* std::map< int, Vector3f > my_map_vec3;
|
|
716
|
-
* \endcode
|
|
717
|
-
*
|
|
718
|
-
* \sa \blank \ref TopicStlContainers.
|
|
719
|
-
*/
|
|
720
|
-
template<class T>
|
|
721
|
-
class aligned_allocator
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
typedef std::
|
|
725
|
-
typedef
|
|
726
|
-
typedef T*
|
|
727
|
-
typedef
|
|
728
|
-
typedef T&
|
|
729
|
-
typedef
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
struct rebind
|
|
734
|
-
{
|
|
908
|
+
* \ingroup Core_Module
|
|
909
|
+
*
|
|
910
|
+
* \brief STL compatible allocator to use with types requiring a non-standard alignment.
|
|
911
|
+
*
|
|
912
|
+
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
|
|
913
|
+
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
|
|
914
|
+
* - 32 bytes alignment if AVX is enabled.
|
|
915
|
+
* - 64 bytes alignment if AVX512 is enabled.
|
|
916
|
+
*
|
|
917
|
+
* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
|
|
918
|
+
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
|
919
|
+
*
|
|
920
|
+
* Example:
|
|
921
|
+
* \code
|
|
922
|
+
* // Matrix4f requires 16 bytes alignment:
|
|
923
|
+
* std::map< int, Matrix4f, std::less<int>,
|
|
924
|
+
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
|
|
925
|
+
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
|
|
926
|
+
* std::map< int, Vector3f > my_map_vec3;
|
|
927
|
+
* \endcode
|
|
928
|
+
*
|
|
929
|
+
* \sa \blank \ref TopicStlContainers.
|
|
930
|
+
*/
|
|
931
|
+
template <class T>
|
|
932
|
+
class aligned_allocator {
|
|
933
|
+
public:
|
|
934
|
+
typedef std::size_t size_type;
|
|
935
|
+
typedef std::ptrdiff_t difference_type;
|
|
936
|
+
typedef T* pointer;
|
|
937
|
+
typedef const T* const_pointer;
|
|
938
|
+
typedef T& reference;
|
|
939
|
+
typedef const T& const_reference;
|
|
940
|
+
typedef T value_type;
|
|
941
|
+
|
|
942
|
+
template <class U>
|
|
943
|
+
struct rebind {
|
|
735
944
|
typedef aligned_allocator<U> other;
|
|
736
945
|
};
|
|
737
946
|
|
|
738
|
-
aligned_allocator()
|
|
947
|
+
aligned_allocator() = default;
|
|
739
948
|
|
|
740
|
-
aligned_allocator(const aligned_allocator&
|
|
949
|
+
aligned_allocator(const aligned_allocator&) = default;
|
|
741
950
|
|
|
742
|
-
template<class U>
|
|
743
|
-
aligned_allocator(const aligned_allocator<U>&
|
|
951
|
+
template <class U>
|
|
952
|
+
aligned_allocator(const aligned_allocator<U>&) {}
|
|
744
953
|
|
|
745
|
-
|
|
954
|
+
template <class U>
|
|
955
|
+
constexpr bool operator==(const aligned_allocator<U>&) const noexcept {
|
|
956
|
+
return true;
|
|
957
|
+
}
|
|
958
|
+
template <class U>
|
|
959
|
+
constexpr bool operator!=(const aligned_allocator<U>&) const noexcept {
|
|
960
|
+
return false;
|
|
961
|
+
}
|
|
746
962
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
// workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
|
753
|
-
// It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
|
|
754
|
-
if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
|
|
755
|
-
return 0;
|
|
756
|
-
else
|
|
963
|
+
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0)
|
|
964
|
+
// In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
|
|
965
|
+
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object
|
|
966
|
+
// size 9223372036854775807 See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
|
967
|
+
size_type max_size() const { return (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T); }
|
|
757
968
|
#endif
|
|
758
|
-
return static_cast<pointer>( internal::aligned_malloc(size) );
|
|
759
|
-
}
|
|
760
969
|
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
internal::
|
|
970
|
+
pointer allocate(size_type num, const void* /*hint*/ = 0) {
|
|
971
|
+
internal::check_size_for_overflow<T>(num);
|
|
972
|
+
return static_cast<pointer>(internal::aligned_malloc(num * sizeof(T)));
|
|
764
973
|
}
|
|
974
|
+
|
|
975
|
+
void deallocate(pointer p, size_type /*num*/) { internal::aligned_free(p); }
|
|
765
976
|
};
|
|
766
977
|
|
|
767
978
|
//---------- Cache sizes ----------
|
|
768
979
|
|
|
769
980
|
#if !defined(EIGEN_NO_CPUID)
|
|
770
|
-
#
|
|
771
|
-
#
|
|
772
|
-
|
|
773
|
-
#
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
#
|
|
786
|
-
|
|
787
|
-
#
|
|
788
|
-
|
|
789
|
-
#
|
|
981
|
+
#if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
|
|
982
|
+
#if defined(__PIC__) && EIGEN_ARCH_i386
|
|
983
|
+
// Case for x86 with PIC
|
|
984
|
+
#define EIGEN_CPUID(abcd, func, id) \
|
|
985
|
+
__asm__ __volatile__("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1" \
|
|
986
|
+
: "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
|
|
987
|
+
: "a"(func), "c"(id));
|
|
988
|
+
#elif defined(__PIC__) && EIGEN_ARCH_x86_64
|
|
989
|
+
// Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with
|
|
990
|
+
// the default small code model. However, we cannot detect which code model is used, and the xchg overhead is negligible
|
|
991
|
+
// anyway.
|
|
992
|
+
#define EIGEN_CPUID(abcd, func, id) \
|
|
993
|
+
__asm__ __volatile__("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1" \
|
|
994
|
+
: "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
|
|
995
|
+
: "0"(func), "2"(id));
|
|
996
|
+
#else
|
|
997
|
+
// Case for x86_64 or x86 w/o PIC
|
|
998
|
+
#define EIGEN_CPUID(abcd, func, id) \
|
|
999
|
+
__asm__ __volatile__("cpuid" : "=a"(abcd[0]), "=b"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) : "0"(func), "2"(id));
|
|
1000
|
+
#endif
|
|
1001
|
+
#elif EIGEN_COMP_MSVC
|
|
1002
|
+
#if EIGEN_ARCH_i386_OR_x86_64
|
|
1003
|
+
#define EIGEN_CPUID(abcd, func, id) __cpuidex((int*)abcd, func, id)
|
|
1004
|
+
#endif
|
|
1005
|
+
#endif
|
|
790
1006
|
#endif
|
|
791
1007
|
|
|
792
1008
|
namespace internal {
|
|
793
1009
|
|
|
794
1010
|
#ifdef EIGEN_CPUID
|
|
795
1011
|
|
|
796
|
-
inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
|
|
797
|
-
|
|
798
|
-
return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
|
|
1012
|
+
inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) {
|
|
1013
|
+
return abcd[1] == vendor[0] && abcd[3] == vendor[1] && abcd[2] == vendor[2];
|
|
799
1014
|
}
|
|
800
1015
|
|
|
801
|
-
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
|
|
802
|
-
{
|
|
1016
|
+
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) {
|
|
803
1017
|
int abcd[4];
|
|
804
1018
|
l1 = l2 = l3 = 0;
|
|
805
1019
|
int cache_id = 0;
|
|
806
1020
|
int cache_type = 0;
|
|
807
1021
|
do {
|
|
808
1022
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
809
|
-
EIGEN_CPUID(abcd,0x4,cache_id);
|
|
810
|
-
cache_type
|
|
811
|
-
if(cache_type==1||cache_type==3)
|
|
1023
|
+
EIGEN_CPUID(abcd, 0x4, cache_id);
|
|
1024
|
+
cache_type = (abcd[0] & 0x0F) >> 0;
|
|
1025
|
+
if (cache_type == 1 || cache_type == 3) // data or unified cache
|
|
812
1026
|
{
|
|
813
|
-
int cache_level = (abcd[0] & 0xE0) >> 5;
|
|
814
|
-
int ways
|
|
815
|
-
int partitions
|
|
816
|
-
int line_size
|
|
817
|
-
int sets
|
|
818
|
-
|
|
819
|
-
int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
|
|
820
|
-
|
|
821
|
-
switch(cache_level)
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
case
|
|
826
|
-
|
|
1027
|
+
int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
|
|
1028
|
+
int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
|
|
1029
|
+
int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
|
|
1030
|
+
int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
|
|
1031
|
+
int sets = (abcd[2]); // C[31:0]
|
|
1032
|
+
|
|
1033
|
+
int cache_size = (ways + 1) * (partitions + 1) * (line_size + 1) * (sets + 1);
|
|
1034
|
+
|
|
1035
|
+
switch (cache_level) {
|
|
1036
|
+
case 1:
|
|
1037
|
+
l1 = cache_size;
|
|
1038
|
+
break;
|
|
1039
|
+
case 2:
|
|
1040
|
+
l2 = cache_size;
|
|
1041
|
+
break;
|
|
1042
|
+
case 3:
|
|
1043
|
+
l3 = cache_size;
|
|
1044
|
+
break;
|
|
1045
|
+
default:
|
|
1046
|
+
break;
|
|
827
1047
|
}
|
|
828
1048
|
}
|
|
829
1049
|
cache_id++;
|
|
830
|
-
} while(cache_type>0 && cache_id<16);
|
|
1050
|
+
} while (cache_type > 0 && cache_id < 16);
|
|
831
1051
|
}
|
|
832
1052
|
|
|
833
|
-
inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
|
|
834
|
-
{
|
|
1053
|
+
inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) {
|
|
835
1054
|
int abcd[4];
|
|
836
1055
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
837
1056
|
l1 = l2 = l3 = 0;
|
|
838
|
-
EIGEN_CPUID(abcd,0x00000002,0);
|
|
839
|
-
unsigned char
|
|
1057
|
+
EIGEN_CPUID(abcd, 0x00000002, 0);
|
|
1058
|
+
unsigned char* bytes = reinterpret_cast<unsigned char*>(abcd) + 2;
|
|
840
1059
|
bool check_for_p2_core2 = false;
|
|
841
|
-
for(int i=0; i<14; ++i)
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
case 0x0C:
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
case
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
case
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
case
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
case
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
case
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
case
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
case
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
case
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
case
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
case
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
case
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
case
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
case
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
case
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
case
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
case
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
case
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
case
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
1060
|
+
for (int i = 0; i < 14; ++i) {
|
|
1061
|
+
switch (bytes[i]) {
|
|
1062
|
+
case 0x0A:
|
|
1063
|
+
l1 = 8;
|
|
1064
|
+
break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
|
|
1065
|
+
case 0x0C:
|
|
1066
|
+
l1 = 16;
|
|
1067
|
+
break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
|
|
1068
|
+
case 0x0E:
|
|
1069
|
+
l1 = 24;
|
|
1070
|
+
break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
|
|
1071
|
+
case 0x10:
|
|
1072
|
+
l1 = 16;
|
|
1073
|
+
break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
|
1074
|
+
case 0x15:
|
|
1075
|
+
l1 = 16;
|
|
1076
|
+
break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
|
1077
|
+
case 0x2C:
|
|
1078
|
+
l1 = 32;
|
|
1079
|
+
break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
|
|
1080
|
+
case 0x30:
|
|
1081
|
+
l1 = 32;
|
|
1082
|
+
break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
|
|
1083
|
+
case 0x60:
|
|
1084
|
+
l1 = 16;
|
|
1085
|
+
break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
|
|
1086
|
+
case 0x66:
|
|
1087
|
+
l1 = 8;
|
|
1088
|
+
break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
|
|
1089
|
+
case 0x67:
|
|
1090
|
+
l1 = 16;
|
|
1091
|
+
break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
|
|
1092
|
+
case 0x68:
|
|
1093
|
+
l1 = 32;
|
|
1094
|
+
break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
|
|
1095
|
+
case 0x1A:
|
|
1096
|
+
l2 = 96;
|
|
1097
|
+
break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
|
|
1098
|
+
case 0x22:
|
|
1099
|
+
l3 = 512;
|
|
1100
|
+
break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
|
|
1101
|
+
case 0x23:
|
|
1102
|
+
l3 = 1024;
|
|
1103
|
+
break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1104
|
+
case 0x25:
|
|
1105
|
+
l3 = 2048;
|
|
1106
|
+
break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1107
|
+
case 0x29:
|
|
1108
|
+
l3 = 4096;
|
|
1109
|
+
break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1110
|
+
case 0x39:
|
|
1111
|
+
l2 = 128;
|
|
1112
|
+
break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
|
|
1113
|
+
case 0x3A:
|
|
1114
|
+
l2 = 192;
|
|
1115
|
+
break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
|
|
1116
|
+
case 0x3B:
|
|
1117
|
+
l2 = 128;
|
|
1118
|
+
break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
|
|
1119
|
+
case 0x3C:
|
|
1120
|
+
l2 = 256;
|
|
1121
|
+
break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
|
|
1122
|
+
case 0x3D:
|
|
1123
|
+
l2 = 384;
|
|
1124
|
+
break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
|
|
1125
|
+
case 0x3E:
|
|
1126
|
+
l2 = 512;
|
|
1127
|
+
break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
|
|
1128
|
+
case 0x40:
|
|
1129
|
+
l2 = 0;
|
|
1130
|
+
break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
|
|
1131
|
+
case 0x41:
|
|
1132
|
+
l2 = 128;
|
|
1133
|
+
break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
|
|
1134
|
+
case 0x42:
|
|
1135
|
+
l2 = 256;
|
|
1136
|
+
break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
|
|
1137
|
+
case 0x43:
|
|
1138
|
+
l2 = 512;
|
|
1139
|
+
break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
|
|
1140
|
+
case 0x44:
|
|
1141
|
+
l2 = 1024;
|
|
1142
|
+
break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
|
|
1143
|
+
case 0x45:
|
|
1144
|
+
l2 = 2048;
|
|
1145
|
+
break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
|
|
1146
|
+
case 0x46:
|
|
1147
|
+
l3 = 4096;
|
|
1148
|
+
break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
|
|
1149
|
+
case 0x47:
|
|
1150
|
+
l3 = 8192;
|
|
1151
|
+
break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
|
|
1152
|
+
case 0x48:
|
|
1153
|
+
l2 = 3072;
|
|
1154
|
+
break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
|
|
1155
|
+
case 0x49:
|
|
1156
|
+
if (l2 != 0)
|
|
1157
|
+
l3 = 4096;
|
|
1158
|
+
else {
|
|
1159
|
+
check_for_p2_core2 = true;
|
|
1160
|
+
l3 = l2 = 4096;
|
|
1161
|
+
}
|
|
1162
|
+
break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
|
|
1163
|
+
case 0x4A:
|
|
1164
|
+
l3 = 6144;
|
|
1165
|
+
break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
|
|
1166
|
+
case 0x4B:
|
|
1167
|
+
l3 = 8192;
|
|
1168
|
+
break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
|
|
1169
|
+
case 0x4C:
|
|
1170
|
+
l3 = 12288;
|
|
1171
|
+
break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
|
|
1172
|
+
case 0x4D:
|
|
1173
|
+
l3 = 16384;
|
|
1174
|
+
break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
|
|
1175
|
+
case 0x4E:
|
|
1176
|
+
l2 = 6144;
|
|
1177
|
+
break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
|
|
1178
|
+
case 0x78:
|
|
1179
|
+
l2 = 1024;
|
|
1180
|
+
break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
|
|
1181
|
+
case 0x79:
|
|
1182
|
+
l2 = 128;
|
|
1183
|
+
break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1184
|
+
case 0x7A:
|
|
1185
|
+
l2 = 256;
|
|
1186
|
+
break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1187
|
+
case 0x7B:
|
|
1188
|
+
l2 = 512;
|
|
1189
|
+
break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1190
|
+
case 0x7C:
|
|
1191
|
+
l2 = 1024;
|
|
1192
|
+
break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
|
1193
|
+
case 0x7D:
|
|
1194
|
+
l2 = 2048;
|
|
1195
|
+
break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
|
|
1196
|
+
case 0x7E:
|
|
1197
|
+
l2 = 256;
|
|
1198
|
+
break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
|
|
1199
|
+
case 0x7F:
|
|
1200
|
+
l2 = 512;
|
|
1201
|
+
break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
|
|
1202
|
+
case 0x80:
|
|
1203
|
+
l2 = 512;
|
|
1204
|
+
break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
|
|
1205
|
+
case 0x81:
|
|
1206
|
+
l2 = 128;
|
|
1207
|
+
break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
|
|
1208
|
+
case 0x82:
|
|
1209
|
+
l2 = 256;
|
|
1210
|
+
break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
|
|
1211
|
+
case 0x83:
|
|
1212
|
+
l2 = 512;
|
|
1213
|
+
break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
|
|
1214
|
+
case 0x84:
|
|
1215
|
+
l2 = 1024;
|
|
1216
|
+
break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
|
|
1217
|
+
case 0x85:
|
|
1218
|
+
l2 = 2048;
|
|
1219
|
+
break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
|
|
1220
|
+
case 0x86:
|
|
1221
|
+
l2 = 512;
|
|
1222
|
+
break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
|
|
1223
|
+
case 0x87:
|
|
1224
|
+
l2 = 1024;
|
|
1225
|
+
break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
|
|
1226
|
+
case 0x88:
|
|
1227
|
+
l3 = 2048;
|
|
1228
|
+
break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
|
|
1229
|
+
case 0x89:
|
|
1230
|
+
l3 = 4096;
|
|
1231
|
+
break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
|
|
1232
|
+
case 0x8A:
|
|
1233
|
+
l3 = 8192;
|
|
1234
|
+
break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
|
|
1235
|
+
case 0x8D:
|
|
1236
|
+
l3 = 3072;
|
|
1237
|
+
break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
|
|
1238
|
+
|
|
1239
|
+
default:
|
|
1240
|
+
break;
|
|
904
1241
|
}
|
|
905
1242
|
}
|
|
906
|
-
if(check_for_p2_core2 && l2 == l3)
|
|
907
|
-
l3 = 0;
|
|
1243
|
+
if (check_for_p2_core2 && l2 == l3) l3 = 0;
|
|
908
1244
|
l1 *= 1024;
|
|
909
1245
|
l2 *= 1024;
|
|
910
1246
|
l3 *= 1024;
|
|
911
1247
|
}
|
|
912
1248
|
|
|
913
|
-
inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
1249
|
+
inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) {
|
|
1250
|
+
if (max_std_funcs >= 4)
|
|
1251
|
+
queryCacheSizes_intel_direct(l1, l2, l3);
|
|
1252
|
+
else if (max_std_funcs >= 2)
|
|
1253
|
+
queryCacheSizes_intel_codes(l1, l2, l3);
|
|
917
1254
|
else
|
|
918
|
-
|
|
1255
|
+
l1 = l2 = l3 = 0;
|
|
919
1256
|
}
|
|
920
1257
|
|
|
921
|
-
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
|
|
922
|
-
{
|
|
1258
|
+
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) {
|
|
923
1259
|
int abcd[4];
|
|
924
1260
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
abcd
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
1261
|
+
|
|
1262
|
+
// First query the max supported function.
|
|
1263
|
+
EIGEN_CPUID(abcd, 0x80000000, 0);
|
|
1264
|
+
if (static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) {
|
|
1265
|
+
EIGEN_CPUID(abcd, 0x80000005, 0);
|
|
1266
|
+
l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
|
|
1267
|
+
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
|
1268
|
+
EIGEN_CPUID(abcd, 0x80000006, 0);
|
|
1269
|
+
l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
|
|
1270
|
+
l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
|
|
1271
|
+
} else {
|
|
1272
|
+
l1 = l2 = l3 = 0;
|
|
1273
|
+
}
|
|
931
1274
|
}
|
|
932
1275
|
#endif
|
|
933
1276
|
|
|
934
1277
|
/** \internal
|
|
935
1278
|
* Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
|
|
936
|
-
inline void queryCacheSizes(int& l1, int& l2, int& l3)
|
|
937
|
-
|
|
938
|
-
#ifdef EIGEN_CPUID
|
|
1279
|
+
inline void queryCacheSizes(int& l1, int& l2, int& l3) {
|
|
1280
|
+
#ifdef EIGEN_CPUID
|
|
939
1281
|
int abcd[4];
|
|
940
1282
|
const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
|
|
941
1283
|
const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
|
|
942
|
-
const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574};
|
|
1284
|
+
const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
|
|
943
1285
|
|
|
944
1286
|
// identify the CPU vendor
|
|
945
|
-
EIGEN_CPUID(abcd,0x0,0);
|
|
946
|
-
int max_std_funcs = abcd[
|
|
947
|
-
if(cpuid_is_vendor(abcd,GenuineIntel))
|
|
948
|
-
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
|
949
|
-
else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
|
|
950
|
-
queryCacheSizes_amd(l1,l2,l3);
|
|
1287
|
+
EIGEN_CPUID(abcd, 0x0, 0);
|
|
1288
|
+
int max_std_funcs = abcd[0];
|
|
1289
|
+
if (cpuid_is_vendor(abcd, GenuineIntel))
|
|
1290
|
+
queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
|
|
1291
|
+
else if (cpuid_is_vendor(abcd, AuthenticAMD) || cpuid_is_vendor(abcd, AMDisbetter_))
|
|
1292
|
+
queryCacheSizes_amd(l1, l2, l3);
|
|
951
1293
|
else
|
|
952
1294
|
// by default let's use Intel's API
|
|
953
|
-
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
|
|
957
|
-
// ||cpuid_is_vendor(abcd,"CyrixInstead")
|
|
958
|
-
// ||cpuid_is_vendor(abcd,"CentaurHauls")
|
|
959
|
-
// ||cpuid_is_vendor(abcd,"GenuineTMx86")
|
|
960
|
-
// ||cpuid_is_vendor(abcd,"TransmetaCPU")
|
|
961
|
-
// ||cpuid_is_vendor(abcd,"RiseRiseRise")
|
|
962
|
-
// ||cpuid_is_vendor(abcd,"Geode by NSC")
|
|
963
|
-
// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
|
|
964
|
-
// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
|
|
965
|
-
// ||cpuid_is_vendor(abcd,"NexGenDriven")
|
|
966
|
-
|
|
1295
|
+
queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
|
|
1296
|
+
|
|
1297
|
+
// here is the list of other vendors:
|
|
1298
|
+
// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
|
|
1299
|
+
// ||cpuid_is_vendor(abcd,"CyrixInstead")
|
|
1300
|
+
// ||cpuid_is_vendor(abcd,"CentaurHauls")
|
|
1301
|
+
// ||cpuid_is_vendor(abcd,"GenuineTMx86")
|
|
1302
|
+
// ||cpuid_is_vendor(abcd,"TransmetaCPU")
|
|
1303
|
+
// ||cpuid_is_vendor(abcd,"RiseRiseRise")
|
|
1304
|
+
// ||cpuid_is_vendor(abcd,"Geode by NSC")
|
|
1305
|
+
// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
|
|
1306
|
+
// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
|
|
1307
|
+
// ||cpuid_is_vendor(abcd,"NexGenDriven")
|
|
1308
|
+
#else
|
|
967
1309
|
l1 = l2 = l3 = -1;
|
|
968
|
-
|
|
1310
|
+
#endif
|
|
969
1311
|
}
|
|
970
1312
|
|
|
971
1313
|
/** \internal
|
|
972
1314
|
* \returns the size in Bytes of the L1 data cache */
|
|
973
|
-
inline int queryL1CacheSize()
|
|
974
|
-
{
|
|
1315
|
+
inline int queryL1CacheSize() {
|
|
975
1316
|
int l1(-1), l2, l3;
|
|
976
|
-
queryCacheSizes(l1,l2,l3);
|
|
1317
|
+
queryCacheSizes(l1, l2, l3);
|
|
977
1318
|
return l1;
|
|
978
1319
|
}
|
|
979
1320
|
|
|
980
1321
|
/** \internal
|
|
981
1322
|
* \returns the size in Bytes of the L2 or L3 cache if this later is present */
|
|
982
|
-
inline int queryTopLevelCacheSize()
|
|
983
|
-
{
|
|
1323
|
+
inline int queryTopLevelCacheSize() {
|
|
984
1324
|
int l1, l2(-1), l3(-1);
|
|
985
|
-
queryCacheSizes(l1,l2,l3);
|
|
986
|
-
return (std::max)(l2,l3);
|
|
1325
|
+
queryCacheSizes(l1, l2, l3);
|
|
1326
|
+
return (std::max)(l2, l3);
|
|
987
1327
|
}
|
|
988
1328
|
|
|
989
|
-
|
|
1329
|
+
/** \internal
|
|
1330
|
+
* This wraps C++20's std::construct_at, using placement new instead if it is not available.
|
|
1331
|
+
*/
|
|
1332
|
+
|
|
1333
|
+
#if EIGEN_COMP_CXXVER >= 20 && defined(__cpp_lib_constexpr_dynamic_alloc) && \
|
|
1334
|
+
__cpp_lib_constexpr_dynamic_alloc >= 201907L
|
|
1335
|
+
using std::construct_at;
|
|
1336
|
+
#else
|
|
1337
|
+
template <class T, class... Args>
|
|
1338
|
+
EIGEN_DEVICE_FUNC T* construct_at(T* p, Args&&... args) {
|
|
1339
|
+
return ::new (const_cast<void*>(static_cast<const volatile void*>(p))) T(std::forward<Args>(args)...);
|
|
1340
|
+
}
|
|
1341
|
+
#endif
|
|
1342
|
+
|
|
1343
|
+
/** \internal
|
|
1344
|
+
* This wraps C++17's std::destroy_at. If it's not available it calls the destructor.
|
|
1345
|
+
* The wrapper is not a full replacement for C++20's std::destroy_at as it cannot
|
|
1346
|
+
* be applied to std::array.
|
|
1347
|
+
*/
|
|
1348
|
+
#if EIGEN_COMP_CXXVER >= 17
|
|
1349
|
+
using std::destroy_at;
|
|
1350
|
+
#else
|
|
1351
|
+
template <class T>
|
|
1352
|
+
EIGEN_DEVICE_FUNC void destroy_at(T* p) {
|
|
1353
|
+
p->~T();
|
|
1354
|
+
}
|
|
1355
|
+
#endif
|
|
1356
|
+
|
|
1357
|
+
// FIXME(rmlarsen): Work around missing linker symbol with msan on ARM.
|
|
1358
|
+
#if !defined(EIGEN_DONT_ASSUME_ALIGNED) && __has_feature(memory_sanitizer) && \
|
|
1359
|
+
(EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64)
|
|
1360
|
+
#define EIGEN_DONT_ASSUME_ALIGNED
|
|
1361
|
+
#endif
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
#if !defined(EIGEN_DONT_ASSUME_ALIGNED) && defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L)
|
|
1365
|
+
template <std::size_t N, typename T>
|
|
1366
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
|
|
1367
|
+
return std::assume_aligned<N, T>(ptr);
|
|
1368
|
+
}
|
|
1369
|
+
#elif !defined(EIGEN_DONT_ASSUME_ALIGNED) && EIGEN_HAS_BUILTIN(__builtin_assume_aligned)
|
|
1370
|
+
template <std::size_t N, typename T>
|
|
1371
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC T* assume_aligned(T* ptr) {
|
|
1372
|
+
return static_cast<T*>(__builtin_assume_aligned(ptr, N));
|
|
1373
|
+
}
|
|
1374
|
+
#else
|
|
1375
|
+
template <std::size_t N, typename T>
|
|
1376
|
+
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
|
|
1377
|
+
return ptr;
|
|
1378
|
+
}
|
|
1379
|
+
#endif
|
|
1380
|
+
|
|
1381
|
+
} // end namespace internal
|
|
990
1382
|
|
|
991
|
-
}
|
|
1383
|
+
} // end namespace Eigen
|
|
992
1384
|
|
|
993
|
-
#endif
|
|
1385
|
+
#endif // EIGEN_MEMORY_H
|