@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -12,7 +12,6 @@
12
12
  // Public License v. 2.0. If a copy of the MPL was not distributed
13
13
  // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
14
 
15
-
16
15
  /*****************************************************************************
17
16
  *** Platform checks for aligned malloc functions ***
18
17
  *****************************************************************************/
@@ -31,11 +30,11 @@
31
30
  // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
32
31
  // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
33
32
  // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
34
- #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
35
- && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
36
- #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
33
+ #if defined(__GLIBC__) && ((__GLIBC__ >= 2 && __GLIBC_MINOR__ >= 8) || __GLIBC__ > 2) && defined(__LP64__) && \
34
+ !defined(__SANITIZE_ADDRESS__) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
35
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
37
36
  #else
38
- #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
37
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
39
38
  #endif
40
39
 
41
40
  // FreeBSD 6 seems to have 16-byte aligned malloc
@@ -43,35 +42,107 @@
43
42
  // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
44
43
  // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
45
44
  #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
46
- #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
45
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
47
46
  #else
48
- #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
47
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
49
48
  #endif
50
49
 
51
- #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
52
- || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
53
- || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
54
- || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
55
- #define EIGEN_MALLOC_ALREADY_ALIGNED 1
50
+ #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || \
51
+ EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
52
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 1
56
53
  #else
57
- #define EIGEN_MALLOC_ALREADY_ALIGNED 0
54
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 0
58
55
  #endif
59
56
 
60
57
  #endif
61
58
 
59
+ #ifndef EIGEN_MALLOC_CHECK_THREAD_LOCAL
60
+
61
+ // Check whether we can use the thread_local keyword to allow or disallow
62
+ // allocating memory with per-thread granularity, by means of the
63
+ // set_is_malloc_allowed() function.
64
+ #ifndef EIGEN_AVOID_THREAD_LOCAL
65
+
66
+ #if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC >= 1900) && \
67
+ !defined(EIGEN_GPU_COMPILE_PHASE)
68
+ #define EIGEN_MALLOC_CHECK_THREAD_LOCAL thread_local
69
+ #else
70
+ #define EIGEN_MALLOC_CHECK_THREAD_LOCAL
71
+ #endif
72
+
73
+ #else // EIGEN_AVOID_THREAD_LOCAL
74
+ #define EIGEN_MALLOC_CHECK_THREAD_LOCAL
75
+ #endif // EIGEN_AVOID_THREAD_LOCAL
76
+
77
+ #endif
78
+
79
+ // IWYU pragma: private
80
+ #include "../InternalHeaderCheck.h"
81
+
62
82
  namespace Eigen {
63
83
 
64
84
  namespace internal {
65
85
 
66
- EIGEN_DEVICE_FUNC
67
- inline void throw_std_bad_alloc()
68
- {
69
- #ifdef EIGEN_EXCEPTIONS
70
- throw std::bad_alloc();
71
- #else
72
- std::size_t huge = static_cast<std::size_t>(-1);
73
- ::operator new(huge);
74
- #endif
86
+ /*****************************************************************************
87
+ *** Implementation of portable aligned versions of malloc/free/realloc ***
88
+ *****************************************************************************/
89
+
90
+ #ifdef EIGEN_NO_MALLOC
91
+ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
92
+ eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
93
+ }
94
+ EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
95
+ eigen_assert(false && "heap deallocation is forbidden (EIGEN_NO_MALLOC is defined)");
96
+ }
97
+ #elif defined EIGEN_RUNTIME_NO_MALLOC
98
+ EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) {
99
+ EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
100
+ if (update == 1) value = new_value;
101
+ return value;
102
+ }
103
+ EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
104
+ EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
105
+ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
106
+ eigen_assert(is_malloc_allowed() &&
107
+ "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_malloc_allowed is false)");
108
+ }
109
+ EIGEN_DEVICE_FUNC inline bool is_free_allowed_impl(bool update, bool new_value = false) {
110
+ EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
111
+ if (update == 1) value = new_value;
112
+ return value;
113
+ }
114
+ EIGEN_DEVICE_FUNC inline bool is_free_allowed() { return is_free_allowed_impl(false); }
115
+ EIGEN_DEVICE_FUNC inline bool set_is_free_allowed(bool new_value) { return is_free_allowed_impl(true, new_value); }
116
+ EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
117
+ eigen_assert(is_malloc_allowed() &&
118
+ "heap deallocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_free_allowed is false)");
119
+ }
120
+ #else
121
+ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {}
122
+ EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {}
123
+ #endif
124
+
125
+ EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() {
126
+ #ifdef EIGEN_EXCEPTIONS
127
+ throw std::bad_alloc();
128
+ #else
129
+ std::size_t huge = static_cast<std::size_t>(-1);
130
+ #if defined(EIGEN_HIPCC)
131
+ //
132
+ // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
133
+ // and as a consequence the code in the #else block triggers the hipcc warning :
134
+ // "no overloaded function has restriction specifiers that are compatible with the ambient context"
135
+ //
136
+ // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
137
+ // the same on "operator new"
138
+ // Reverting code back to the old version in this #if block for the hipcc compiler
139
+ //
140
+ new int[huge];
141
+ #else
142
+ void* unused = ::operator new(huge);
143
+ EIGEN_UNUSED_VARIABLE(unused);
144
+ #endif
145
+ #endif
75
146
  }
76
147
 
77
148
  /*****************************************************************************
@@ -80,124 +151,132 @@ inline void throw_std_bad_alloc()
80
151
 
81
152
  /* ----- Hand made implementations of aligned malloc/free and realloc ----- */
82
153
 
83
- /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
84
- * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
85
- */
86
- inline void* handmade_aligned_malloc(std::size_t size)
87
- {
88
- void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
89
- if (original == 0) return 0;
90
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
91
- *(reinterpret_cast<void**>(aligned) - 1) = original;
154
+ /** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
155
+ * Fast, but wastes `alignment` additional bytes of memory. Does not throw any exception.
156
+ */
157
+ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size,
158
+ std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
159
+ eigen_assert(alignment >= sizeof(void*) && alignment <= 256 && (alignment & (alignment - 1)) == 0 &&
160
+ "Alignment must be at least sizeof(void*), less than or equal to 256, and a power of 2");
161
+
162
+ check_that_malloc_is_allowed();
163
+ EIGEN_USING_STD(malloc)
164
+ void* original = malloc(size + alignment);
165
+ if (original == nullptr) return nullptr;
166
+ std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
167
+ void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
168
+ // Store offset - 1, since it is guaranteed to be at least 1.
169
+ *(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
92
170
  return aligned;
93
171
  }
94
172
 
95
173
  /** \internal Frees memory allocated with handmade_aligned_malloc */
96
- inline void handmade_aligned_free(void *ptr)
97
- {
98
- if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
174
+ EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void* ptr) {
175
+ if (ptr != nullptr) {
176
+ std::size_t offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
177
+ void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
178
+
179
+ check_that_free_is_allowed();
180
+ EIGEN_USING_STD(free)
181
+ free(original);
182
+ }
99
183
  }
100
184
 
101
185
  /** \internal
102
- * \brief Reallocates aligned memory.
103
- * Since we know that our handmade version is based on std::malloc
104
- * we can use std::realloc to implement efficient reallocation.
105
- */
106
- inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
107
- {
108
- if (ptr == 0) return handmade_aligned_malloc(size);
109
- void *original = *(reinterpret_cast<void**>(ptr) - 1);
110
- std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
111
- original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
112
- if (original == 0) return 0;
113
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
114
- void *previous_aligned = static_cast<char *>(original)+previous_offset;
115
- if(aligned!=previous_aligned)
116
- std::memmove(aligned, previous_aligned, size);
117
-
118
- *(reinterpret_cast<void**>(aligned) - 1) = original;
186
+ * \brief Reallocates aligned memory.
187
+ * Since we know that our handmade version is based on std::malloc
188
+ * we can use std::realloc to implement efficient reallocation.
189
+ */
190
+ EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size,
191
+ std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
192
+ if (ptr == nullptr) return handmade_aligned_malloc(new_size, alignment);
193
+ std::size_t old_offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
194
+ void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
195
+
196
+ check_that_malloc_is_allowed();
197
+ EIGEN_USING_STD(realloc)
198
+ void* original = realloc(old_original, new_size + alignment);
199
+ if (original == nullptr) return nullptr;
200
+ if (original == old_original) return ptr;
201
+ std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
202
+ void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
203
+ if (offset != old_offset) {
204
+ const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
205
+ std::size_t count = (std::min)(new_size, old_size);
206
+ std::memmove(aligned, src, count);
207
+ }
208
+ // Store offset - 1, since it is guaranteed to be at least 1.
209
+ *(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
119
210
  return aligned;
120
211
  }
121
212
 
122
- /*****************************************************************************
123
- *** Implementation of portable aligned versions of malloc/free/realloc ***
124
- *****************************************************************************/
213
+ /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on
214
+ * the requirements. On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
215
+ */
216
+ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) {
217
+ if (size == 0) return nullptr;
125
218
 
126
- #ifdef EIGEN_NO_MALLOC
127
- EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
128
- {
129
- eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
130
- }
131
- #elif defined EIGEN_RUNTIME_NO_MALLOC
132
- EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
133
- {
134
- static bool value = true;
135
- if (update == 1)
136
- value = new_value;
137
- return value;
138
- }
139
- EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
140
- EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
141
- EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
142
- {
143
- eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
144
- }
145
- #else
146
- EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
147
- {}
148
- #endif
219
+ void* result;
220
+ #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
149
221
 
150
- /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
151
- * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
152
- */
153
- EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
154
- {
155
222
  check_that_malloc_is_allowed();
223
+ EIGEN_USING_STD(malloc)
224
+ result = malloc(size);
156
225
 
157
- void *result;
158
- #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
159
- result = std::malloc(size);
160
- #if EIGEN_DEFAULT_ALIGN_BYTES==16
161
- eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
162
- #endif
163
- #else
164
- result = handmade_aligned_malloc(size);
165
- #endif
226
+ #if EIGEN_DEFAULT_ALIGN_BYTES == 16
227
+ eigen_assert((size < 16 || (std::size_t(result) % 16) == 0) &&
228
+ "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback "
229
+ "to handmade aligned memory allocator.");
230
+ #endif
231
+ #else
232
+ result = handmade_aligned_malloc(size);
233
+ #endif
166
234
 
167
- if(!result && size)
168
- throw_std_bad_alloc();
235
+ if (!result && size) throw_std_bad_alloc();
169
236
 
170
237
  return result;
171
238
  }
172
239
 
173
240
  /** \internal Frees memory allocated with aligned_malloc. */
174
- EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
175
- {
176
- #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
177
- std::free(ptr);
178
- #else
179
- handmade_aligned_free(ptr);
180
- #endif
241
+ EIGEN_DEVICE_FUNC inline void aligned_free(void* ptr) {
242
+ #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
243
+
244
+ if (ptr != nullptr) {
245
+ check_that_free_is_allowed();
246
+ EIGEN_USING_STD(free)
247
+ free(ptr);
248
+ }
249
+
250
+ #else
251
+ handmade_aligned_free(ptr);
252
+ #endif
181
253
  }
182
254
 
183
255
  /**
184
- * \internal
185
- * \brief Reallocates an aligned block of memory.
186
- * \throws std::bad_alloc on allocation failure
187
- */
188
- inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
189
- {
190
- EIGEN_UNUSED_VARIABLE(old_size);
191
-
192
- void *result;
193
- #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
194
- result = std::realloc(ptr,new_size);
256
+ * \internal
257
+ * \brief Reallocates an aligned block of memory.
258
+ * \throws std::bad_alloc on allocation failure
259
+ */
260
+ EIGEN_DEVICE_FUNC inline void* aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
261
+ if (ptr == nullptr) return aligned_malloc(new_size);
262
+ if (old_size == new_size) return ptr;
263
+ if (new_size == 0) {
264
+ aligned_free(ptr);
265
+ return nullptr;
266
+ }
267
+
268
+ void* result;
269
+ #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
270
+ EIGEN_UNUSED_VARIABLE(old_size)
271
+
272
+ check_that_malloc_is_allowed();
273
+ EIGEN_USING_STD(realloc)
274
+ result = realloc(ptr, new_size);
195
275
  #else
196
- result = handmade_aligned_realloc(ptr,new_size,old_size);
276
+ result = handmade_aligned_realloc(ptr, new_size, old_size);
197
277
  #endif
198
278
 
199
- if (!result && new_size)
200
- throw_std_bad_alloc();
279
+ if (!result && new_size) throw_std_bad_alloc();
201
280
 
202
281
  return result;
203
282
  }
@@ -207,42 +286,58 @@ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_si
207
286
  *****************************************************************************/
208
287
 
209
288
  /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
210
- * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
211
- */
212
- template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
213
- {
289
+ * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
290
+ */
291
+ template <bool Align>
292
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) {
214
293
  return aligned_malloc(size);
215
294
  }
216
295
 
217
- template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
218
- {
296
+ template <>
297
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size) {
298
+ if (size == 0) return nullptr;
299
+
219
300
  check_that_malloc_is_allowed();
301
+ EIGEN_USING_STD(malloc)
302
+ void* result = malloc(size);
220
303
 
221
- void *result = std::malloc(size);
222
- if(!result && size)
223
- throw_std_bad_alloc();
304
+ if (!result && size) throw_std_bad_alloc();
224
305
  return result;
225
306
  }
226
307
 
227
308
  /** \internal Frees memory allocated with conditional_aligned_malloc */
228
- template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
229
- {
309
+ template <bool Align>
310
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void* ptr) {
230
311
  aligned_free(ptr);
231
312
  }
232
313
 
233
- template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
234
- {
235
- std::free(ptr);
314
+ template <>
315
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void* ptr) {
316
+ if (ptr != nullptr) {
317
+ check_that_free_is_allowed();
318
+ EIGEN_USING_STD(free)
319
+ free(ptr);
320
+ }
236
321
  }
237
322
 
238
- template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
239
- {
323
+ template <bool Align>
324
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
240
325
  return aligned_realloc(ptr, new_size, old_size);
241
326
  }
242
327
 
243
- template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
244
- {
245
- return std::realloc(ptr, new_size);
328
+ template <>
329
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size,
330
+ std::size_t old_size) {
331
+ if (ptr == nullptr) return conditional_aligned_malloc<false>(new_size);
332
+ if (old_size == new_size) return ptr;
333
+ if (new_size == 0) {
334
+ conditional_aligned_free<false>(ptr);
335
+ return nullptr;
336
+ }
337
+
338
+ check_that_malloc_is_allowed();
339
+ EIGEN_USING_STD(realloc)
340
+ return realloc(ptr, new_size);
246
341
  }
247
342
 
248
343
  /*****************************************************************************
@@ -250,75 +345,95 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_
250
345
  *****************************************************************************/
251
346
 
252
347
  /** \internal Destructs the elements of an array.
253
- * The \a size parameters tells on how many objects to call the destructor of T.
254
- */
255
- template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
256
- {
348
+ * The \a size parameters tells on how many objects to call the destructor of T.
349
+ */
350
+ template <typename T>
351
+ EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T* ptr, std::size_t size) {
257
352
  // always destruct an array starting from the end.
258
- if(ptr)
259
- while(size) ptr[--size].~T();
353
+ if (ptr)
354
+ while (size) ptr[--size].~T();
260
355
  }
261
356
 
262
357
  /** \internal Constructs the elements of an array.
263
- * The \a size parameter tells on how many objects to call the constructor of T.
264
- */
265
- template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
266
- {
267
- std::size_t i;
268
- EIGEN_TRY
269
- {
270
- for (i = 0; i < size; ++i) ::new (ptr + i) T;
271
- return ptr;
272
- }
273
- EIGEN_CATCH(...)
274
- {
358
+ * The \a size parameter tells on how many objects to call the constructor of T.
359
+ */
360
+ template <typename T>
361
+ EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T* ptr, std::size_t size) {
362
+ std::size_t i = 0;
363
+ EIGEN_TRY {
364
+ for (i = 0; i < size; ++i) ::new (ptr + i) T;
365
+ }
366
+ EIGEN_CATCH(...) {
367
+ destruct_elements_of_array(ptr, i);
368
+ EIGEN_THROW;
369
+ }
370
+ return ptr;
371
+ }
372
+
373
+ /** \internal Copy-constructs the elements of an array.
374
+ * The \a size parameter tells on how many objects to copy.
375
+ */
376
+ template <typename T>
377
+ EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T* ptr, const T* src, std::size_t size) {
378
+ std::size_t i = 0;
379
+ EIGEN_TRY {
380
+ for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
381
+ }
382
+ EIGEN_CATCH(...) {
275
383
  destruct_elements_of_array(ptr, i);
276
384
  EIGEN_THROW;
277
385
  }
278
- return NULL;
386
+ return ptr;
387
+ }
388
+
389
+ /** \internal Move-constructs the elements of an array.
390
+ * The \a size parameter tells on how many objects to move.
391
+ */
392
+ template <typename T>
393
+ EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T* ptr, T* src, std::size_t size) {
394
+ std::size_t i = 0;
395
+ EIGEN_TRY {
396
+ for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
397
+ }
398
+ EIGEN_CATCH(...) {
399
+ destruct_elements_of_array(ptr, i);
400
+ EIGEN_THROW;
401
+ }
402
+ return ptr;
279
403
  }
280
404
 
281
405
  /*****************************************************************************
282
406
  *** Implementation of aligned new/delete-like functions ***
283
407
  *****************************************************************************/
284
408
 
285
- template<typename T>
286
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
287
- {
288
- if(size > std::size_t(-1) / sizeof(T))
289
- throw_std_bad_alloc();
409
+ template <typename T>
410
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) {
411
+ constexpr std::size_t max_elements = (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T);
412
+ if (size > max_elements) throw_std_bad_alloc();
290
413
  }
291
414
 
292
415
  /** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
293
- * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
294
- * The default constructor of T is called.
295
- */
296
- template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
297
- {
416
+ * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
417
+ * The default constructor of T is called.
418
+ */
419
+ template <typename T>
420
+ EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) {
298
421
  check_size_for_overflow<T>(size);
299
- T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
300
- EIGEN_TRY
301
- {
302
- return construct_elements_of_array(result, size);
303
- }
304
- EIGEN_CATCH(...)
305
- {
422
+ T* result = static_cast<T*>(aligned_malloc(sizeof(T) * size));
423
+ EIGEN_TRY { return default_construct_elements_of_array(result, size); }
424
+ EIGEN_CATCH(...) {
306
425
  aligned_free(result);
307
426
  EIGEN_THROW;
308
427
  }
309
428
  return result;
310
429
  }
311
430
 
312
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
313
- {
431
+ template <typename T, bool Align>
432
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) {
314
433
  check_size_for_overflow<T>(size);
315
- T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
316
- EIGEN_TRY
317
- {
318
- return construct_elements_of_array(result, size);
319
- }
320
- EIGEN_CATCH(...)
321
- {
434
+ T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
435
+ EIGEN_TRY { return default_construct_elements_of_array(result, size); }
436
+ EIGEN_CATCH(...) {
322
437
  conditional_aligned_free<Align>(result);
323
438
  EIGEN_THROW;
324
439
  }
@@ -326,60 +441,62 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
326
441
  }
327
442
 
328
443
  /** \internal Deletes objects constructed with aligned_new
329
- * The \a size parameters tells on how many objects to call the destructor of T.
330
- */
331
- template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
332
- {
444
+ * The \a size parameters tells on how many objects to call the destructor of T.
445
+ */
446
+ template <typename T>
447
+ EIGEN_DEVICE_FUNC inline void aligned_delete(T* ptr, std::size_t size) {
333
448
  destruct_elements_of_array<T>(ptr, size);
334
449
  aligned_free(ptr);
335
450
  }
336
451
 
337
452
  /** \internal Deletes objects constructed with conditional_aligned_new
338
- * The \a size parameters tells on how many objects to call the destructor of T.
339
- */
340
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
341
- {
453
+ * The \a size parameters tells on how many objects to call the destructor of T.
454
+ */
455
+ template <typename T, bool Align>
456
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T* ptr, std::size_t size) {
342
457
  destruct_elements_of_array<T>(ptr, size);
343
458
  conditional_aligned_free<Align>(ptr);
344
459
  }
345
460
 
346
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
347
- {
461
+ template <typename T, bool Align>
462
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) {
348
463
  check_size_for_overflow<T>(new_size);
349
464
  check_size_for_overflow<T>(old_size);
350
- if(new_size < old_size)
351
- destruct_elements_of_array(pts+new_size, old_size-new_size);
352
- T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
353
- if(new_size > old_size)
354
- {
355
- EIGEN_TRY
356
- {
357
- construct_elements_of_array(result+old_size, new_size-old_size);
358
- }
359
- EIGEN_CATCH(...)
360
- {
361
- conditional_aligned_free<Align>(result);
362
- EIGEN_THROW;
465
+
466
+ // If elements need to be explicitly initialized, we cannot simply realloc
467
+ // (or memcpy) the memory block - each element needs to be reconstructed.
468
+ // Otherwise, objects that contain internal pointers like mpfr or
469
+ // AnnoyingScalar can be pointing to the wrong thing.
470
+ T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * new_size));
471
+ EIGEN_TRY {
472
+ // Move-construct initial elements.
473
+ std::size_t copy_size = (std::min)(old_size, new_size);
474
+ move_construct_elements_of_array(result, pts, copy_size);
475
+
476
+ // Default-construct remaining elements.
477
+ if (new_size > old_size) {
478
+ default_construct_elements_of_array(result + copy_size, new_size - old_size);
363
479
  }
480
+
481
+ // Delete old elements.
482
+ conditional_aligned_delete<T, Align>(pts, old_size);
483
+ }
484
+ EIGEN_CATCH(...) {
485
+ conditional_aligned_free<Align>(result);
486
+ EIGEN_THROW;
364
487
  }
488
+
365
489
  return result;
366
490
  }
367
491
 
368
-
369
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
370
- {
371
- if(size==0)
372
- return 0; // short-cut. Also fixes Bug 884
492
+ template <typename T, bool Align>
493
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) {
494
+ if (size == 0) return nullptr; // short-cut. Also fixes Bug 884
373
495
  check_size_for_overflow<T>(size);
374
- T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
375
- if(NumTraits<T>::RequireInitialization)
376
- {
377
- EIGEN_TRY
378
- {
379
- construct_elements_of_array(result, size);
380
- }
381
- EIGEN_CATCH(...)
382
- {
496
+ T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
497
+ if (NumTraits<T>::RequireInitialization) {
498
+ EIGEN_TRY { default_construct_elements_of_array(result, size); }
499
+ EIGEN_CATCH(...) {
383
500
  conditional_aligned_free<Align>(result);
384
501
  EIGEN_THROW;
385
502
  }
@@ -387,154 +504,140 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
387
504
  return result;
388
505
  }
389
506
 
390
- template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
391
- {
507
+ template <typename T, bool Align>
508
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) {
509
+ if (NumTraits<T>::RequireInitialization) {
510
+ return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
511
+ }
512
+
392
513
  check_size_for_overflow<T>(new_size);
393
514
  check_size_for_overflow<T>(old_size);
394
- if(NumTraits<T>::RequireInitialization && (new_size < old_size))
395
- destruct_elements_of_array(pts+new_size, old_size-new_size);
396
- T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
397
- if(NumTraits<T>::RequireInitialization && (new_size > old_size))
398
- {
399
- EIGEN_TRY
400
- {
401
- construct_elements_of_array(result+old_size, new_size-old_size);
402
- }
403
- EIGEN_CATCH(...)
404
- {
405
- conditional_aligned_free<Align>(result);
406
- EIGEN_THROW;
407
- }
408
- }
409
- return result;
515
+ return static_cast<T*>(
516
+ conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T) * new_size, sizeof(T) * old_size));
410
517
  }
411
518
 
412
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
413
- {
414
- if(NumTraits<T>::RequireInitialization)
415
- destruct_elements_of_array<T>(ptr, size);
519
+ template <typename T, bool Align>
520
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T* ptr, std::size_t size) {
521
+ if (NumTraits<T>::RequireInitialization) destruct_elements_of_array<T>(ptr, size);
416
522
  conditional_aligned_free<Align>(ptr);
417
523
  }
418
524
 
419
525
  /****************************************************************************/
420
526
 
421
- /** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment.
422
- *
423
- * \tparam Alignment requested alignment in Bytes.
424
- * \param array the address of the start of the array
425
- * \param size the size of the array
426
- *
427
- * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
428
- * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
429
- * packet size for the given scalar type is 1, then everything is considered well-aligned.
430
- *
431
- * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
432
- * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
433
- * example with Scalar=double on certain 32-bit platforms, see bug #79.
434
- *
435
- * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
436
- * \sa first_default_aligned()
437
- */
438
- template<int Alignment, typename Scalar, typename Index>
439
- EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
440
- {
527
+ /** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a
528
+ * Alignment.
529
+ *
530
+ * \tparam Alignment requested alignment in Bytes.
531
+ * \param array the address of the start of the array
532
+ * \param size the size of the array
533
+ *
534
+ * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
535
+ * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
536
+ * packet size for the given scalar type is 1, then everything is considered well-aligned.
537
+ *
538
+ * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
539
+ * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails
540
+ * for example with Scalar=double on certain 32-bit platforms, see bug #79.
541
+ *
542
+ * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
543
+ * \sa first_default_aligned()
544
+ */
545
+ template <int Alignment, typename Scalar, typename Index>
546
+ EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) {
441
547
  const Index ScalarSize = sizeof(Scalar);
442
548
  const Index AlignmentSize = Alignment / ScalarSize;
443
- const Index AlignmentMask = AlignmentSize-1;
549
+ const Index AlignmentMask = AlignmentSize - 1;
444
550
 
445
- if(AlignmentSize<=1)
446
- {
551
+ if (AlignmentSize <= 1) {
447
552
  // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
448
553
  // so that all elements of the array have the same alignment.
449
554
  return 0;
450
- }
451
- else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
452
- {
453
- // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
454
- // Consequently, no element of the array is well aligned.
555
+ } else if ((std::uintptr_t(array) & (sizeof(Scalar) - 1)) || (Alignment % ScalarSize) != 0) {
556
+ // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the
557
+ // scalar size. Consequently, no element of the array is well aligned.
455
558
  return size;
456
- }
457
- else
458
- {
459
- Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
559
+ } else {
560
+ Index first = (AlignmentSize - (Index((std::uintptr_t(array) / sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
460
561
  return (first < size) ? first : size;
461
562
  }
462
563
  }
463
564
 
464
- /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
465
- * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
466
- template<typename Scalar, typename Index>
467
- EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
468
- {
565
+ /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet
566
+ * requirement. \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
567
+ template <typename Scalar, typename Index>
568
+ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) {
469
569
  typedef typename packet_traits<Scalar>::type DefaultPacketType;
470
570
  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
471
571
  }
472
572
 
473
573
  /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
474
- */
475
- template<typename Index>
476
- inline Index first_multiple(Index size, Index base)
477
- {
478
- return ((size+base-1)/base)*base;
574
+ */
575
+ template <typename Index>
576
+ inline Index first_multiple(Index size, Index base) {
577
+ return ((size + base - 1) / base) * base;
479
578
  }
480
579
 
481
580
  // std::copy is much slower than memcpy, so let's introduce a smart_copy which
482
581
  // use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
483
- template<typename T, bool UseMemcpy> struct smart_copy_helper;
582
+ template <typename T, bool UseMemcpy>
583
+ struct smart_copy_helper;
484
584
 
485
- template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
486
- {
487
- smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
585
+ template <typename T>
586
+ EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) {
587
+ smart_copy_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
488
588
  }
489
589
 
490
- template<typename T> struct smart_copy_helper<T,true> {
491
- EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
492
- {
493
- IntPtr size = IntPtr(end)-IntPtr(start);
494
- if(size==0) return;
495
- eigen_internal_assert(start!=0 && end!=0 && target!=0);
496
- std::memcpy(target, start, size);
590
+ template <typename T>
591
+ struct smart_copy_helper<T, true> {
592
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) {
593
+ std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
594
+ if (size == 0) return;
595
+ eigen_internal_assert(start != 0 && end != 0 && target != 0);
596
+ EIGEN_USING_STD(memcpy)
597
+ memcpy(target, start, size);
497
598
  }
498
599
  };
499
600
 
500
- template<typename T> struct smart_copy_helper<T,false> {
501
- EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
502
- { std::copy(start, end, target); }
601
+ template <typename T>
602
+ struct smart_copy_helper<T, false> {
603
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { std::copy(start, end, target); }
503
604
  };
504
605
 
505
- // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
506
- template<typename T, bool UseMemmove> struct smart_memmove_helper;
606
+ // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
607
+ template <typename T, bool UseMemmove>
608
+ struct smart_memmove_helper;
507
609
 
508
- template<typename T> void smart_memmove(const T* start, const T* end, T* target)
509
- {
510
- smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
610
+ template <typename T>
611
+ void smart_memmove(const T* start, const T* end, T* target) {
612
+ smart_memmove_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
511
613
  }
512
614
 
513
- template<typename T> struct smart_memmove_helper<T,true> {
514
- static inline void run(const T* start, const T* end, T* target)
515
- {
516
- IntPtr size = IntPtr(end)-IntPtr(start);
517
- if(size==0) return;
518
- eigen_internal_assert(start!=0 && end!=0 && target!=0);
615
+ template <typename T>
616
+ struct smart_memmove_helper<T, true> {
617
+ static inline void run(const T* start, const T* end, T* target) {
618
+ std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
619
+ if (size == 0) return;
620
+ eigen_internal_assert(start != 0 && end != 0 && target != 0);
519
621
  std::memmove(target, start, size);
520
622
  }
521
623
  };
522
624
 
523
- template<typename T> struct smart_memmove_helper<T,false> {
524
- static inline void run(const T* start, const T* end, T* target)
525
- {
526
- if (UIntPtr(target) < UIntPtr(start))
527
- {
625
+ template <typename T>
626
+ struct smart_memmove_helper<T, false> {
627
+ static inline void run(const T* start, const T* end, T* target) {
628
+ if (std::uintptr_t(target) < std::uintptr_t(start)) {
528
629
  std::copy(start, end, target);
529
- }
530
- else
531
- {
532
- std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
533
- std::copy_backward(start, end, target + count);
630
+ } else {
631
+ std::ptrdiff_t count = (std::ptrdiff_t(end) - std::ptrdiff_t(start)) / sizeof(T);
632
+ std::copy_backward(start, end, target + count);
534
633
  }
535
634
  }
536
635
  };
537
636
 
637
+ template <typename T>
638
+ EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) {
639
+ return std::move(start, end, target);
640
+ }
538
641
 
539
642
  /*****************************************************************************
540
643
  *** Implementation of runtime stack allocation (falling back to malloc) ***
@@ -542,452 +645,741 @@ template<typename T> struct smart_memmove_helper<T,false> {
542
645
 
543
646
  // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
544
647
  // to the appropriate stack allocation function
545
- #ifndef EIGEN_ALLOCA
546
- #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
547
- #define EIGEN_ALLOCA alloca
548
- #elif EIGEN_COMP_MSVC
549
- #define EIGEN_ALLOCA _alloca
550
- #endif
648
+ #if !defined EIGEN_ALLOCA && !defined EIGEN_GPU_COMPILE_PHASE
649
+ #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
650
+ #define EIGEN_ALLOCA alloca
651
+ #elif EIGEN_COMP_MSVC
652
+ #define EIGEN_ALLOCA _alloca
653
+ #endif
654
+ #endif
655
+
656
+ // With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
657
+ // not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
658
+ // the compiler still emits bad code because stack allocation checks use "<=".
659
+ // TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
660
+ // is fixed.
661
+ #if defined(__clang__) && defined(__thumb__)
662
+ #undef EIGEN_ALLOCA
551
663
  #endif
552
664
 
553
665
  // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
554
666
  // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
555
- template<typename T> class aligned_stack_memory_handler : noncopyable
556
- {
557
- public:
558
- /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
559
- * Note that \a ptr can be 0 regardless of the other parameters.
560
- * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
561
- * In this case, the buffer elements will also be destructed when this handler will be destructed.
562
- * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
563
- **/
564
- aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
565
- : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
566
- {
567
- if(NumTraits<T>::RequireInitialization && m_ptr)
568
- Eigen::internal::construct_elements_of_array(m_ptr, size);
569
- }
570
- ~aligned_stack_memory_handler()
571
- {
572
- if(NumTraits<T>::RequireInitialization && m_ptr)
573
- Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
574
- if(m_deallocate)
575
- Eigen::internal::aligned_free(m_ptr);
576
- }
577
- protected:
578
- T* m_ptr;
579
- std::size_t m_size;
580
- bool m_deallocate;
581
- };
667
+ template <typename T>
668
+ class aligned_stack_memory_handler : noncopyable {
669
+ public:
670
+ /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
671
+ * Note that \a ptr can be 0 regardless of the other parameters.
672
+ * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type
673
+ *T (see NumTraits<T>::RequireInitialization). In this case, the buffer elements will also be destructed when this
674
+ *handler will be destructed. Finally, if \a dealloc is true, then the pointer \a ptr is freed.
675
+ **/
676
+ EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
677
+ : m_ptr(ptr), m_size(size), m_deallocate(dealloc) {
678
+ if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::default_construct_elements_of_array(m_ptr, size);
679
+ }
680
+ EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() {
681
+ if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
682
+ if (m_deallocate) Eigen::internal::aligned_free(m_ptr);
683
+ }
582
684
 
583
- template<typename T> class scoped_array : noncopyable
584
- {
685
+ protected:
585
686
  T* m_ptr;
586
- public:
587
- explicit scoped_array(std::ptrdiff_t size)
588
- {
589
- m_ptr = new T[size];
687
+ std::size_t m_size;
688
+ bool m_deallocate;
689
+ };
690
+
691
+ #ifdef EIGEN_ALLOCA
692
+
693
+ template <typename Xpr, int NbEvaluations,
694
+ bool MapExternalBuffer = nested_eval<Xpr, NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime == Dynamic>
695
+ struct local_nested_eval_wrapper {
696
+ static constexpr bool NeedExternalBuffer = false;
697
+ typedef typename Xpr::Scalar Scalar;
698
+ typedef typename nested_eval<Xpr, NbEvaluations>::type ObjectType;
699
+ ObjectType object;
700
+
701
+ EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) {
702
+ EIGEN_UNUSED_VARIABLE(ptr);
703
+ eigen_internal_assert(ptr == 0);
590
704
  }
591
- ~scoped_array()
592
- {
593
- delete[] m_ptr;
705
+ };
706
+
707
+ template <typename Xpr, int NbEvaluations>
708
+ struct local_nested_eval_wrapper<Xpr, NbEvaluations, true> {
709
+ static constexpr bool NeedExternalBuffer = true;
710
+ typedef typename Xpr::Scalar Scalar;
711
+ typedef typename plain_object_eval<Xpr>::type PlainObject;
712
+ typedef Map<PlainObject, EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
713
+ ObjectType object;
714
+
715
+ EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
716
+ : object(ptr == 0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar) * xpr.size())) : ptr,
717
+ xpr.rows(), xpr.cols()),
718
+ m_deallocate(ptr == 0) {
719
+ if (NumTraits<Scalar>::RequireInitialization && object.data())
720
+ Eigen::internal::default_construct_elements_of_array(object.data(), object.size());
721
+ object = xpr;
722
+ }
723
+
724
+ EIGEN_DEVICE_FUNC ~local_nested_eval_wrapper() {
725
+ if (NumTraits<Scalar>::RequireInitialization && object.data())
726
+ Eigen::internal::destruct_elements_of_array(object.data(), object.size());
727
+ if (m_deallocate) Eigen::internal::aligned_free(object.data());
594
728
  }
729
+
730
+ private:
731
+ bool m_deallocate;
732
+ };
733
+
734
+ #endif // EIGEN_ALLOCA
735
+
736
+ template <typename T>
737
+ class scoped_array : noncopyable {
738
+ T* m_ptr;
739
+
740
+ public:
741
+ explicit scoped_array(std::ptrdiff_t size) { m_ptr = new T[size]; }
742
+ ~scoped_array() { delete[] m_ptr; }
595
743
  T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
596
744
  const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
597
- T* &ptr() { return m_ptr; }
745
+ T*& ptr() { return m_ptr; }
598
746
  const T* ptr() const { return m_ptr; }
599
747
  operator const T*() const { return m_ptr; }
600
748
  };
601
749
 
602
- template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
603
- {
604
- std::swap(a.ptr(),b.ptr());
750
+ template <typename T>
751
+ void swap(scoped_array<T>& a, scoped_array<T>& b) {
752
+ std::swap(a.ptr(), b.ptr());
605
753
  }
606
-
607
- } // end namespace internal
608
754
 
609
- /** \internal
610
- * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
611
- * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
612
- * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
613
- * The allocated buffer is automatically deleted when exiting the scope of this declaration.
614
- * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
615
- * Here is an example:
616
- * \code
617
- * {
618
- * ei_declare_aligned_stack_constructed_variable(float,data,size,0);
619
- * // use data[0] to data[size-1]
620
- * }
621
- * \endcode
622
- * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
623
- */
624
- #ifdef EIGEN_ALLOCA
625
-
626
- #if EIGEN_DEFAULT_ALIGN_BYTES>0
627
- // We always manually re-align the result of EIGEN_ALLOCA.
628
- // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
629
- #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
630
- #else
631
- #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
632
- #endif
633
-
634
- #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
635
- Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
636
- TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
637
- : reinterpret_cast<TYPE*>( \
638
- (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
639
- : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
640
- Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
755
+ } // end namespace internal
641
756
 
757
+ /** \internal
758
+ *
759
+ * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
760
+ * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
761
+ * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the
762
+ * platform (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. The
763
+ * allocated buffer is automatically deleted when exiting the scope of this declaration. If BUFFER is non null, then the
764
+ * declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. Here is an example: \code
765
+ * {
766
+ * ei_declare_aligned_stack_constructed_variable(float,data,size,0);
767
+ * // use data[0] to data[size-1]
768
+ * }
769
+ * \endcode
770
+ * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
771
+ *
772
+ * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
773
+ * \code
774
+ * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
775
+ * \endcode
776
+ * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
777
+ * This is accomplished through alloca if this later is supported and if the required number of bytes
778
+ * is below EIGEN_STACK_ALLOCATION_LIMIT.
779
+ */
780
+ #if defined(EIGEN_ALLOCA) && !defined(EIGEN_NO_ALLOCA)
781
+
782
+ #if EIGEN_DEFAULT_ALIGN_BYTES > 0
783
+ // We always manually re-align the result of EIGEN_ALLOCA.
784
+ // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
785
+
786
+ #if ((EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) && !EIGEN_COMP_NVHPC)
787
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
642
788
  #else
789
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
790
+ constexpr std::uintptr_t mask = EIGEN_DEFAULT_ALIGN_BYTES - 1;
791
+ std::uintptr_t ptr_int = std::uintptr_t(ptr);
792
+ std::uintptr_t aligned_ptr_int = (ptr_int + mask) & ~mask;
793
+ std::uintptr_t offset = aligned_ptr_int - ptr_int;
794
+ return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
795
+ }
796
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) eigen_aligned_alloca_helper(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1))
797
+ #endif
643
798
 
644
- #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
645
- Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
646
- TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
647
- Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
648
-
799
+ #else
800
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
649
801
  #endif
650
802
 
803
+ #define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
804
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
805
+ TYPE* NAME = (BUFFER) != 0 ? (BUFFER) \
806
+ : reinterpret_cast<TYPE*>((sizeof(TYPE) * (SIZE) <= EIGEN_STACK_ALLOCATION_LIMIT) \
807
+ ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE) * (SIZE)) \
808
+ : Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
809
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
810
+ (BUFFER) == 0 ? NAME : 0, SIZE, sizeof(TYPE) * (SIZE) > EIGEN_STACK_ALLOCATION_LIMIT)
811
+
812
+ #define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
813
+ Eigen::internal::local_nested_eval_wrapper<XPR_T, N> EIGEN_CAT(NAME, _wrapper)( \
814
+ XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
815
+ ((Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::NeedExternalBuffer) && \
816
+ ((sizeof(typename XPR_T::Scalar) * XPR.size()) <= EIGEN_STACK_ALLOCATION_LIMIT)) \
817
+ ? EIGEN_ALIGNED_ALLOCA(sizeof(typename XPR_T::Scalar) * XPR.size()) \
818
+ : 0)); \
819
+ typename Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::ObjectType NAME(EIGEN_CAT(NAME, _wrapper).object)
820
+
821
+ #else
822
+
823
+ #define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
824
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
825
+ TYPE* NAME = \
826
+ (BUFFER) != 0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
827
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
828
+ (BUFFER) == 0 ? NAME : 0, SIZE, true)
829
+
830
+ #define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
831
+ typename Eigen::internal::nested_eval<XPR_T, N>::type NAME(XPR)
832
+
833
+ #endif
651
834
 
652
835
  /*****************************************************************************
653
836
  *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
654
837
  *****************************************************************************/
655
838
 
656
- #if EIGEN_MAX_ALIGN_BYTES!=0
657
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
658
- void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
659
- EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
660
- EIGEN_CATCH (...) { return 0; } \
661
- }
662
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
663
- void *operator new(std::size_t size) { \
664
- return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
665
- } \
666
- void *operator new[](std::size_t size) { \
667
- return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
668
- } \
669
- void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
670
- void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
671
- void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
672
- void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
673
- /* in-place new and delete. since (at least afaik) there is no actual */ \
674
- /* memory allocated we can safely let the default implementation handle */ \
675
- /* this particular case. */ \
676
- static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
677
- static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
678
- void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
679
- void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
680
- /* nothrow-new (returns zero instead of std::bad_alloc) */ \
681
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
682
- void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
683
- Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
684
- } \
685
- typedef void eigen_aligned_operator_new_marker_type;
839
+ #if EIGEN_HAS_CXX17_OVERALIGN
840
+
841
+ // C++17 -> no need to bother about alignment anymore :)
842
+
843
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
844
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
845
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
846
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size)
847
+
686
848
  #else
687
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
849
+
850
+ // HIP does not support new/delete on device.
851
+ #if EIGEN_MAX_ALIGN_BYTES != 0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
852
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
853
+ EIGEN_DEVICE_FUNC void* operator new(std::size_t size, const std::nothrow_t&) noexcept { \
854
+ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
855
+ EIGEN_CATCH(...) { return 0; } \
856
+ }
857
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
858
+ EIGEN_DEVICE_FUNC void* operator new(std::size_t size) { \
859
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
860
+ } \
861
+ EIGEN_DEVICE_FUNC void* operator new[](std::size_t size) { \
862
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
863
+ } \
864
+ EIGEN_DEVICE_FUNC void operator delete(void* ptr) noexcept { \
865
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
866
+ } \
867
+ EIGEN_DEVICE_FUNC void operator delete[](void* ptr) noexcept { \
868
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
869
+ } \
870
+ EIGEN_DEVICE_FUNC void operator delete(void* ptr, std::size_t /* sz */) noexcept { \
871
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
872
+ } \
873
+ EIGEN_DEVICE_FUNC void operator delete[](void* ptr, std::size_t /* sz */) noexcept { \
874
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
875
+ } \
876
+ /* in-place new and delete. since (at least afaik) there is no actual */ \
877
+ /* memory allocated we can safely let the default implementation handle */ \
878
+ /* this particular case. */ \
879
+ EIGEN_DEVICE_FUNC static void* operator new(std::size_t size, void* ptr) { return ::operator new(size, ptr); } \
880
+ EIGEN_DEVICE_FUNC static void* operator new[](std::size_t size, void* ptr) { return ::operator new[](size, ptr); } \
881
+ EIGEN_DEVICE_FUNC void operator delete(void* memory, void* ptr) noexcept { return ::operator delete(memory, ptr); } \
882
+ EIGEN_DEVICE_FUNC void operator delete[](void* memory, void* ptr) noexcept { \
883
+ return ::operator delete[](memory, ptr); \
884
+ } \
885
+ /* nothrow-new (returns zero instead of std::bad_alloc) */ \
886
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
887
+ EIGEN_DEVICE_FUNC void operator delete(void* ptr, const std::nothrow_t&) noexcept { \
888
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
889
+ } \
890
+ typedef void eigen_aligned_operator_new_marker_type;
891
+ #else
892
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
688
893
  #endif
689
894
 
690
895
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
691
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
692
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
896
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size) \
897
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF( \
898
+ bool(((Size) != Eigen::Dynamic) && \
899
+ (((EIGEN_MAX_ALIGN_BYTES >= 16) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES) == 0)) || \
900
+ ((EIGEN_MAX_ALIGN_BYTES >= 32) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 2) == 0)) || \
901
+ ((EIGEN_MAX_ALIGN_BYTES >= 64) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 4) == 0)))))
902
+
903
+ #endif
693
904
 
694
905
  /****************************************************************************/
695
906
 
696
907
  /** \class aligned_allocator
697
- * \ingroup Core_Module
698
- *
699
- * \brief STL compatible allocator to use with types requiring a non standrad alignment.
700
- *
701
- * The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
702
- * By default, it will thus provide at least 16 bytes alignment and more in following cases:
703
- * - 32 bytes alignment if AVX is enabled.
704
- * - 64 bytes alignment if AVX512 is enabled.
705
- *
706
- * This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
707
- * \link TopicPreprocessorDirectivesPerformance there \endlink.
708
- *
709
- * Example:
710
- * \code
711
- * // Matrix4f requires 16 bytes alignment:
712
- * std::map< int, Matrix4f, std::less<int>,
713
- * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
714
- * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
715
- * std::map< int, Vector3f > my_map_vec3;
716
- * \endcode
717
- *
718
- * \sa \blank \ref TopicStlContainers.
719
- */
720
- template<class T>
721
- class aligned_allocator : public std::allocator<T>
722
- {
723
- public:
724
- typedef std::size_t size_type;
725
- typedef std::ptrdiff_t difference_type;
726
- typedef T* pointer;
727
- typedef const T* const_pointer;
728
- typedef T& reference;
729
- typedef const T& const_reference;
730
- typedef T value_type;
731
-
732
- template<class U>
733
- struct rebind
734
- {
908
+ * \ingroup Core_Module
909
+ *
910
+ * \brief STL compatible allocator to use with types requiring a non-standard alignment.
911
+ *
912
+ * The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
913
+ * By default, it will thus provide at least 16 bytes alignment and more in following cases:
914
+ * - 32 bytes alignment if AVX is enabled.
915
+ * - 64 bytes alignment if AVX512 is enabled.
916
+ *
917
+ * This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
918
+ * \link TopicPreprocessorDirectivesPerformance there \endlink.
919
+ *
920
+ * Example:
921
+ * \code
922
+ * // Matrix4f requires 16 bytes alignment:
923
+ * std::map< int, Matrix4f, std::less<int>,
924
+ * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
925
+ * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
926
+ * std::map< int, Vector3f > my_map_vec3;
927
+ * \endcode
928
+ *
929
+ * \sa \blank \ref TopicStlContainers.
930
+ */
931
+ template <class T>
932
+ class aligned_allocator {
933
+ public:
934
+ typedef std::size_t size_type;
935
+ typedef std::ptrdiff_t difference_type;
936
+ typedef T* pointer;
937
+ typedef const T* const_pointer;
938
+ typedef T& reference;
939
+ typedef const T& const_reference;
940
+ typedef T value_type;
941
+
942
+ template <class U>
943
+ struct rebind {
735
944
  typedef aligned_allocator<U> other;
736
945
  };
737
946
 
738
- aligned_allocator() : std::allocator<T>() {}
947
+ aligned_allocator() = default;
739
948
 
740
- aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
949
+ aligned_allocator(const aligned_allocator&) = default;
741
950
 
742
- template<class U>
743
- aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
951
+ template <class U>
952
+ aligned_allocator(const aligned_allocator<U>&) {}
744
953
 
745
- ~aligned_allocator() {}
954
+ template <class U>
955
+ constexpr bool operator==(const aligned_allocator<U>&) const noexcept {
956
+ return true;
957
+ }
958
+ template <class U>
959
+ constexpr bool operator!=(const aligned_allocator<U>&) const noexcept {
960
+ return false;
961
+ }
746
962
 
747
- pointer allocate(size_type num, const void* /*hint*/ = 0)
748
- {
749
- internal::check_size_for_overflow<T>(num);
750
- size_type size = num * sizeof(T);
751
- #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
752
- // workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
753
- // It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
754
- if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
755
- return 0;
756
- else
963
+ #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0)
964
+ // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
965
+ // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object
966
+ // size 9223372036854775807 See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
967
+ size_type max_size() const { return (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T); }
757
968
  #endif
758
- return static_cast<pointer>( internal::aligned_malloc(size) );
759
- }
760
969
 
761
- void deallocate(pointer p, size_type /*num*/)
762
- {
763
- internal::aligned_free(p);
970
+ pointer allocate(size_type num, const void* /*hint*/ = 0) {
971
+ internal::check_size_for_overflow<T>(num);
972
+ return static_cast<pointer>(internal::aligned_malloc(num * sizeof(T)));
764
973
  }
974
+
975
+ void deallocate(pointer p, size_type /*num*/) { internal::aligned_free(p); }
765
976
  };
766
977
 
767
978
  //---------- Cache sizes ----------
768
979
 
769
980
  #if !defined(EIGEN_NO_CPUID)
770
- # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
771
- # if defined(__PIC__) && EIGEN_ARCH_i386
772
- // Case for x86 with PIC
773
- # define EIGEN_CPUID(abcd,func,id) \
774
- __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
775
- # elif defined(__PIC__) && EIGEN_ARCH_x86_64
776
- // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
777
- // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
778
- # define EIGEN_CPUID(abcd,func,id) \
779
- __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
780
- # else
781
- // Case for x86_64 or x86 w/o PIC
782
- # define EIGEN_CPUID(abcd,func,id) \
783
- __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
784
- # endif
785
- # elif EIGEN_COMP_MSVC
786
- # if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
787
- # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
788
- # endif
789
- # endif
981
+ #if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
982
+ #if defined(__PIC__) && EIGEN_ARCH_i386
983
+ // Case for x86 with PIC
984
+ #define EIGEN_CPUID(abcd, func, id) \
985
+ __asm__ __volatile__("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1" \
986
+ : "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
987
+ : "a"(func), "c"(id));
988
+ #elif defined(__PIC__) && EIGEN_ARCH_x86_64
989
+ // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with
990
+ // the default small code model. However, we cannot detect which code model is used, and the xchg overhead is negligible
991
+ // anyway.
992
+ #define EIGEN_CPUID(abcd, func, id) \
993
+ __asm__ __volatile__("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1" \
994
+ : "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
995
+ : "0"(func), "2"(id));
996
+ #else
997
+ // Case for x86_64 or x86 w/o PIC
998
+ #define EIGEN_CPUID(abcd, func, id) \
999
+ __asm__ __volatile__("cpuid" : "=a"(abcd[0]), "=b"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) : "0"(func), "2"(id));
1000
+ #endif
1001
+ #elif EIGEN_COMP_MSVC
1002
+ #if EIGEN_ARCH_i386_OR_x86_64
1003
+ #define EIGEN_CPUID(abcd, func, id) __cpuidex((int*)abcd, func, id)
1004
+ #endif
1005
+ #endif
790
1006
  #endif
791
1007
 
792
1008
  namespace internal {
793
1009
 
794
1010
  #ifdef EIGEN_CPUID
795
1011
 
796
- inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
797
- {
798
- return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
1012
+ inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) {
1013
+ return abcd[1] == vendor[0] && abcd[3] == vendor[1] && abcd[2] == vendor[2];
799
1014
  }
800
1015
 
801
- inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
802
- {
1016
+ inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) {
803
1017
  int abcd[4];
804
1018
  l1 = l2 = l3 = 0;
805
1019
  int cache_id = 0;
806
1020
  int cache_type = 0;
807
1021
  do {
808
1022
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
809
- EIGEN_CPUID(abcd,0x4,cache_id);
810
- cache_type = (abcd[0] & 0x0F) >> 0;
811
- if(cache_type==1||cache_type==3) // data or unified cache
1023
+ EIGEN_CPUID(abcd, 0x4, cache_id);
1024
+ cache_type = (abcd[0] & 0x0F) >> 0;
1025
+ if (cache_type == 1 || cache_type == 3) // data or unified cache
812
1026
  {
813
- int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
814
- int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
815
- int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
816
- int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
817
- int sets = (abcd[2]); // C[31:0]
818
-
819
- int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
820
-
821
- switch(cache_level)
822
- {
823
- case 1: l1 = cache_size; break;
824
- case 2: l2 = cache_size; break;
825
- case 3: l3 = cache_size; break;
826
- default: break;
1027
+ int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
1028
+ int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
1029
+ int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
1030
+ int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
1031
+ int sets = (abcd[2]); // C[31:0]
1032
+
1033
+ int cache_size = (ways + 1) * (partitions + 1) * (line_size + 1) * (sets + 1);
1034
+
1035
+ switch (cache_level) {
1036
+ case 1:
1037
+ l1 = cache_size;
1038
+ break;
1039
+ case 2:
1040
+ l2 = cache_size;
1041
+ break;
1042
+ case 3:
1043
+ l3 = cache_size;
1044
+ break;
1045
+ default:
1046
+ break;
827
1047
  }
828
1048
  }
829
1049
  cache_id++;
830
- } while(cache_type>0 && cache_id<16);
1050
+ } while (cache_type > 0 && cache_id < 16);
831
1051
  }
832
1052
 
833
- inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
834
- {
1053
+ inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) {
835
1054
  int abcd[4];
836
1055
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
837
1056
  l1 = l2 = l3 = 0;
838
- EIGEN_CPUID(abcd,0x00000002,0);
839
- unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
1057
+ EIGEN_CPUID(abcd, 0x00000002, 0);
1058
+ unsigned char* bytes = reinterpret_cast<unsigned char*>(abcd) + 2;
840
1059
  bool check_for_p2_core2 = false;
841
- for(int i=0; i<14; ++i)
842
- {
843
- switch(bytes[i])
844
- {
845
- case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
846
- case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
847
- case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
848
- case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
849
- case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
850
- case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
851
- case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
852
- case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
853
- case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
854
- case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
855
- case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
856
- case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
857
- case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
858
- case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
859
- case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
860
- case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
861
- case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
862
- case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
863
- case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
864
- case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
865
- case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
866
- case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
867
- case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
868
- case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
869
- case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
870
- case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
871
- case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
872
- case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
873
- case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
874
- case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
875
- case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
876
- case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
877
- case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
878
- case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
879
- case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
880
- case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
881
- case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
882
- case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
883
- case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
884
- case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
885
- case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
886
- case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
887
- case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
888
- case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
889
- case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
890
- case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
891
- case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
892
- case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
893
- case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
894
- case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
895
- case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
896
- case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
897
- case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
898
- case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
899
- case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
900
- case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
901
- case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
902
-
903
- default: break;
1060
+ for (int i = 0; i < 14; ++i) {
1061
+ switch (bytes[i]) {
1062
+ case 0x0A:
1063
+ l1 = 8;
1064
+ break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
1065
+ case 0x0C:
1066
+ l1 = 16;
1067
+ break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
1068
+ case 0x0E:
1069
+ l1 = 24;
1070
+ break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
1071
+ case 0x10:
1072
+ l1 = 16;
1073
+ break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1074
+ case 0x15:
1075
+ l1 = 16;
1076
+ break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1077
+ case 0x2C:
1078
+ l1 = 32;
1079
+ break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
1080
+ case 0x30:
1081
+ l1 = 32;
1082
+ break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
1083
+ case 0x60:
1084
+ l1 = 16;
1085
+ break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
1086
+ case 0x66:
1087
+ l1 = 8;
1088
+ break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
1089
+ case 0x67:
1090
+ l1 = 16;
1091
+ break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
1092
+ case 0x68:
1093
+ l1 = 32;
1094
+ break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
1095
+ case 0x1A:
1096
+ l2 = 96;
1097
+ break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
1098
+ case 0x22:
1099
+ l3 = 512;
1100
+ break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
1101
+ case 0x23:
1102
+ l3 = 1024;
1103
+ break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1104
+ case 0x25:
1105
+ l3 = 2048;
1106
+ break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
1107
+ case 0x29:
1108
+ l3 = 4096;
1109
+ break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
1110
+ case 0x39:
1111
+ l2 = 128;
1112
+ break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
1113
+ case 0x3A:
1114
+ l2 = 192;
1115
+ break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
1116
+ case 0x3B:
1117
+ l2 = 128;
1118
+ break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
1119
+ case 0x3C:
1120
+ l2 = 256;
1121
+ break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
1122
+ case 0x3D:
1123
+ l2 = 384;
1124
+ break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
1125
+ case 0x3E:
1126
+ l2 = 512;
1127
+ break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
1128
+ case 0x40:
1129
+ l2 = 0;
1130
+ break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
1131
+ case 0x41:
1132
+ l2 = 128;
1133
+ break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
1134
+ case 0x42:
1135
+ l2 = 256;
1136
+ break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
1137
+ case 0x43:
1138
+ l2 = 512;
1139
+ break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
1140
+ case 0x44:
1141
+ l2 = 1024;
1142
+ break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
1143
+ case 0x45:
1144
+ l2 = 2048;
1145
+ break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
1146
+ case 0x46:
1147
+ l3 = 4096;
1148
+ break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
1149
+ case 0x47:
1150
+ l3 = 8192;
1151
+ break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
1152
+ case 0x48:
1153
+ l2 = 3072;
1154
+ break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
1155
+ case 0x49:
1156
+ if (l2 != 0)
1157
+ l3 = 4096;
1158
+ else {
1159
+ check_for_p2_core2 = true;
1160
+ l3 = l2 = 4096;
1161
+ }
1162
+ break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
1163
+ case 0x4A:
1164
+ l3 = 6144;
1165
+ break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
1166
+ case 0x4B:
1167
+ l3 = 8192;
1168
+ break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
1169
+ case 0x4C:
1170
+ l3 = 12288;
1171
+ break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
1172
+ case 0x4D:
1173
+ l3 = 16384;
1174
+ break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
1175
+ case 0x4E:
1176
+ l2 = 6144;
1177
+ break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
1178
+ case 0x78:
1179
+ l2 = 1024;
1180
+ break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
1181
+ case 0x79:
1182
+ l2 = 128;
1183
+ break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
1184
+ case 0x7A:
1185
+ l2 = 256;
1186
+ break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
1187
+ case 0x7B:
1188
+ l2 = 512;
1189
+ break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
1190
+ case 0x7C:
1191
+ l2 = 1024;
1192
+ break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1193
+ case 0x7D:
1194
+ l2 = 2048;
1195
+ break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
1196
+ case 0x7E:
1197
+ l2 = 256;
1198
+ break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
1199
+ case 0x7F:
1200
+ l2 = 512;
1201
+ break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
1202
+ case 0x80:
1203
+ l2 = 512;
1204
+ break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
1205
+ case 0x81:
1206
+ l2 = 128;
1207
+ break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
1208
+ case 0x82:
1209
+ l2 = 256;
1210
+ break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
1211
+ case 0x83:
1212
+ l2 = 512;
1213
+ break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
1214
+ case 0x84:
1215
+ l2 = 1024;
1216
+ break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
1217
+ case 0x85:
1218
+ l2 = 2048;
1219
+ break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
1220
+ case 0x86:
1221
+ l2 = 512;
1222
+ break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
1223
+ case 0x87:
1224
+ l2 = 1024;
1225
+ break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
1226
+ case 0x88:
1227
+ l3 = 2048;
1228
+ break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
1229
+ case 0x89:
1230
+ l3 = 4096;
1231
+ break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
1232
+ case 0x8A:
1233
+ l3 = 8192;
1234
+ break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
1235
+ case 0x8D:
1236
+ l3 = 3072;
1237
+ break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
1238
+
1239
+ default:
1240
+ break;
904
1241
  }
905
1242
  }
906
- if(check_for_p2_core2 && l2 == l3)
907
- l3 = 0;
1243
+ if (check_for_p2_core2 && l2 == l3) l3 = 0;
908
1244
  l1 *= 1024;
909
1245
  l2 *= 1024;
910
1246
  l3 *= 1024;
911
1247
  }
912
1248
 
913
- inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
914
- {
915
- if(max_std_funcs>=4)
916
- queryCacheSizes_intel_direct(l1,l2,l3);
1249
+ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) {
1250
+ if (max_std_funcs >= 4)
1251
+ queryCacheSizes_intel_direct(l1, l2, l3);
1252
+ else if (max_std_funcs >= 2)
1253
+ queryCacheSizes_intel_codes(l1, l2, l3);
917
1254
  else
918
- queryCacheSizes_intel_codes(l1,l2,l3);
1255
+ l1 = l2 = l3 = 0;
919
1256
  }
920
1257
 
921
- inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
922
- {
1258
+ inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) {
923
1259
  int abcd[4];
924
1260
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
925
- EIGEN_CPUID(abcd,0x80000005,0);
926
- l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
927
- abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
928
- EIGEN_CPUID(abcd,0x80000006,0);
929
- l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
930
- l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1261
+
1262
+ // First query the max supported function.
1263
+ EIGEN_CPUID(abcd, 0x80000000, 0);
1264
+ if (static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) {
1265
+ EIGEN_CPUID(abcd, 0x80000005, 0);
1266
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
1267
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1268
+ EIGEN_CPUID(abcd, 0x80000006, 0);
1269
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
1270
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1271
+ } else {
1272
+ l1 = l2 = l3 = 0;
1273
+ }
931
1274
  }
932
1275
  #endif
933
1276
 
934
1277
  /** \internal
935
1278
  * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
936
- inline void queryCacheSizes(int& l1, int& l2, int& l3)
937
- {
938
- #ifdef EIGEN_CPUID
1279
+ inline void queryCacheSizes(int& l1, int& l2, int& l3) {
1280
+ #ifdef EIGEN_CPUID
939
1281
  int abcd[4];
940
1282
  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
941
1283
  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
942
- const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
1284
+ const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
943
1285
 
944
1286
  // identify the CPU vendor
945
- EIGEN_CPUID(abcd,0x0,0);
946
- int max_std_funcs = abcd[1];
947
- if(cpuid_is_vendor(abcd,GenuineIntel))
948
- queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
949
- else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
950
- queryCacheSizes_amd(l1,l2,l3);
1287
+ EIGEN_CPUID(abcd, 0x0, 0);
1288
+ int max_std_funcs = abcd[0];
1289
+ if (cpuid_is_vendor(abcd, GenuineIntel))
1290
+ queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
1291
+ else if (cpuid_is_vendor(abcd, AuthenticAMD) || cpuid_is_vendor(abcd, AMDisbetter_))
1292
+ queryCacheSizes_amd(l1, l2, l3);
951
1293
  else
952
1294
  // by default let's use Intel's API
953
- queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
954
-
955
- // here is the list of other vendors:
956
- // ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
957
- // ||cpuid_is_vendor(abcd,"CyrixInstead")
958
- // ||cpuid_is_vendor(abcd,"CentaurHauls")
959
- // ||cpuid_is_vendor(abcd,"GenuineTMx86")
960
- // ||cpuid_is_vendor(abcd,"TransmetaCPU")
961
- // ||cpuid_is_vendor(abcd,"RiseRiseRise")
962
- // ||cpuid_is_vendor(abcd,"Geode by NSC")
963
- // ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
964
- // ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
965
- // ||cpuid_is_vendor(abcd,"NexGenDriven")
966
- #else
1295
+ queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
1296
+
1297
+ // here is the list of other vendors:
1298
+ // ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1299
+ // ||cpuid_is_vendor(abcd,"CyrixInstead")
1300
+ // ||cpuid_is_vendor(abcd,"CentaurHauls")
1301
+ // ||cpuid_is_vendor(abcd,"GenuineTMx86")
1302
+ // ||cpuid_is_vendor(abcd,"TransmetaCPU")
1303
+ // ||cpuid_is_vendor(abcd,"RiseRiseRise")
1304
+ // ||cpuid_is_vendor(abcd,"Geode by NSC")
1305
+ // ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1306
+ // ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1307
+ // ||cpuid_is_vendor(abcd,"NexGenDriven")
1308
+ #else
967
1309
  l1 = l2 = l3 = -1;
968
- #endif
1310
+ #endif
969
1311
  }
970
1312
 
971
1313
  /** \internal
972
1314
  * \returns the size in Bytes of the L1 data cache */
973
- inline int queryL1CacheSize()
974
- {
1315
+ inline int queryL1CacheSize() {
975
1316
  int l1(-1), l2, l3;
976
- queryCacheSizes(l1,l2,l3);
1317
+ queryCacheSizes(l1, l2, l3);
977
1318
  return l1;
978
1319
  }
979
1320
 
980
1321
  /** \internal
981
1322
  * \returns the size in Bytes of the L2 or L3 cache if this later is present */
982
- inline int queryTopLevelCacheSize()
983
- {
1323
+ inline int queryTopLevelCacheSize() {
984
1324
  int l1, l2(-1), l3(-1);
985
- queryCacheSizes(l1,l2,l3);
986
- return (std::max)(l2,l3);
1325
+ queryCacheSizes(l1, l2, l3);
1326
+ return (std::max)(l2, l3);
987
1327
  }
988
1328
 
989
- } // end namespace internal
1329
+ /** \internal
1330
+ * This wraps C++20's std::construct_at, using placement new instead if it is not available.
1331
+ */
1332
+
1333
+ #if EIGEN_COMP_CXXVER >= 20 && defined(__cpp_lib_constexpr_dynamic_alloc) && \
1334
+ __cpp_lib_constexpr_dynamic_alloc >= 201907L
1335
+ using std::construct_at;
1336
+ #else
1337
+ template <class T, class... Args>
1338
+ EIGEN_DEVICE_FUNC T* construct_at(T* p, Args&&... args) {
1339
+ return ::new (const_cast<void*>(static_cast<const volatile void*>(p))) T(std::forward<Args>(args)...);
1340
+ }
1341
+ #endif
1342
+
1343
+ /** \internal
1344
+ * This wraps C++17's std::destroy_at. If it's not available it calls the destructor.
1345
+ * The wrapper is not a full replacement for C++20's std::destroy_at as it cannot
1346
+ * be applied to std::array.
1347
+ */
1348
+ #if EIGEN_COMP_CXXVER >= 17
1349
+ using std::destroy_at;
1350
+ #else
1351
+ template <class T>
1352
+ EIGEN_DEVICE_FUNC void destroy_at(T* p) {
1353
+ p->~T();
1354
+ }
1355
+ #endif
1356
+
1357
+ // FIXME(rmlarsen): Work around missing linker symbol with msan on ARM.
1358
+ #if !defined(EIGEN_DONT_ASSUME_ALIGNED) && __has_feature(memory_sanitizer) && \
1359
+ (EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64)
1360
+ #define EIGEN_DONT_ASSUME_ALIGNED
1361
+ #endif
1362
+
1363
+
1364
+ #if !defined(EIGEN_DONT_ASSUME_ALIGNED) && defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L)
1365
+ template <std::size_t N, typename T>
1366
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
1367
+ return std::assume_aligned<N, T>(ptr);
1368
+ }
1369
+ #elif !defined(EIGEN_DONT_ASSUME_ALIGNED) && EIGEN_HAS_BUILTIN(__builtin_assume_aligned)
1370
+ template <std::size_t N, typename T>
1371
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC T* assume_aligned(T* ptr) {
1372
+ return static_cast<T*>(__builtin_assume_aligned(ptr, N));
1373
+ }
1374
+ #else
1375
+ template <std::size_t N, typename T>
1376
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
1377
+ return ptr;
1378
+ }
1379
+ #endif
1380
+
1381
+ } // end namespace internal
990
1382
 
991
- } // end namespace Eigen
1383
+ } // end namespace Eigen
992
1384
 
993
- #endif // EIGEN_MEMORY_H
1385
+ #endif // EIGEN_MEMORY_H