@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -12,7 +12,6 @@
12
12
  // Public License v. 2.0. If a copy of the MPL was not distributed
13
13
  // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
14
 
15
-
16
15
  /*****************************************************************************
17
16
  *** Platform checks for aligned malloc functions ***
18
17
  *****************************************************************************/
@@ -31,11 +30,11 @@
31
30
  // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
32
31
  // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
33
32
  // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
34
- #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
35
- && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
36
- #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
33
+ #if defined(__GLIBC__) && ((__GLIBC__ >= 2 && __GLIBC_MINOR__ >= 8) || __GLIBC__ > 2) && defined(__LP64__) && \
34
+ !defined(__SANITIZE_ADDRESS__) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
35
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
37
36
  #else
38
- #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
37
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
39
38
  #endif
40
39
 
41
40
  // FreeBSD 6 seems to have 16-byte aligned malloc
@@ -43,49 +42,107 @@
43
42
  // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
44
43
  // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
45
44
  #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
46
- #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
45
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
46
+ #else
47
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
48
+ #endif
49
+
50
+ #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) || \
51
+ EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
52
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 1
47
53
  #else
48
- #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
54
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 0
55
+ #endif
56
+
49
57
  #endif
50
58
 
51
- #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
52
- || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
53
- || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
54
- || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
55
- #define EIGEN_MALLOC_ALREADY_ALIGNED 1
59
+ #ifndef EIGEN_MALLOC_CHECK_THREAD_LOCAL
60
+
61
+ // Check whether we can use the thread_local keyword to allow or disallow
62
+ // allocating memory with per-thread granularity, by means of the
63
+ // set_is_malloc_allowed() function.
64
+ #ifndef EIGEN_AVOID_THREAD_LOCAL
65
+
66
+ #if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC >= 1900) && \
67
+ !defined(EIGEN_GPU_COMPILE_PHASE)
68
+ #define EIGEN_MALLOC_CHECK_THREAD_LOCAL thread_local
56
69
  #else
57
- #define EIGEN_MALLOC_ALREADY_ALIGNED 0
70
+ #define EIGEN_MALLOC_CHECK_THREAD_LOCAL
58
71
  #endif
59
72
 
73
+ #else // EIGEN_AVOID_THREAD_LOCAL
74
+ #define EIGEN_MALLOC_CHECK_THREAD_LOCAL
75
+ #endif // EIGEN_AVOID_THREAD_LOCAL
76
+
60
77
  #endif
61
78
 
79
+ // IWYU pragma: private
80
+ #include "../InternalHeaderCheck.h"
81
+
62
82
  namespace Eigen {
63
83
 
64
84
  namespace internal {
65
85
 
66
- EIGEN_DEVICE_FUNC
67
- inline void throw_std_bad_alloc()
68
- {
69
- #ifdef EIGEN_EXCEPTIONS
70
- throw std::bad_alloc();
71
- #else
72
- std::size_t huge = static_cast<std::size_t>(-1);
73
- #if defined(EIGEN_HIPCC)
74
- //
75
- // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
76
- // and as a consequence the code in the #else block triggers the hipcc warning :
77
- // "no overloaded function has restriction specifiers that are compatible with the ambient context"
78
- //
79
- // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
80
- // the same on "operator new"
81
- // Reverting code back to the old version in this #if block for the hipcc compiler
82
- //
83
- new int[huge];
84
- #else
85
- void* unused = ::operator new(huge);
86
- EIGEN_UNUSED_VARIABLE(unused);
87
- #endif
88
- #endif
86
+ /*****************************************************************************
87
+ *** Implementation of portable aligned versions of malloc/free/realloc ***
88
+ *****************************************************************************/
89
+
90
+ #ifdef EIGEN_NO_MALLOC
91
+ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
92
+ eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
93
+ }
94
+ EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
95
+ eigen_assert(false && "heap deallocation is forbidden (EIGEN_NO_MALLOC is defined)");
96
+ }
97
+ #elif defined EIGEN_RUNTIME_NO_MALLOC
98
+ EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) {
99
+ EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
100
+ if (update == 1) value = new_value;
101
+ return value;
102
+ }
103
+ EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
104
+ EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
105
+ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {
106
+ eigen_assert(is_malloc_allowed() &&
107
+ "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_malloc_allowed is false)");
108
+ }
109
+ EIGEN_DEVICE_FUNC inline bool is_free_allowed_impl(bool update, bool new_value = false) {
110
+ EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
111
+ if (update == 1) value = new_value;
112
+ return value;
113
+ }
114
+ EIGEN_DEVICE_FUNC inline bool is_free_allowed() { return is_free_allowed_impl(false); }
115
+ EIGEN_DEVICE_FUNC inline bool set_is_free_allowed(bool new_value) { return is_free_allowed_impl(true, new_value); }
116
+ EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {
117
+ eigen_assert(is_malloc_allowed() &&
118
+ "heap deallocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and set_is_free_allowed is false)");
119
+ }
120
+ #else
121
+ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {}
122
+ EIGEN_DEVICE_FUNC inline void check_that_free_is_allowed() {}
123
+ #endif
124
+
125
+ EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() {
126
+ #ifdef EIGEN_EXCEPTIONS
127
+ throw std::bad_alloc();
128
+ #else
129
+ std::size_t huge = static_cast<std::size_t>(-1);
130
+ #if defined(EIGEN_HIPCC)
131
+ //
132
+ // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
133
+ // and as a consequence the code in the #else block triggers the hipcc warning :
134
+ // "no overloaded function has restriction specifiers that are compatible with the ambient context"
135
+ //
136
+ // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
137
+ // the same on "operator new"
138
+ // Reverting code back to the old version in this #if block for the hipcc compiler
139
+ //
140
+ new int[huge];
141
+ #else
142
+ void* unused = ::operator new(huge);
143
+ EIGEN_UNUSED_VARIABLE(unused);
144
+ #endif
145
+ #endif
89
146
  }
90
147
 
91
148
  /*****************************************************************************
@@ -94,137 +151,132 @@ inline void throw_std_bad_alloc()
94
151
 
95
152
  /* ----- Hand made implementations of aligned malloc/free and realloc ----- */
96
153
 
97
- /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
98
- * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
99
- */
100
- EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
101
- {
102
- eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
154
+ /** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
155
+ * Fast, but wastes `alignment` additional bytes of memory. Does not throw any exception.
156
+ */
157
+ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size,
158
+ std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
159
+ eigen_assert(alignment >= sizeof(void*) && alignment <= 256 && (alignment & (alignment - 1)) == 0 &&
160
+ "Alignment must be at least sizeof(void*), less than or equal to 256, and a power of 2");
103
161
 
162
+ check_that_malloc_is_allowed();
104
163
  EIGEN_USING_STD(malloc)
105
- void *original = malloc(size+alignment);
106
-
107
- if (original == 0) return 0;
108
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
109
- *(reinterpret_cast<void**>(aligned) - 1) = original;
164
+ void* original = malloc(size + alignment);
165
+ if (original == nullptr) return nullptr;
166
+ std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
167
+ void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
168
+ // Store offset - 1, since it is guaranteed to be at least 1.
169
+ *(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
110
170
  return aligned;
111
171
  }
112
172
 
113
173
  /** \internal Frees memory allocated with handmade_aligned_malloc */
114
- EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
115
- {
116
- if (ptr) {
174
+ EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void* ptr) {
175
+ if (ptr != nullptr) {
176
+ std::size_t offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
177
+ void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
178
+
179
+ check_that_free_is_allowed();
117
180
  EIGEN_USING_STD(free)
118
- free(*(reinterpret_cast<void**>(ptr) - 1));
181
+ free(original);
119
182
  }
120
183
  }
121
184
 
122
185
  /** \internal
123
- * \brief Reallocates aligned memory.
124
- * Since we know that our handmade version is based on std::malloc
125
- * we can use std::realloc to implement efficient reallocation.
126
- */
127
- inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
128
- {
129
- if (ptr == 0) return handmade_aligned_malloc(size);
130
- void *original = *(reinterpret_cast<void**>(ptr) - 1);
131
- std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
132
- original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
133
- if (original == 0) return 0;
134
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
135
- void *previous_aligned = static_cast<char *>(original)+previous_offset;
136
- if(aligned!=previous_aligned)
137
- std::memmove(aligned, previous_aligned, size);
138
-
139
- *(reinterpret_cast<void**>(aligned) - 1) = original;
186
+ * \brief Reallocates aligned memory.
187
+ * Since we know that our handmade version is based on std::malloc
188
+ * we can use std::realloc to implement efficient reallocation.
189
+ */
190
+ EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size,
191
+ std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) {
192
+ if (ptr == nullptr) return handmade_aligned_malloc(new_size, alignment);
193
+ std::size_t old_offset = static_cast<std::size_t>(*(static_cast<uint8_t*>(ptr) - 1)) + 1;
194
+ void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
195
+
196
+ check_that_malloc_is_allowed();
197
+ EIGEN_USING_STD(realloc)
198
+ void* original = realloc(old_original, new_size + alignment);
199
+ if (original == nullptr) return nullptr;
200
+ if (original == old_original) return ptr;
201
+ std::size_t offset = alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1));
202
+ void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
203
+ if (offset != old_offset) {
204
+ const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
205
+ std::size_t count = (std::min)(new_size, old_size);
206
+ std::memmove(aligned, src, count);
207
+ }
208
+ // Store offset - 1, since it is guaranteed to be at least 1.
209
+ *(static_cast<uint8_t*>(aligned) - 1) = static_cast<uint8_t>(offset - 1);
140
210
  return aligned;
141
211
  }
142
212
 
143
- /*****************************************************************************
144
- *** Implementation of portable aligned versions of malloc/free/realloc ***
145
- *****************************************************************************/
213
+ /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on
214
+ * the requirements. On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
215
+ */
216
+ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) {
217
+ if (size == 0) return nullptr;
146
218
 
147
- #ifdef EIGEN_NO_MALLOC
148
- EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
149
- {
150
- eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
151
- }
152
- #elif defined EIGEN_RUNTIME_NO_MALLOC
153
- EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
154
- {
155
- static bool value = true;
156
- if (update == 1)
157
- value = new_value;
158
- return value;
159
- }
160
- EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
161
- EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
162
- EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
163
- {
164
- eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
165
- }
166
- #else
167
- EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
168
- {}
169
- #endif
219
+ void* result;
220
+ #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
170
221
 
171
- /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
172
- * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
173
- */
174
- EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
175
- {
176
222
  check_that_malloc_is_allowed();
223
+ EIGEN_USING_STD(malloc)
224
+ result = malloc(size);
177
225
 
178
- void *result;
179
- #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
180
-
181
- EIGEN_USING_STD(malloc)
182
- result = malloc(size);
183
-
184
- #if EIGEN_DEFAULT_ALIGN_BYTES==16
185
- eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
186
- #endif
187
- #else
188
- result = handmade_aligned_malloc(size);
189
- #endif
226
+ #if EIGEN_DEFAULT_ALIGN_BYTES == 16
227
+ eigen_assert((size < 16 || (std::size_t(result) % 16) == 0) &&
228
+ "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback "
229
+ "to handmade aligned memory allocator.");
230
+ #endif
231
+ #else
232
+ result = handmade_aligned_malloc(size);
233
+ #endif
190
234
 
191
- if(!result && size)
192
- throw_std_bad_alloc();
235
+ if (!result && size) throw_std_bad_alloc();
193
236
 
194
237
  return result;
195
238
  }
196
239
 
197
240
  /** \internal Frees memory allocated with aligned_malloc. */
198
- EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
199
- {
200
- #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
241
+ EIGEN_DEVICE_FUNC inline void aligned_free(void* ptr) {
242
+ #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
201
243
 
244
+ if (ptr != nullptr) {
245
+ check_that_free_is_allowed();
202
246
  EIGEN_USING_STD(free)
203
247
  free(ptr);
248
+ }
204
249
 
205
- #else
206
- handmade_aligned_free(ptr);
207
- #endif
250
+ #else
251
+ handmade_aligned_free(ptr);
252
+ #endif
208
253
  }
209
254
 
210
255
  /**
211
- * \internal
212
- * \brief Reallocates an aligned block of memory.
213
- * \throws std::bad_alloc on allocation failure
214
- */
215
- inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
216
- {
256
+ * \internal
257
+ * \brief Reallocates an aligned block of memory.
258
+ * \throws std::bad_alloc on allocation failure
259
+ */
260
+ EIGEN_DEVICE_FUNC inline void* aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
261
+ if (ptr == nullptr) return aligned_malloc(new_size);
262
+ if (old_size == new_size) return ptr;
263
+ if (new_size == 0) {
264
+ aligned_free(ptr);
265
+ return nullptr;
266
+ }
267
+
268
+ void* result;
269
+ #if (EIGEN_DEFAULT_ALIGN_BYTES == 0) || EIGEN_MALLOC_ALREADY_ALIGNED
217
270
  EIGEN_UNUSED_VARIABLE(old_size)
218
271
 
219
- void *result;
220
- #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
221
- result = std::realloc(ptr,new_size);
272
+ check_that_malloc_is_allowed();
273
+ EIGEN_USING_STD(realloc)
274
+ result = realloc(ptr, new_size);
222
275
  #else
223
- result = handmade_aligned_realloc(ptr,new_size,old_size);
276
+ result = handmade_aligned_realloc(ptr, new_size, old_size);
224
277
  #endif
225
278
 
226
- if (!result && new_size)
227
- throw_std_bad_alloc();
279
+ if (!result && new_size) throw_std_bad_alloc();
228
280
 
229
281
  return result;
230
282
  }
@@ -234,45 +286,58 @@ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_si
234
286
  *****************************************************************************/
235
287
 
236
288
  /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
237
- * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
238
- */
239
- template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
240
- {
289
+ * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
290
+ */
291
+ template <bool Align>
292
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) {
241
293
  return aligned_malloc(size);
242
294
  }
243
295
 
244
- template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
245
- {
246
- check_that_malloc_is_allowed();
296
+ template <>
297
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size) {
298
+ if (size == 0) return nullptr;
247
299
 
300
+ check_that_malloc_is_allowed();
248
301
  EIGEN_USING_STD(malloc)
249
- void *result = malloc(size);
302
+ void* result = malloc(size);
250
303
 
251
- if(!result && size)
252
- throw_std_bad_alloc();
304
+ if (!result && size) throw_std_bad_alloc();
253
305
  return result;
254
306
  }
255
307
 
256
308
  /** \internal Frees memory allocated with conditional_aligned_malloc */
257
- template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
258
- {
309
+ template <bool Align>
310
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void* ptr) {
259
311
  aligned_free(ptr);
260
312
  }
261
313
 
262
- template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
263
- {
264
- EIGEN_USING_STD(free)
265
- free(ptr);
314
+ template <>
315
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void* ptr) {
316
+ if (ptr != nullptr) {
317
+ check_that_free_is_allowed();
318
+ EIGEN_USING_STD(free)
319
+ free(ptr);
320
+ }
266
321
  }
267
322
 
268
- template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
269
- {
323
+ template <bool Align>
324
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) {
270
325
  return aligned_realloc(ptr, new_size, old_size);
271
326
  }
272
327
 
273
- template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
274
- {
275
- return std::realloc(ptr, new_size);
328
+ template <>
329
+ EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size,
330
+ std::size_t old_size) {
331
+ if (ptr == nullptr) return conditional_aligned_malloc<false>(new_size);
332
+ if (old_size == new_size) return ptr;
333
+ if (new_size == 0) {
334
+ conditional_aligned_free<false>(ptr);
335
+ return nullptr;
336
+ }
337
+
338
+ check_that_malloc_is_allowed();
339
+ EIGEN_USING_STD(realloc)
340
+ return realloc(ptr, new_size);
276
341
  }
277
342
 
278
343
  /*****************************************************************************
@@ -280,75 +345,95 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_
280
345
  *****************************************************************************/
281
346
 
282
347
  /** \internal Destructs the elements of an array.
283
- * The \a size parameters tells on how many objects to call the destructor of T.
284
- */
285
- template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
286
- {
348
+ * The \a size parameters tells on how many objects to call the destructor of T.
349
+ */
350
+ template <typename T>
351
+ EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T* ptr, std::size_t size) {
287
352
  // always destruct an array starting from the end.
288
- if(ptr)
289
- while(size) ptr[--size].~T();
353
+ if (ptr)
354
+ while (size) ptr[--size].~T();
290
355
  }
291
356
 
292
357
  /** \internal Constructs the elements of an array.
293
- * The \a size parameter tells on how many objects to call the constructor of T.
294
- */
295
- template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
296
- {
297
- std::size_t i;
298
- EIGEN_TRY
299
- {
300
- for (i = 0; i < size; ++i) ::new (ptr + i) T;
301
- return ptr;
358
+ * The \a size parameter tells on how many objects to call the constructor of T.
359
+ */
360
+ template <typename T>
361
+ EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T* ptr, std::size_t size) {
362
+ std::size_t i = 0;
363
+ EIGEN_TRY {
364
+ for (i = 0; i < size; ++i) ::new (ptr + i) T;
365
+ }
366
+ EIGEN_CATCH(...) {
367
+ destruct_elements_of_array(ptr, i);
368
+ EIGEN_THROW;
369
+ }
370
+ return ptr;
371
+ }
372
+
373
+ /** \internal Copy-constructs the elements of an array.
374
+ * The \a size parameter tells on how many objects to copy.
375
+ */
376
+ template <typename T>
377
+ EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T* ptr, const T* src, std::size_t size) {
378
+ std::size_t i = 0;
379
+ EIGEN_TRY {
380
+ for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
302
381
  }
303
- EIGEN_CATCH(...)
304
- {
382
+ EIGEN_CATCH(...) {
305
383
  destruct_elements_of_array(ptr, i);
306
384
  EIGEN_THROW;
307
385
  }
308
- return NULL;
386
+ return ptr;
387
+ }
388
+
389
+ /** \internal Move-constructs the elements of an array.
390
+ * The \a size parameter tells on how many objects to move.
391
+ */
392
+ template <typename T>
393
+ EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T* ptr, T* src, std::size_t size) {
394
+ std::size_t i = 0;
395
+ EIGEN_TRY {
396
+ for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
397
+ }
398
+ EIGEN_CATCH(...) {
399
+ destruct_elements_of_array(ptr, i);
400
+ EIGEN_THROW;
401
+ }
402
+ return ptr;
309
403
  }
310
404
 
311
405
  /*****************************************************************************
312
406
  *** Implementation of aligned new/delete-like functions ***
313
407
  *****************************************************************************/
314
408
 
315
- template<typename T>
316
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
317
- {
318
- if(size > std::size_t(-1) / sizeof(T))
319
- throw_std_bad_alloc();
409
+ template <typename T>
410
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) {
411
+ constexpr std::size_t max_elements = (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T);
412
+ if (size > max_elements) throw_std_bad_alloc();
320
413
  }
321
414
 
322
415
  /** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
323
- * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
324
- * The default constructor of T is called.
325
- */
326
- template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
327
- {
416
+ * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
417
+ * The default constructor of T is called.
418
+ */
419
+ template <typename T>
420
+ EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) {
328
421
  check_size_for_overflow<T>(size);
329
- T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
330
- EIGEN_TRY
331
- {
332
- return construct_elements_of_array(result, size);
333
- }
334
- EIGEN_CATCH(...)
335
- {
422
+ T* result = static_cast<T*>(aligned_malloc(sizeof(T) * size));
423
+ EIGEN_TRY { return default_construct_elements_of_array(result, size); }
424
+ EIGEN_CATCH(...) {
336
425
  aligned_free(result);
337
426
  EIGEN_THROW;
338
427
  }
339
428
  return result;
340
429
  }
341
430
 
342
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
343
- {
431
+ template <typename T, bool Align>
432
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) {
344
433
  check_size_for_overflow<T>(size);
345
- T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
346
- EIGEN_TRY
347
- {
348
- return construct_elements_of_array(result, size);
349
- }
350
- EIGEN_CATCH(...)
351
- {
434
+ T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
435
+ EIGEN_TRY { return default_construct_elements_of_array(result, size); }
436
+ EIGEN_CATCH(...) {
352
437
  conditional_aligned_free<Align>(result);
353
438
  EIGEN_THROW;
354
439
  }
@@ -356,60 +441,62 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
356
441
  }
357
442
 
358
443
  /** \internal Deletes objects constructed with aligned_new
359
- * The \a size parameters tells on how many objects to call the destructor of T.
360
- */
361
- template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
362
- {
444
+ * The \a size parameters tells on how many objects to call the destructor of T.
445
+ */
446
+ template <typename T>
447
+ EIGEN_DEVICE_FUNC inline void aligned_delete(T* ptr, std::size_t size) {
363
448
  destruct_elements_of_array<T>(ptr, size);
364
- Eigen::internal::aligned_free(ptr);
449
+ aligned_free(ptr);
365
450
  }
366
451
 
367
452
  /** \internal Deletes objects constructed with conditional_aligned_new
368
- * The \a size parameters tells on how many objects to call the destructor of T.
369
- */
370
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
371
- {
453
+ * The \a size parameters tells on how many objects to call the destructor of T.
454
+ */
455
+ template <typename T, bool Align>
456
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T* ptr, std::size_t size) {
372
457
  destruct_elements_of_array<T>(ptr, size);
373
458
  conditional_aligned_free<Align>(ptr);
374
459
  }
375
460
 
376
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
377
- {
461
+ template <typename T, bool Align>
462
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) {
378
463
  check_size_for_overflow<T>(new_size);
379
464
  check_size_for_overflow<T>(old_size);
380
- if(new_size < old_size)
381
- destruct_elements_of_array(pts+new_size, old_size-new_size);
382
- T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
383
- if(new_size > old_size)
384
- {
385
- EIGEN_TRY
386
- {
387
- construct_elements_of_array(result+old_size, new_size-old_size);
388
- }
389
- EIGEN_CATCH(...)
390
- {
391
- conditional_aligned_free<Align>(result);
392
- EIGEN_THROW;
465
+
466
+ // If elements need to be explicitly initialized, we cannot simply realloc
467
+ // (or memcpy) the memory block - each element needs to be reconstructed.
468
+ // Otherwise, objects that contain internal pointers like mpfr or
469
+ // AnnoyingScalar can be pointing to the wrong thing.
470
+ T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * new_size));
471
+ EIGEN_TRY {
472
+ // Move-construct initial elements.
473
+ std::size_t copy_size = (std::min)(old_size, new_size);
474
+ move_construct_elements_of_array(result, pts, copy_size);
475
+
476
+ // Default-construct remaining elements.
477
+ if (new_size > old_size) {
478
+ default_construct_elements_of_array(result + copy_size, new_size - old_size);
393
479
  }
480
+
481
+ // Delete old elements.
482
+ conditional_aligned_delete<T, Align>(pts, old_size);
483
+ }
484
+ EIGEN_CATCH(...) {
485
+ conditional_aligned_free<Align>(result);
486
+ EIGEN_THROW;
394
487
  }
488
+
395
489
  return result;
396
490
  }
397
491
 
398
-
399
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
400
- {
401
- if(size==0)
402
- return 0; // short-cut. Also fixes Bug 884
492
+ template <typename T, bool Align>
493
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) {
494
+ if (size == 0) return nullptr; // short-cut. Also fixes Bug 884
403
495
  check_size_for_overflow<T>(size);
404
- T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
405
- if(NumTraits<T>::RequireInitialization)
406
- {
407
- EIGEN_TRY
408
- {
409
- construct_elements_of_array(result, size);
410
- }
411
- EIGEN_CATCH(...)
412
- {
496
+ T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T) * size));
497
+ if (NumTraits<T>::RequireInitialization) {
498
+ EIGEN_TRY { default_construct_elements_of_array(result, size); }
499
+ EIGEN_CATCH(...) {
413
500
  conditional_aligned_free<Align>(result);
414
501
  EIGEN_THROW;
415
502
  }
@@ -417,166 +504,140 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
417
504
  return result;
418
505
  }
419
506
 
420
- template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
421
- {
507
+ template <typename T, bool Align>
508
+ EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) {
509
+ if (NumTraits<T>::RequireInitialization) {
510
+ return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
511
+ }
512
+
422
513
  check_size_for_overflow<T>(new_size);
423
514
  check_size_for_overflow<T>(old_size);
424
- if(NumTraits<T>::RequireInitialization && (new_size < old_size))
425
- destruct_elements_of_array(pts+new_size, old_size-new_size);
426
- T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
427
- if(NumTraits<T>::RequireInitialization && (new_size > old_size))
428
- {
429
- EIGEN_TRY
430
- {
431
- construct_elements_of_array(result+old_size, new_size-old_size);
432
- }
433
- EIGEN_CATCH(...)
434
- {
435
- conditional_aligned_free<Align>(result);
436
- EIGEN_THROW;
437
- }
438
- }
439
- return result;
515
+ return static_cast<T*>(
516
+ conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T) * new_size, sizeof(T) * old_size));
440
517
  }
441
518
 
442
- template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
443
- {
444
- if(NumTraits<T>::RequireInitialization)
445
- destruct_elements_of_array<T>(ptr, size);
519
+ template <typename T, bool Align>
520
+ EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T* ptr, std::size_t size) {
521
+ if (NumTraits<T>::RequireInitialization) destruct_elements_of_array<T>(ptr, size);
446
522
  conditional_aligned_free<Align>(ptr);
447
523
  }
448
524
 
449
525
  /****************************************************************************/
450
526
 
451
- /** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment.
452
- *
453
- * \tparam Alignment requested alignment in Bytes.
454
- * \param array the address of the start of the array
455
- * \param size the size of the array
456
- *
457
- * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
458
- * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
459
- * packet size for the given scalar type is 1, then everything is considered well-aligned.
460
- *
461
- * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
462
- * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
463
- * example with Scalar=double on certain 32-bit platforms, see bug #79.
464
- *
465
- * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
466
- * \sa first_default_aligned()
467
- */
468
- template<int Alignment, typename Scalar, typename Index>
469
- EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
470
- {
527
+ /** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a
528
+ * Alignment.
529
+ *
530
+ * \tparam Alignment requested alignment in Bytes.
531
+ * \param array the address of the start of the array
532
+ * \param size the size of the array
533
+ *
534
+ * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
535
+ * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
536
+ * packet size for the given scalar type is 1, then everything is considered well-aligned.
537
+ *
538
+ * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
539
+ * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails
540
+ * for example with Scalar=double on certain 32-bit platforms, see bug #79.
541
+ *
542
+ * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
543
+ * \sa first_default_aligned()
544
+ */
545
+ template <int Alignment, typename Scalar, typename Index>
546
+ EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) {
471
547
  const Index ScalarSize = sizeof(Scalar);
472
548
  const Index AlignmentSize = Alignment / ScalarSize;
473
- const Index AlignmentMask = AlignmentSize-1;
549
+ const Index AlignmentMask = AlignmentSize - 1;
474
550
 
475
- if(AlignmentSize<=1)
476
- {
551
+ if (AlignmentSize <= 1) {
477
552
  // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
478
553
  // so that all elements of the array have the same alignment.
479
554
  return 0;
480
- }
481
- else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
482
- {
483
- // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
484
- // Consequently, no element of the array is well aligned.
555
+ } else if ((std::uintptr_t(array) & (sizeof(Scalar) - 1)) || (Alignment % ScalarSize) != 0) {
556
+ // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the
557
+ // scalar size. Consequently, no element of the array is well aligned.
485
558
  return size;
486
- }
487
- else
488
- {
489
- Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
559
+ } else {
560
+ Index first = (AlignmentSize - (Index((std::uintptr_t(array) / sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
490
561
  return (first < size) ? first : size;
491
562
  }
492
563
  }
493
564
 
494
- /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
495
- * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
496
- template<typename Scalar, typename Index>
497
- EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
498
- {
565
+ /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet
566
+ * requirement. \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
567
+ template <typename Scalar, typename Index>
568
+ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) {
499
569
  typedef typename packet_traits<Scalar>::type DefaultPacketType;
500
570
  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
501
571
  }
502
572
 
503
573
  /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
504
- */
505
- template<typename Index>
506
- inline Index first_multiple(Index size, Index base)
507
- {
508
- return ((size+base-1)/base)*base;
574
+ */
575
+ template <typename Index>
576
+ inline Index first_multiple(Index size, Index base) {
577
+ return ((size + base - 1) / base) * base;
509
578
  }
510
579
 
511
580
  // std::copy is much slower than memcpy, so let's introduce a smart_copy which
512
581
  // use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
513
- template<typename T, bool UseMemcpy> struct smart_copy_helper;
582
+ template <typename T, bool UseMemcpy>
583
+ struct smart_copy_helper;
514
584
 
515
- template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
516
- {
517
- smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
585
+ template <typename T>
586
+ EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) {
587
+ smart_copy_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
518
588
  }
519
589
 
520
- template<typename T> struct smart_copy_helper<T,true> {
521
- EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
522
- {
523
- IntPtr size = IntPtr(end)-IntPtr(start);
524
- if(size==0) return;
525
- eigen_internal_assert(start!=0 && end!=0 && target!=0);
590
+ template <typename T>
591
+ struct smart_copy_helper<T, true> {
592
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) {
593
+ std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
594
+ if (size == 0) return;
595
+ eigen_internal_assert(start != 0 && end != 0 && target != 0);
526
596
  EIGEN_USING_STD(memcpy)
527
597
  memcpy(target, start, size);
528
598
  }
529
599
  };
530
600
 
531
- template<typename T> struct smart_copy_helper<T,false> {
532
- EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
533
- { std::copy(start, end, target); }
601
+ template <typename T>
602
+ struct smart_copy_helper<T, false> {
603
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { std::copy(start, end, target); }
534
604
  };
535
605
 
536
606
  // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
537
- template<typename T, bool UseMemmove> struct smart_memmove_helper;
607
+ template <typename T, bool UseMemmove>
608
+ struct smart_memmove_helper;
538
609
 
539
- template<typename T> void smart_memmove(const T* start, const T* end, T* target)
540
- {
541
- smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
610
+ template <typename T>
611
+ void smart_memmove(const T* start, const T* end, T* target) {
612
+ smart_memmove_helper<T, !NumTraits<T>::RequireInitialization>::run(start, end, target);
542
613
  }
543
614
 
544
- template<typename T> struct smart_memmove_helper<T,true> {
545
- static inline void run(const T* start, const T* end, T* target)
546
- {
547
- IntPtr size = IntPtr(end)-IntPtr(start);
548
- if(size==0) return;
549
- eigen_internal_assert(start!=0 && end!=0 && target!=0);
615
+ template <typename T>
616
+ struct smart_memmove_helper<T, true> {
617
+ static inline void run(const T* start, const T* end, T* target) {
618
+ std::intptr_t size = std::intptr_t(end) - std::intptr_t(start);
619
+ if (size == 0) return;
620
+ eigen_internal_assert(start != 0 && end != 0 && target != 0);
550
621
  std::memmove(target, start, size);
551
622
  }
552
623
  };
553
624
 
554
- template<typename T> struct smart_memmove_helper<T,false> {
555
- static inline void run(const T* start, const T* end, T* target)
556
- {
557
- if (UIntPtr(target) < UIntPtr(start))
558
- {
625
+ template <typename T>
626
+ struct smart_memmove_helper<T, false> {
627
+ static inline void run(const T* start, const T* end, T* target) {
628
+ if (std::uintptr_t(target) < std::uintptr_t(start)) {
559
629
  std::copy(start, end, target);
560
- }
561
- else
562
- {
563
- std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
630
+ } else {
631
+ std::ptrdiff_t count = (std::ptrdiff_t(end) - std::ptrdiff_t(start)) / sizeof(T);
564
632
  std::copy_backward(start, end, target + count);
565
633
  }
566
634
  }
567
635
  };
568
636
 
569
- #if EIGEN_HAS_RVALUE_REFERENCES
570
- template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
571
- {
637
+ template <typename T>
638
+ EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) {
572
639
  return std::move(start, end, target);
573
640
  }
574
- #else
575
- template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
576
- {
577
- return std::copy(start, end, target);
578
- }
579
- #endif
580
641
 
581
642
  /*****************************************************************************
582
643
  *** Implementation of runtime stack allocation (falling back to malloc) ***
@@ -584,12 +645,12 @@ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target
584
645
 
585
646
  // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
586
647
  // to the appropriate stack allocation function
587
- #if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
588
- #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
589
- #define EIGEN_ALLOCA alloca
590
- #elif EIGEN_COMP_MSVC
591
- #define EIGEN_ALLOCA _alloca
592
- #endif
648
+ #if !defined EIGEN_ALLOCA && !defined EIGEN_GPU_COMPILE_PHASE
649
+ #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
650
+ #define EIGEN_ALLOCA alloca
651
+ #elif EIGEN_COMP_MSVC
652
+ #define EIGEN_ALLOCA _alloca
653
+ #endif
593
654
  #endif
594
655
 
595
656
  // With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
@@ -598,183 +659,178 @@ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target
598
659
  // TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
599
660
  // is fixed.
600
661
  #if defined(__clang__) && defined(__thumb__)
601
- #undef EIGEN_ALLOCA
662
+ #undef EIGEN_ALLOCA
602
663
  #endif
603
664
 
604
665
  // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
605
666
  // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
606
- template<typename T> class aligned_stack_memory_handler : noncopyable
607
- {
608
- public:
609
- /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
610
- * Note that \a ptr can be 0 regardless of the other parameters.
611
- * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
612
- * In this case, the buffer elements will also be destructed when this handler will be destructed.
613
- * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
614
- **/
615
- EIGEN_DEVICE_FUNC
616
- aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
617
- : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
618
- {
619
- if(NumTraits<T>::RequireInitialization && m_ptr)
620
- Eigen::internal::construct_elements_of_array(m_ptr, size);
621
- }
622
- EIGEN_DEVICE_FUNC
623
- ~aligned_stack_memory_handler()
624
- {
625
- if(NumTraits<T>::RequireInitialization && m_ptr)
626
- Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
627
- if(m_deallocate)
628
- Eigen::internal::aligned_free(m_ptr);
629
- }
630
- protected:
631
- T* m_ptr;
632
- std::size_t m_size;
633
- bool m_deallocate;
667
+ template <typename T>
668
+ class aligned_stack_memory_handler : noncopyable {
669
+ public:
670
+ /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
671
+ * Note that \a ptr can be 0 regardless of the other parameters.
672
+ * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type
673
+ *T (see NumTraits<T>::RequireInitialization). In this case, the buffer elements will also be destructed when this
674
+ *handler will be destructed. Finally, if \a dealloc is true, then the pointer \a ptr is freed.
675
+ **/
676
+ EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
677
+ : m_ptr(ptr), m_size(size), m_deallocate(dealloc) {
678
+ if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::default_construct_elements_of_array(m_ptr, size);
679
+ }
680
+ EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() {
681
+ if (NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
682
+ if (m_deallocate) Eigen::internal::aligned_free(m_ptr);
683
+ }
684
+
685
+ protected:
686
+ T* m_ptr;
687
+ std::size_t m_size;
688
+ bool m_deallocate;
634
689
  };
635
690
 
636
691
  #ifdef EIGEN_ALLOCA
637
692
 
638
- template<typename Xpr, int NbEvaluations,
639
- bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
640
- >
641
- struct local_nested_eval_wrapper
642
- {
643
- static const bool NeedExternalBuffer = false;
693
+ template <typename Xpr, int NbEvaluations,
694
+ bool MapExternalBuffer = nested_eval<Xpr, NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime == Dynamic>
695
+ struct local_nested_eval_wrapper {
696
+ static constexpr bool NeedExternalBuffer = false;
644
697
  typedef typename Xpr::Scalar Scalar;
645
- typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
698
+ typedef typename nested_eval<Xpr, NbEvaluations>::type ObjectType;
646
699
  ObjectType object;
647
700
 
648
- EIGEN_DEVICE_FUNC
649
- local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
650
- {
701
+ EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) {
651
702
  EIGEN_UNUSED_VARIABLE(ptr);
652
- eigen_internal_assert(ptr==0);
703
+ eigen_internal_assert(ptr == 0);
653
704
  }
654
705
  };
655
706
 
656
- template<typename Xpr, int NbEvaluations>
657
- struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
658
- {
659
- static const bool NeedExternalBuffer = true;
707
+ template <typename Xpr, int NbEvaluations>
708
+ struct local_nested_eval_wrapper<Xpr, NbEvaluations, true> {
709
+ static constexpr bool NeedExternalBuffer = true;
660
710
  typedef typename Xpr::Scalar Scalar;
661
711
  typedef typename plain_object_eval<Xpr>::type PlainObject;
662
- typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
712
+ typedef Map<PlainObject, EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
663
713
  ObjectType object;
664
714
 
665
- EIGEN_DEVICE_FUNC
666
- local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
667
- : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
668
- m_deallocate(ptr==0)
669
- {
670
- if(NumTraits<Scalar>::RequireInitialization && object.data())
671
- Eigen::internal::construct_elements_of_array(object.data(), object.size());
715
+ EIGEN_DEVICE_FUNC local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
716
+ : object(ptr == 0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar) * xpr.size())) : ptr,
717
+ xpr.rows(), xpr.cols()),
718
+ m_deallocate(ptr == 0) {
719
+ if (NumTraits<Scalar>::RequireInitialization && object.data())
720
+ Eigen::internal::default_construct_elements_of_array(object.data(), object.size());
672
721
  object = xpr;
673
722
  }
674
723
 
675
- EIGEN_DEVICE_FUNC
676
- ~local_nested_eval_wrapper()
677
- {
678
- if(NumTraits<Scalar>::RequireInitialization && object.data())
724
+ EIGEN_DEVICE_FUNC ~local_nested_eval_wrapper() {
725
+ if (NumTraits<Scalar>::RequireInitialization && object.data())
679
726
  Eigen::internal::destruct_elements_of_array(object.data(), object.size());
680
- if(m_deallocate)
681
- Eigen::internal::aligned_free(object.data());
727
+ if (m_deallocate) Eigen::internal::aligned_free(object.data());
682
728
  }
683
729
 
684
- private:
730
+ private:
685
731
  bool m_deallocate;
686
732
  };
687
733
 
688
- #endif // EIGEN_ALLOCA
734
+ #endif // EIGEN_ALLOCA
689
735
 
690
- template<typename T> class scoped_array : noncopyable
691
- {
736
+ template <typename T>
737
+ class scoped_array : noncopyable {
692
738
  T* m_ptr;
693
- public:
694
- explicit scoped_array(std::ptrdiff_t size)
695
- {
696
- m_ptr = new T[size];
697
- }
698
- ~scoped_array()
699
- {
700
- delete[] m_ptr;
701
- }
739
+
740
+ public:
741
+ explicit scoped_array(std::ptrdiff_t size) { m_ptr = new T[size]; }
742
+ ~scoped_array() { delete[] m_ptr; }
702
743
  T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
703
744
  const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
704
- T* &ptr() { return m_ptr; }
745
+ T*& ptr() { return m_ptr; }
705
746
  const T* ptr() const { return m_ptr; }
706
747
  operator const T*() const { return m_ptr; }
707
748
  };
708
749
 
709
- template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
710
- {
711
- std::swap(a.ptr(),b.ptr());
750
+ template <typename T>
751
+ void swap(scoped_array<T>& a, scoped_array<T>& b) {
752
+ std::swap(a.ptr(), b.ptr());
712
753
  }
713
754
 
714
- } // end namespace internal
755
+ } // end namespace internal
715
756
 
716
757
  /** \internal
717
- *
718
- * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
719
- * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
720
- * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
721
- * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.
722
- * The allocated buffer is automatically deleted when exiting the scope of this declaration.
723
- * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
724
- * Here is an example:
725
- * \code
726
- * {
727
- * ei_declare_aligned_stack_constructed_variable(float,data,size,0);
728
- * // use data[0] to data[size-1]
729
- * }
730
- * \endcode
731
- * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
732
- *
733
- * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
734
- * \code
735
- * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
736
- * \endcode
737
- * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
738
- * This is accomplished through alloca if this later is supported and if the required number of bytes
739
- * is below EIGEN_STACK_ALLOCATION_LIMIT.
740
- */
741
- #ifdef EIGEN_ALLOCA
742
-
743
- #if EIGEN_DEFAULT_ALIGN_BYTES>0
744
- // We always manually re-align the result of EIGEN_ALLOCA.
745
- // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
746
- #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
747
- #else
748
- #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
749
- #endif
750
-
751
- #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
752
- Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
753
- TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
754
- : reinterpret_cast<TYPE*>( \
755
- (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
756
- : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
757
- Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
758
-
759
-
760
- #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
761
- Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
762
- ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
763
- ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
764
- typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
758
+ *
759
+ * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
760
+ * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
761
+ * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the
762
+ * platform (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. The
763
+ * allocated buffer is automatically deleted when exiting the scope of this declaration. If BUFFER is non null, then the
764
+ * declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. Here is an example: \code
765
+ * {
766
+ * ei_declare_aligned_stack_constructed_variable(float,data,size,0);
767
+ * // use data[0] to data[size-1]
768
+ * }
769
+ * \endcode
770
+ * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
771
+ *
772
+ * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
773
+ * \code
774
+ * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
775
+ * \endcode
776
+ * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
777
+ * This is accomplished through alloca if this later is supported and if the required number of bytes
778
+ * is below EIGEN_STACK_ALLOCATION_LIMIT.
779
+ */
780
+ #if defined(EIGEN_ALLOCA) && !defined(EIGEN_NO_ALLOCA)
781
+
782
+ #if EIGEN_DEFAULT_ALIGN_BYTES > 0
783
+ // We always manually re-align the result of EIGEN_ALLOCA.
784
+ // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
785
+
786
+ #if ((EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) && !EIGEN_COMP_NVHPC)
787
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
788
+ #else
789
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
790
+ constexpr std::uintptr_t mask = EIGEN_DEFAULT_ALIGN_BYTES - 1;
791
+ std::uintptr_t ptr_int = std::uintptr_t(ptr);
792
+ std::uintptr_t aligned_ptr_int = (ptr_int + mask) & ~mask;
793
+ std::uintptr_t offset = aligned_ptr_int - ptr_int;
794
+ return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
795
+ }
796
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) eigen_aligned_alloca_helper(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1))
797
+ #endif
765
798
 
766
799
  #else
800
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
801
+ #endif
767
802
 
768
- #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
769
- Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
770
- TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
771
- Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
803
+ #define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
804
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
805
+ TYPE* NAME = (BUFFER) != 0 ? (BUFFER) \
806
+ : reinterpret_cast<TYPE*>((sizeof(TYPE) * (SIZE) <= EIGEN_STACK_ALLOCATION_LIMIT) \
807
+ ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE) * (SIZE)) \
808
+ : Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
809
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
810
+ (BUFFER) == 0 ? NAME : 0, SIZE, sizeof(TYPE) * (SIZE) > EIGEN_STACK_ALLOCATION_LIMIT)
811
+
812
+ #define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
813
+ Eigen::internal::local_nested_eval_wrapper<XPR_T, N> EIGEN_CAT(NAME, _wrapper)( \
814
+ XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
815
+ ((Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::NeedExternalBuffer) && \
816
+ ((sizeof(typename XPR_T::Scalar) * XPR.size()) <= EIGEN_STACK_ALLOCATION_LIMIT)) \
817
+ ? EIGEN_ALIGNED_ALLOCA(sizeof(typename XPR_T::Scalar) * XPR.size()) \
818
+ : 0)); \
819
+ typename Eigen::internal::local_nested_eval_wrapper<XPR_T, N>::ObjectType NAME(EIGEN_CAT(NAME, _wrapper).object)
772
820
 
821
+ #else
773
822
 
774
- #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
823
+ #define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
824
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
825
+ TYPE* NAME = \
826
+ (BUFFER) != 0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
827
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
828
+ (BUFFER) == 0 ? NAME : 0, SIZE, true)
775
829
 
776
- #endif
830
+ #define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
831
+ typename Eigen::internal::nested_eval<XPR_T, N>::type NAME(XPR)
777
832
 
833
+ #endif
778
834
 
779
835
  /*****************************************************************************
780
836
  *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
@@ -787,315 +843,432 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
787
843
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
788
844
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
789
845
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
790
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
846
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size)
791
847
 
792
848
  #else
793
849
 
794
850
  // HIP does not support new/delete on device.
795
- #if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
796
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
797
- EIGEN_DEVICE_FUNC \
798
- void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
799
- EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
800
- EIGEN_CATCH (...) { return 0; } \
801
- }
802
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
803
- EIGEN_DEVICE_FUNC \
804
- void *operator new(std::size_t size) { \
805
- return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
806
- } \
807
- EIGEN_DEVICE_FUNC \
808
- void *operator new[](std::size_t size) { \
809
- return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
810
- } \
811
- EIGEN_DEVICE_FUNC \
812
- void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
813
- EIGEN_DEVICE_FUNC \
814
- void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
815
- EIGEN_DEVICE_FUNC \
816
- void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
817
- EIGEN_DEVICE_FUNC \
818
- void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
819
- /* in-place new and delete. since (at least afaik) there is no actual */ \
820
- /* memory allocated we can safely let the default implementation handle */ \
821
- /* this particular case. */ \
822
- EIGEN_DEVICE_FUNC \
823
- static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
824
- EIGEN_DEVICE_FUNC \
825
- static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
826
- EIGEN_DEVICE_FUNC \
827
- void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
828
- EIGEN_DEVICE_FUNC \
829
- void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
830
- /* nothrow-new (returns zero instead of std::bad_alloc) */ \
831
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
832
- EIGEN_DEVICE_FUNC \
833
- void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
834
- Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
835
- } \
836
- typedef void eigen_aligned_operator_new_marker_type;
851
+ #if EIGEN_MAX_ALIGN_BYTES != 0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
852
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
853
+ EIGEN_DEVICE_FUNC void* operator new(std::size_t size, const std::nothrow_t&) noexcept { \
854
+ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
855
+ EIGEN_CATCH(...) { return 0; } \
856
+ }
857
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
858
+ EIGEN_DEVICE_FUNC void* operator new(std::size_t size) { \
859
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
860
+ } \
861
+ EIGEN_DEVICE_FUNC void* operator new[](std::size_t size) { \
862
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
863
+ } \
864
+ EIGEN_DEVICE_FUNC void operator delete(void* ptr) noexcept { \
865
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
866
+ } \
867
+ EIGEN_DEVICE_FUNC void operator delete[](void* ptr) noexcept { \
868
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
869
+ } \
870
+ EIGEN_DEVICE_FUNC void operator delete(void* ptr, std::size_t /* sz */) noexcept { \
871
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
872
+ } \
873
+ EIGEN_DEVICE_FUNC void operator delete[](void* ptr, std::size_t /* sz */) noexcept { \
874
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
875
+ } \
876
+ /* in-place new and delete. since (at least afaik) there is no actual */ \
877
+ /* memory allocated we can safely let the default implementation handle */ \
878
+ /* this particular case. */ \
879
+ EIGEN_DEVICE_FUNC static void* operator new(std::size_t size, void* ptr) { return ::operator new(size, ptr); } \
880
+ EIGEN_DEVICE_FUNC static void* operator new[](std::size_t size, void* ptr) { return ::operator new[](size, ptr); } \
881
+ EIGEN_DEVICE_FUNC void operator delete(void* memory, void* ptr) noexcept { return ::operator delete(memory, ptr); } \
882
+ EIGEN_DEVICE_FUNC void operator delete[](void* memory, void* ptr) noexcept { \
883
+ return ::operator delete[](memory, ptr); \
884
+ } \
885
+ /* nothrow-new (returns zero instead of std::bad_alloc) */ \
886
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
887
+ EIGEN_DEVICE_FUNC void operator delete(void* ptr, const std::nothrow_t&) noexcept { \
888
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
889
+ } \
890
+ typedef void eigen_aligned_operator_new_marker_type;
837
891
  #else
838
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
892
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
839
893
  #endif
840
894
 
841
895
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
842
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
843
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
844
- ((Size)!=Eigen::Dynamic) && \
845
- (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
846
- ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
847
- ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
896
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Size) \
897
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF( \
898
+ bool(((Size) != Eigen::Dynamic) && \
899
+ (((EIGEN_MAX_ALIGN_BYTES >= 16) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES) == 0)) || \
900
+ ((EIGEN_MAX_ALIGN_BYTES >= 32) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 2) == 0)) || \
901
+ ((EIGEN_MAX_ALIGN_BYTES >= 64) && ((sizeof(Scalar) * (Size)) % (EIGEN_MAX_ALIGN_BYTES / 4) == 0)))))
848
902
 
849
903
  #endif
850
904
 
851
905
  /****************************************************************************/
852
906
 
853
907
  /** \class aligned_allocator
854
- * \ingroup Core_Module
855
- *
856
- * \brief STL compatible allocator to use with types requiring a non standrad alignment.
857
- *
858
- * The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
859
- * By default, it will thus provide at least 16 bytes alignment and more in following cases:
860
- * - 32 bytes alignment if AVX is enabled.
861
- * - 64 bytes alignment if AVX512 is enabled.
862
- *
863
- * This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
864
- * \link TopicPreprocessorDirectivesPerformance there \endlink.
865
- *
866
- * Example:
867
- * \code
868
- * // Matrix4f requires 16 bytes alignment:
869
- * std::map< int, Matrix4f, std::less<int>,
870
- * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
871
- * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
872
- * std::map< int, Vector3f > my_map_vec3;
873
- * \endcode
874
- *
875
- * \sa \blank \ref TopicStlContainers.
876
- */
877
- template<class T>
878
- class aligned_allocator : public std::allocator<T>
879
- {
880
- public:
881
- typedef std::size_t size_type;
882
- typedef std::ptrdiff_t difference_type;
883
- typedef T* pointer;
884
- typedef const T* const_pointer;
885
- typedef T& reference;
886
- typedef const T& const_reference;
887
- typedef T value_type;
888
-
889
- template<class U>
890
- struct rebind
891
- {
908
+ * \ingroup Core_Module
909
+ *
910
+ * \brief STL compatible allocator to use with types requiring a non-standard alignment.
911
+ *
912
+ * The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
913
+ * By default, it will thus provide at least 16 bytes alignment and more in following cases:
914
+ * - 32 bytes alignment if AVX is enabled.
915
+ * - 64 bytes alignment if AVX512 is enabled.
916
+ *
917
+ * This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
918
+ * \link TopicPreprocessorDirectivesPerformance there \endlink.
919
+ *
920
+ * Example:
921
+ * \code
922
+ * // Matrix4f requires 16 bytes alignment:
923
+ * std::map< int, Matrix4f, std::less<int>,
924
+ * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
925
+ * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
926
+ * std::map< int, Vector3f > my_map_vec3;
927
+ * \endcode
928
+ *
929
+ * \sa \blank \ref TopicStlContainers.
930
+ */
931
+ template <class T>
932
+ class aligned_allocator {
933
+ public:
934
+ typedef std::size_t size_type;
935
+ typedef std::ptrdiff_t difference_type;
936
+ typedef T* pointer;
937
+ typedef const T* const_pointer;
938
+ typedef T& reference;
939
+ typedef const T& const_reference;
940
+ typedef T value_type;
941
+
942
+ template <class U>
943
+ struct rebind {
892
944
  typedef aligned_allocator<U> other;
893
945
  };
894
946
 
895
- aligned_allocator() : std::allocator<T>() {}
947
+ aligned_allocator() = default;
896
948
 
897
- aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
949
+ aligned_allocator(const aligned_allocator&) = default;
898
950
 
899
- template<class U>
900
- aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
951
+ template <class U>
952
+ aligned_allocator(const aligned_allocator<U>&) {}
901
953
 
902
- ~aligned_allocator() {}
954
+ template <class U>
955
+ constexpr bool operator==(const aligned_allocator<U>&) const noexcept {
956
+ return true;
957
+ }
958
+ template <class U>
959
+ constexpr bool operator!=(const aligned_allocator<U>&) const noexcept {
960
+ return false;
961
+ }
903
962
 
904
- #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
963
+ #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0)
905
964
  // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
906
- // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
907
- // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
908
- size_type max_size() const {
909
- return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
910
- }
911
- #endif
965
+ // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object
966
+ // size 9223372036854775807 See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
967
+ size_type max_size() const { return (std::numeric_limits<std::ptrdiff_t>::max)() / sizeof(T); }
968
+ #endif
912
969
 
913
- pointer allocate(size_type num, const void* /*hint*/ = 0)
914
- {
970
+ pointer allocate(size_type num, const void* /*hint*/ = 0) {
915
971
  internal::check_size_for_overflow<T>(num);
916
- return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
972
+ return static_cast<pointer>(internal::aligned_malloc(num * sizeof(T)));
917
973
  }
918
974
 
919
- void deallocate(pointer p, size_type /*num*/)
920
- {
921
- internal::aligned_free(p);
922
- }
975
+ void deallocate(pointer p, size_type /*num*/) { internal::aligned_free(p); }
923
976
  };
924
977
 
925
978
  //---------- Cache sizes ----------
926
979
 
927
980
  #if !defined(EIGEN_NO_CPUID)
928
- # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
929
- # if defined(__PIC__) && EIGEN_ARCH_i386
930
- // Case for x86 with PIC
931
- # define EIGEN_CPUID(abcd,func,id) \
932
- __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
933
- # elif defined(__PIC__) && EIGEN_ARCH_x86_64
934
- // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
935
- // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
936
- # define EIGEN_CPUID(abcd,func,id) \
937
- __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
938
- # else
939
- // Case for x86_64 or x86 w/o PIC
940
- # define EIGEN_CPUID(abcd,func,id) \
941
- __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
942
- # endif
943
- # elif EIGEN_COMP_MSVC
944
- # if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
945
- # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
946
- # endif
947
- # endif
981
+ #if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
982
+ #if defined(__PIC__) && EIGEN_ARCH_i386
983
+ // Case for x86 with PIC
984
+ #define EIGEN_CPUID(abcd, func, id) \
985
+ __asm__ __volatile__("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1" \
986
+ : "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
987
+ : "a"(func), "c"(id));
988
+ #elif defined(__PIC__) && EIGEN_ARCH_x86_64
989
+ // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with
990
+ // the default small code model. However, we cannot detect which code model is used, and the xchg overhead is negligible
991
+ // anyway.
992
+ #define EIGEN_CPUID(abcd, func, id) \
993
+ __asm__ __volatile__("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1" \
994
+ : "=a"(abcd[0]), "=&r"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) \
995
+ : "0"(func), "2"(id));
996
+ #else
997
+ // Case for x86_64 or x86 w/o PIC
998
+ #define EIGEN_CPUID(abcd, func, id) \
999
+ __asm__ __volatile__("cpuid" : "=a"(abcd[0]), "=b"(abcd[1]), "=c"(abcd[2]), "=d"(abcd[3]) : "0"(func), "2"(id));
1000
+ #endif
1001
+ #elif EIGEN_COMP_MSVC
1002
+ #if EIGEN_ARCH_i386_OR_x86_64
1003
+ #define EIGEN_CPUID(abcd, func, id) __cpuidex((int*)abcd, func, id)
1004
+ #endif
1005
+ #endif
948
1006
  #endif
949
1007
 
950
1008
  namespace internal {
951
1009
 
952
1010
  #ifdef EIGEN_CPUID
953
1011
 
954
- inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
955
- {
956
- return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
1012
+ inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) {
1013
+ return abcd[1] == vendor[0] && abcd[3] == vendor[1] && abcd[2] == vendor[2];
957
1014
  }
958
1015
 
959
- inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
960
- {
1016
+ inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) {
961
1017
  int abcd[4];
962
1018
  l1 = l2 = l3 = 0;
963
1019
  int cache_id = 0;
964
1020
  int cache_type = 0;
965
1021
  do {
966
1022
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
967
- EIGEN_CPUID(abcd,0x4,cache_id);
968
- cache_type = (abcd[0] & 0x0F) >> 0;
969
- if(cache_type==1||cache_type==3) // data or unified cache
1023
+ EIGEN_CPUID(abcd, 0x4, cache_id);
1024
+ cache_type = (abcd[0] & 0x0F) >> 0;
1025
+ if (cache_type == 1 || cache_type == 3) // data or unified cache
970
1026
  {
971
- int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
972
- int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
973
- int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
974
- int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
975
- int sets = (abcd[2]); // C[31:0]
976
-
977
- int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
978
-
979
- switch(cache_level)
980
- {
981
- case 1: l1 = cache_size; break;
982
- case 2: l2 = cache_size; break;
983
- case 3: l3 = cache_size; break;
984
- default: break;
1027
+ int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
1028
+ int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
1029
+ int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
1030
+ int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
1031
+ int sets = (abcd[2]); // C[31:0]
1032
+
1033
+ int cache_size = (ways + 1) * (partitions + 1) * (line_size + 1) * (sets + 1);
1034
+
1035
+ switch (cache_level) {
1036
+ case 1:
1037
+ l1 = cache_size;
1038
+ break;
1039
+ case 2:
1040
+ l2 = cache_size;
1041
+ break;
1042
+ case 3:
1043
+ l3 = cache_size;
1044
+ break;
1045
+ default:
1046
+ break;
985
1047
  }
986
1048
  }
987
1049
  cache_id++;
988
- } while(cache_type>0 && cache_id<16);
1050
+ } while (cache_type > 0 && cache_id < 16);
989
1051
  }
990
1052
 
991
- inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
992
- {
1053
+ inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) {
993
1054
  int abcd[4];
994
1055
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
995
1056
  l1 = l2 = l3 = 0;
996
- EIGEN_CPUID(abcd,0x00000002,0);
997
- unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
1057
+ EIGEN_CPUID(abcd, 0x00000002, 0);
1058
+ unsigned char* bytes = reinterpret_cast<unsigned char*>(abcd) + 2;
998
1059
  bool check_for_p2_core2 = false;
999
- for(int i=0; i<14; ++i)
1000
- {
1001
- switch(bytes[i])
1002
- {
1003
- case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
1004
- case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
1005
- case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
1006
- case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1007
- case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1008
- case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
1009
- case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
1010
- case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
1011
- case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
1012
- case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
1013
- case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
1014
- case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
1015
- case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
1016
- case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1017
- case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
1018
- case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
1019
- case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
1020
- case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
1021
- case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
1022
- case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
1023
- case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
1024
- case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
1025
- case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
1026
- case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
1027
- case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
1028
- case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
1029
- case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
1030
- case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
1031
- case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
1032
- case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
1033
- case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
1034
- case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
1035
- case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
1036
- case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
1037
- case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
1038
- case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
1039
- case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
1040
- case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
1041
- case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
1042
- case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
1043
- case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
1044
- case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1045
- case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
1046
- case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
1047
- case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
1048
- case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
1049
- case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
1050
- case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
1051
- case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
1052
- case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
1053
- case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
1054
- case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
1055
- case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
1056
- case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
1057
- case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
1058
- case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
1059
- case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
1060
-
1061
- default: break;
1060
+ for (int i = 0; i < 14; ++i) {
1061
+ switch (bytes[i]) {
1062
+ case 0x0A:
1063
+ l1 = 8;
1064
+ break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
1065
+ case 0x0C:
1066
+ l1 = 16;
1067
+ break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
1068
+ case 0x0E:
1069
+ l1 = 24;
1070
+ break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
1071
+ case 0x10:
1072
+ l1 = 16;
1073
+ break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1074
+ case 0x15:
1075
+ l1 = 16;
1076
+ break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1077
+ case 0x2C:
1078
+ l1 = 32;
1079
+ break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
1080
+ case 0x30:
1081
+ l1 = 32;
1082
+ break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
1083
+ case 0x60:
1084
+ l1 = 16;
1085
+ break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
1086
+ case 0x66:
1087
+ l1 = 8;
1088
+ break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
1089
+ case 0x67:
1090
+ l1 = 16;
1091
+ break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
1092
+ case 0x68:
1093
+ l1 = 32;
1094
+ break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
1095
+ case 0x1A:
1096
+ l2 = 96;
1097
+ break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
1098
+ case 0x22:
1099
+ l3 = 512;
1100
+ break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
1101
+ case 0x23:
1102
+ l3 = 1024;
1103
+ break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1104
+ case 0x25:
1105
+ l3 = 2048;
1106
+ break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
1107
+ case 0x29:
1108
+ l3 = 4096;
1109
+ break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
1110
+ case 0x39:
1111
+ l2 = 128;
1112
+ break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
1113
+ case 0x3A:
1114
+ l2 = 192;
1115
+ break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
1116
+ case 0x3B:
1117
+ l2 = 128;
1118
+ break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
1119
+ case 0x3C:
1120
+ l2 = 256;
1121
+ break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
1122
+ case 0x3D:
1123
+ l2 = 384;
1124
+ break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
1125
+ case 0x3E:
1126
+ l2 = 512;
1127
+ break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
1128
+ case 0x40:
1129
+ l2 = 0;
1130
+ break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
1131
+ case 0x41:
1132
+ l2 = 128;
1133
+ break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
1134
+ case 0x42:
1135
+ l2 = 256;
1136
+ break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
1137
+ case 0x43:
1138
+ l2 = 512;
1139
+ break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
1140
+ case 0x44:
1141
+ l2 = 1024;
1142
+ break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
1143
+ case 0x45:
1144
+ l2 = 2048;
1145
+ break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
1146
+ case 0x46:
1147
+ l3 = 4096;
1148
+ break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
1149
+ case 0x47:
1150
+ l3 = 8192;
1151
+ break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
1152
+ case 0x48:
1153
+ l2 = 3072;
1154
+ break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
1155
+ case 0x49:
1156
+ if (l2 != 0)
1157
+ l3 = 4096;
1158
+ else {
1159
+ check_for_p2_core2 = true;
1160
+ l3 = l2 = 4096;
1161
+ }
1162
+ break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
1163
+ case 0x4A:
1164
+ l3 = 6144;
1165
+ break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
1166
+ case 0x4B:
1167
+ l3 = 8192;
1168
+ break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
1169
+ case 0x4C:
1170
+ l3 = 12288;
1171
+ break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
1172
+ case 0x4D:
1173
+ l3 = 16384;
1174
+ break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
1175
+ case 0x4E:
1176
+ l2 = 6144;
1177
+ break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
1178
+ case 0x78:
1179
+ l2 = 1024;
1180
+ break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
1181
+ case 0x79:
1182
+ l2 = 128;
1183
+ break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
1184
+ case 0x7A:
1185
+ l2 = 256;
1186
+ break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
1187
+ case 0x7B:
1188
+ l2 = 512;
1189
+ break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
1190
+ case 0x7C:
1191
+ l2 = 1024;
1192
+ break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1193
+ case 0x7D:
1194
+ l2 = 2048;
1195
+ break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
1196
+ case 0x7E:
1197
+ l2 = 256;
1198
+ break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
1199
+ case 0x7F:
1200
+ l2 = 512;
1201
+ break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
1202
+ case 0x80:
1203
+ l2 = 512;
1204
+ break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
1205
+ case 0x81:
1206
+ l2 = 128;
1207
+ break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
1208
+ case 0x82:
1209
+ l2 = 256;
1210
+ break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
1211
+ case 0x83:
1212
+ l2 = 512;
1213
+ break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
1214
+ case 0x84:
1215
+ l2 = 1024;
1216
+ break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
1217
+ case 0x85:
1218
+ l2 = 2048;
1219
+ break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
1220
+ case 0x86:
1221
+ l2 = 512;
1222
+ break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
1223
+ case 0x87:
1224
+ l2 = 1024;
1225
+ break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
1226
+ case 0x88:
1227
+ l3 = 2048;
1228
+ break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
1229
+ case 0x89:
1230
+ l3 = 4096;
1231
+ break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
1232
+ case 0x8A:
1233
+ l3 = 8192;
1234
+ break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
1235
+ case 0x8D:
1236
+ l3 = 3072;
1237
+ break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
1238
+
1239
+ default:
1240
+ break;
1062
1241
  }
1063
1242
  }
1064
- if(check_for_p2_core2 && l2 == l3)
1065
- l3 = 0;
1243
+ if (check_for_p2_core2 && l2 == l3) l3 = 0;
1066
1244
  l1 *= 1024;
1067
1245
  l2 *= 1024;
1068
1246
  l3 *= 1024;
1069
1247
  }
1070
1248
 
1071
- inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
1072
- {
1073
- if(max_std_funcs>=4)
1074
- queryCacheSizes_intel_direct(l1,l2,l3);
1075
- else if(max_std_funcs>=2)
1076
- queryCacheSizes_intel_codes(l1,l2,l3);
1249
+ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) {
1250
+ if (max_std_funcs >= 4)
1251
+ queryCacheSizes_intel_direct(l1, l2, l3);
1252
+ else if (max_std_funcs >= 2)
1253
+ queryCacheSizes_intel_codes(l1, l2, l3);
1077
1254
  else
1078
1255
  l1 = l2 = l3 = 0;
1079
1256
  }
1080
1257
 
1081
- inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
1082
- {
1258
+ inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) {
1083
1259
  int abcd[4];
1084
1260
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1085
-
1261
+
1086
1262
  // First query the max supported function.
1087
- EIGEN_CPUID(abcd,0x80000000,0);
1088
- if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
1089
- {
1090
- EIGEN_CPUID(abcd,0x80000005,0);
1091
- l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
1263
+ EIGEN_CPUID(abcd, 0x80000000, 0);
1264
+ if (static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) {
1265
+ EIGEN_CPUID(abcd, 0x80000005, 0);
1266
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
1092
1267
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1093
- EIGEN_CPUID(abcd,0x80000006,0);
1094
- l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
1095
- l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1096
- }
1097
- else
1098
- {
1268
+ EIGEN_CPUID(abcd, 0x80000006, 0);
1269
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
1270
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1271
+ } else {
1099
1272
  l1 = l2 = l3 = 0;
1100
1273
  }
1101
1274
  }
@@ -1103,61 +1276,110 @@ inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
1103
1276
 
1104
1277
  /** \internal
1105
1278
  * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
1106
- inline void queryCacheSizes(int& l1, int& l2, int& l3)
1107
- {
1108
- #ifdef EIGEN_CPUID
1279
+ inline void queryCacheSizes(int& l1, int& l2, int& l3) {
1280
+ #ifdef EIGEN_CPUID
1109
1281
  int abcd[4];
1110
1282
  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
1111
1283
  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
1112
- const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
1284
+ const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
1113
1285
 
1114
1286
  // identify the CPU vendor
1115
- EIGEN_CPUID(abcd,0x0,0);
1287
+ EIGEN_CPUID(abcd, 0x0, 0);
1116
1288
  int max_std_funcs = abcd[0];
1117
- if(cpuid_is_vendor(abcd,GenuineIntel))
1118
- queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1119
- else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
1120
- queryCacheSizes_amd(l1,l2,l3);
1289
+ if (cpuid_is_vendor(abcd, GenuineIntel))
1290
+ queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
1291
+ else if (cpuid_is_vendor(abcd, AuthenticAMD) || cpuid_is_vendor(abcd, AMDisbetter_))
1292
+ queryCacheSizes_amd(l1, l2, l3);
1121
1293
  else
1122
1294
  // by default let's use Intel's API
1123
- queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1124
-
1125
- // here is the list of other vendors:
1126
- // ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1127
- // ||cpuid_is_vendor(abcd,"CyrixInstead")
1128
- // ||cpuid_is_vendor(abcd,"CentaurHauls")
1129
- // ||cpuid_is_vendor(abcd,"GenuineTMx86")
1130
- // ||cpuid_is_vendor(abcd,"TransmetaCPU")
1131
- // ||cpuid_is_vendor(abcd,"RiseRiseRise")
1132
- // ||cpuid_is_vendor(abcd,"Geode by NSC")
1133
- // ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1134
- // ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1135
- // ||cpuid_is_vendor(abcd,"NexGenDriven")
1136
- #else
1295
+ queryCacheSizes_intel(l1, l2, l3, max_std_funcs);
1296
+
1297
+ // here is the list of other vendors:
1298
+ // ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1299
+ // ||cpuid_is_vendor(abcd,"CyrixInstead")
1300
+ // ||cpuid_is_vendor(abcd,"CentaurHauls")
1301
+ // ||cpuid_is_vendor(abcd,"GenuineTMx86")
1302
+ // ||cpuid_is_vendor(abcd,"TransmetaCPU")
1303
+ // ||cpuid_is_vendor(abcd,"RiseRiseRise")
1304
+ // ||cpuid_is_vendor(abcd,"Geode by NSC")
1305
+ // ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1306
+ // ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1307
+ // ||cpuid_is_vendor(abcd,"NexGenDriven")
1308
+ #else
1137
1309
  l1 = l2 = l3 = -1;
1138
- #endif
1310
+ #endif
1139
1311
  }
1140
1312
 
1141
1313
  /** \internal
1142
1314
  * \returns the size in Bytes of the L1 data cache */
1143
- inline int queryL1CacheSize()
1144
- {
1315
+ inline int queryL1CacheSize() {
1145
1316
  int l1(-1), l2, l3;
1146
- queryCacheSizes(l1,l2,l3);
1317
+ queryCacheSizes(l1, l2, l3);
1147
1318
  return l1;
1148
1319
  }
1149
1320
 
1150
1321
  /** \internal
1151
1322
  * \returns the size in Bytes of the L2 or L3 cache if this later is present */
1152
- inline int queryTopLevelCacheSize()
1153
- {
1323
+ inline int queryTopLevelCacheSize() {
1154
1324
  int l1, l2(-1), l3(-1);
1155
- queryCacheSizes(l1,l2,l3);
1156
- return (std::max)(l2,l3);
1325
+ queryCacheSizes(l1, l2, l3);
1326
+ return (std::max)(l2, l3);
1327
+ }
1328
+
1329
+ /** \internal
1330
+ * This wraps C++20's std::construct_at, using placement new instead if it is not available.
1331
+ */
1332
+
1333
+ #if EIGEN_COMP_CXXVER >= 20 && defined(__cpp_lib_constexpr_dynamic_alloc) && \
1334
+ __cpp_lib_constexpr_dynamic_alloc >= 201907L
1335
+ using std::construct_at;
1336
+ #else
1337
+ template <class T, class... Args>
1338
+ EIGEN_DEVICE_FUNC T* construct_at(T* p, Args&&... args) {
1339
+ return ::new (const_cast<void*>(static_cast<const volatile void*>(p))) T(std::forward<Args>(args)...);
1340
+ }
1341
+ #endif
1342
+
1343
+ /** \internal
1344
+ * This wraps C++17's std::destroy_at. If it's not available it calls the destructor.
1345
+ * The wrapper is not a full replacement for C++20's std::destroy_at as it cannot
1346
+ * be applied to std::array.
1347
+ */
1348
+ #if EIGEN_COMP_CXXVER >= 17
1349
+ using std::destroy_at;
1350
+ #else
1351
+ template <class T>
1352
+ EIGEN_DEVICE_FUNC void destroy_at(T* p) {
1353
+ p->~T();
1157
1354
  }
1355
+ #endif
1356
+
1357
+ // FIXME(rmlarsen): Work around missing linker symbol with msan on ARM.
1358
+ #if !defined(EIGEN_DONT_ASSUME_ALIGNED) && __has_feature(memory_sanitizer) && \
1359
+ (EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64)
1360
+ #define EIGEN_DONT_ASSUME_ALIGNED
1361
+ #endif
1362
+
1363
+
1364
+ #if !defined(EIGEN_DONT_ASSUME_ALIGNED) && defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L)
1365
+ template <std::size_t N, typename T>
1366
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
1367
+ return std::assume_aligned<N, T>(ptr);
1368
+ }
1369
+ #elif !defined(EIGEN_DONT_ASSUME_ALIGNED) && EIGEN_HAS_BUILTIN(__builtin_assume_aligned)
1370
+ template <std::size_t N, typename T>
1371
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC T* assume_aligned(T* ptr) {
1372
+ return static_cast<T*>(__builtin_assume_aligned(ptr, N));
1373
+ }
1374
+ #else
1375
+ template <std::size_t N, typename T>
1376
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
1377
+ return ptr;
1378
+ }
1379
+ #endif
1158
1380
 
1159
- } // end namespace internal
1381
+ } // end namespace internal
1160
1382
 
1161
- } // end namespace Eigen
1383
+ } // end namespace Eigen
1162
1384
 
1163
- #endif // EIGEN_MEMORY_H
1385
+ #endif // EIGEN_MEMORY_H