@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -33,104 +33,107 @@
33
33
  #ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
34
34
  #define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
35
35
 
36
- namespace Eigen {
36
+ // IWYU pragma: private
37
+ #include "../InternalHeaderCheck.h"
38
+
39
+ namespace Eigen {
37
40
 
38
41
  namespace internal {
39
42
 
40
43
  /**********************************************************************
41
- * This file implements general matrix-vector multiplication using BLAS
42
- * gemv function via partial specialization of
43
- * general_matrix_vector_product::run(..) method for float, double,
44
- * std::complex<float> and std::complex<double> types
45
- **********************************************************************/
44
+ * This file implements general matrix-vector multiplication using BLAS
45
+ * gemv function via partial specialization of
46
+ * general_matrix_vector_product::run(..) method for float, double,
47
+ * std::complex<float> and std::complex<double> types
48
+ **********************************************************************/
46
49
 
47
50
  // gemv specialization
48
51
 
49
- template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
52
+ template <typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar,
53
+ bool ConjugateRhs>
50
54
  struct general_matrix_vector_product_gemv;
51
55
 
52
- #define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
53
- template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
54
- struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
55
- static void run( \
56
- Index rows, Index cols, \
57
- const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
58
- const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
59
- Scalar* res, Index resIncr, Scalar alpha) \
60
- { \
61
- if (ConjugateLhs) { \
62
- general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
63
- rows, cols, lhs, rhs, res, resIncr, alpha); \
64
- } else { \
65
- general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
66
- rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
67
- } \
68
- } \
69
- }; \
70
- template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
71
- struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
72
- static void run( \
73
- Index rows, Index cols, \
74
- const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
75
- const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
76
- Scalar* res, Index resIncr, Scalar alpha) \
77
- { \
78
- general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
79
- rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
80
- } \
81
- }; \
56
+ #define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
57
+ template <typename Index, bool ConjugateLhs, bool ConjugateRhs> \
58
+ struct general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, ColMajor, \
59
+ ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, \
60
+ ConjugateRhs, Specialized> { \
61
+ static void run(Index rows, Index cols, const const_blas_data_mapper<Scalar, Index, ColMajor>& lhs, \
62
+ const const_blas_data_mapper<Scalar, Index, RowMajor>& rhs, Scalar* res, Index resIncr, \
63
+ Scalar alpha) { \
64
+ if (ConjugateLhs) { \
65
+ general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, ColMajor, \
66
+ ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, \
67
+ ConjugateRhs, BuiltIn>::run(rows, cols, lhs, rhs, res, resIncr, alpha); \
68
+ } else { \
69
+ general_matrix_vector_product_gemv<Index, Scalar, ColMajor, ConjugateLhs, Scalar, ConjugateRhs>::run( \
70
+ rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
71
+ } \
72
+ } \
73
+ }; \
74
+ template <typename Index, bool ConjugateLhs, bool ConjugateRhs> \
75
+ struct general_matrix_vector_product<Index, Scalar, const_blas_data_mapper<Scalar, Index, RowMajor>, RowMajor, \
76
+ ConjugateLhs, Scalar, const_blas_data_mapper<Scalar, Index, ColMajor>, \
77
+ ConjugateRhs, Specialized> { \
78
+ static void run(Index rows, Index cols, const const_blas_data_mapper<Scalar, Index, RowMajor>& lhs, \
79
+ const const_blas_data_mapper<Scalar, Index, ColMajor>& rhs, Scalar* res, Index resIncr, \
80
+ Scalar alpha) { \
81
+ general_matrix_vector_product_gemv<Index, Scalar, RowMajor, ConjugateLhs, Scalar, ConjugateRhs>::run( \
82
+ rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
83
+ } \
84
+ };
82
85
 
83
86
  EIGEN_BLAS_GEMV_SPECIALIZE(double)
84
87
  EIGEN_BLAS_GEMV_SPECIALIZE(float)
85
88
  EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
86
89
  EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
87
90
 
88
- #define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
89
- template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
90
- struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
91
- { \
92
- typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
93
- \
94
- static void run( \
95
- Index rows, Index cols, \
96
- const EIGTYPE* lhs, Index lhsStride, \
97
- const EIGTYPE* rhs, Index rhsIncr, \
98
- EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
99
- { \
100
- BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
101
- lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
102
- const EIGTYPE beta(1); \
103
- const EIGTYPE *x_ptr; \
104
- char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
105
- if (LhsStorageOrder==RowMajor) { \
106
- m = convert_index<BlasIndex>(cols); \
107
- n = convert_index<BlasIndex>(rows); \
108
- }\
109
- GEMVVector x_tmp; \
110
- if (ConjugateRhs) { \
111
- Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
112
- x_tmp=map_x.conjugate(); \
113
- x_ptr=x_tmp.data(); \
114
- incx=1; \
115
- } else x_ptr=rhs; \
116
- BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
117
- }\
118
- };
91
+ #define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE, BLASTYPE, BLASFUNC) \
92
+ template <typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
93
+ struct general_matrix_vector_product_gemv<Index, EIGTYPE, LhsStorageOrder, ConjugateLhs, EIGTYPE, ConjugateRhs> { \
94
+ typedef Matrix<EIGTYPE, Dynamic, 1, ColMajor> GEMVVector; \
95
+ \
96
+ static void run(Index rows, Index cols, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* rhs, Index rhsIncr, \
97
+ EIGTYPE* res, Index resIncr, EIGTYPE alpha) { \
98
+ if (rows == 0 || cols == 0) return; \
99
+ BlasIndex m = convert_index<BlasIndex>(rows), n = convert_index<BlasIndex>(cols), \
100
+ lda = convert_index<BlasIndex>(lhsStride), incx = convert_index<BlasIndex>(rhsIncr), \
101
+ incy = convert_index<BlasIndex>(resIncr); \
102
+ const EIGTYPE beta(1); \
103
+ const EIGTYPE* x_ptr; \
104
+ char trans = (LhsStorageOrder == ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
105
+ if (LhsStorageOrder == RowMajor) { \
106
+ m = convert_index<BlasIndex>(cols); \
107
+ n = convert_index<BlasIndex>(rows); \
108
+ } \
109
+ GEMVVector x_tmp; \
110
+ if (ConjugateRhs) { \
111
+ Map<const GEMVVector, 0, InnerStride<> > map_x(rhs, cols, 1, InnerStride<>(incx)); \
112
+ x_tmp = map_x.conjugate(); \
113
+ x_ptr = x_tmp.data(); \
114
+ incx = 1; \
115
+ } else { \
116
+ x_ptr = rhs; \
117
+ } \
118
+ BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, \
119
+ (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
120
+ } \
121
+ };
119
122
 
120
123
  #ifdef EIGEN_USE_MKL
121
- EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
122
- EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
124
+ EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
125
+ EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
123
126
  EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
124
- EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
127
+ EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, cgemv)
125
128
  #else
126
- EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
127
- EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
129
+ EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
130
+ EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
128
131
  EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
129
- EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
132
+ EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
130
133
  #endif
131
134
 
132
- } // end namespase internal
135
+ } // namespace internal
133
136
 
134
- } // end namespace Eigen
137
+ } // end namespace Eigen
135
138
 
136
- #endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
139
+ #endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
@@ -10,171 +10,273 @@
10
10
  #ifndef EIGEN_PARALLELIZER_H
11
11
  #define EIGEN_PARALLELIZER_H
12
12
 
13
- #if EIGEN_HAS_CXX11_ATOMIC
14
- #include <atomic>
13
+ // IWYU pragma: private
14
+ #include "../InternalHeaderCheck.h"
15
+
16
+ // Note that in the following, there are 3 different uses of the concept
17
+ // "number of threads":
18
+ // 1. Max number of threads used by OpenMP or ThreadPool.
19
+ // * For OpenMP this is typically the value set by the OMP_NUM_THREADS
20
+ // environment variable, or by a call to omp_set_num_threads() prior to
21
+ // calling Eigen.
22
+ // * For ThreadPool, this is the number of threads in the ThreadPool.
23
+ // 2. Max number of threads currently allowed to be used by parallel Eigen
24
+ // operations. This is set by setNbThreads(), and cannot exceed the value
25
+ // in 1.
26
+ // 3. The actual number of threads used for a given parallel Eigen operation.
27
+ // This is typically computed on the fly using a cost model and cannot exceed
28
+ // the value in 2.
29
+ // * For OpenMP, this is typically the number of threads specified in individual
30
+ // "omp parallel" pragmas associated with an Eigen operation.
31
+ // * For ThreadPool, it is the number of concurrent tasks scheduled in the
32
+ // threadpool for a given Eigen operation. Notice that since the threadpool
33
+ // uses task stealing, there is no way to limit the number of concurrently
34
+ // executing tasks to below the number in 1. except by limiting the total
35
+ // number of tasks in flight.
36
+
37
+ #if defined(EIGEN_HAS_OPENMP) && defined(EIGEN_GEMM_THREADPOOL)
38
+ #error "EIGEN_HAS_OPENMP and EIGEN_GEMM_THREADPOOL may not both be defined."
15
39
  #endif
16
40
 
17
41
  namespace Eigen {
18
42
 
19
43
  namespace internal {
20
-
21
- /** \internal */
22
- inline void manage_multi_threading(Action action, int* v)
23
- {
24
- static int m_maxThreads = -1;
25
- EIGEN_UNUSED_VARIABLE(m_maxThreads)
26
-
27
- if(action==SetAction)
28
- {
29
- eigen_internal_assert(v!=0);
30
- m_maxThreads = *v;
31
- }
32
- else if(action==GetAction)
33
- {
34
- eigen_internal_assert(v!=0);
35
- #ifdef EIGEN_HAS_OPENMP
36
- if(m_maxThreads>0)
37
- *v = m_maxThreads;
38
- else
39
- *v = omp_get_max_threads();
40
- #else
41
- *v = 1;
42
- #endif
43
- }
44
- else
45
- {
46
- eigen_internal_assert(false);
47
- }
44
+ inline void manage_multi_threading(Action action, int* v);
48
45
  }
49
46
 
50
- }
47
+ // Public APIs.
51
48
 
52
49
  /** Must be call first when calling Eigen from multiple threads */
53
- inline void initParallel()
54
- {
55
- int nbt;
56
- internal::manage_multi_threading(GetAction, &nbt);
57
- std::ptrdiff_t l1, l2, l3;
58
- internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
59
- }
50
+ EIGEN_DEPRECATED_WITH_REASON("Initialization is no longer needed.") inline void initParallel() {}
60
51
 
61
52
  /** \returns the max number of threads reserved for Eigen
62
- * \sa setNbThreads */
63
- inline int nbThreads()
64
- {
53
+ * \sa setNbThreads */
54
+ inline int nbThreads() {
65
55
  int ret;
66
56
  internal::manage_multi_threading(GetAction, &ret);
67
57
  return ret;
68
58
  }
69
59
 
70
60
  /** Sets the max number of threads reserved for Eigen
71
- * \sa nbThreads */
72
- inline void setNbThreads(int v)
73
- {
74
- internal::manage_multi_threading(SetAction, &v);
61
+ * \sa nbThreads */
62
+ inline void setNbThreads(int v) { internal::manage_multi_threading(SetAction, &v); }
63
+
64
+ #ifdef EIGEN_GEMM_THREADPOOL
65
+ // Sets the ThreadPool used by Eigen parallel Gemm.
66
+ //
67
+ // NOTICE: This function has a known race condition with
68
+ // parallelize_gemm below, and should not be called while
69
+ // an instance of that function is running.
70
+ //
71
+ // TODO(rmlarsen): Make the device API available instead of
72
+ // storing a local static pointer variable to avoid this issue.
73
+ inline ThreadPool* setGemmThreadPool(ThreadPool* new_pool) {
74
+ static ThreadPool* pool = nullptr;
75
+ if (new_pool != nullptr) {
76
+ // This will wait for work in all threads in *pool to finish,
77
+ // then destroy the old ThreadPool, and then replace it with new_pool.
78
+ pool = new_pool;
79
+ // Reset the number of threads to the number of threads on the new pool.
80
+ setNbThreads(pool->NumThreads());
81
+ }
82
+ return pool;
75
83
  }
76
84
 
85
+ // Gets the ThreadPool used by Eigen parallel Gemm.
86
+ inline ThreadPool* getGemmThreadPool() { return setGemmThreadPool(nullptr); }
87
+ #endif
88
+
77
89
  namespace internal {
78
90
 
79
- template<typename Index> struct GemmParallelInfo
80
- {
81
- GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
91
+ // Implementation.
92
+
93
+ #if defined(EIGEN_USE_BLAS) || (!defined(EIGEN_HAS_OPENMP) && !defined(EIGEN_GEMM_THREADPOOL))
94
+
95
+ inline void manage_multi_threading(Action action, int* v) {
96
+ if (action == SetAction) {
97
+ eigen_internal_assert(v != nullptr);
98
+ } else if (action == GetAction) {
99
+ eigen_internal_assert(v != nullptr);
100
+ *v = 1;
101
+ } else {
102
+ eigen_internal_assert(false);
103
+ }
104
+ }
105
+ template <typename Index>
106
+ struct GemmParallelInfo {};
107
+ template <bool Condition, typename Functor, typename Index>
108
+ EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index cols, Index /*unused*/,
109
+ bool /*unused*/) {
110
+ func(0, rows, 0, cols);
111
+ }
82
112
 
83
- // volatile is not enough on all architectures (see bug 1572)
84
- // to guarantee that when thread A says to thread B that it is
85
- // done with packing a block, then all writes have been really
86
- // carried out... C++11 memory model+atomic guarantees this.
87
- #if EIGEN_HAS_CXX11_ATOMIC
88
- std::atomic<Index> sync;
89
- std::atomic<int> users;
90
113
  #else
91
- Index volatile sync;
92
- int volatile users;
93
- #endif
94
114
 
115
+ template <typename Index>
116
+ struct GemmParallelTaskInfo {
117
+ GemmParallelTaskInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
118
+ std::atomic<Index> sync;
119
+ std::atomic<int> users;
95
120
  Index lhs_start;
96
121
  Index lhs_length;
97
122
  };
98
123
 
99
- template<bool Condition, typename Functor, typename Index>
100
- void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
101
- {
102
- // TODO when EIGEN_USE_BLAS is defined,
103
- // we should still enable OMP for other scalar types
104
- // Without C++11, we have to disable GEMM's parallelization on
105
- // non x86 architectures because there volatile is not enough for our purpose.
106
- // See bug 1572.
107
- #if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
108
- // FIXME the transpose variable is only needed to properly split
109
- // the matrix product when multithreading is enabled. This is a temporary
110
- // fix to support row-major destination matrices. This whole
111
- // parallelizer mechanism has to be redesigned anyway.
112
- EIGEN_UNUSED_VARIABLE(depth);
113
- EIGEN_UNUSED_VARIABLE(transpose);
114
- func(0,rows, 0,cols);
124
+ template <typename Index>
125
+ struct GemmParallelInfo {
126
+ const int logical_thread_id;
127
+ const int num_threads;
128
+ GemmParallelTaskInfo<Index>* task_info;
129
+
130
+ GemmParallelInfo(int logical_thread_id_, int num_threads_, GemmParallelTaskInfo<Index>* task_info_)
131
+ : logical_thread_id(logical_thread_id_), num_threads(num_threads_), task_info(task_info_) {}
132
+ };
133
+
134
+ inline void manage_multi_threading(Action action, int* v) {
135
+ static int m_maxThreads = -1;
136
+ if (action == SetAction) {
137
+ eigen_internal_assert(v != nullptr);
138
+ #if defined(EIGEN_HAS_OPENMP)
139
+ // Calling action == SetAction and *v = 0 means
140
+ // restoring m_maxThreads to the maximum number of threads specified
141
+ // for OpenMP.
142
+ eigen_internal_assert(*v >= 0);
143
+ int omp_threads = omp_get_max_threads();
144
+ m_maxThreads = (*v == 0 ? omp_threads : std::min(*v, omp_threads));
145
+ #elif defined(EIGEN_GEMM_THREADPOOL)
146
+ // Calling action == SetAction and *v = 0 means
147
+ // restoring m_maxThreads to the number of threads in the ThreadPool,
148
+ // which defaults to 1 if no pool was provided.
149
+ eigen_internal_assert(*v >= 0);
150
+ ThreadPool* pool = getGemmThreadPool();
151
+ int pool_threads = pool != nullptr ? pool->NumThreads() : 1;
152
+ m_maxThreads = (*v == 0 ? pool_threads : numext::mini(pool_threads, *v));
153
+ #endif
154
+ } else if (action == GetAction) {
155
+ eigen_internal_assert(v != nullptr);
156
+ #if defined(EIGEN_HAS_OPENMP)
157
+ if (m_maxThreads > 0)
158
+ *v = m_maxThreads;
159
+ else
160
+ *v = omp_get_max_threads();
115
161
  #else
162
+ *v = m_maxThreads;
163
+ #endif
164
+ } else {
165
+ eigen_internal_assert(false);
166
+ }
167
+ }
116
168
 
117
- // Dynamically check whether we should enable or disable OpenMP.
169
+ template <bool Condition, typename Functor, typename Index>
170
+ EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose) {
171
+ // Dynamically check whether we should even try to execute in parallel.
118
172
  // The conditions are:
119
173
  // - the max number of threads we can create is greater than 1
120
174
  // - we are not already in a parallel code
121
175
  // - the sizes are large enough
122
176
 
123
177
  // compute the maximal number of threads from the size of the product:
124
- // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
178
+ // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at
179
+ // once.
125
180
  Index size = transpose ? rows : cols;
126
- Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
181
+ Index pb_max_threads = std::max<Index>(1, size / Functor::Traits::nr);
127
182
 
128
183
  // compute the maximal number of threads from the total amount of work:
129
- double work = static_cast<double>(rows) * static_cast<double>(cols) *
130
- static_cast<double>(depth);
184
+ double work = static_cast<double>(rows) * static_cast<double>(cols) * static_cast<double>(depth);
131
185
  double kMinTaskSize = 50000; // FIXME improve this heuristic.
132
- pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
186
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>(work / kMinTaskSize)));
133
187
 
134
188
  // compute the number of threads we are going to use
135
- Index threads = std::min<Index>(nbThreads(), pb_max_threads);
136
-
137
- // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
138
- // then abort multi-threading
139
- // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
140
- if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
141
- return func(0,rows, 0,cols);
189
+ int threads = std::min<int>(nbThreads(), static_cast<int>(pb_max_threads));
190
+
191
+ // if multi-threading is explicitly disabled, not useful, or if we already are
192
+ // inside a parallel session, then abort multi-threading
193
+ bool dont_parallelize = (!Condition) || (threads <= 1);
194
+ #if defined(EIGEN_HAS_OPENMP)
195
+ // don't parallelize if we are executing in a parallel context already.
196
+ dont_parallelize |= omp_get_num_threads() > 1;
197
+ #elif defined(EIGEN_GEMM_THREADPOOL)
198
+ // don't parallelize if we have a trivial threadpool or the current thread id
199
+ // is != -1, indicating that we are already executing on a thread inside the pool.
200
+ // In other words, we do not allow nested parallelism, since this would lead to
201
+ // deadlocks due to the workstealing nature of the threadpool.
202
+ ThreadPool* pool = getGemmThreadPool();
203
+ dont_parallelize |= (pool == nullptr || pool->CurrentThreadId() != -1);
204
+ #endif
205
+ if (dont_parallelize) return func(0, rows, 0, cols);
142
206
 
143
- Eigen::initParallel();
144
207
  func.initParallelSession(threads);
145
208
 
146
- if(transpose)
147
- std::swap(rows,cols);
209
+ if (transpose) std::swap(rows, cols);
148
210
 
149
- ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
211
+ ei_declare_aligned_stack_constructed_variable(GemmParallelTaskInfo<Index>, task_info, threads, 0);
150
212
 
151
- #pragma omp parallel num_threads(threads)
213
+ #if defined(EIGEN_HAS_OPENMP)
214
+ #pragma omp parallel num_threads(threads)
152
215
  {
153
216
  Index i = omp_get_thread_num();
154
- // Note that the actual number of threads might be lower than the number of request ones.
217
+ // Note that the actual number of threads might be lower than the number of
218
+ // requested ones
155
219
  Index actual_threads = omp_get_num_threads();
220
+ GemmParallelInfo<Index> info(static_cast<int>(i), static_cast<int>(actual_threads), task_info);
221
+
222
+ Index blockCols = (cols / actual_threads) & ~Index(0x3);
223
+ Index blockRows = (rows / actual_threads);
224
+ blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
225
+
226
+ Index r0 = i * blockRows;
227
+ Index actualBlockRows = (i + 1 == actual_threads) ? rows - r0 : blockRows;
156
228
 
229
+ Index c0 = i * blockCols;
230
+ Index actualBlockCols = (i + 1 == actual_threads) ? cols - c0 : blockCols;
231
+
232
+ info.task_info[i].lhs_start = r0;
233
+ info.task_info[i].lhs_length = actualBlockRows;
234
+
235
+ if (transpose)
236
+ func(c0, actualBlockCols, 0, rows, &info);
237
+ else
238
+ func(0, rows, c0, actualBlockCols, &info);
239
+ }
240
+
241
+ #elif defined(EIGEN_GEMM_THREADPOOL)
242
+ Barrier barrier(threads);
243
+ auto task = [=, &func, &barrier, &task_info](int i) {
244
+ Index actual_threads = threads;
245
+ GemmParallelInfo<Index> info(i, static_cast<int>(actual_threads), task_info);
157
246
  Index blockCols = (cols / actual_threads) & ~Index(0x3);
158
247
  Index blockRows = (rows / actual_threads);
159
- blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
248
+ blockRows = (blockRows / Functor::Traits::mr) * Functor::Traits::mr;
160
249
 
161
- Index r0 = i*blockRows;
162
- Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
250
+ Index r0 = i * blockRows;
251
+ Index actualBlockRows = (i + 1 == actual_threads) ? rows - r0 : blockRows;
163
252
 
164
- Index c0 = i*blockCols;
165
- Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
253
+ Index c0 = i * blockCols;
254
+ Index actualBlockCols = (i + 1 == actual_threads) ? cols - c0 : blockCols;
166
255
 
167
- info[i].lhs_start = r0;
168
- info[i].lhs_length = actualBlockRows;
256
+ info.task_info[i].lhs_start = r0;
257
+ info.task_info[i].lhs_length = actualBlockRows;
169
258
 
170
- if(transpose) func(c0, actualBlockCols, 0, rows, info);
171
- else func(0, rows, c0, actualBlockCols, info);
259
+ if (transpose)
260
+ func(c0, actualBlockCols, 0, rows, &info);
261
+ else
262
+ func(0, rows, c0, actualBlockCols, &info);
263
+
264
+ barrier.Notify();
265
+ };
266
+ // Notice that we do not schedule more than "threads" tasks, which allows us to
267
+ // limit number of running threads, even if the threadpool itself was constructed
268
+ // with a larger number of threads.
269
+ for (int i = 0; i < threads - 1; ++i) {
270
+ pool->Schedule([=, task = std::move(task)] { task(i); });
172
271
  }
272
+ task(threads - 1);
273
+ barrier.Wait();
173
274
  #endif
174
275
  }
175
276
 
176
- } // end namespace internal
277
+ #endif
177
278
 
178
- } // end namespace Eigen
279
+ } // end namespace internal
280
+ } // end namespace Eigen
179
281
 
180
- #endif // EIGEN_PARALLELIZER_H
282
+ #endif // EIGEN_PARALLELIZER_H