@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -10,508 +10,450 @@
10
10
  #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
11
11
  #define EIGEN_GENERAL_MATRIX_MATRIX_H
12
12
 
13
+ // IWYU pragma: private
14
+ #include "../InternalHeaderCheck.h"
15
+
13
16
  namespace Eigen {
14
17
 
15
18
  namespace internal {
16
19
 
17
- template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
20
+ template <typename LhsScalar_, typename RhsScalar_>
21
+ class level3_blocking;
18
22
 
19
23
  /* Specialization for a row-major destination matrix => simple transposition of the product */
20
- template<
21
- typename Index,
22
- typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
23
- typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
24
- int ResInnerStride>
25
- struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride>
26
- {
27
- typedef gebp_traits<RhsScalar,LhsScalar> Traits;
24
+ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
25
+ int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride>
26
+ struct general_matrix_matrix_product<Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar, RhsStorageOrder,
27
+ ConjugateRhs, RowMajor, ResInnerStride> {
28
+ typedef gebp_traits<RhsScalar, LhsScalar> Traits;
28
29
 
29
30
  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
30
- static EIGEN_STRONG_INLINE void run(
31
- Index rows, Index cols, Index depth,
32
- const LhsScalar* lhs, Index lhsStride,
33
- const RhsScalar* rhs, Index rhsStride,
34
- ResScalar* res, Index resIncr, Index resStride,
35
- ResScalar alpha,
36
- level3_blocking<RhsScalar,LhsScalar>& blocking,
37
- GemmParallelInfo<Index>* info = 0)
38
- {
31
+ static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride,
32
+ const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr,
33
+ Index resStride, ResScalar alpha, level3_blocking<RhsScalar, LhsScalar>& blocking,
34
+ GemmParallelInfo<Index>* info = 0) {
39
35
  // transpose the product such that the result is column major
40
- general_matrix_matrix_product<Index,
41
- RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
42
- LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
43
- ColMajor,ResInnerStride>
44
- ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info);
36
+ general_matrix_matrix_product<Index, RhsScalar, RhsStorageOrder == RowMajor ? ColMajor : RowMajor, ConjugateRhs,
37
+ LhsScalar, LhsStorageOrder == RowMajor ? ColMajor : RowMajor, ConjugateLhs, ColMajor,
38
+ ResInnerStride>::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr,
39
+ resStride, alpha, blocking, info);
45
40
  }
46
41
  };
47
42
 
48
43
  /* Specialization for a col-major destination matrix
49
44
  * => Blocking algorithm following Goto's paper */
50
- template<
51
- typename Index,
52
- typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
53
- typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
54
- int ResInnerStride>
55
- struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride>
56
- {
57
-
58
- typedef gebp_traits<LhsScalar,RhsScalar> Traits;
59
-
60
- typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
61
- static void run(Index rows, Index cols, Index depth,
62
- const LhsScalar* _lhs, Index lhsStride,
63
- const RhsScalar* _rhs, Index rhsStride,
64
- ResScalar* _res, Index resIncr, Index resStride,
65
- ResScalar alpha,
66
- level3_blocking<LhsScalar,RhsScalar>& blocking,
67
- GemmParallelInfo<Index>* info = 0)
68
- {
69
- typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
70
- typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
71
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper;
72
- LhsMapper lhs(_lhs, lhsStride);
73
- RhsMapper rhs(_rhs, rhsStride);
74
- ResMapper res(_res, resStride, resIncr);
75
-
76
- Index kc = blocking.kc(); // cache block size along the K direction
77
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
78
- Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
79
-
80
- gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
81
- gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
82
- gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
83
-
84
- #ifdef EIGEN_HAS_OPENMP
85
- if(info)
86
- {
87
- // this is the parallel version!
88
- int tid = omp_get_thread_num();
89
- int threads = omp_get_num_threads();
90
-
91
- LhsScalar* blockA = blocking.blockA();
92
- eigen_internal_assert(blockA!=0);
93
-
94
- std::size_t sizeB = kc*nc;
95
- ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
96
-
97
- // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
98
- for(Index k=0; k<depth; k+=kc)
99
- {
100
- const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
45
+ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
46
+ int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride>
47
+ struct general_matrix_matrix_product<Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar, RhsStorageOrder,
48
+ ConjugateRhs, ColMajor, ResInnerStride> {
49
+ typedef gebp_traits<LhsScalar, RhsScalar> Traits;
101
50
 
102
- // In order to reduce the chance that a thread has to wait for the other,
103
- // let's start by packing B'.
104
- pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
51
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
52
+ static void run(Index rows, Index cols, Index depth, const LhsScalar* lhs_, Index lhsStride, const RhsScalar* rhs_,
53
+ Index rhsStride, ResScalar* res_, Index resIncr, Index resStride, ResScalar alpha,
54
+ level3_blocking<LhsScalar, RhsScalar>& blocking, GemmParallelInfo<Index>* info = 0) {
55
+ typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
56
+ typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
57
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
58
+ LhsMapper lhs(lhs_, lhsStride);
59
+ RhsMapper rhs(rhs_, rhsStride);
60
+ ResMapper res(res_, resStride, resIncr);
61
+
62
+ Index kc = blocking.kc(); // cache block size along the K direction
63
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
64
+ Index nc = (std::min)(cols, blocking.nc()); // cache block size along the N direction
65
+
66
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
67
+ LhsStorageOrder>
68
+ pack_lhs;
69
+ gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
70
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
71
+
72
+ #if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL))
73
+ if (info) {
74
+ // this is the parallel version!
75
+ int tid = info->logical_thread_id;
76
+ int threads = info->num_threads;
77
+
78
+ LhsScalar* blockA = blocking.blockA();
79
+ eigen_internal_assert(blockA != 0);
80
+
81
+ std::size_t sizeB = kc * nc;
82
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
83
+
84
+ // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
85
+ for (Index k = 0; k < depth; k += kc) {
86
+ const Index actual_kc = (std::min)(k + kc, depth) - k; // => rows of B', and cols of the A'
87
+
88
+ // In order to reduce the chance that a thread has to wait for the other,
89
+ // let's start by packing B'.
90
+ pack_rhs(blockB, rhs.getSubMapper(k, 0), actual_kc, nc);
91
+
92
+ // Pack A_k to A' in a parallel fashion:
93
+ // each thread packs the sub block A_k,i to A'_i where i is the thread id.
94
+
95
+ // However, before copying to A'_i, we have to make sure that no other thread is still using it,
96
+ // i.e., we test that info->task_info[tid].users equals 0.
97
+ // Then, we set info->task_info[tid].users to the number of threads to mark that all other threads are going to
98
+ // use it.
99
+ while (info->task_info[tid].users != 0) {
100
+ std::this_thread::yield();
101
+ }
102
+ info->task_info[tid].users = threads;
105
103
 
106
- // Pack A_k to A' in a parallel fashion:
107
- // each thread packs the sub block A_k,i to A'_i where i is the thread id.
104
+ pack_lhs(blockA + info->task_info[tid].lhs_start * actual_kc,
105
+ lhs.getSubMapper(info->task_info[tid].lhs_start, k), actual_kc, info->task_info[tid].lhs_length);
108
106
 
109
- // However, before copying to A'_i, we have to make sure that no other thread is still using it,
110
- // i.e., we test that info[tid].users equals 0.
111
- // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
112
- while(info[tid].users!=0) {}
113
- info[tid].users = threads;
107
+ // Notify the other threads that the part A'_i is ready to go.
108
+ info->task_info[tid].sync = k;
114
109
 
115
- pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
110
+ // Computes C_i += A' * B' per A'_i
111
+ for (int shift = 0; shift < threads; ++shift) {
112
+ int i = (tid + shift) % threads;
116
113
 
117
- // Notify the other threads that the part A'_i is ready to go.
118
- info[tid].sync = k;
114
+ // At this point we have to make sure that A'_i has been updated by the thread i,
115
+ // we use testAndSetOrdered to mimic a volatile access.
116
+ // However, no need to wait for the B' part which has been updated by the current thread!
117
+ if (shift > 0) {
118
+ while (info->task_info[i].sync != k) {
119
+ std::this_thread::yield();
120
+ }
121
+ }
119
122
 
120
- // Computes C_i += A' * B' per A'_i
121
- for(int shift=0; shift<threads; ++shift)
122
- {
123
- int i = (tid+shift)%threads;
123
+ gebp(res.getSubMapper(info->task_info[i].lhs_start, 0), blockA + info->task_info[i].lhs_start * actual_kc,
124
+ blockB, info->task_info[i].lhs_length, actual_kc, nc, alpha);
125
+ }
124
126
 
125
- // At this point we have to make sure that A'_i has been updated by the thread i,
126
- // we use testAndSetOrdered to mimic a volatile access.
127
- // However, no need to wait for the B' part which has been updated by the current thread!
128
- if (shift>0) {
129
- while(info[i].sync!=k) {
130
- }
127
+ // Then keep going as usual with the remaining B'
128
+ for (Index j = nc; j < cols; j += nc) {
129
+ const Index actual_nc = (std::min)(j + nc, cols) - j;
130
+
131
+ // pack B_k,j to B'
132
+ pack_rhs(blockB, rhs.getSubMapper(k, j), actual_kc, actual_nc);
133
+
134
+ // C_j += A' * B'
135
+ gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
131
136
  }
132
137
 
133
- gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
138
+ // Release all the sub blocks A'_i of A' for the current thread,
139
+ // i.e., we simply decrement the number of users by 1
140
+ for (Index i = 0; i < threads; ++i) info->task_info[i].users -= 1;
134
141
  }
142
+ } else
143
+ #endif // defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)
144
+ {
145
+ EIGEN_UNUSED_VARIABLE(info);
135
146
 
136
- // Then keep going as usual with the remaining B'
137
- for(Index j=nc; j<cols; j+=nc)
138
- {
139
- const Index actual_nc = (std::min)(j+nc,cols)-j;
147
+ // this is the sequential version!
148
+ std::size_t sizeA = kc * mc;
149
+ std::size_t sizeB = kc * nc;
140
150
 
141
- // pack B_k,j to B'
142
- pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
151
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
152
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
143
153
 
144
- // C_j += A' * B'
145
- gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
146
- }
154
+ const bool pack_rhs_once = mc != rows && kc == depth && nc == cols;
147
155
 
148
- // Release all the sub blocks A'_i of A' for the current thread,
149
- // i.e., we simply decrement the number of users by 1
150
- for(Index i=0; i<threads; ++i)
151
- #if !EIGEN_HAS_CXX11_ATOMIC
152
- #pragma omp atomic
153
- #endif
154
- info[i].users -= 1;
155
- }
156
- }
157
- else
158
- #endif // EIGEN_HAS_OPENMP
159
- {
160
- EIGEN_UNUSED_VARIABLE(info);
156
+ // For each horizontal panel of the rhs, and corresponding panel of the lhs...
157
+ for (Index i2 = 0; i2 < rows; i2 += mc) {
158
+ const Index actual_mc = (std::min)(i2 + mc, rows) - i2;
161
159
 
162
- // this is the sequential version!
163
- std::size_t sizeA = kc*mc;
164
- std::size_t sizeB = kc*nc;
160
+ for (Index k2 = 0; k2 < depth; k2 += kc) {
161
+ const Index actual_kc = (std::min)(k2 + kc, depth) - k2;
165
162
 
166
- ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
167
- ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
163
+ // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
164
+ // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
165
+ // Note that this panel will be read as many times as the number of blocks in the rhs's
166
+ // horizontal panel which is, in practice, a very low number.
167
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
168
168
 
169
- const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
169
+ // For each kc x nc block of the rhs's horizontal panel...
170
+ for (Index j2 = 0; j2 < cols; j2 += nc) {
171
+ const Index actual_nc = (std::min)(j2 + nc, cols) - j2;
170
172
 
171
- // For each horizontal panel of the rhs, and corresponding panel of the lhs...
172
- for(Index i2=0; i2<rows; i2+=mc)
173
- {
174
- const Index actual_mc = (std::min)(i2+mc,rows)-i2;
175
-
176
- for(Index k2=0; k2<depth; k2+=kc)
177
- {
178
- const Index actual_kc = (std::min)(k2+kc,depth)-k2;
179
-
180
- // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
181
- // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
182
- // Note that this panel will be read as many times as the number of blocks in the rhs's
183
- // horizontal panel which is, in practice, a very low number.
184
- pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
185
-
186
- // For each kc x nc block of the rhs's horizontal panel...
187
- for(Index j2=0; j2<cols; j2+=nc)
188
- {
189
- const Index actual_nc = (std::min)(j2+nc,cols)-j2;
190
-
191
- // We pack the rhs's block into a sequential chunk of memory (L2 caching)
192
- // Note that this block will be read a very high number of times, which is equal to the number of
193
- // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
194
- if((!pack_rhs_once) || i2==0)
195
- pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
196
-
197
- // Everything is packed, we can now call the panel * block kernel:
198
- gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
173
+ // We pack the rhs's block into a sequential chunk of memory (L2 caching)
174
+ // Note that this block will be read a very high number of times, which is equal to the number of
175
+ // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
176
+ if ((!pack_rhs_once) || i2 == 0) pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc);
177
+
178
+ // Everything is packed, we can now call the panel * block kernel:
179
+ gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
180
+ }
199
181
  }
200
182
  }
201
183
  }
202
184
  }
203
- }
204
-
205
185
  };
206
186
 
207
187
  /*********************************************************************************
208
- * Specialization of generic_product_impl for "large" GEMM, i.e.,
209
- * implementation of the high level wrapper to general_matrix_matrix_product
210
- **********************************************************************************/
188
+ * Specialization of generic_product_impl for "large" GEMM, i.e.,
189
+ * implementation of the high level wrapper to general_matrix_matrix_product
190
+ **********************************************************************************/
211
191
 
212
- template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
213
- struct gemm_functor
214
- {
192
+ template <typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest,
193
+ typename BlockingType>
194
+ struct gemm_functor {
215
195
  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
216
- : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
217
- {}
196
+ : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) {}
218
197
 
219
- void initParallelSession(Index num_threads) const
220
- {
198
+ void initParallelSession(Index num_threads) const {
221
199
  m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
222
200
  m_blocking.allocateA();
223
201
  }
224
202
 
225
- void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
226
- {
227
- if(cols==-1)
228
- cols = m_rhs.cols();
203
+ void operator()(Index row, Index rows, Index col = 0, Index cols = -1, GemmParallelInfo<Index>* info = 0) const {
204
+ if (cols == -1) cols = m_rhs.cols();
229
205
 
230
- Gemm::run(rows, cols, m_lhs.cols(),
231
- &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
232
- &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
233
- (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(),
206
+ Gemm::run(rows, cols, m_lhs.cols(), &m_lhs.coeffRef(row, 0), m_lhs.outerStride(), &m_rhs.coeffRef(0, col),
207
+ m_rhs.outerStride(), (Scalar*)&(m_dest.coeffRef(row, col)), m_dest.innerStride(), m_dest.outerStride(),
234
208
  m_actualAlpha, m_blocking, info);
235
209
  }
236
210
 
237
211
  typedef typename Gemm::Traits Traits;
238
212
 
239
- protected:
240
- const Lhs& m_lhs;
241
- const Rhs& m_rhs;
242
- Dest& m_dest;
243
- Scalar m_actualAlpha;
244
- BlockingType& m_blocking;
213
+ protected:
214
+ const Lhs& m_lhs;
215
+ const Rhs& m_rhs;
216
+ Dest& m_dest;
217
+ Scalar m_actualAlpha;
218
+ BlockingType& m_blocking;
245
219
  };
246
220
 
247
- template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
248
- bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
221
+ template <int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
222
+ int KcFactor = 1, bool FiniteAtCompileTime = MaxRows != Dynamic && MaxCols != Dynamic && MaxDepth != Dynamic>
223
+ class gemm_blocking_space;
249
224
 
250
- template<typename _LhsScalar, typename _RhsScalar>
251
- class level3_blocking
252
- {
253
- typedef _LhsScalar LhsScalar;
254
- typedef _RhsScalar RhsScalar;
225
+ template <typename LhsScalar_, typename RhsScalar_>
226
+ class level3_blocking {
227
+ typedef LhsScalar_ LhsScalar;
228
+ typedef RhsScalar_ RhsScalar;
255
229
 
256
- protected:
257
- LhsScalar* m_blockA;
258
- RhsScalar* m_blockB;
230
+ protected:
231
+ LhsScalar* m_blockA;
232
+ RhsScalar* m_blockB;
259
233
 
260
- Index m_mc;
261
- Index m_nc;
262
- Index m_kc;
234
+ Index m_mc;
235
+ Index m_nc;
236
+ Index m_kc;
263
237
 
264
- public:
238
+ public:
239
+ level3_blocking() : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0) {}
265
240
 
266
- level3_blocking()
267
- : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
268
- {}
241
+ inline Index mc() const { return m_mc; }
242
+ inline Index nc() const { return m_nc; }
243
+ inline Index kc() const { return m_kc; }
269
244
 
270
- inline Index mc() const { return m_mc; }
271
- inline Index nc() const { return m_nc; }
272
- inline Index kc() const { return m_kc; }
273
-
274
- inline LhsScalar* blockA() { return m_blockA; }
275
- inline RhsScalar* blockB() { return m_blockB; }
245
+ inline LhsScalar* blockA() { return m_blockA; }
246
+ inline RhsScalar* blockB() { return m_blockB; }
276
247
  };
277
248
 
278
- template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
279
- class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
280
- : public level3_blocking<
281
- typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
282
- typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
283
- {
284
- enum {
285
- Transpose = StorageOrder==RowMajor,
286
- ActualRows = Transpose ? MaxCols : MaxRows,
287
- ActualCols = Transpose ? MaxRows : MaxCols
288
- };
289
- typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
290
- typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
291
- typedef gebp_traits<LhsScalar,RhsScalar> Traits;
292
- enum {
293
- SizeA = ActualRows * MaxDepth,
294
- SizeB = ActualCols * MaxDepth
295
- };
249
+ template <int StorageOrder, typename LhsScalar_, typename RhsScalar_, int MaxRows, int MaxCols, int MaxDepth,
250
+ int KcFactor>
251
+ class gemm_blocking_space<StorageOrder, LhsScalar_, RhsScalar_, MaxRows, MaxCols, MaxDepth, KcFactor,
252
+ true /* == FiniteAtCompileTime */>
253
+ : public level3_blocking<std::conditional_t<StorageOrder == RowMajor, RhsScalar_, LhsScalar_>,
254
+ std::conditional_t<StorageOrder == RowMajor, LhsScalar_, RhsScalar_>> {
255
+ enum {
256
+ Transpose = StorageOrder == RowMajor,
257
+ ActualRows = Transpose ? MaxCols : MaxRows,
258
+ ActualCols = Transpose ? MaxRows : MaxCols
259
+ };
260
+ typedef std::conditional_t<Transpose, RhsScalar_, LhsScalar_> LhsScalar;
261
+ typedef std::conditional_t<Transpose, LhsScalar_, RhsScalar_> RhsScalar;
262
+ enum { SizeA = ActualRows * MaxDepth, SizeB = ActualCols * MaxDepth };
296
263
 
297
264
  #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
298
- EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
299
- EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
265
+ EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
266
+ EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
300
267
  #else
301
- EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
302
- EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
268
+ EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES - 1];
269
+ EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES - 1];
303
270
  #endif
304
271
 
305
- public:
306
-
307
- gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
308
- {
309
- this->m_mc = ActualRows;
310
- this->m_nc = ActualCols;
311
- this->m_kc = MaxDepth;
272
+ public:
273
+ gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/,
274
+ bool /*full_rows = false*/) {
275
+ this->m_mc = ActualRows;
276
+ this->m_nc = ActualCols;
277
+ this->m_kc = MaxDepth;
312
278
  #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
313
- this->m_blockA = m_staticA;
314
- this->m_blockB = m_staticB;
279
+ this->m_blockA = m_staticA;
280
+ this->m_blockB = m_staticB;
315
281
  #else
316
- this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
317
- this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
282
+ this->m_blockA = reinterpret_cast<LhsScalar*>((std::uintptr_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES - 1)) &
283
+ ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES - 1));
284
+ this->m_blockB = reinterpret_cast<RhsScalar*>((std::uintptr_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES - 1)) &
285
+ ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES - 1));
318
286
  #endif
319
- }
287
+ }
320
288
 
321
- void initParallel(Index, Index, Index, Index)
322
- {}
289
+ void initParallel(Index, Index, Index, Index) {}
323
290
 
324
- inline void allocateA() {}
325
- inline void allocateB() {}
326
- inline void allocateAll() {}
291
+ inline void allocateA() {}
292
+ inline void allocateB() {}
293
+ inline void allocateAll() {}
327
294
  };
328
295
 
329
- template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
330
- class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
331
- : public level3_blocking<
332
- typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
333
- typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
334
- {
335
- enum {
336
- Transpose = StorageOrder==RowMajor
337
- };
338
- typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
339
- typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
340
- typedef gebp_traits<LhsScalar,RhsScalar> Traits;
341
-
342
- Index m_sizeA;
343
- Index m_sizeB;
344
-
345
- public:
346
-
347
- gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
296
+ template <int StorageOrder, typename LhsScalar_, typename RhsScalar_, int MaxRows, int MaxCols, int MaxDepth,
297
+ int KcFactor>
298
+ class gemm_blocking_space<StorageOrder, LhsScalar_, RhsScalar_, MaxRows, MaxCols, MaxDepth, KcFactor, false>
299
+ : public level3_blocking<std::conditional_t<StorageOrder == RowMajor, RhsScalar_, LhsScalar_>,
300
+ std::conditional_t<StorageOrder == RowMajor, LhsScalar_, RhsScalar_>> {
301
+ enum { Transpose = StorageOrder == RowMajor };
302
+ typedef std::conditional_t<Transpose, RhsScalar_, LhsScalar_> LhsScalar;
303
+ typedef std::conditional_t<Transpose, LhsScalar_, RhsScalar_> RhsScalar;
304
+
305
+ Index m_sizeA;
306
+ Index m_sizeB;
307
+
308
+ public:
309
+ gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking) {
310
+ this->m_mc = Transpose ? cols : rows;
311
+ this->m_nc = Transpose ? rows : cols;
312
+ this->m_kc = depth;
313
+
314
+ if (l3_blocking) {
315
+ computeProductBlockingSizes<LhsScalar, RhsScalar, KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
316
+ } else // no l3 blocking
348
317
  {
349
- this->m_mc = Transpose ? cols : rows;
350
- this->m_nc = Transpose ? rows : cols;
351
- this->m_kc = depth;
318
+ Index n = this->m_nc;
319
+ computeProductBlockingSizes<LhsScalar, RhsScalar, KcFactor>(this->m_kc, this->m_mc, n, num_threads);
320
+ }
352
321
 
353
- if(l3_blocking)
354
- {
355
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
356
- }
357
- else // no l3 blocking
358
- {
359
- Index n = this->m_nc;
360
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
361
- }
322
+ m_sizeA = this->m_mc * this->m_kc;
323
+ m_sizeB = this->m_kc * this->m_nc;
324
+ }
362
325
 
363
- m_sizeA = this->m_mc * this->m_kc;
364
- m_sizeB = this->m_kc * this->m_nc;
365
- }
326
+ void initParallel(Index rows, Index cols, Index depth, Index num_threads) {
327
+ this->m_mc = Transpose ? cols : rows;
328
+ this->m_nc = Transpose ? rows : cols;
329
+ this->m_kc = depth;
366
330
 
367
- void initParallel(Index rows, Index cols, Index depth, Index num_threads)
368
- {
369
- this->m_mc = Transpose ? cols : rows;
370
- this->m_nc = Transpose ? rows : cols;
371
- this->m_kc = depth;
372
-
373
- eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
374
- Index m = this->m_mc;
375
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
376
- m_sizeA = this->m_mc * this->m_kc;
377
- m_sizeB = this->m_kc * this->m_nc;
378
- }
331
+ eigen_internal_assert(this->m_blockA == 0 && this->m_blockB == 0);
332
+ Index m = this->m_mc;
333
+ computeProductBlockingSizes<LhsScalar, RhsScalar, KcFactor>(this->m_kc, m, this->m_nc, num_threads);
334
+ m_sizeA = this->m_mc * this->m_kc;
335
+ m_sizeB = this->m_kc * this->m_nc;
336
+ }
379
337
 
380
- void allocateA()
381
- {
382
- if(this->m_blockA==0)
383
- this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
384
- }
338
+ void allocateA() {
339
+ if (this->m_blockA == 0) this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
340
+ }
385
341
 
386
- void allocateB()
387
- {
388
- if(this->m_blockB==0)
389
- this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
390
- }
342
+ void allocateB() {
343
+ if (this->m_blockB == 0) this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
344
+ }
391
345
 
392
- void allocateAll()
393
- {
394
- allocateA();
395
- allocateB();
396
- }
346
+ void allocateAll() {
347
+ allocateA();
348
+ allocateB();
349
+ }
397
350
 
398
- ~gemm_blocking_space()
399
- {
400
- aligned_delete(this->m_blockA, m_sizeA);
401
- aligned_delete(this->m_blockB, m_sizeB);
402
- }
351
+ ~gemm_blocking_space() {
352
+ aligned_delete(this->m_blockA, m_sizeA);
353
+ aligned_delete(this->m_blockB, m_sizeB);
354
+ }
403
355
  };
404
356
 
405
- } // end namespace internal
357
+ } // end namespace internal
406
358
 
407
359
  namespace internal {
408
360
 
409
- template<typename Lhs, typename Rhs>
410
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
411
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
412
- {
413
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
361
+ template <typename Lhs, typename Rhs>
362
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemmProduct>
363
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemmProduct>> {
364
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
414
365
  typedef typename Lhs::Scalar LhsScalar;
415
366
  typedef typename Rhs::Scalar RhsScalar;
416
367
 
417
368
  typedef internal::blas_traits<Lhs> LhsBlasTraits;
418
369
  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
419
- typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
370
+ typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
420
371
 
421
372
  typedef internal::blas_traits<Rhs> RhsBlasTraits;
422
373
  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
423
- typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
374
+ typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
424
375
 
425
- enum {
426
- MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
427
- };
376
+ enum { MaxDepthAtCompileTime = min_size_prefer_fixed(Lhs::MaxColsAtCompileTime, Rhs::MaxRowsAtCompileTime) };
428
377
 
429
- typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
378
+ typedef generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> lazyproduct;
430
379
 
431
- template<typename Dst>
432
- static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
433
- {
380
+ template <typename Dst>
381
+ static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
434
382
  // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program
435
383
  // to determine the following heuristic.
436
384
  // EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
437
385
  // unless it has been specialized by the user or for a given architecture.
438
386
  // Note that the condition rhs.rows()>0 was required because lazy product is (was?) not happy with empty inputs.
439
387
  // I'm not sure it is still required.
440
- if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
441
- lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());
442
- else
443
- {
388
+ if ((rhs.rows() + dst.rows() + dst.cols()) < EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows() > 0)
389
+ lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar, Scalar>());
390
+ else {
444
391
  dst.setZero();
445
392
  scaleAndAddTo(dst, lhs, rhs, Scalar(1));
446
393
  }
447
394
  }
448
395
 
449
- template<typename Dst>
450
- static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
451
- {
452
- if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
453
- lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());
396
+ template <typename Dst>
397
+ static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
398
+ if ((rhs.rows() + dst.rows() + dst.cols()) < EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows() > 0)
399
+ lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar, Scalar>());
454
400
  else
455
- scaleAndAddTo(dst,lhs, rhs, Scalar(1));
401
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
456
402
  }
457
403
 
458
- template<typename Dst>
459
- static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
460
- {
461
- if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
462
- lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());
404
+ template <typename Dst>
405
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
406
+ if ((rhs.rows() + dst.rows() + dst.cols()) < EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows() > 0)
407
+ lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar, Scalar>());
463
408
  else
464
409
  scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
465
410
  }
466
411
 
467
- template<typename Dest>
468
- static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
469
- {
470
- eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
471
- if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
472
- return;
412
+ template <typename Dest>
413
+ static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
414
+ eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
415
+ if (a_lhs.cols() == 0 || a_lhs.rows() == 0 || a_rhs.cols() == 0) return;
473
416
 
474
- if (dst.cols() == 1)
475
- {
417
+ if (dst.cols() == 1) {
476
418
  // Fallback to GEMV if either the lhs or rhs is a runtime vector
477
419
  typename Dest::ColXpr dst_vec(dst.col(0));
478
- return internal::generic_product_impl<Lhs,typename Rhs::ConstColXpr,DenseShape,DenseShape,GemvProduct>
479
- ::scaleAndAddTo(dst_vec, a_lhs, a_rhs.col(0), alpha);
480
- }
481
- else if (dst.rows() == 1)
482
- {
420
+ return internal::generic_product_impl<Lhs, typename Rhs::ConstColXpr, DenseShape, DenseShape,
421
+ GemvProduct>::scaleAndAddTo(dst_vec, a_lhs, a_rhs.col(0), alpha);
422
+ } else if (dst.rows() == 1) {
483
423
  // Fallback to GEMV if either the lhs or rhs is a runtime vector
484
424
  typename Dest::RowXpr dst_vec(dst.row(0));
485
- return internal::generic_product_impl<typename Lhs::ConstRowXpr,Rhs,DenseShape,DenseShape,GemvProduct>
486
- ::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);
425
+ return internal::generic_product_impl<typename Lhs::ConstRowXpr, Rhs, DenseShape, DenseShape,
426
+ GemvProduct>::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);
487
427
  }
488
428
 
489
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
490
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
429
+ add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
430
+ add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
491
431
 
492
432
  Scalar actualAlpha = combine_scalar_factors(alpha, a_lhs, a_rhs);
493
433
 
494
- typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
495
- Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
434
+ typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, LhsScalar, RhsScalar,
435
+ Dest::MaxRowsAtCompileTime, Dest::MaxColsAtCompileTime, MaxDepthAtCompileTime>
436
+ BlockingType;
496
437
 
497
438
  typedef internal::gemm_functor<
498
- Scalar, Index,
499
- internal::general_matrix_matrix_product<
500
- Index,
501
- LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
502
- RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
503
- (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,
504
- Dest::InnerStrideAtCompileTime>,
505
- ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
439
+ Scalar, Index,
440
+ internal::general_matrix_matrix_product<
441
+ Index, LhsScalar, (ActualLhsTypeCleaned::Flags & RowMajorBit) ? RowMajor : ColMajor,
442
+ bool(LhsBlasTraits::NeedToConjugate), RhsScalar,
443
+ (ActualRhsTypeCleaned::Flags & RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
444
+ (Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>,
445
+ ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType>
446
+ GemmFunctor;
506
447
 
507
448
  BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
508
- internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
509
- (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
449
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime > 32 || Dest::MaxRowsAtCompileTime == Dynamic)>(
450
+ GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(),
451
+ Dest::Flags & RowMajorBit);
510
452
  }
511
453
  };
512
454
 
513
- } // end namespace internal
455
+ } // end namespace internal
514
456
 
515
- } // end namespace Eigen
457
+ } // end namespace Eigen
516
458
 
517
- #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
459
+ #endif // EIGEN_GENERAL_MATRIX_MATRIX_H