@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -10,486 +10,450 @@
10
10
  #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
11
11
  #define EIGEN_GENERAL_MATRIX_MATRIX_H
12
12
 
13
+ // IWYU pragma: private
14
+ #include "../InternalHeaderCheck.h"
15
+
13
16
  namespace Eigen {
14
17
 
15
18
  namespace internal {
16
19
 
17
- template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
20
+ template <typename LhsScalar_, typename RhsScalar_>
21
+ class level3_blocking;
18
22
 
19
23
  /* Specialization for a row-major destination matrix => simple transposition of the product */
20
- template<
21
- typename Index,
22
- typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
23
- typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
24
- int ResInnerStride>
25
- struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride>
26
- {
27
- typedef gebp_traits<RhsScalar,LhsScalar> Traits;
24
+ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
25
+ int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride>
26
+ struct general_matrix_matrix_product<Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar, RhsStorageOrder,
27
+ ConjugateRhs, RowMajor, ResInnerStride> {
28
+ typedef gebp_traits<RhsScalar, LhsScalar> Traits;
28
29
 
29
30
  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
30
- static EIGEN_STRONG_INLINE void run(
31
- Index rows, Index cols, Index depth,
32
- const LhsScalar* lhs, Index lhsStride,
33
- const RhsScalar* rhs, Index rhsStride,
34
- ResScalar* res, Index resIncr, Index resStride,
35
- ResScalar alpha,
36
- level3_blocking<RhsScalar,LhsScalar>& blocking,
37
- GemmParallelInfo<Index>* info = 0)
38
- {
31
+ static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride,
32
+ const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr,
33
+ Index resStride, ResScalar alpha, level3_blocking<RhsScalar, LhsScalar>& blocking,
34
+ GemmParallelInfo<Index>* info = 0) {
39
35
  // transpose the product such that the result is column major
40
- general_matrix_matrix_product<Index,
41
- RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
42
- LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
43
- ColMajor,ResInnerStride>
44
- ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info);
36
+ general_matrix_matrix_product<Index, RhsScalar, RhsStorageOrder == RowMajor ? ColMajor : RowMajor, ConjugateRhs,
37
+ LhsScalar, LhsStorageOrder == RowMajor ? ColMajor : RowMajor, ConjugateLhs, ColMajor,
38
+ ResInnerStride>::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr,
39
+ resStride, alpha, blocking, info);
45
40
  }
46
41
  };
47
42
 
48
43
  /* Specialization for a col-major destination matrix
49
44
  * => Blocking algorithm following Goto's paper */
50
- template<
51
- typename Index,
52
- typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
53
- typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
54
- int ResInnerStride>
55
- struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride>
56
- {
57
-
58
- typedef gebp_traits<LhsScalar,RhsScalar> Traits;
59
-
60
- typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
61
- static void run(Index rows, Index cols, Index depth,
62
- const LhsScalar* _lhs, Index lhsStride,
63
- const RhsScalar* _rhs, Index rhsStride,
64
- ResScalar* _res, Index resIncr, Index resStride,
65
- ResScalar alpha,
66
- level3_blocking<LhsScalar,RhsScalar>& blocking,
67
- GemmParallelInfo<Index>* info = 0)
68
- {
69
- typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
70
- typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
71
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper;
72
- LhsMapper lhs(_lhs, lhsStride);
73
- RhsMapper rhs(_rhs, rhsStride);
74
- ResMapper res(_res, resStride, resIncr);
75
-
76
- Index kc = blocking.kc(); // cache block size along the K direction
77
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
78
- Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
79
-
80
- gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
81
- gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
82
- gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
83
-
84
- #ifdef EIGEN_HAS_OPENMP
85
- if(info)
86
- {
87
- // this is the parallel version!
88
- int tid = omp_get_thread_num();
89
- int threads = omp_get_num_threads();
90
-
91
- LhsScalar* blockA = blocking.blockA();
92
- eigen_internal_assert(blockA!=0);
93
-
94
- std::size_t sizeB = kc*nc;
95
- ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
96
-
97
- // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
98
- for(Index k=0; k<depth; k+=kc)
99
- {
100
- const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
45
+ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar,
46
+ int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride>
47
+ struct general_matrix_matrix_product<Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar, RhsStorageOrder,
48
+ ConjugateRhs, ColMajor, ResInnerStride> {
49
+ typedef gebp_traits<LhsScalar, RhsScalar> Traits;
101
50
 
102
- // In order to reduce the chance that a thread has to wait for the other,
103
- // let's start by packing B'.
104
- pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
51
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
52
+ static void run(Index rows, Index cols, Index depth, const LhsScalar* lhs_, Index lhsStride, const RhsScalar* rhs_,
53
+ Index rhsStride, ResScalar* res_, Index resIncr, Index resStride, ResScalar alpha,
54
+ level3_blocking<LhsScalar, RhsScalar>& blocking, GemmParallelInfo<Index>* info = 0) {
55
+ typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
56
+ typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
57
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
58
+ LhsMapper lhs(lhs_, lhsStride);
59
+ RhsMapper rhs(rhs_, rhsStride);
60
+ ResMapper res(res_, resStride, resIncr);
61
+
62
+ Index kc = blocking.kc(); // cache block size along the K direction
63
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
64
+ Index nc = (std::min)(cols, blocking.nc()); // cache block size along the N direction
65
+
66
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
67
+ LhsStorageOrder>
68
+ pack_lhs;
69
+ gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
70
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
71
+
72
+ #if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL))
73
+ if (info) {
74
+ // this is the parallel version!
75
+ int tid = info->logical_thread_id;
76
+ int threads = info->num_threads;
77
+
78
+ LhsScalar* blockA = blocking.blockA();
79
+ eigen_internal_assert(blockA != 0);
80
+
81
+ std::size_t sizeB = kc * nc;
82
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
83
+
84
+ // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
85
+ for (Index k = 0; k < depth; k += kc) {
86
+ const Index actual_kc = (std::min)(k + kc, depth) - k; // => rows of B', and cols of the A'
87
+
88
+ // In order to reduce the chance that a thread has to wait for the other,
89
+ // let's start by packing B'.
90
+ pack_rhs(blockB, rhs.getSubMapper(k, 0), actual_kc, nc);
91
+
92
+ // Pack A_k to A' in a parallel fashion:
93
+ // each thread packs the sub block A_k,i to A'_i where i is the thread id.
94
+
95
+ // However, before copying to A'_i, we have to make sure that no other thread is still using it,
96
+ // i.e., we test that info->task_info[tid].users equals 0.
97
+ // Then, we set info->task_info[tid].users to the number of threads to mark that all other threads are going to
98
+ // use it.
99
+ while (info->task_info[tid].users != 0) {
100
+ std::this_thread::yield();
101
+ }
102
+ info->task_info[tid].users = threads;
105
103
 
106
- // Pack A_k to A' in a parallel fashion:
107
- // each thread packs the sub block A_k,i to A'_i where i is the thread id.
104
+ pack_lhs(blockA + info->task_info[tid].lhs_start * actual_kc,
105
+ lhs.getSubMapper(info->task_info[tid].lhs_start, k), actual_kc, info->task_info[tid].lhs_length);
108
106
 
109
- // However, before copying to A'_i, we have to make sure that no other thread is still using it,
110
- // i.e., we test that info[tid].users equals 0.
111
- // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
112
- while(info[tid].users!=0) {}
113
- info[tid].users += threads;
107
+ // Notify the other threads that the part A'_i is ready to go.
108
+ info->task_info[tid].sync = k;
114
109
 
115
- pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
110
+ // Computes C_i += A' * B' per A'_i
111
+ for (int shift = 0; shift < threads; ++shift) {
112
+ int i = (tid + shift) % threads;
116
113
 
117
- // Notify the other threads that the part A'_i is ready to go.
118
- info[tid].sync = k;
114
+ // At this point we have to make sure that A'_i has been updated by the thread i,
115
+ // we use testAndSetOrdered to mimic a volatile access.
116
+ // However, no need to wait for the B' part which has been updated by the current thread!
117
+ if (shift > 0) {
118
+ while (info->task_info[i].sync != k) {
119
+ std::this_thread::yield();
120
+ }
121
+ }
119
122
 
120
- // Computes C_i += A' * B' per A'_i
121
- for(int shift=0; shift<threads; ++shift)
122
- {
123
- int i = (tid+shift)%threads;
123
+ gebp(res.getSubMapper(info->task_info[i].lhs_start, 0), blockA + info->task_info[i].lhs_start * actual_kc,
124
+ blockB, info->task_info[i].lhs_length, actual_kc, nc, alpha);
125
+ }
124
126
 
125
- // At this point we have to make sure that A'_i has been updated by the thread i,
126
- // we use testAndSetOrdered to mimic a volatile access.
127
- // However, no need to wait for the B' part which has been updated by the current thread!
128
- if (shift>0) {
129
- while(info[i].sync!=k) {
130
- }
127
+ // Then keep going as usual with the remaining B'
128
+ for (Index j = nc; j < cols; j += nc) {
129
+ const Index actual_nc = (std::min)(j + nc, cols) - j;
130
+
131
+ // pack B_k,j to B'
132
+ pack_rhs(blockB, rhs.getSubMapper(k, j), actual_kc, actual_nc);
133
+
134
+ // C_j += A' * B'
135
+ gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
131
136
  }
132
137
 
133
- gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
138
+ // Release all the sub blocks A'_i of A' for the current thread,
139
+ // i.e., we simply decrement the number of users by 1
140
+ for (Index i = 0; i < threads; ++i) info->task_info[i].users -= 1;
134
141
  }
142
+ } else
143
+ #endif // defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)
144
+ {
145
+ EIGEN_UNUSED_VARIABLE(info);
135
146
 
136
- // Then keep going as usual with the remaining B'
137
- for(Index j=nc; j<cols; j+=nc)
138
- {
139
- const Index actual_nc = (std::min)(j+nc,cols)-j;
147
+ // this is the sequential version!
148
+ std::size_t sizeA = kc * mc;
149
+ std::size_t sizeB = kc * nc;
140
150
 
141
- // pack B_k,j to B'
142
- pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
151
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
152
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
143
153
 
144
- // C_j += A' * B'
145
- gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
146
- }
154
+ const bool pack_rhs_once = mc != rows && kc == depth && nc == cols;
147
155
 
148
- // Release all the sub blocks A'_i of A' for the current thread,
149
- // i.e., we simply decrement the number of users by 1
150
- for(Index i=0; i<threads; ++i)
151
- #pragma omp atomic
152
- info[i].users -= 1;
153
- }
154
- }
155
- else
156
- #endif // EIGEN_HAS_OPENMP
157
- {
158
- EIGEN_UNUSED_VARIABLE(info);
156
+ // For each horizontal panel of the rhs, and corresponding panel of the lhs...
157
+ for (Index i2 = 0; i2 < rows; i2 += mc) {
158
+ const Index actual_mc = (std::min)(i2 + mc, rows) - i2;
159
159
 
160
- // this is the sequential version!
161
- std::size_t sizeA = kc*mc;
162
- std::size_t sizeB = kc*nc;
160
+ for (Index k2 = 0; k2 < depth; k2 += kc) {
161
+ const Index actual_kc = (std::min)(k2 + kc, depth) - k2;
163
162
 
164
- ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
165
- ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
163
+ // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
164
+ // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
165
+ // Note that this panel will be read as many times as the number of blocks in the rhs's
166
+ // horizontal panel which is, in practice, a very low number.
167
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
166
168
 
167
- const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
169
+ // For each kc x nc block of the rhs's horizontal panel...
170
+ for (Index j2 = 0; j2 < cols; j2 += nc) {
171
+ const Index actual_nc = (std::min)(j2 + nc, cols) - j2;
168
172
 
169
- // For each horizontal panel of the rhs, and corresponding panel of the lhs...
170
- for(Index i2=0; i2<rows; i2+=mc)
171
- {
172
- const Index actual_mc = (std::min)(i2+mc,rows)-i2;
173
-
174
- for(Index k2=0; k2<depth; k2+=kc)
175
- {
176
- const Index actual_kc = (std::min)(k2+kc,depth)-k2;
177
-
178
- // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
179
- // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
180
- // Note that this panel will be read as many times as the number of blocks in the rhs's
181
- // horizontal panel which is, in practice, a very low number.
182
- pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
183
-
184
- // For each kc x nc block of the rhs's horizontal panel...
185
- for(Index j2=0; j2<cols; j2+=nc)
186
- {
187
- const Index actual_nc = (std::min)(j2+nc,cols)-j2;
188
-
189
- // We pack the rhs's block into a sequential chunk of memory (L2 caching)
190
- // Note that this block will be read a very high number of times, which is equal to the number of
191
- // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
192
- if((!pack_rhs_once) || i2==0)
193
- pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
194
-
195
- // Everything is packed, we can now call the panel * block kernel:
196
- gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
173
+ // We pack the rhs's block into a sequential chunk of memory (L2 caching)
174
+ // Note that this block will be read a very high number of times, which is equal to the number of
175
+ // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
176
+ if ((!pack_rhs_once) || i2 == 0) pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc);
177
+
178
+ // Everything is packed, we can now call the panel * block kernel:
179
+ gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
180
+ }
197
181
  }
198
182
  }
199
183
  }
200
184
  }
201
- }
202
-
203
185
  };
204
186
 
205
187
  /*********************************************************************************
206
- * Specialization of generic_product_impl for "large" GEMM, i.e.,
207
- * implementation of the high level wrapper to general_matrix_matrix_product
208
- **********************************************************************************/
188
+ * Specialization of generic_product_impl for "large" GEMM, i.e.,
189
+ * implementation of the high level wrapper to general_matrix_matrix_product
190
+ **********************************************************************************/
209
191
 
210
- template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
211
- struct gemm_functor
212
- {
192
+ template <typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest,
193
+ typename BlockingType>
194
+ struct gemm_functor {
213
195
  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
214
- : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
215
- {}
196
+ : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) {}
216
197
 
217
- void initParallelSession(Index num_threads) const
218
- {
198
+ void initParallelSession(Index num_threads) const {
219
199
  m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
220
200
  m_blocking.allocateA();
221
201
  }
222
202
 
223
- void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
224
- {
225
- if(cols==-1)
226
- cols = m_rhs.cols();
203
+ void operator()(Index row, Index rows, Index col = 0, Index cols = -1, GemmParallelInfo<Index>* info = 0) const {
204
+ if (cols == -1) cols = m_rhs.cols();
227
205
 
228
- Gemm::run(rows, cols, m_lhs.cols(),
229
- &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
230
- &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
231
- (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(),
206
+ Gemm::run(rows, cols, m_lhs.cols(), &m_lhs.coeffRef(row, 0), m_lhs.outerStride(), &m_rhs.coeffRef(0, col),
207
+ m_rhs.outerStride(), (Scalar*)&(m_dest.coeffRef(row, col)), m_dest.innerStride(), m_dest.outerStride(),
232
208
  m_actualAlpha, m_blocking, info);
233
209
  }
234
210
 
235
211
  typedef typename Gemm::Traits Traits;
236
212
 
237
- protected:
238
- const Lhs& m_lhs;
239
- const Rhs& m_rhs;
240
- Dest& m_dest;
241
- Scalar m_actualAlpha;
242
- BlockingType& m_blocking;
213
+ protected:
214
+ const Lhs& m_lhs;
215
+ const Rhs& m_rhs;
216
+ Dest& m_dest;
217
+ Scalar m_actualAlpha;
218
+ BlockingType& m_blocking;
243
219
  };
244
220
 
245
- template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
246
- bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
247
-
248
- template<typename _LhsScalar, typename _RhsScalar>
249
- class level3_blocking
250
- {
251
- typedef _LhsScalar LhsScalar;
252
- typedef _RhsScalar RhsScalar;
221
+ template <int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
222
+ int KcFactor = 1, bool FiniteAtCompileTime = MaxRows != Dynamic && MaxCols != Dynamic && MaxDepth != Dynamic>
223
+ class gemm_blocking_space;
253
224
 
254
- protected:
255
- LhsScalar* m_blockA;
256
- RhsScalar* m_blockB;
225
+ template <typename LhsScalar_, typename RhsScalar_>
226
+ class level3_blocking {
227
+ typedef LhsScalar_ LhsScalar;
228
+ typedef RhsScalar_ RhsScalar;
257
229
 
258
- Index m_mc;
259
- Index m_nc;
260
- Index m_kc;
230
+ protected:
231
+ LhsScalar* m_blockA;
232
+ RhsScalar* m_blockB;
261
233
 
262
- public:
234
+ Index m_mc;
235
+ Index m_nc;
236
+ Index m_kc;
263
237
 
264
- level3_blocking()
265
- : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
266
- {}
238
+ public:
239
+ level3_blocking() : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0) {}
267
240
 
268
- inline Index mc() const { return m_mc; }
269
- inline Index nc() const { return m_nc; }
270
- inline Index kc() const { return m_kc; }
241
+ inline Index mc() const { return m_mc; }
242
+ inline Index nc() const { return m_nc; }
243
+ inline Index kc() const { return m_kc; }
271
244
 
272
- inline LhsScalar* blockA() { return m_blockA; }
273
- inline RhsScalar* blockB() { return m_blockB; }
245
+ inline LhsScalar* blockA() { return m_blockA; }
246
+ inline RhsScalar* blockB() { return m_blockB; }
274
247
  };
275
248
 
276
- template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
277
- class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
278
- : public level3_blocking<
279
- typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
280
- typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
281
- {
282
- enum {
283
- Transpose = StorageOrder==RowMajor,
284
- ActualRows = Transpose ? MaxCols : MaxRows,
285
- ActualCols = Transpose ? MaxRows : MaxCols
286
- };
287
- typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
288
- typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
289
- typedef gebp_traits<LhsScalar,RhsScalar> Traits;
290
- enum {
291
- SizeA = ActualRows * MaxDepth,
292
- SizeB = ActualCols * MaxDepth
293
- };
249
+ template <int StorageOrder, typename LhsScalar_, typename RhsScalar_, int MaxRows, int MaxCols, int MaxDepth,
250
+ int KcFactor>
251
+ class gemm_blocking_space<StorageOrder, LhsScalar_, RhsScalar_, MaxRows, MaxCols, MaxDepth, KcFactor,
252
+ true /* == FiniteAtCompileTime */>
253
+ : public level3_blocking<std::conditional_t<StorageOrder == RowMajor, RhsScalar_, LhsScalar_>,
254
+ std::conditional_t<StorageOrder == RowMajor, LhsScalar_, RhsScalar_>> {
255
+ enum {
256
+ Transpose = StorageOrder == RowMajor,
257
+ ActualRows = Transpose ? MaxCols : MaxRows,
258
+ ActualCols = Transpose ? MaxRows : MaxCols
259
+ };
260
+ typedef std::conditional_t<Transpose, RhsScalar_, LhsScalar_> LhsScalar;
261
+ typedef std::conditional_t<Transpose, LhsScalar_, RhsScalar_> RhsScalar;
262
+ enum { SizeA = ActualRows * MaxDepth, SizeB = ActualCols * MaxDepth };
294
263
 
295
264
  #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
296
- EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
297
- EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
265
+ EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
266
+ EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
298
267
  #else
299
- EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
300
- EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
268
+ EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES - 1];
269
+ EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES - 1];
301
270
  #endif
302
271
 
303
- public:
304
-
305
- gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
306
- {
307
- this->m_mc = ActualRows;
308
- this->m_nc = ActualCols;
309
- this->m_kc = MaxDepth;
272
+ public:
273
+ gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/,
274
+ bool /*full_rows = false*/) {
275
+ this->m_mc = ActualRows;
276
+ this->m_nc = ActualCols;
277
+ this->m_kc = MaxDepth;
310
278
  #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
311
- this->m_blockA = m_staticA;
312
- this->m_blockB = m_staticB;
279
+ this->m_blockA = m_staticA;
280
+ this->m_blockB = m_staticB;
313
281
  #else
314
- this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
315
- this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
282
+ this->m_blockA = reinterpret_cast<LhsScalar*>((std::uintptr_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES - 1)) &
283
+ ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES - 1));
284
+ this->m_blockB = reinterpret_cast<RhsScalar*>((std::uintptr_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES - 1)) &
285
+ ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES - 1));
316
286
  #endif
317
- }
287
+ }
318
288
 
319
- void initParallel(Index, Index, Index, Index)
320
- {}
289
+ void initParallel(Index, Index, Index, Index) {}
321
290
 
322
- inline void allocateA() {}
323
- inline void allocateB() {}
324
- inline void allocateAll() {}
291
+ inline void allocateA() {}
292
+ inline void allocateB() {}
293
+ inline void allocateAll() {}
325
294
  };
326
295
 
327
- template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
328
- class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
329
- : public level3_blocking<
330
- typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
331
- typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
332
- {
333
- enum {
334
- Transpose = StorageOrder==RowMajor
335
- };
336
- typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
337
- typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
338
- typedef gebp_traits<LhsScalar,RhsScalar> Traits;
339
-
340
- Index m_sizeA;
341
- Index m_sizeB;
342
-
343
- public:
344
-
345
- gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
296
+ template <int StorageOrder, typename LhsScalar_, typename RhsScalar_, int MaxRows, int MaxCols, int MaxDepth,
297
+ int KcFactor>
298
+ class gemm_blocking_space<StorageOrder, LhsScalar_, RhsScalar_, MaxRows, MaxCols, MaxDepth, KcFactor, false>
299
+ : public level3_blocking<std::conditional_t<StorageOrder == RowMajor, RhsScalar_, LhsScalar_>,
300
+ std::conditional_t<StorageOrder == RowMajor, LhsScalar_, RhsScalar_>> {
301
+ enum { Transpose = StorageOrder == RowMajor };
302
+ typedef std::conditional_t<Transpose, RhsScalar_, LhsScalar_> LhsScalar;
303
+ typedef std::conditional_t<Transpose, LhsScalar_, RhsScalar_> RhsScalar;
304
+
305
+ Index m_sizeA;
306
+ Index m_sizeB;
307
+
308
+ public:
309
+ gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking) {
310
+ this->m_mc = Transpose ? cols : rows;
311
+ this->m_nc = Transpose ? rows : cols;
312
+ this->m_kc = depth;
313
+
314
+ if (l3_blocking) {
315
+ computeProductBlockingSizes<LhsScalar, RhsScalar, KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
316
+ } else // no l3 blocking
346
317
  {
347
- this->m_mc = Transpose ? cols : rows;
348
- this->m_nc = Transpose ? rows : cols;
349
- this->m_kc = depth;
318
+ Index n = this->m_nc;
319
+ computeProductBlockingSizes<LhsScalar, RhsScalar, KcFactor>(this->m_kc, this->m_mc, n, num_threads);
320
+ }
350
321
 
351
- if(l3_blocking)
352
- {
353
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
354
- }
355
- else // no l3 blocking
356
- {
357
- Index n = this->m_nc;
358
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
359
- }
322
+ m_sizeA = this->m_mc * this->m_kc;
323
+ m_sizeB = this->m_kc * this->m_nc;
324
+ }
360
325
 
361
- m_sizeA = this->m_mc * this->m_kc;
362
- m_sizeB = this->m_kc * this->m_nc;
363
- }
326
+ void initParallel(Index rows, Index cols, Index depth, Index num_threads) {
327
+ this->m_mc = Transpose ? cols : rows;
328
+ this->m_nc = Transpose ? rows : cols;
329
+ this->m_kc = depth;
364
330
 
365
- void initParallel(Index rows, Index cols, Index depth, Index num_threads)
366
- {
367
- this->m_mc = Transpose ? cols : rows;
368
- this->m_nc = Transpose ? rows : cols;
369
- this->m_kc = depth;
370
-
371
- eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
372
- Index m = this->m_mc;
373
- computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
374
- m_sizeA = this->m_mc * this->m_kc;
375
- m_sizeB = this->m_kc * this->m_nc;
376
- }
331
+ eigen_internal_assert(this->m_blockA == 0 && this->m_blockB == 0);
332
+ Index m = this->m_mc;
333
+ computeProductBlockingSizes<LhsScalar, RhsScalar, KcFactor>(this->m_kc, m, this->m_nc, num_threads);
334
+ m_sizeA = this->m_mc * this->m_kc;
335
+ m_sizeB = this->m_kc * this->m_nc;
336
+ }
377
337
 
378
- void allocateA()
379
- {
380
- if(this->m_blockA==0)
381
- this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
382
- }
338
+ void allocateA() {
339
+ if (this->m_blockA == 0) this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
340
+ }
383
341
 
384
- void allocateB()
385
- {
386
- if(this->m_blockB==0)
387
- this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
388
- }
342
+ void allocateB() {
343
+ if (this->m_blockB == 0) this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
344
+ }
389
345
 
390
- void allocateAll()
391
- {
392
- allocateA();
393
- allocateB();
394
- }
346
+ void allocateAll() {
347
+ allocateA();
348
+ allocateB();
349
+ }
395
350
 
396
- ~gemm_blocking_space()
397
- {
398
- aligned_delete(this->m_blockA, m_sizeA);
399
- aligned_delete(this->m_blockB, m_sizeB);
400
- }
351
+ ~gemm_blocking_space() {
352
+ aligned_delete(this->m_blockA, m_sizeA);
353
+ aligned_delete(this->m_blockB, m_sizeB);
354
+ }
401
355
  };
402
356
 
403
- } // end namespace internal
357
+ } // end namespace internal
404
358
 
405
359
  namespace internal {
406
360
 
407
- template<typename Lhs, typename Rhs>
408
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
409
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
410
- {
411
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
361
+ template <typename Lhs, typename Rhs>
362
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemmProduct>
363
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemmProduct>> {
364
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
412
365
  typedef typename Lhs::Scalar LhsScalar;
413
366
  typedef typename Rhs::Scalar RhsScalar;
414
367
 
415
368
  typedef internal::blas_traits<Lhs> LhsBlasTraits;
416
369
  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
417
- typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
370
+ typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
418
371
 
419
372
  typedef internal::blas_traits<Rhs> RhsBlasTraits;
420
373
  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
421
- typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
422
-
423
- enum {
424
- MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
425
- };
426
-
427
- typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
428
-
429
- template<typename Dst>
430
- static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
431
- {
432
- if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
433
- lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());
434
- else
435
- {
374
+ typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
375
+
376
+ enum { MaxDepthAtCompileTime = min_size_prefer_fixed(Lhs::MaxColsAtCompileTime, Rhs::MaxRowsAtCompileTime) };
377
+
378
+ typedef generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> lazyproduct;
379
+
380
+ template <typename Dst>
381
+ static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
382
+ // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program
383
+ // to determine the following heuristic.
384
+ // EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
385
+ // unless it has been specialized by the user or for a given architecture.
386
+ // Note that the condition rhs.rows()>0 was required because lazy product is (was?) not happy with empty inputs.
387
+ // I'm not sure it is still required.
388
+ if ((rhs.rows() + dst.rows() + dst.cols()) < EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows() > 0)
389
+ lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar, Scalar>());
390
+ else {
436
391
  dst.setZero();
437
392
  scaleAndAddTo(dst, lhs, rhs, Scalar(1));
438
393
  }
439
394
  }
440
395
 
441
- template<typename Dst>
442
- static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
443
- {
444
- if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
445
- lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());
396
+ template <typename Dst>
397
+ static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
398
+ if ((rhs.rows() + dst.rows() + dst.cols()) < EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows() > 0)
399
+ lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar, Scalar>());
446
400
  else
447
- scaleAndAddTo(dst,lhs, rhs, Scalar(1));
401
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
448
402
  }
449
403
 
450
- template<typename Dst>
451
- static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
452
- {
453
- if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
454
- lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());
404
+ template <typename Dst>
405
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
406
+ if ((rhs.rows() + dst.rows() + dst.cols()) < EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows() > 0)
407
+ lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar, Scalar>());
455
408
  else
456
409
  scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
457
410
  }
458
411
 
459
- template<typename Dest>
460
- static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
461
- {
462
- eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
463
- if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
464
- return;
412
+ template <typename Dest>
413
+ static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
414
+ eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
415
+ if (a_lhs.cols() == 0 || a_lhs.rows() == 0 || a_rhs.cols() == 0) return;
416
+
417
+ if (dst.cols() == 1) {
418
+ // Fallback to GEMV if either the lhs or rhs is a runtime vector
419
+ typename Dest::ColXpr dst_vec(dst.col(0));
420
+ return internal::generic_product_impl<Lhs, typename Rhs::ConstColXpr, DenseShape, DenseShape,
421
+ GemvProduct>::scaleAndAddTo(dst_vec, a_lhs, a_rhs.col(0), alpha);
422
+ } else if (dst.rows() == 1) {
423
+ // Fallback to GEMV if either the lhs or rhs is a runtime vector
424
+ typename Dest::RowXpr dst_vec(dst.row(0));
425
+ return internal::generic_product_impl<typename Lhs::ConstRowXpr, Rhs, DenseShape, DenseShape,
426
+ GemvProduct>::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);
427
+ }
465
428
 
466
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
467
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
429
+ add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
430
+ add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
468
431
 
469
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
470
- * RhsBlasTraits::extractScalarFactor(a_rhs);
432
+ Scalar actualAlpha = combine_scalar_factors(alpha, a_lhs, a_rhs);
471
433
 
472
- typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
473
- Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
434
+ typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, LhsScalar, RhsScalar,
435
+ Dest::MaxRowsAtCompileTime, Dest::MaxColsAtCompileTime, MaxDepthAtCompileTime>
436
+ BlockingType;
474
437
 
475
438
  typedef internal::gemm_functor<
476
- Scalar, Index,
477
- internal::general_matrix_matrix_product<
478
- Index,
479
- LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
480
- RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
481
- (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,
482
- Dest::InnerStrideAtCompileTime>,
483
- ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
439
+ Scalar, Index,
440
+ internal::general_matrix_matrix_product<
441
+ Index, LhsScalar, (ActualLhsTypeCleaned::Flags & RowMajorBit) ? RowMajor : ColMajor,
442
+ bool(LhsBlasTraits::NeedToConjugate), RhsScalar,
443
+ (ActualRhsTypeCleaned::Flags & RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
444
+ (Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>,
445
+ ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType>
446
+ GemmFunctor;
484
447
 
485
448
  BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
486
- internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
487
- (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
449
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime > 32 || Dest::MaxRowsAtCompileTime == Dynamic)>(
450
+ GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(),
451
+ Dest::Flags & RowMajorBit);
488
452
  }
489
453
  };
490
454
 
491
- } // end namespace internal
455
+ } // end namespace internal
492
456
 
493
- } // end namespace Eigen
457
+ } // end namespace Eigen
494
458
 
495
- #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
459
+ #endif // EIGEN_GENERAL_MATRIX_MATRIX_H