@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -2,6 +2,7 @@
2
2
  // for linear algebra.
3
3
  //
4
4
  // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ // Modifications Copyright (C) 2022 Intel Corporation
5
6
  //
6
7
  // This Source Code Form is subject to the terms of the Mozilla
7
8
  // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -10,326 +11,378 @@
10
11
  #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
11
12
  #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
12
13
 
13
- namespace Eigen {
14
+ // IWYU pragma: private
15
+ #include "../InternalHeaderCheck.h"
16
+
17
+ namespace Eigen {
14
18
 
15
19
  namespace internal {
16
20
 
21
+ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
22
+ bool Specialized>
23
+ struct trsmKernelL {
24
+ // Generic Implementation of triangular solve for triangular matrix on left and multiple rhs.
25
+ // Handles non-packed matrices.
26
+ static void kernel(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr,
27
+ Index otherStride);
28
+ };
29
+
30
+ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
31
+ bool Specialized>
32
+ struct trsmKernelR {
33
+ // Generic Implementation of triangular solve for triangular matrix on right and multiple lhs.
34
+ // Handles non-packed matrices.
35
+ static void kernel(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr,
36
+ Index otherStride);
37
+ };
38
+
39
+ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
40
+ bool Specialized>
41
+ EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
42
+ Specialized>::kernel(Index size, Index otherSize, const Scalar* _tri,
43
+ Index triStride, Scalar* _other, Index otherIncr,
44
+ Index otherStride) {
45
+ typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
46
+ typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
47
+ TriMapper tri(_tri, triStride);
48
+ OtherMapper other(_other, otherStride, otherIncr);
49
+
50
+ enum { IsLower = (Mode & Lower) == Lower };
51
+ conj_if<Conjugate> conj;
52
+
53
+ // tr solve
54
+ for (Index k = 0; k < size; ++k) {
55
+ // TODO write a small kernel handling this (can be shared with trsv)
56
+ Index i = IsLower ? k : -k - 1;
57
+ Index rs = size - k - 1; // remaining size
58
+ Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs;
59
+
60
+ Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1) / conj(tri(i, i)));
61
+ for (Index j = 0; j < otherSize; ++j) {
62
+ if (TriStorageOrder == RowMajor) {
63
+ Scalar b(0);
64
+ const Scalar* l = &tri(i, s);
65
+ typename OtherMapper::LinearMapper r = other.getLinearMapper(s, j);
66
+ for (Index i3 = 0; i3 < k; ++i3) b += conj(l[i3]) * r(i3);
67
+
68
+ other(i, j) = (other(i, j) - b) * a;
69
+ } else {
70
+ Scalar& otherij = other(i, j);
71
+ otherij *= a;
72
+ Scalar b = otherij;
73
+ typename OtherMapper::LinearMapper r = other.getLinearMapper(s, j);
74
+ typename TriMapper::LinearMapper l = tri.getLinearMapper(s, i);
75
+ for (Index i3 = 0; i3 < rs; ++i3) r(i3) -= b * conj(l(i3));
76
+ }
77
+ }
78
+ }
79
+ }
80
+
81
+ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride,
82
+ bool Specialized>
83
+ EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
84
+ Specialized>::kernel(Index size, Index otherSize, const Scalar* _tri,
85
+ Index triStride, Scalar* _other, Index otherIncr,
86
+ Index otherStride) {
87
+ typedef typename NumTraits<Scalar>::Real RealScalar;
88
+ typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
89
+ typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
90
+ LhsMapper lhs(_other, otherStride, otherIncr);
91
+ RhsMapper rhs(_tri, triStride);
92
+
93
+ enum { RhsStorageOrder = TriStorageOrder, IsLower = (Mode & Lower) == Lower };
94
+ conj_if<Conjugate> conj;
95
+
96
+ for (Index k = 0; k < size; ++k) {
97
+ Index j = IsLower ? size - k - 1 : k;
98
+
99
+ typename LhsMapper::LinearMapper r = lhs.getLinearMapper(0, j);
100
+ for (Index k3 = 0; k3 < k; ++k3) {
101
+ Scalar b = conj(rhs(IsLower ? j + 1 + k3 : k3, j));
102
+ typename LhsMapper::LinearMapper a = lhs.getLinearMapper(0, IsLower ? j + 1 + k3 : k3);
103
+ for (Index i = 0; i < otherSize; ++i) r(i) -= a(i) * b;
104
+ }
105
+ if ((Mode & UnitDiag) == 0) {
106
+ Scalar inv_rjj = RealScalar(1) / conj(rhs(j, j));
107
+ for (Index i = 0; i < otherSize; ++i) r(i) *= inv_rjj;
108
+ }
109
+ }
110
+ }
111
+
17
112
  // if the rhs is row major, let's transpose the product
18
- template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
19
- struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
20
- {
21
- static void run(
22
- Index size, Index cols,
23
- const Scalar* tri, Index triStride,
24
- Scalar* _other, Index otherIncr, Index otherStride,
25
- level3_blocking<Scalar,Scalar>& blocking)
26
- {
113
+ template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder,
114
+ int OtherInnerStride>
115
+ struct triangular_solve_matrix<Scalar, Index, Side, Mode, Conjugate, TriStorageOrder, RowMajor, OtherInnerStride> {
116
+ static void run(Index size, Index cols, const Scalar* tri, Index triStride, Scalar* _other, Index otherIncr,
117
+ Index otherStride, level3_blocking<Scalar, Scalar>& blocking) {
27
118
  triangular_solve_matrix<
28
- Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
29
- (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
30
- NumTraits<Scalar>::IsComplex && Conjugate,
31
- TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
32
- ::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
119
+ Scalar, Index, Side == OnTheLeft ? OnTheRight : OnTheLeft, (Mode & UnitDiag) | ((Mode & Upper) ? Lower : Upper),
120
+ NumTraits<Scalar>::IsComplex && Conjugate, TriStorageOrder == RowMajor ? ColMajor : RowMajor, ColMajor,
121
+ OtherInnerStride>::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
33
122
  }
34
123
  };
35
124
 
36
125
  /* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
37
126
  */
38
- template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
39
- struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
40
- {
41
- static EIGEN_DONT_INLINE void run(
42
- Index size, Index otherSize,
43
- const Scalar* _tri, Index triStride,
44
- Scalar* _other, Index otherIncr, Index otherStride,
45
- level3_blocking<Scalar,Scalar>& blocking);
46
- };
47
127
  template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
48
- EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
49
- Index size, Index otherSize,
50
- const Scalar* _tri, Index triStride,
51
- Scalar* _other, Index otherIncr, Index otherStride,
52
- level3_blocking<Scalar,Scalar>& blocking)
53
- {
54
- Index cols = otherSize;
55
-
56
- typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
57
- typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
58
- TriMapper tri(_tri, triStride);
59
- OtherMapper other(_other, otherStride, otherIncr);
60
-
61
- typedef gebp_traits<Scalar,Scalar> Traits;
62
-
63
- enum {
64
- SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
65
- IsLower = (Mode&Lower) == Lower
66
- };
67
-
68
- Index kc = blocking.kc(); // cache block size along the K direction
69
- Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
70
-
71
- std::size_t sizeA = kc*mc;
72
- std::size_t sizeB = kc*cols;
73
-
74
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
75
- ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
76
-
77
- conj_if<Conjugate> conj;
78
- gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
79
- gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
80
- gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
81
-
82
- // the goal here is to subdivise the Rhs panels such that we keep some cache
83
- // coherence when accessing the rhs elements
84
- std::ptrdiff_t l1, l2, l3;
85
- manage_caching_sizes(GetAction, &l1, &l2, &l3);
86
- Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
87
- subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
128
+ struct triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, Conjugate, TriStorageOrder, ColMajor, OtherInnerStride> {
129
+ static EIGEN_DONT_INLINE void run(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other,
130
+ Index otherIncr, Index otherStride, level3_blocking<Scalar, Scalar>& blocking);
131
+ };
88
132
 
89
- for(Index k2=IsLower ? 0 : size;
90
- IsLower ? k2<size : k2>0;
91
- IsLower ? k2+=kc : k2-=kc)
92
- {
93
- const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
94
-
95
- // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
96
- // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
97
- // A11 (the triangular part) and A21 the remaining rectangular part.
98
- // Then the high level algorithm is:
99
- // - B = R1 => general block copy (done during the next step)
100
- // - R1 = A11^-1 B => tricky part
101
- // - update B from the new R1 => actually this has to be performed continuously during the above step
102
- // - R2 -= A21 * B => GEPP
103
-
104
- // The tricky part: compute R1 = A11^-1 B while updating B from R1
105
- // The idea is to split A11 into multiple small vertical panels.
106
- // Each panel can be split into a small triangular part T1k which is processed without optimization,
107
- // and the remaining small part T2k which is processed using gebp with appropriate block strides
108
- for(Index j2=0; j2<cols; j2+=subcols)
109
- {
110
- Index actual_cols = (std::min)(cols-j2,subcols);
111
- // for each small vertical panels [T1k^T, T2k^T]^T of lhs
112
- for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
133
+ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
134
+ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, Conjugate, TriStorageOrder, ColMajor,
135
+ OtherInnerStride>::run(Index size, Index otherSize, const Scalar* _tri,
136
+ Index triStride, Scalar* _other, Index otherIncr,
137
+ Index otherStride,
138
+ level3_blocking<Scalar, Scalar>& blocking) {
139
+ Index cols = otherSize;
140
+
141
+ std::ptrdiff_t l1, l2, l3;
142
+ manage_caching_sizes(GetAction, &l1, &l2, &l3);
143
+
144
+ #if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
145
+ EIGEN_IF_CONSTEXPR(
146
+ (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
147
+ // Very rough cutoffs to determine when to call trsm w/o packing
148
+ // For small problem sizes trsmKernel compiled with clang is generally faster.
149
+ // TODO: Investigate better heuristics for cutoffs.
150
+ double L2Cap = 0.5; // 50% of L2 size
151
+ if (size < avx512_trsm_cutoff<Scalar>(l2, cols, L2Cap)) {
152
+ trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1, /*Specialized=*/true>::kernel(
153
+ size, cols, _tri, triStride, _other, 1, otherStride);
154
+ return;
155
+ }
156
+ }
157
+ #endif
158
+
159
+ typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
160
+ typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
161
+ TriMapper tri(_tri, triStride);
162
+ OtherMapper other(_other, otherStride, otherIncr);
163
+
164
+ typedef gebp_traits<Scalar, Scalar> Traits;
165
+
166
+ enum { SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr), IsLower = (Mode & Lower) == Lower };
167
+
168
+ Index kc = blocking.kc(); // cache block size along the K direction
169
+ Index mc = (std::min)(size, blocking.mc()); // cache block size along the M direction
170
+
171
+ std::size_t sizeA = kc * mc;
172
+ std::size_t sizeB = kc * cols;
173
+
174
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
175
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
176
+
177
+ gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
178
+ gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
179
+ TriStorageOrder>
180
+ pack_lhs;
181
+ gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
182
+
183
+ // the goal here is to subdivise the Rhs panels such that we keep some cache
184
+ // coherence when accessing the rhs elements
185
+ Index subcols = cols > 0 ? l2 / (4 * sizeof(Scalar) * std::max<Index>(otherStride, size)) : 0;
186
+ subcols = std::max<Index>((subcols / Traits::nr) * Traits::nr, Traits::nr);
187
+
188
+ for (Index k2 = IsLower ? 0 : size; IsLower ? k2 < size : k2 > 0; IsLower ? k2 += kc : k2 -= kc) {
189
+ const Index actual_kc = (std::min)(IsLower ? size - k2 : k2, kc);
190
+
191
+ // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
192
+ // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
193
+ // A11 (the triangular part) and A21 the remaining rectangular part.
194
+ // Then the high level algorithm is:
195
+ // - B = R1 => general block copy (done during the next step)
196
+ // - R1 = A11^-1 B => tricky part
197
+ // - update B from the new R1 => actually this has to be performed continuously during the above step
198
+ // - R2 -= A21 * B => GEPP
199
+
200
+ // The tricky part: compute R1 = A11^-1 B while updating B from R1
201
+ // The idea is to split A11 into multiple small vertical panels.
202
+ // Each panel can be split into a small triangular part T1k which is processed without optimization,
203
+ // and the remaining small part T2k which is processed using gebp with appropriate block strides
204
+ for (Index j2 = 0; j2 < cols; j2 += subcols) {
205
+ Index actual_cols = (std::min)(cols - j2, subcols);
206
+ // for each small vertical panels [T1k^T, T2k^T]^T of lhs
207
+ for (Index k1 = 0; k1 < actual_kc; k1 += SmallPanelWidth) {
208
+ Index actualPanelWidth = std::min<Index>(actual_kc - k1, SmallPanelWidth);
209
+ // tr solve
113
210
  {
114
- Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
115
- // tr solve
116
- for (Index k=0; k<actualPanelWidth; ++k)
117
- {
118
- // TODO write a small kernel handling this (can be shared with trsv)
119
- Index i = IsLower ? k2+k1+k : k2-k1-k-1;
120
- Index rs = actualPanelWidth - k - 1; // remaining size
121
- Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
122
- : IsLower ? i+1 : i-rs;
123
-
124
- Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
125
- for (Index j=j2; j<j2+actual_cols; ++j)
126
- {
127
- if (TriStorageOrder==RowMajor)
128
- {
129
- Scalar b(0);
130
- const Scalar* l = &tri(i,s);
131
- typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
132
- for (Index i3=0; i3<k; ++i3)
133
- b += conj(l[i3]) * r(i3);
134
-
135
- other(i,j) = (other(i,j) - b)*a;
136
- }
137
- else
138
- {
139
- Scalar b = (other(i,j) *= a);
140
- typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
141
- typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
142
- for (Index i3=0;i3<rs;++i3)
143
- r(i3) -= b * conj(l(i3));
144
- }
145
- }
211
+ Index i = IsLower ? k2 + k1 : k2 - k1;
212
+ #if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS
213
+ EIGEN_IF_CONSTEXPR(
214
+ (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
215
+ i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
146
216
  }
217
+ #endif
218
+ trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
219
+ actualPanelWidth, actual_cols, _tri + i + (i)*triStride, triStride,
220
+ _other + i * OtherInnerStride + j2 * otherStride, otherIncr, otherStride);
221
+ }
147
222
 
148
- Index lengthTarget = actual_kc-k1-actualPanelWidth;
149
- Index startBlock = IsLower ? k2+k1 : k2-k1-actualPanelWidth;
150
- Index blockBOffset = IsLower ? k1 : lengthTarget;
223
+ Index lengthTarget = actual_kc - k1 - actualPanelWidth;
224
+ Index startBlock = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
225
+ Index blockBOffset = IsLower ? k1 : lengthTarget;
151
226
 
152
- // update the respective rows of B from other
153
- pack_rhs(blockB+actual_kc*j2, other.getSubMapper(startBlock,j2), actualPanelWidth, actual_cols, actual_kc, blockBOffset);
227
+ // update the respective rows of B from other
228
+ pack_rhs(blockB + actual_kc * j2, other.getSubMapper(startBlock, j2), actualPanelWidth, actual_cols, actual_kc,
229
+ blockBOffset);
154
230
 
155
- // GEBP
156
- if (lengthTarget>0)
157
- {
158
- Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
231
+ // GEBP
232
+ if (lengthTarget > 0) {
233
+ Index startTarget = IsLower ? k2 + k1 + actualPanelWidth : k2 - actual_kc;
159
234
 
160
- pack_lhs(blockA, tri.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
235
+ pack_lhs(blockA, tri.getSubMapper(startTarget, startBlock), actualPanelWidth, lengthTarget);
161
236
 
162
- gebp_kernel(other.getSubMapper(startTarget,j2), blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
163
- actualPanelWidth, actual_kc, 0, blockBOffset);
164
- }
237
+ gebp_kernel(other.getSubMapper(startTarget, j2), blockA, blockB + actual_kc * j2, lengthTarget,
238
+ actualPanelWidth, actual_cols, Scalar(-1), actualPanelWidth, actual_kc, 0, blockBOffset);
165
239
  }
166
240
  }
167
-
168
- // R2 -= A21 * B => GEPP
169
- {
170
- Index start = IsLower ? k2+kc : 0;
171
- Index end = IsLower ? size : k2-kc;
172
- for(Index i2=start; i2<end; i2+=mc)
173
- {
174
- const Index actual_mc = (std::min)(mc,end-i2);
175
- if (actual_mc>0)
176
- {
177
- pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
241
+ }
178
242
 
179
- gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
180
- }
243
+ // R2 -= A21 * B => GEPP
244
+ {
245
+ Index start = IsLower ? k2 + kc : 0;
246
+ Index end = IsLower ? size : k2 - kc;
247
+ for (Index i2 = start; i2 < end; i2 += mc) {
248
+ const Index actual_mc = (std::min)(mc, end - i2);
249
+ if (actual_mc > 0) {
250
+ pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2 - kc), actual_kc, actual_mc);
251
+
252
+ gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
181
253
  }
182
254
  }
183
255
  }
184
256
  }
257
+ }
185
258
 
186
259
  /* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
187
260
  */
188
261
  template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
189
- struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
190
- {
191
- static EIGEN_DONT_INLINE void run(
192
- Index size, Index otherSize,
193
- const Scalar* _tri, Index triStride,
194
- Scalar* _other, Index otherIncr, Index otherStride,
195
- level3_blocking<Scalar,Scalar>& blocking);
262
+ struct triangular_solve_matrix<Scalar, Index, OnTheRight, Mode, Conjugate, TriStorageOrder, ColMajor,
263
+ OtherInnerStride> {
264
+ static EIGEN_DONT_INLINE void run(Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other,
265
+ Index otherIncr, Index otherStride, level3_blocking<Scalar, Scalar>& blocking);
196
266
  };
267
+
197
268
  template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
198
- EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
199
- Index size, Index otherSize,
200
- const Scalar* _tri, Index triStride,
201
- Scalar* _other, Index otherIncr, Index otherStride,
202
- level3_blocking<Scalar,Scalar>& blocking)
203
- {
204
- Index rows = otherSize;
205
- typedef typename NumTraits<Scalar>::Real RealScalar;
206
-
207
- typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
208
- typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
209
- LhsMapper lhs(_other, otherStride, otherIncr);
210
- RhsMapper rhs(_tri, triStride);
211
-
212
- typedef gebp_traits<Scalar,Scalar> Traits;
213
- enum {
214
- RhsStorageOrder = TriStorageOrder,
215
- SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
216
- IsLower = (Mode&Lower) == Lower
217
- };
218
-
219
- Index kc = blocking.kc(); // cache block size along the K direction
220
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
221
-
222
- std::size_t sizeA = kc*mc;
223
- std::size_t sizeB = kc*size;
224
-
225
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
226
- ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
227
-
228
- conj_if<Conjugate> conj;
229
- gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
230
- gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
231
- gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
232
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
233
-
234
- for(Index k2=IsLower ? size : 0;
235
- IsLower ? k2>0 : k2<size;
236
- IsLower ? k2-=kc : k2+=kc)
237
- {
238
- const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
239
- Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
269
+ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheRight, Mode, Conjugate, TriStorageOrder, ColMajor,
270
+ OtherInnerStride>::run(Index size, Index otherSize, const Scalar* _tri,
271
+ Index triStride, Scalar* _other, Index otherIncr,
272
+ Index otherStride,
273
+ level3_blocking<Scalar, Scalar>& blocking) {
274
+ Index rows = otherSize;
275
+
276
+ #if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
277
+ EIGEN_IF_CONSTEXPR(
278
+ (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
279
+ // TODO: Investigate better heuristics for cutoffs.
280
+ std::ptrdiff_t l1, l2, l3;
281
+ manage_caching_sizes(GetAction, &l1, &l2, &l3);
282
+ double L2Cap = 0.5; // 50% of L2 size
283
+ if (size < avx512_trsm_cutoff<Scalar>(l2, rows, L2Cap)) {
284
+ trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
285
+ size, rows, _tri, triStride, _other, 1, otherStride);
286
+ return;
287
+ }
288
+ }
289
+ #endif
240
290
 
241
- Index startPanel = IsLower ? 0 : k2+actual_kc;
242
- Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
243
- Scalar* geb = blockB+actual_kc*actual_kc;
291
+ typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
292
+ typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
293
+ LhsMapper lhs(_other, otherStride, otherIncr);
294
+ RhsMapper rhs(_tri, triStride);
244
295
 
245
- if (rs>0) pack_rhs(geb, rhs.getSubMapper(actual_k2,startPanel), actual_kc, rs);
296
+ typedef gebp_traits<Scalar, Scalar> Traits;
297
+ enum {
298
+ RhsStorageOrder = TriStorageOrder,
299
+ SmallPanelWidth = plain_enum_max(Traits::mr, Traits::nr),
300
+ IsLower = (Mode & Lower) == Lower
301
+ };
246
302
 
247
- // triangular packing (we only pack the panels off the diagonal,
248
- // neglecting the blocks overlapping the diagonal
249
- {
250
- for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
251
- {
252
- Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
253
- Index actual_j2 = actual_k2 + j2;
254
- Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
255
- Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
256
-
257
- if (panelLength>0)
258
- pack_rhs_panel(blockB+j2*actual_kc,
259
- rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
260
- panelLength, actualPanelWidth,
261
- actual_kc, panelOffset);
262
- }
303
+ Index kc = blocking.kc(); // cache block size along the K direction
304
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
305
+
306
+ std::size_t sizeA = kc * mc;
307
+ std::size_t sizeB = kc * size;
308
+
309
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
310
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
311
+
312
+ gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
313
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
314
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder, false, true> pack_rhs_panel;
315
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor,
316
+ false, true>
317
+ pack_lhs_panel;
318
+
319
+ for (Index k2 = IsLower ? size : 0; IsLower ? k2 > 0 : k2 < size; IsLower ? k2 -= kc : k2 += kc) {
320
+ const Index actual_kc = (std::min)(IsLower ? k2 : size - k2, kc);
321
+ Index actual_k2 = IsLower ? k2 - actual_kc : k2;
322
+
323
+ Index startPanel = IsLower ? 0 : k2 + actual_kc;
324
+ Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
325
+ Scalar* geb = blockB + actual_kc * actual_kc;
326
+
327
+ if (rs > 0) pack_rhs(geb, rhs.getSubMapper(actual_k2, startPanel), actual_kc, rs);
328
+
329
+ // triangular packing (we only pack the panels off the diagonal,
330
+ // neglecting the blocks overlapping the diagonal
331
+ {
332
+ for (Index j2 = 0; j2 < actual_kc; j2 += SmallPanelWidth) {
333
+ Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
334
+ Index actual_j2 = actual_k2 + j2;
335
+ Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
336
+ Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
337
+
338
+ if (panelLength > 0)
339
+ pack_rhs_panel(blockB + j2 * actual_kc, rhs.getSubMapper(actual_k2 + panelOffset, actual_j2), panelLength,
340
+ actualPanelWidth, actual_kc, panelOffset);
263
341
  }
342
+ }
343
+
344
+ for (Index i2 = 0; i2 < rows; i2 += mc) {
345
+ const Index actual_mc = (std::min)(mc, rows - i2);
264
346
 
265
- for(Index i2=0; i2<rows; i2+=mc)
347
+ // triangular solver kernel
266
348
  {
267
- const Index actual_mc = (std::min)(mc,rows-i2);
349
+ // for each small block of the diagonal (=> vertical panels of rhs)
350
+ for (Index j2 = IsLower ? (actual_kc - ((actual_kc % SmallPanelWidth) ? Index(actual_kc % SmallPanelWidth)
351
+ : Index(SmallPanelWidth)))
352
+ : 0;
353
+ IsLower ? j2 >= 0 : j2 < actual_kc; IsLower ? j2 -= SmallPanelWidth : j2 += SmallPanelWidth) {
354
+ Index actualPanelWidth = std::min<Index>(actual_kc - j2, SmallPanelWidth);
355
+ Index absolute_j2 = actual_k2 + j2;
356
+ Index panelOffset = IsLower ? j2 + actualPanelWidth : 0;
357
+ Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
268
358
 
269
- // triangular solver kernel
270
- {
271
- // for each small block of the diagonal (=> vertical panels of rhs)
272
- for (Index j2 = IsLower
273
- ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth)
274
- : Index(SmallPanelWidth)))
275
- : 0;
276
- IsLower ? j2>=0 : j2<actual_kc;
277
- IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
278
- {
279
- Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
280
- Index absolute_j2 = actual_k2 + j2;
281
- Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
282
- Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
283
-
284
- // GEBP
285
- if(panelLength>0)
286
- {
287
- gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
288
- blockA, blockB+j2*actual_kc,
289
- actual_mc, panelLength, actualPanelWidth,
290
- Scalar(-1),
291
- actual_kc, actual_kc, // strides
292
- panelOffset, panelOffset); // offsets
293
- }
359
+ // GEBP
360
+ if (panelLength > 0) {
361
+ gebp_kernel(lhs.getSubMapper(i2, absolute_j2), blockA, blockB + j2 * actual_kc, actual_mc, panelLength,
362
+ actualPanelWidth, Scalar(-1), actual_kc, actual_kc, // strides
363
+ panelOffset, panelOffset); // offsets
364
+ }
294
365
 
366
+ {
295
367
  // unblocked triangular solve
296
- for (Index k=0; k<actualPanelWidth; ++k)
297
- {
298
- Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
299
-
300
- typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
301
- for (Index k3=0; k3<k; ++k3)
302
- {
303
- Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
304
- typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
305
- for (Index i=0; i<actual_mc; ++i)
306
- r(i) -= a(i) * b;
307
- }
308
- if((Mode & UnitDiag)==0)
309
- {
310
- Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
311
- for (Index i=0; i<actual_mc; ++i)
312
- r(i) *= inv_rjj;
313
- }
314
- }
315
-
316
- // pack the just computed part of lhs to A
317
- pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
318
- actualPanelWidth, actual_mc,
319
- actual_kc, j2);
368
+ trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride,
369
+ /*Specialized=*/true>::kernel(actualPanelWidth, actual_mc,
370
+ _tri + absolute_j2 + absolute_j2 * triStride, triStride,
371
+ _other + i2 * OtherInnerStride + absolute_j2 * otherStride,
372
+ otherIncr, otherStride);
320
373
  }
374
+ // pack the just computed part of lhs to A
375
+ pack_lhs_panel(blockA, lhs.getSubMapper(i2, absolute_j2), actualPanelWidth, actual_mc, actual_kc, j2);
321
376
  }
322
-
323
- if (rs>0)
324
- gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
325
- actual_mc, actual_kc, rs, Scalar(-1),
326
- -1, -1, 0, 0);
327
377
  }
378
+
379
+ if (rs > 0)
380
+ gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb, actual_mc, actual_kc, rs, Scalar(-1), -1, -1, 0, 0);
328
381
  }
329
382
  }
383
+ }
384
+ } // end namespace internal
330
385
 
331
- } // end namespace internal
332
-
333
- } // end namespace Eigen
386
+ } // end namespace Eigen
334
387
 
335
- #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
388
+ #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H