@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -10,278 +10,246 @@
10
10
  #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11
11
  #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12
12
 
13
- namespace Eigen {
13
+ // IWYU pragma: private
14
+ #include "../InternalHeaderCheck.h"
15
+
16
+ namespace Eigen {
14
17
 
15
18
  namespace internal {
16
19
 
17
20
  // pack a selfadjoint block diagonal for use with the gebp_kernel
18
- template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
19
- struct symm_pack_lhs
20
- {
21
- template<int BlockRows> inline
22
- void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23
- {
21
+ template <typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
22
+ struct symm_pack_lhs {
23
+ template <int BlockRows>
24
+ inline void pack(Scalar* blockA, const const_blas_data_mapper<Scalar, Index, StorageOrder>& lhs, Index cols, Index i,
25
+ Index& count) {
24
26
  // normal copy
25
- for(Index k=0; k<i; k++)
26
- for(Index w=0; w<BlockRows; w++)
27
- blockA[count++] = lhs(i+w,k); // normal
27
+ for (Index k = 0; k < i; k++)
28
+ for (Index w = 0; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
28
29
  // symmetric copy
29
30
  Index h = 0;
30
- for(Index k=i; k<i+BlockRows; k++)
31
- {
32
- for(Index w=0; w<h; w++)
33
- blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
31
+ for (Index k = i; k < i + BlockRows; k++) {
32
+ for (Index w = 0; w < h; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
34
33
 
35
- blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
34
+ blockA[count++] = numext::real(lhs(k, k)); // real (diagonal)
36
35
 
37
- for(Index w=h+1; w<BlockRows; w++)
38
- blockA[count++] = lhs(i+w, k); // normal
36
+ for (Index w = h + 1; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
39
37
  ++h;
40
38
  }
41
39
  // transposed copy
42
- for(Index k=i+BlockRows; k<cols; k++)
43
- for(Index w=0; w<BlockRows; w++)
44
- blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
40
+ for (Index k = i + BlockRows; k < cols; k++)
41
+ for (Index w = 0; w < BlockRows; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
45
42
  }
46
- void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47
- {
43
+ void operator()(Scalar* blockA, const Scalar* lhs_, Index lhsStride, Index cols, Index rows) {
48
44
  typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
49
- typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;
50
- enum { PacketSize = packet_traits<Scalar>::size,
51
- HalfPacketSize = unpacket_traits<HalfPacket>::size,
52
- QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
53
- HasHalf = (int)HalfPacketSize < (int)PacketSize,
54
- HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};
55
-
56
- const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
45
+ typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half
46
+ QuarterPacket;
47
+ enum {
48
+ PacketSize = packet_traits<Scalar>::size,
49
+ HalfPacketSize = unpacket_traits<HalfPacket>::size,
50
+ QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
51
+ HasHalf = (int)HalfPacketSize < (int)PacketSize,
52
+ HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize
53
+ };
54
+
55
+ const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(lhs_, lhsStride);
57
56
  Index count = 0;
58
- //Index peeled_mc3 = (rows/Pack1)*Pack1;
59
-
60
- const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
61
- const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
62
- const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;
63
- const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;
64
- const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;
65
-
66
- if(Pack1>=3*PacketSize)
67
- for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
68
- pack<3*PacketSize>(blockA, lhs, cols, i, count);
69
-
70
- if(Pack1>=2*PacketSize)
71
- for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
72
- pack<2*PacketSize>(blockA, lhs, cols, i, count);
73
-
74
- if(Pack1>=1*PacketSize)
75
- for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
76
- pack<1*PacketSize>(blockA, lhs, cols, i, count);
77
-
78
- if(HasHalf && Pack1>=HalfPacketSize)
79
- for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)
57
+ // Index peeled_mc3 = (rows/Pack1)*Pack1;
58
+
59
+ const Index peeled_mc3 = Pack1 >= 3 * PacketSize ? (rows / (3 * PacketSize)) * (3 * PacketSize) : 0;
60
+ const Index peeled_mc2 =
61
+ Pack1 >= 2 * PacketSize ? peeled_mc3 + ((rows - peeled_mc3) / (2 * PacketSize)) * (2 * PacketSize) : 0;
62
+ const Index peeled_mc1 =
63
+ Pack1 >= 1 * PacketSize ? peeled_mc2 + ((rows - peeled_mc2) / (1 * PacketSize)) * (1 * PacketSize) : 0;
64
+ const Index peeled_mc_half =
65
+ Pack1 >= HalfPacketSize ? peeled_mc1 + ((rows - peeled_mc1) / (HalfPacketSize)) * (HalfPacketSize) : 0;
66
+ const Index peeled_mc_quarter =
67
+ Pack1 >= QuarterPacketSize
68
+ ? peeled_mc_half + ((rows - peeled_mc_half) / (QuarterPacketSize)) * (QuarterPacketSize)
69
+ : 0;
70
+
71
+ if (Pack1 >= 3 * PacketSize)
72
+ for (Index i = 0; i < peeled_mc3; i += 3 * PacketSize) pack<3 * PacketSize>(blockA, lhs, cols, i, count);
73
+
74
+ if (Pack1 >= 2 * PacketSize)
75
+ for (Index i = peeled_mc3; i < peeled_mc2; i += 2 * PacketSize) pack<2 * PacketSize>(blockA, lhs, cols, i, count);
76
+
77
+ if (Pack1 >= 1 * PacketSize)
78
+ for (Index i = peeled_mc2; i < peeled_mc1; i += 1 * PacketSize) pack<1 * PacketSize>(blockA, lhs, cols, i, count);
79
+
80
+ if (HasHalf && Pack1 >= HalfPacketSize)
81
+ for (Index i = peeled_mc1; i < peeled_mc_half; i += HalfPacketSize)
80
82
  pack<HalfPacketSize>(blockA, lhs, cols, i, count);
81
83
 
82
- if(HasQuarter && Pack1>=QuarterPacketSize)
83
- for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
84
+ if (HasQuarter && Pack1 >= QuarterPacketSize)
85
+ for (Index i = peeled_mc_half; i < peeled_mc_quarter; i += QuarterPacketSize)
84
86
  pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
85
87
 
86
88
  // do the same with mr==1
87
- for(Index i=peeled_mc_quarter; i<rows; i++)
88
- {
89
- for(Index k=0; k<i; k++)
90
- blockA[count++] = lhs(i, k); // normal
89
+ for (Index i = peeled_mc_quarter; i < rows; i++) {
90
+ for (Index k = 0; k < i; k++) blockA[count++] = lhs(i, k); // normal
91
91
 
92
- blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
92
+ blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
93
93
 
94
- for(Index k=i+1; k<cols; k++)
95
- blockA[count++] = numext::conj(lhs(k, i)); // transposed
94
+ for (Index k = i + 1; k < cols; k++) blockA[count++] = numext::conj(lhs(k, i)); // transposed
96
95
  }
97
96
  }
98
97
  };
99
98
 
100
- template<typename Scalar, typename Index, int nr, int StorageOrder>
101
- struct symm_pack_rhs
102
- {
99
+ template <typename Scalar, typename Index, int nr, int StorageOrder>
100
+ struct symm_pack_rhs {
103
101
  enum { PacketSize = packet_traits<Scalar>::size };
104
- void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
105
- {
102
+ void operator()(Scalar* blockB, const Scalar* rhs_, Index rhsStride, Index rows, Index cols, Index k2) {
106
103
  Index end_k = k2 + rows;
107
104
  Index count = 0;
108
- const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
109
- Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
110
- Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
105
+ const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(rhs_, rhsStride);
106
+ Index packet_cols8 = nr >= 8 ? (cols / 8) * 8 : 0;
107
+ Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
111
108
 
112
109
  // first part: normal case
113
- for(Index j2=0; j2<k2; j2+=nr)
114
- {
115
- for(Index k=k2; k<end_k; k++)
116
- {
117
- blockB[count+0] = rhs(k,j2+0);
118
- blockB[count+1] = rhs(k,j2+1);
119
- if (nr>=4)
120
- {
121
- blockB[count+2] = rhs(k,j2+2);
122
- blockB[count+3] = rhs(k,j2+3);
110
+ for (Index j2 = 0; j2 < k2; j2 += nr) {
111
+ for (Index k = k2; k < end_k; k++) {
112
+ blockB[count + 0] = rhs(k, j2 + 0);
113
+ blockB[count + 1] = rhs(k, j2 + 1);
114
+ if (nr >= 4) {
115
+ blockB[count + 2] = rhs(k, j2 + 2);
116
+ blockB[count + 3] = rhs(k, j2 + 3);
123
117
  }
124
- if (nr>=8)
125
- {
126
- blockB[count+4] = rhs(k,j2+4);
127
- blockB[count+5] = rhs(k,j2+5);
128
- blockB[count+6] = rhs(k,j2+6);
129
- blockB[count+7] = rhs(k,j2+7);
118
+ if (nr >= 8) {
119
+ blockB[count + 4] = rhs(k, j2 + 4);
120
+ blockB[count + 5] = rhs(k, j2 + 5);
121
+ blockB[count + 6] = rhs(k, j2 + 6);
122
+ blockB[count + 7] = rhs(k, j2 + 7);
130
123
  }
131
124
  count += nr;
132
125
  }
133
126
  }
134
127
 
135
128
  // second part: diagonal block
136
- Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
137
- if(nr>=8)
138
- {
139
- for(Index j2=k2; j2<end8; j2+=8)
140
- {
129
+ Index end8 = nr >= 8 ? (std::min)(k2 + rows, packet_cols8) : k2;
130
+ if (nr >= 8) {
131
+ for (Index j2 = k2; j2 < end8; j2 += 8) {
141
132
  // again we can split vertically in three different parts (transpose, symmetric, normal)
142
133
  // transpose
143
- for(Index k=k2; k<j2; k++)
144
- {
145
- blockB[count+0] = numext::conj(rhs(j2+0,k));
146
- blockB[count+1] = numext::conj(rhs(j2+1,k));
147
- blockB[count+2] = numext::conj(rhs(j2+2,k));
148
- blockB[count+3] = numext::conj(rhs(j2+3,k));
149
- blockB[count+4] = numext::conj(rhs(j2+4,k));
150
- blockB[count+5] = numext::conj(rhs(j2+5,k));
151
- blockB[count+6] = numext::conj(rhs(j2+6,k));
152
- blockB[count+7] = numext::conj(rhs(j2+7,k));
134
+ for (Index k = k2; k < j2; k++) {
135
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
136
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
137
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
138
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
139
+ blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
140
+ blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
141
+ blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
142
+ blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
153
143
  count += 8;
154
144
  }
155
145
  // symmetric
156
146
  Index h = 0;
157
- for(Index k=j2; k<j2+8; k++)
158
- {
147
+ for (Index k = j2; k < j2 + 8; k++) {
159
148
  // normal
160
- for (Index w=0 ; w<h; ++w)
161
- blockB[count+w] = rhs(k,j2+w);
149
+ for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
162
150
 
163
- blockB[count+h] = numext::real(rhs(k,k));
151
+ blockB[count + h] = numext::real(rhs(k, k));
164
152
 
165
153
  // transpose
166
- for (Index w=h+1 ; w<8; ++w)
167
- blockB[count+w] = numext::conj(rhs(j2+w,k));
154
+ for (Index w = h + 1; w < 8; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
168
155
  count += 8;
169
156
  ++h;
170
157
  }
171
158
  // normal
172
- for(Index k=j2+8; k<end_k; k++)
173
- {
174
- blockB[count+0] = rhs(k,j2+0);
175
- blockB[count+1] = rhs(k,j2+1);
176
- blockB[count+2] = rhs(k,j2+2);
177
- blockB[count+3] = rhs(k,j2+3);
178
- blockB[count+4] = rhs(k,j2+4);
179
- blockB[count+5] = rhs(k,j2+5);
180
- blockB[count+6] = rhs(k,j2+6);
181
- blockB[count+7] = rhs(k,j2+7);
159
+ for (Index k = j2 + 8; k < end_k; k++) {
160
+ blockB[count + 0] = rhs(k, j2 + 0);
161
+ blockB[count + 1] = rhs(k, j2 + 1);
162
+ blockB[count + 2] = rhs(k, j2 + 2);
163
+ blockB[count + 3] = rhs(k, j2 + 3);
164
+ blockB[count + 4] = rhs(k, j2 + 4);
165
+ blockB[count + 5] = rhs(k, j2 + 5);
166
+ blockB[count + 6] = rhs(k, j2 + 6);
167
+ blockB[count + 7] = rhs(k, j2 + 7);
182
168
  count += 8;
183
169
  }
184
170
  }
185
171
  }
186
- if(nr>=4)
187
- {
188
- for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
189
- {
172
+ if (nr >= 4) {
173
+ for (Index j2 = end8; j2 < (std::min)(k2 + rows, packet_cols4); j2 += 4) {
190
174
  // again we can split vertically in three different parts (transpose, symmetric, normal)
191
175
  // transpose
192
- for(Index k=k2; k<j2; k++)
193
- {
194
- blockB[count+0] = numext::conj(rhs(j2+0,k));
195
- blockB[count+1] = numext::conj(rhs(j2+1,k));
196
- blockB[count+2] = numext::conj(rhs(j2+2,k));
197
- blockB[count+3] = numext::conj(rhs(j2+3,k));
176
+ for (Index k = k2; k < j2; k++) {
177
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
178
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
179
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
180
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
198
181
  count += 4;
199
182
  }
200
183
  // symmetric
201
184
  Index h = 0;
202
- for(Index k=j2; k<j2+4; k++)
203
- {
185
+ for (Index k = j2; k < j2 + 4; k++) {
204
186
  // normal
205
- for (Index w=0 ; w<h; ++w)
206
- blockB[count+w] = rhs(k,j2+w);
187
+ for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
207
188
 
208
- blockB[count+h] = numext::real(rhs(k,k));
189
+ blockB[count + h] = numext::real(rhs(k, k));
209
190
 
210
191
  // transpose
211
- for (Index w=h+1 ; w<4; ++w)
212
- blockB[count+w] = numext::conj(rhs(j2+w,k));
192
+ for (Index w = h + 1; w < 4; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
213
193
  count += 4;
214
194
  ++h;
215
195
  }
216
196
  // normal
217
- for(Index k=j2+4; k<end_k; k++)
218
- {
219
- blockB[count+0] = rhs(k,j2+0);
220
- blockB[count+1] = rhs(k,j2+1);
221
- blockB[count+2] = rhs(k,j2+2);
222
- blockB[count+3] = rhs(k,j2+3);
197
+ for (Index k = j2 + 4; k < end_k; k++) {
198
+ blockB[count + 0] = rhs(k, j2 + 0);
199
+ blockB[count + 1] = rhs(k, j2 + 1);
200
+ blockB[count + 2] = rhs(k, j2 + 2);
201
+ blockB[count + 3] = rhs(k, j2 + 3);
223
202
  count += 4;
224
203
  }
225
204
  }
226
205
  }
227
206
 
228
207
  // third part: transposed
229
- if(nr>=8)
230
- {
231
- for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
232
- {
233
- for(Index k=k2; k<end_k; k++)
234
- {
235
- blockB[count+0] = numext::conj(rhs(j2+0,k));
236
- blockB[count+1] = numext::conj(rhs(j2+1,k));
237
- blockB[count+2] = numext::conj(rhs(j2+2,k));
238
- blockB[count+3] = numext::conj(rhs(j2+3,k));
239
- blockB[count+4] = numext::conj(rhs(j2+4,k));
240
- blockB[count+5] = numext::conj(rhs(j2+5,k));
241
- blockB[count+6] = numext::conj(rhs(j2+6,k));
242
- blockB[count+7] = numext::conj(rhs(j2+7,k));
208
+ if (nr >= 8) {
209
+ for (Index j2 = k2 + rows; j2 < packet_cols8; j2 += 8) {
210
+ for (Index k = k2; k < end_k; k++) {
211
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
212
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
213
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
214
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
215
+ blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
216
+ blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
217
+ blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
218
+ blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
243
219
  count += 8;
244
220
  }
245
221
  }
246
222
  }
247
- if(nr>=4)
248
- {
249
- for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
250
- {
251
- for(Index k=k2; k<end_k; k++)
252
- {
253
- blockB[count+0] = numext::conj(rhs(j2+0,k));
254
- blockB[count+1] = numext::conj(rhs(j2+1,k));
255
- blockB[count+2] = numext::conj(rhs(j2+2,k));
256
- blockB[count+3] = numext::conj(rhs(j2+3,k));
223
+ if (nr >= 4) {
224
+ for (Index j2 = (std::max)(packet_cols8, k2 + rows); j2 < packet_cols4; j2 += 4) {
225
+ for (Index k = k2; k < end_k; k++) {
226
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
227
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
228
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
229
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
257
230
  count += 4;
258
231
  }
259
232
  }
260
233
  }
261
234
 
262
235
  // copy the remaining columns one at a time (=> the same with nr==1)
263
- for(Index j2=packet_cols4; j2<cols; ++j2)
264
- {
236
+ for (Index j2 = packet_cols4; j2 < cols; ++j2) {
265
237
  // transpose
266
- Index half = (std::min)(end_k,j2);
267
- for(Index k=k2; k<half; k++)
268
- {
269
- blockB[count] = numext::conj(rhs(j2,k));
238
+ Index half = (std::min)(end_k, j2);
239
+ for (Index k = k2; k < half; k++) {
240
+ blockB[count] = numext::conj(rhs(j2, k));
270
241
  count += 1;
271
242
  }
272
243
 
273
- if(half==j2 && half<k2+rows)
274
- {
275
- blockB[count] = numext::real(rhs(j2,j2));
244
+ if (half == j2 && half < k2 + rows) {
245
+ blockB[count] = numext::real(rhs(j2, j2));
276
246
  count += 1;
277
- }
278
- else
247
+ } else
279
248
  half--;
280
249
 
281
250
  // normal
282
- for(Index k=half+1; k<k2+rows; k++)
283
- {
284
- blockB[count] = rhs(k,j2);
251
+ for (Index k = half + 1; k < k2 + rows; k++) {
252
+ blockB[count] = rhs(k, j2);
285
253
  count += 1;
286
254
  }
287
255
  }
@@ -291,254 +259,225 @@ struct symm_pack_rhs
291
259
  /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
292
260
  * the general matrix matrix product.
293
261
  */
294
- template <typename Scalar, typename Index,
295
- int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
296
- int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
297
- int ResStorageOrder, int ResInnerStride>
262
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
263
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride>
298
264
  struct product_selfadjoint_matrix;
299
265
 
300
- template <typename Scalar, typename Index,
301
- int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
302
- int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
303
- int ResInnerStride>
304
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
305
- {
306
-
307
- static EIGEN_STRONG_INLINE void run(
308
- Index rows, Index cols,
309
- const Scalar* lhs, Index lhsStride,
310
- const Scalar* rhs, Index rhsStride,
311
- Scalar* res, Index resIncr, Index resStride,
312
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
313
- {
314
- product_selfadjoint_matrix<Scalar, Index,
315
- EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
316
- RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
317
- EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
318
- LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
319
- ColMajor,ResInnerStride>
320
- ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
266
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
267
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResInnerStride>
268
+ struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, LhsSelfAdjoint, ConjugateLhs, RhsStorageOrder,
269
+ RhsSelfAdjoint, ConjugateRhs, RowMajor, ResInnerStride> {
270
+ static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs,
271
+ Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
272
+ level3_blocking<Scalar, Scalar>& blocking) {
273
+ product_selfadjoint_matrix<
274
+ Scalar, Index, logical_xor(RhsSelfAdjoint, RhsStorageOrder == RowMajor) ? ColMajor : RowMajor, RhsSelfAdjoint,
275
+ NumTraits<Scalar>::IsComplex && logical_xor(RhsSelfAdjoint, ConjugateRhs),
276
+ logical_xor(LhsSelfAdjoint, LhsStorageOrder == RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint,
277
+ NumTraits<Scalar>::IsComplex && logical_xor(LhsSelfAdjoint, ConjugateLhs), ColMajor,
278
+ ResInnerStride>::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
321
279
  }
322
280
  };
323
281
 
324
- template <typename Scalar, typename Index,
325
- int LhsStorageOrder, bool ConjugateLhs,
326
- int RhsStorageOrder, bool ConjugateRhs,
327
- int ResInnerStride>
328
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
329
- {
330
-
331
- static EIGEN_DONT_INLINE void run(
332
- Index rows, Index cols,
333
- const Scalar* _lhs, Index lhsStride,
334
- const Scalar* _rhs, Index rhsStride,
335
- Scalar* res, Index resIncr, Index resStride,
336
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
282
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
283
+ bool ConjugateRhs, int ResInnerStride>
284
+ struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false,
285
+ ConjugateRhs, ColMajor, ResInnerStride> {
286
+ static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
287
+ Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
288
+ level3_blocking<Scalar, Scalar>& blocking);
337
289
  };
338
290
 
339
- template <typename Scalar, typename Index,
340
- int LhsStorageOrder, bool ConjugateLhs,
341
- int RhsStorageOrder, bool ConjugateRhs,
342
- int ResInnerStride>
343
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
344
- Index rows, Index cols,
345
- const Scalar* _lhs, Index lhsStride,
346
- const Scalar* _rhs, Index rhsStride,
347
- Scalar* _res, Index resIncr, Index resStride,
348
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
349
- {
350
- Index size = rows;
351
-
352
- typedef gebp_traits<Scalar,Scalar> Traits;
353
-
354
- typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
355
- typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
356
- typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
357
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
358
- LhsMapper lhs(_lhs,lhsStride);
359
- LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
360
- RhsMapper rhs(_rhs,rhsStride);
361
- ResMapper res(_res, resStride, resIncr);
362
-
363
- Index kc = blocking.kc(); // cache block size along the K direction
364
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
365
- // kc must be smaller than mc
366
- kc = (std::min)(kc,mc);
367
- std::size_t sizeA = kc*mc;
368
- std::size_t sizeB = kc*cols;
369
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
370
- ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
371
-
372
- gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
373
- symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
374
- gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
375
- gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
376
-
377
- for(Index k2=0; k2<size; k2+=kc)
291
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
292
+ bool ConjugateRhs, int ResInnerStride>
293
+ EIGEN_DONT_INLINE void
294
+ product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false, ConjugateRhs,
295
+ ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
296
+ const Scalar* rhs_, Index rhsStride, Scalar* res_,
297
+ Index resIncr, Index resStride, const Scalar& alpha,
298
+ level3_blocking<Scalar, Scalar>& blocking) {
299
+ Index size = rows;
300
+
301
+ typedef gebp_traits<Scalar, Scalar> Traits;
302
+
303
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
304
+ typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
305
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
306
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
307
+ LhsMapper lhs(lhs_, lhsStride);
308
+ LhsTransposeMapper lhs_transpose(lhs_, lhsStride);
309
+ RhsMapper rhs(rhs_, rhsStride);
310
+ ResMapper res(res_, resStride, resIncr);
311
+
312
+ Index kc = blocking.kc(); // cache block size along the K direction
313
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
314
+ // kc must be smaller than mc
315
+ kc = (std::min)(kc, mc);
316
+ std::size_t sizeA = kc * mc;
317
+ std::size_t sizeB = kc * cols;
318
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
319
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
320
+
321
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
322
+ symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
323
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
324
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
325
+ LhsStorageOrder == RowMajor ? ColMajor : RowMajor, true>
326
+ pack_lhs_transposed;
327
+
328
+ for (Index k2 = 0; k2 < size; k2 += kc) {
329
+ const Index actual_kc = (std::min)(k2 + kc, size) - k2;
330
+
331
+ // we have selected one row panel of rhs and one column panel of lhs
332
+ // pack rhs's panel into a sequential chunk of memory
333
+ // and expand each coeff to a constant packet for further reuse
334
+ pack_rhs(blockB, rhs.getSubMapper(k2, 0), actual_kc, cols);
335
+
336
+ // the select lhs's panel has to be split in three different parts:
337
+ // 1 - the transposed panel above the diagonal block => transposed packed copy
338
+ // 2 - the diagonal block => special packed copy
339
+ // 3 - the panel below the diagonal block => generic packed copy
340
+ for (Index i2 = 0; i2 < k2; i2 += mc) {
341
+ const Index actual_mc = (std::min)(i2 + mc, k2) - i2;
342
+ // transposed packed copy
343
+ pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
344
+
345
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
346
+ }
347
+ // the block diagonal
378
348
  {
379
- const Index actual_kc = (std::min)(k2+kc,size)-k2;
380
-
381
- // we have selected one row panel of rhs and one column panel of lhs
382
- // pack rhs's panel into a sequential chunk of memory
383
- // and expand each coeff to a constant packet for further reuse
384
- pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
385
-
386
- // the select lhs's panel has to be split in three different parts:
387
- // 1 - the transposed panel above the diagonal block => transposed packed copy
388
- // 2 - the diagonal block => special packed copy
389
- // 3 - the panel below the diagonal block => generic packed copy
390
- for(Index i2=0; i2<k2; i2+=mc)
391
- {
392
- const Index actual_mc = (std::min)(i2+mc,k2)-i2;
393
- // transposed packed copy
394
- pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
395
-
396
- gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
397
- }
398
- // the block diagonal
399
- {
400
- const Index actual_mc = (std::min)(k2+kc,size)-k2;
401
- // symmetric packed copy
402
- pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
349
+ const Index actual_mc = (std::min)(k2 + kc, size) - k2;
350
+ // symmetric packed copy
351
+ pack_lhs(blockA, &lhs(k2, k2), lhsStride, actual_kc, actual_mc);
403
352
 
404
- gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
405
- }
353
+ gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
354
+ }
406
355
 
407
- for(Index i2=k2+kc; i2<size; i2+=mc)
408
- {
409
- const Index actual_mc = (std::min)(i2+mc,size)-i2;
410
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
411
- (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
356
+ for (Index i2 = k2 + kc; i2 < size; i2 += mc) {
357
+ const Index actual_mc = (std::min)(i2 + mc, size) - i2;
358
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
359
+ LhsStorageOrder, false>()(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
412
360
 
413
- gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
414
- }
361
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
415
362
  }
416
363
  }
364
+ }
417
365
 
418
366
  // matrix * selfadjoint product
419
- template <typename Scalar, typename Index,
420
- int LhsStorageOrder, bool ConjugateLhs,
421
- int RhsStorageOrder, bool ConjugateRhs,
422
- int ResInnerStride>
423
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
424
- {
425
-
426
- static EIGEN_DONT_INLINE void run(
427
- Index rows, Index cols,
428
- const Scalar* _lhs, Index lhsStride,
429
- const Scalar* _rhs, Index rhsStride,
430
- Scalar* res, Index resIncr, Index resStride,
431
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
367
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
368
+ bool ConjugateRhs, int ResInnerStride>
369
+ struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true,
370
+ ConjugateRhs, ColMajor, ResInnerStride> {
371
+ static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
372
+ Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
373
+ level3_blocking<Scalar, Scalar>& blocking);
432
374
  };
433
375
 
434
- template <typename Scalar, typename Index,
435
- int LhsStorageOrder, bool ConjugateLhs,
436
- int RhsStorageOrder, bool ConjugateRhs,
437
- int ResInnerStride>
438
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
439
- Index rows, Index cols,
440
- const Scalar* _lhs, Index lhsStride,
441
- const Scalar* _rhs, Index rhsStride,
442
- Scalar* _res, Index resIncr, Index resStride,
443
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
444
- {
445
- Index size = cols;
446
-
447
- typedef gebp_traits<Scalar,Scalar> Traits;
448
-
449
- typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
450
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
451
- LhsMapper lhs(_lhs,lhsStride);
452
- ResMapper res(_res,resStride, resIncr);
453
-
454
- Index kc = blocking.kc(); // cache block size along the K direction
455
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
456
- std::size_t sizeA = kc*mc;
457
- std::size_t sizeB = kc*cols;
458
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
459
- ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
460
-
461
- gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
462
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
463
- symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
464
-
465
- for(Index k2=0; k2<size; k2+=kc)
466
- {
467
- const Index actual_kc = (std::min)(k2+kc,size)-k2;
468
-
469
- pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
470
-
471
- // => GEPP
472
- for(Index i2=0; i2<rows; i2+=mc)
473
- {
474
- const Index actual_mc = (std::min)(i2+mc,rows)-i2;
475
- pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
476
-
477
- gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
478
- }
376
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
377
+ bool ConjugateRhs, int ResInnerStride>
378
+ EIGEN_DONT_INLINE void
379
+ product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true, ConjugateRhs,
380
+ ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
381
+ const Scalar* rhs_, Index rhsStride, Scalar* res_,
382
+ Index resIncr, Index resStride, const Scalar& alpha,
383
+ level3_blocking<Scalar, Scalar>& blocking) {
384
+ Index size = cols;
385
+
386
+ typedef gebp_traits<Scalar, Scalar> Traits;
387
+
388
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
389
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
390
+ LhsMapper lhs(lhs_, lhsStride);
391
+ ResMapper res(res_, resStride, resIncr);
392
+
393
+ Index kc = blocking.kc(); // cache block size along the K direction
394
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
395
+ std::size_t sizeA = kc * mc;
396
+ std::size_t sizeB = kc * cols;
397
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
398
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
399
+
400
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
401
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
402
+ LhsStorageOrder>
403
+ pack_lhs;
404
+ symm_pack_rhs<Scalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
405
+
406
+ for (Index k2 = 0; k2 < size; k2 += kc) {
407
+ const Index actual_kc = (std::min)(k2 + kc, size) - k2;
408
+
409
+ pack_rhs(blockB, rhs_, rhsStride, actual_kc, cols, k2);
410
+
411
+ // => GEPP
412
+ for (Index i2 = 0; i2 < rows; i2 += mc) {
413
+ const Index actual_mc = (std::min)(i2 + mc, rows) - i2;
414
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
415
+
416
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
479
417
  }
480
418
  }
419
+ }
481
420
 
482
- } // end namespace internal
421
+ } // end namespace internal
483
422
 
484
423
  /***************************************************************************
485
- * Wrapper to product_selfadjoint_matrix
486
- ***************************************************************************/
424
+ * Wrapper to product_selfadjoint_matrix
425
+ ***************************************************************************/
487
426
 
488
427
  namespace internal {
489
-
490
- template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
491
- struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
492
- {
493
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
494
-
428
+
429
+ template <typename Lhs, int LhsMode, typename Rhs, int RhsMode>
430
+ struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, RhsMode, false> {
431
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
432
+
495
433
  typedef internal::blas_traits<Lhs> LhsBlasTraits;
496
434
  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
497
435
  typedef internal::blas_traits<Rhs> RhsBlasTraits;
498
436
  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
499
-
437
+
500
438
  enum {
501
- LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
502
- LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
503
- RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
504
- RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
439
+ LhsIsUpper = (LhsMode & (Upper | Lower)) == Upper,
440
+ LhsIsSelfAdjoint = (LhsMode & SelfAdjoint) == SelfAdjoint,
441
+ RhsIsUpper = (RhsMode & (Upper | Lower)) == Upper,
442
+ RhsIsSelfAdjoint = (RhsMode & SelfAdjoint) == SelfAdjoint
505
443
  };
506
-
507
- template<typename Dest>
508
- static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
509
- {
510
- eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
511
444
 
512
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
513
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
445
+ template <typename Dest>
446
+ static void run(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
447
+ eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
448
+
449
+ add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
450
+ add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
514
451
 
515
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
516
- * RhsBlasTraits::extractScalarFactor(a_rhs);
452
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs);
517
453
 
518
- typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
519
- Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
454
+ typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Scalar, Scalar,
455
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
456
+ Lhs::MaxColsAtCompileTime, 1>
457
+ BlockingType;
520
458
 
521
459
  BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
522
460
 
523
- internal::product_selfadjoint_matrix<Scalar, Index,
524
- EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
525
- NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
526
- EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
527
- NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
528
- internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
529
- Dest::InnerStrideAtCompileTime>
530
- ::run(
531
- lhs.rows(), rhs.cols(), // sizes
532
- &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
533
- &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
534
- &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
535
- actualAlpha, blocking // alpha
536
- );
461
+ internal::product_selfadjoint_matrix<
462
+ Scalar, Index,
463
+ internal::logical_xor(LhsIsUpper, internal::traits<Lhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
464
+ LhsIsSelfAdjoint,
465
+ NumTraits<Scalar>::IsComplex && internal::logical_xor(LhsIsUpper, bool(LhsBlasTraits::NeedToConjugate)),
466
+ internal::logical_xor(RhsIsUpper, internal::traits<Rhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
467
+ RhsIsSelfAdjoint,
468
+ NumTraits<Scalar>::IsComplex && internal::logical_xor(RhsIsUpper, bool(RhsBlasTraits::NeedToConjugate)),
469
+ internal::traits<Dest>::Flags & RowMajorBit ? RowMajor : ColMajor,
470
+ Dest::InnerStrideAtCompileTime>::run(lhs.rows(), rhs.cols(), // sizes
471
+ &lhs.coeffRef(0, 0), lhs.outerStride(), // lhs info
472
+ &rhs.coeffRef(0, 0), rhs.outerStride(), // rhs info
473
+ &dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(), // result info
474
+ actualAlpha, blocking // alpha
475
+ );
537
476
  }
538
477
  };
539
478
 
540
- } // end namespace internal
479
+ } // end namespace internal
541
480
 
542
- } // end namespace Eigen
481
+ } // end namespace Eigen
543
482
 
544
- #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
483
+ #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H