@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -10,261 +10,246 @@
10
10
  #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11
11
  #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12
12
 
13
- namespace Eigen {
13
+ // IWYU pragma: private
14
+ #include "../InternalHeaderCheck.h"
15
+
16
+ namespace Eigen {
14
17
 
15
18
  namespace internal {
16
19
 
17
20
  // pack a selfadjoint block diagonal for use with the gebp_kernel
18
- template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
19
- struct symm_pack_lhs
20
- {
21
- template<int BlockRows> inline
22
- void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23
- {
21
+ template <typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
22
+ struct symm_pack_lhs {
23
+ template <int BlockRows>
24
+ inline void pack(Scalar* blockA, const const_blas_data_mapper<Scalar, Index, StorageOrder>& lhs, Index cols, Index i,
25
+ Index& count) {
24
26
  // normal copy
25
- for(Index k=0; k<i; k++)
26
- for(Index w=0; w<BlockRows; w++)
27
- blockA[count++] = lhs(i+w,k); // normal
27
+ for (Index k = 0; k < i; k++)
28
+ for (Index w = 0; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
28
29
  // symmetric copy
29
30
  Index h = 0;
30
- for(Index k=i; k<i+BlockRows; k++)
31
- {
32
- for(Index w=0; w<h; w++)
33
- blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
31
+ for (Index k = i; k < i + BlockRows; k++) {
32
+ for (Index w = 0; w < h; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
34
33
 
35
- blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
34
+ blockA[count++] = numext::real(lhs(k, k)); // real (diagonal)
36
35
 
37
- for(Index w=h+1; w<BlockRows; w++)
38
- blockA[count++] = lhs(i+w, k); // normal
36
+ for (Index w = h + 1; w < BlockRows; w++) blockA[count++] = lhs(i + w, k); // normal
39
37
  ++h;
40
38
  }
41
39
  // transposed copy
42
- for(Index k=i+BlockRows; k<cols; k++)
43
- for(Index w=0; w<BlockRows; w++)
44
- blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
40
+ for (Index k = i + BlockRows; k < cols; k++)
41
+ for (Index w = 0; w < BlockRows; w++) blockA[count++] = numext::conj(lhs(k, i + w)); // transposed
45
42
  }
46
- void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47
- {
48
- enum { PacketSize = packet_traits<Scalar>::size };
49
- const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
43
+ void operator()(Scalar* blockA, const Scalar* lhs_, Index lhsStride, Index cols, Index rows) {
44
+ typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
45
+ typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half
46
+ QuarterPacket;
47
+ enum {
48
+ PacketSize = packet_traits<Scalar>::size,
49
+ HalfPacketSize = unpacket_traits<HalfPacket>::size,
50
+ QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
51
+ HasHalf = (int)HalfPacketSize < (int)PacketSize,
52
+ HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize
53
+ };
54
+
55
+ const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(lhs_, lhsStride);
50
56
  Index count = 0;
51
- //Index peeled_mc3 = (rows/Pack1)*Pack1;
52
-
53
- const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
54
- const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
55
- const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
56
-
57
- if(Pack1>=3*PacketSize)
58
- for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
59
- pack<3*PacketSize>(blockA, lhs, cols, i, count);
60
-
61
- if(Pack1>=2*PacketSize)
62
- for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
63
- pack<2*PacketSize>(blockA, lhs, cols, i, count);
64
-
65
- if(Pack1>=1*PacketSize)
66
- for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
67
- pack<1*PacketSize>(blockA, lhs, cols, i, count);
57
+ // Index peeled_mc3 = (rows/Pack1)*Pack1;
58
+
59
+ const Index peeled_mc3 = Pack1 >= 3 * PacketSize ? (rows / (3 * PacketSize)) * (3 * PacketSize) : 0;
60
+ const Index peeled_mc2 =
61
+ Pack1 >= 2 * PacketSize ? peeled_mc3 + ((rows - peeled_mc3) / (2 * PacketSize)) * (2 * PacketSize) : 0;
62
+ const Index peeled_mc1 =
63
+ Pack1 >= 1 * PacketSize ? peeled_mc2 + ((rows - peeled_mc2) / (1 * PacketSize)) * (1 * PacketSize) : 0;
64
+ const Index peeled_mc_half =
65
+ Pack1 >= HalfPacketSize ? peeled_mc1 + ((rows - peeled_mc1) / (HalfPacketSize)) * (HalfPacketSize) : 0;
66
+ const Index peeled_mc_quarter =
67
+ Pack1 >= QuarterPacketSize
68
+ ? peeled_mc_half + ((rows - peeled_mc_half) / (QuarterPacketSize)) * (QuarterPacketSize)
69
+ : 0;
70
+
71
+ if (Pack1 >= 3 * PacketSize)
72
+ for (Index i = 0; i < peeled_mc3; i += 3 * PacketSize) pack<3 * PacketSize>(blockA, lhs, cols, i, count);
73
+
74
+ if (Pack1 >= 2 * PacketSize)
75
+ for (Index i = peeled_mc3; i < peeled_mc2; i += 2 * PacketSize) pack<2 * PacketSize>(blockA, lhs, cols, i, count);
76
+
77
+ if (Pack1 >= 1 * PacketSize)
78
+ for (Index i = peeled_mc2; i < peeled_mc1; i += 1 * PacketSize) pack<1 * PacketSize>(blockA, lhs, cols, i, count);
79
+
80
+ if (HasHalf && Pack1 >= HalfPacketSize)
81
+ for (Index i = peeled_mc1; i < peeled_mc_half; i += HalfPacketSize)
82
+ pack<HalfPacketSize>(blockA, lhs, cols, i, count);
83
+
84
+ if (HasQuarter && Pack1 >= QuarterPacketSize)
85
+ for (Index i = peeled_mc_half; i < peeled_mc_quarter; i += QuarterPacketSize)
86
+ pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
68
87
 
69
88
  // do the same with mr==1
70
- for(Index i=peeled_mc1; i<rows; i++)
71
- {
72
- for(Index k=0; k<i; k++)
73
- blockA[count++] = lhs(i, k); // normal
89
+ for (Index i = peeled_mc_quarter; i < rows; i++) {
90
+ for (Index k = 0; k < i; k++) blockA[count++] = lhs(i, k); // normal
74
91
 
75
- blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
92
+ blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
76
93
 
77
- for(Index k=i+1; k<cols; k++)
78
- blockA[count++] = numext::conj(lhs(k, i)); // transposed
94
+ for (Index k = i + 1; k < cols; k++) blockA[count++] = numext::conj(lhs(k, i)); // transposed
79
95
  }
80
96
  }
81
97
  };
82
98
 
83
- template<typename Scalar, typename Index, int nr, int StorageOrder>
84
- struct symm_pack_rhs
85
- {
99
+ template <typename Scalar, typename Index, int nr, int StorageOrder>
100
+ struct symm_pack_rhs {
86
101
  enum { PacketSize = packet_traits<Scalar>::size };
87
- void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
88
- {
102
+ void operator()(Scalar* blockB, const Scalar* rhs_, Index rhsStride, Index rows, Index cols, Index k2) {
89
103
  Index end_k = k2 + rows;
90
104
  Index count = 0;
91
- const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
92
- Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
93
- Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
105
+ const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(rhs_, rhsStride);
106
+ Index packet_cols8 = nr >= 8 ? (cols / 8) * 8 : 0;
107
+ Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
94
108
 
95
109
  // first part: normal case
96
- for(Index j2=0; j2<k2; j2+=nr)
97
- {
98
- for(Index k=k2; k<end_k; k++)
99
- {
100
- blockB[count+0] = rhs(k,j2+0);
101
- blockB[count+1] = rhs(k,j2+1);
102
- if (nr>=4)
103
- {
104
- blockB[count+2] = rhs(k,j2+2);
105
- blockB[count+3] = rhs(k,j2+3);
110
+ for (Index j2 = 0; j2 < k2; j2 += nr) {
111
+ for (Index k = k2; k < end_k; k++) {
112
+ blockB[count + 0] = rhs(k, j2 + 0);
113
+ blockB[count + 1] = rhs(k, j2 + 1);
114
+ if (nr >= 4) {
115
+ blockB[count + 2] = rhs(k, j2 + 2);
116
+ blockB[count + 3] = rhs(k, j2 + 3);
106
117
  }
107
- if (nr>=8)
108
- {
109
- blockB[count+4] = rhs(k,j2+4);
110
- blockB[count+5] = rhs(k,j2+5);
111
- blockB[count+6] = rhs(k,j2+6);
112
- blockB[count+7] = rhs(k,j2+7);
118
+ if (nr >= 8) {
119
+ blockB[count + 4] = rhs(k, j2 + 4);
120
+ blockB[count + 5] = rhs(k, j2 + 5);
121
+ blockB[count + 6] = rhs(k, j2 + 6);
122
+ blockB[count + 7] = rhs(k, j2 + 7);
113
123
  }
114
124
  count += nr;
115
125
  }
116
126
  }
117
127
 
118
128
  // second part: diagonal block
119
- Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
120
- if(nr>=8)
121
- {
122
- for(Index j2=k2; j2<end8; j2+=8)
123
- {
129
+ Index end8 = nr >= 8 ? (std::min)(k2 + rows, packet_cols8) : k2;
130
+ if (nr >= 8) {
131
+ for (Index j2 = k2; j2 < end8; j2 += 8) {
124
132
  // again we can split vertically in three different parts (transpose, symmetric, normal)
125
133
  // transpose
126
- for(Index k=k2; k<j2; k++)
127
- {
128
- blockB[count+0] = numext::conj(rhs(j2+0,k));
129
- blockB[count+1] = numext::conj(rhs(j2+1,k));
130
- blockB[count+2] = numext::conj(rhs(j2+2,k));
131
- blockB[count+3] = numext::conj(rhs(j2+3,k));
132
- blockB[count+4] = numext::conj(rhs(j2+4,k));
133
- blockB[count+5] = numext::conj(rhs(j2+5,k));
134
- blockB[count+6] = numext::conj(rhs(j2+6,k));
135
- blockB[count+7] = numext::conj(rhs(j2+7,k));
134
+ for (Index k = k2; k < j2; k++) {
135
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
136
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
137
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
138
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
139
+ blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
140
+ blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
141
+ blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
142
+ blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
136
143
  count += 8;
137
144
  }
138
145
  // symmetric
139
146
  Index h = 0;
140
- for(Index k=j2; k<j2+8; k++)
141
- {
147
+ for (Index k = j2; k < j2 + 8; k++) {
142
148
  // normal
143
- for (Index w=0 ; w<h; ++w)
144
- blockB[count+w] = rhs(k,j2+w);
149
+ for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
145
150
 
146
- blockB[count+h] = numext::real(rhs(k,k));
151
+ blockB[count + h] = numext::real(rhs(k, k));
147
152
 
148
153
  // transpose
149
- for (Index w=h+1 ; w<8; ++w)
150
- blockB[count+w] = numext::conj(rhs(j2+w,k));
154
+ for (Index w = h + 1; w < 8; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
151
155
  count += 8;
152
156
  ++h;
153
157
  }
154
158
  // normal
155
- for(Index k=j2+8; k<end_k; k++)
156
- {
157
- blockB[count+0] = rhs(k,j2+0);
158
- blockB[count+1] = rhs(k,j2+1);
159
- blockB[count+2] = rhs(k,j2+2);
160
- blockB[count+3] = rhs(k,j2+3);
161
- blockB[count+4] = rhs(k,j2+4);
162
- blockB[count+5] = rhs(k,j2+5);
163
- blockB[count+6] = rhs(k,j2+6);
164
- blockB[count+7] = rhs(k,j2+7);
159
+ for (Index k = j2 + 8; k < end_k; k++) {
160
+ blockB[count + 0] = rhs(k, j2 + 0);
161
+ blockB[count + 1] = rhs(k, j2 + 1);
162
+ blockB[count + 2] = rhs(k, j2 + 2);
163
+ blockB[count + 3] = rhs(k, j2 + 3);
164
+ blockB[count + 4] = rhs(k, j2 + 4);
165
+ blockB[count + 5] = rhs(k, j2 + 5);
166
+ blockB[count + 6] = rhs(k, j2 + 6);
167
+ blockB[count + 7] = rhs(k, j2 + 7);
165
168
  count += 8;
166
169
  }
167
170
  }
168
171
  }
169
- if(nr>=4)
170
- {
171
- for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
172
- {
172
+ if (nr >= 4) {
173
+ for (Index j2 = end8; j2 < (std::min)(k2 + rows, packet_cols4); j2 += 4) {
173
174
  // again we can split vertically in three different parts (transpose, symmetric, normal)
174
175
  // transpose
175
- for(Index k=k2; k<j2; k++)
176
- {
177
- blockB[count+0] = numext::conj(rhs(j2+0,k));
178
- blockB[count+1] = numext::conj(rhs(j2+1,k));
179
- blockB[count+2] = numext::conj(rhs(j2+2,k));
180
- blockB[count+3] = numext::conj(rhs(j2+3,k));
176
+ for (Index k = k2; k < j2; k++) {
177
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
178
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
179
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
180
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
181
181
  count += 4;
182
182
  }
183
183
  // symmetric
184
184
  Index h = 0;
185
- for(Index k=j2; k<j2+4; k++)
186
- {
185
+ for (Index k = j2; k < j2 + 4; k++) {
187
186
  // normal
188
- for (Index w=0 ; w<h; ++w)
189
- blockB[count+w] = rhs(k,j2+w);
187
+ for (Index w = 0; w < h; ++w) blockB[count + w] = rhs(k, j2 + w);
190
188
 
191
- blockB[count+h] = numext::real(rhs(k,k));
189
+ blockB[count + h] = numext::real(rhs(k, k));
192
190
 
193
191
  // transpose
194
- for (Index w=h+1 ; w<4; ++w)
195
- blockB[count+w] = numext::conj(rhs(j2+w,k));
192
+ for (Index w = h + 1; w < 4; ++w) blockB[count + w] = numext::conj(rhs(j2 + w, k));
196
193
  count += 4;
197
194
  ++h;
198
195
  }
199
196
  // normal
200
- for(Index k=j2+4; k<end_k; k++)
201
- {
202
- blockB[count+0] = rhs(k,j2+0);
203
- blockB[count+1] = rhs(k,j2+1);
204
- blockB[count+2] = rhs(k,j2+2);
205
- blockB[count+3] = rhs(k,j2+3);
197
+ for (Index k = j2 + 4; k < end_k; k++) {
198
+ blockB[count + 0] = rhs(k, j2 + 0);
199
+ blockB[count + 1] = rhs(k, j2 + 1);
200
+ blockB[count + 2] = rhs(k, j2 + 2);
201
+ blockB[count + 3] = rhs(k, j2 + 3);
206
202
  count += 4;
207
203
  }
208
204
  }
209
205
  }
210
206
 
211
207
  // third part: transposed
212
- if(nr>=8)
213
- {
214
- for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
215
- {
216
- for(Index k=k2; k<end_k; k++)
217
- {
218
- blockB[count+0] = numext::conj(rhs(j2+0,k));
219
- blockB[count+1] = numext::conj(rhs(j2+1,k));
220
- blockB[count+2] = numext::conj(rhs(j2+2,k));
221
- blockB[count+3] = numext::conj(rhs(j2+3,k));
222
- blockB[count+4] = numext::conj(rhs(j2+4,k));
223
- blockB[count+5] = numext::conj(rhs(j2+5,k));
224
- blockB[count+6] = numext::conj(rhs(j2+6,k));
225
- blockB[count+7] = numext::conj(rhs(j2+7,k));
208
+ if (nr >= 8) {
209
+ for (Index j2 = k2 + rows; j2 < packet_cols8; j2 += 8) {
210
+ for (Index k = k2; k < end_k; k++) {
211
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
212
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
213
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
214
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
215
+ blockB[count + 4] = numext::conj(rhs(j2 + 4, k));
216
+ blockB[count + 5] = numext::conj(rhs(j2 + 5, k));
217
+ blockB[count + 6] = numext::conj(rhs(j2 + 6, k));
218
+ blockB[count + 7] = numext::conj(rhs(j2 + 7, k));
226
219
  count += 8;
227
220
  }
228
221
  }
229
222
  }
230
- if(nr>=4)
231
- {
232
- for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
233
- {
234
- for(Index k=k2; k<end_k; k++)
235
- {
236
- blockB[count+0] = numext::conj(rhs(j2+0,k));
237
- blockB[count+1] = numext::conj(rhs(j2+1,k));
238
- blockB[count+2] = numext::conj(rhs(j2+2,k));
239
- blockB[count+3] = numext::conj(rhs(j2+3,k));
223
+ if (nr >= 4) {
224
+ for (Index j2 = (std::max)(packet_cols8, k2 + rows); j2 < packet_cols4; j2 += 4) {
225
+ for (Index k = k2; k < end_k; k++) {
226
+ blockB[count + 0] = numext::conj(rhs(j2 + 0, k));
227
+ blockB[count + 1] = numext::conj(rhs(j2 + 1, k));
228
+ blockB[count + 2] = numext::conj(rhs(j2 + 2, k));
229
+ blockB[count + 3] = numext::conj(rhs(j2 + 3, k));
240
230
  count += 4;
241
231
  }
242
232
  }
243
233
  }
244
234
 
245
235
  // copy the remaining columns one at a time (=> the same with nr==1)
246
- for(Index j2=packet_cols4; j2<cols; ++j2)
247
- {
236
+ for (Index j2 = packet_cols4; j2 < cols; ++j2) {
248
237
  // transpose
249
- Index half = (std::min)(end_k,j2);
250
- for(Index k=k2; k<half; k++)
251
- {
252
- blockB[count] = numext::conj(rhs(j2,k));
238
+ Index half = (std::min)(end_k, j2);
239
+ for (Index k = k2; k < half; k++) {
240
+ blockB[count] = numext::conj(rhs(j2, k));
253
241
  count += 1;
254
242
  }
255
243
 
256
- if(half==j2 && half<k2+rows)
257
- {
258
- blockB[count] = numext::real(rhs(j2,j2));
244
+ if (half == j2 && half < k2 + rows) {
245
+ blockB[count] = numext::real(rhs(j2, j2));
259
246
  count += 1;
260
- }
261
- else
247
+ } else
262
248
  half--;
263
249
 
264
250
  // normal
265
- for(Index k=half+1; k<k2+rows; k++)
266
- {
267
- blockB[count] = rhs(k,j2);
251
+ for (Index k = half + 1; k < k2 + rows; k++) {
252
+ blockB[count] = rhs(k, j2);
268
253
  count += 1;
269
254
  }
270
255
  }
@@ -274,254 +259,225 @@ struct symm_pack_rhs
274
259
  /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
275
260
  * the general matrix matrix product.
276
261
  */
277
- template <typename Scalar, typename Index,
278
- int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
279
- int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
280
- int ResStorageOrder, int ResInnerStride>
262
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
263
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride>
281
264
  struct product_selfadjoint_matrix;
282
265
 
283
- template <typename Scalar, typename Index,
284
- int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
285
- int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
286
- int ResInnerStride>
287
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
288
- {
289
-
290
- static EIGEN_STRONG_INLINE void run(
291
- Index rows, Index cols,
292
- const Scalar* lhs, Index lhsStride,
293
- const Scalar* rhs, Index rhsStride,
294
- Scalar* res, Index resIncr, Index resStride,
295
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
296
- {
297
- product_selfadjoint_matrix<Scalar, Index,
298
- EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
299
- RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
300
- EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
301
- LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
302
- ColMajor,ResInnerStride>
303
- ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
266
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
267
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, int ResInnerStride>
268
+ struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, LhsSelfAdjoint, ConjugateLhs, RhsStorageOrder,
269
+ RhsSelfAdjoint, ConjugateRhs, RowMajor, ResInnerStride> {
270
+ static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs,
271
+ Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
272
+ level3_blocking<Scalar, Scalar>& blocking) {
273
+ product_selfadjoint_matrix<
274
+ Scalar, Index, logical_xor(RhsSelfAdjoint, RhsStorageOrder == RowMajor) ? ColMajor : RowMajor, RhsSelfAdjoint,
275
+ NumTraits<Scalar>::IsComplex && logical_xor(RhsSelfAdjoint, ConjugateRhs),
276
+ logical_xor(LhsSelfAdjoint, LhsStorageOrder == RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint,
277
+ NumTraits<Scalar>::IsComplex && logical_xor(LhsSelfAdjoint, ConjugateLhs), ColMajor,
278
+ ResInnerStride>::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
304
279
  }
305
280
  };
306
281
 
307
- template <typename Scalar, typename Index,
308
- int LhsStorageOrder, bool ConjugateLhs,
309
- int RhsStorageOrder, bool ConjugateRhs,
310
- int ResInnerStride>
311
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
312
- {
313
-
314
- static EIGEN_DONT_INLINE void run(
315
- Index rows, Index cols,
316
- const Scalar* _lhs, Index lhsStride,
317
- const Scalar* _rhs, Index rhsStride,
318
- Scalar* res, Index resIncr, Index resStride,
319
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
282
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
283
+ bool ConjugateRhs, int ResInnerStride>
284
+ struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false,
285
+ ConjugateRhs, ColMajor, ResInnerStride> {
286
+ static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
287
+ Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
288
+ level3_blocking<Scalar, Scalar>& blocking);
320
289
  };
321
290
 
322
- template <typename Scalar, typename Index,
323
- int LhsStorageOrder, bool ConjugateLhs,
324
- int RhsStorageOrder, bool ConjugateRhs,
325
- int ResInnerStride>
326
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
327
- Index rows, Index cols,
328
- const Scalar* _lhs, Index lhsStride,
329
- const Scalar* _rhs, Index rhsStride,
330
- Scalar* _res, Index resIncr, Index resStride,
331
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
332
- {
333
- Index size = rows;
334
-
335
- typedef gebp_traits<Scalar,Scalar> Traits;
336
-
337
- typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
338
- typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
339
- typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
340
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
341
- LhsMapper lhs(_lhs,lhsStride);
342
- LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
343
- RhsMapper rhs(_rhs,rhsStride);
344
- ResMapper res(_res, resStride, resIncr);
345
-
346
- Index kc = blocking.kc(); // cache block size along the K direction
347
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
348
- // kc must be smaller than mc
349
- kc = (std::min)(kc,mc);
350
- std::size_t sizeA = kc*mc;
351
- std::size_t sizeB = kc*cols;
352
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
353
- ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
354
-
355
- gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
356
- symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
357
- gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
358
- gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
359
-
360
- for(Index k2=0; k2<size; k2+=kc)
291
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
292
+ bool ConjugateRhs, int ResInnerStride>
293
+ EIGEN_DONT_INLINE void
294
+ product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, true, ConjugateLhs, RhsStorageOrder, false, ConjugateRhs,
295
+ ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
296
+ const Scalar* rhs_, Index rhsStride, Scalar* res_,
297
+ Index resIncr, Index resStride, const Scalar& alpha,
298
+ level3_blocking<Scalar, Scalar>& blocking) {
299
+ Index size = rows;
300
+
301
+ typedef gebp_traits<Scalar, Scalar> Traits;
302
+
303
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
304
+ typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
305
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
306
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
307
+ LhsMapper lhs(lhs_, lhsStride);
308
+ LhsTransposeMapper lhs_transpose(lhs_, lhsStride);
309
+ RhsMapper rhs(rhs_, rhsStride);
310
+ ResMapper res(res_, resStride, resIncr);
311
+
312
+ Index kc = blocking.kc(); // cache block size along the K direction
313
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
314
+ // kc must be smaller than mc
315
+ kc = (std::min)(kc, mc);
316
+ std::size_t sizeA = kc * mc;
317
+ std::size_t sizeB = kc * cols;
318
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
319
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
320
+
321
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
322
+ symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
323
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
324
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
325
+ LhsStorageOrder == RowMajor ? ColMajor : RowMajor, true>
326
+ pack_lhs_transposed;
327
+
328
+ for (Index k2 = 0; k2 < size; k2 += kc) {
329
+ const Index actual_kc = (std::min)(k2 + kc, size) - k2;
330
+
331
+ // we have selected one row panel of rhs and one column panel of lhs
332
+ // pack rhs's panel into a sequential chunk of memory
333
+ // and expand each coeff to a constant packet for further reuse
334
+ pack_rhs(blockB, rhs.getSubMapper(k2, 0), actual_kc, cols);
335
+
336
+ // the select lhs's panel has to be split in three different parts:
337
+ // 1 - the transposed panel above the diagonal block => transposed packed copy
338
+ // 2 - the diagonal block => special packed copy
339
+ // 3 - the panel below the diagonal block => generic packed copy
340
+ for (Index i2 = 0; i2 < k2; i2 += mc) {
341
+ const Index actual_mc = (std::min)(i2 + mc, k2) - i2;
342
+ // transposed packed copy
343
+ pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
344
+
345
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
346
+ }
347
+ // the block diagonal
361
348
  {
362
- const Index actual_kc = (std::min)(k2+kc,size)-k2;
363
-
364
- // we have selected one row panel of rhs and one column panel of lhs
365
- // pack rhs's panel into a sequential chunk of memory
366
- // and expand each coeff to a constant packet for further reuse
367
- pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
368
-
369
- // the select lhs's panel has to be split in three different parts:
370
- // 1 - the transposed panel above the diagonal block => transposed packed copy
371
- // 2 - the diagonal block => special packed copy
372
- // 3 - the panel below the diagonal block => generic packed copy
373
- for(Index i2=0; i2<k2; i2+=mc)
374
- {
375
- const Index actual_mc = (std::min)(i2+mc,k2)-i2;
376
- // transposed packed copy
377
- pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
378
-
379
- gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
380
- }
381
- // the block diagonal
382
- {
383
- const Index actual_mc = (std::min)(k2+kc,size)-k2;
384
- // symmetric packed copy
385
- pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
349
+ const Index actual_mc = (std::min)(k2 + kc, size) - k2;
350
+ // symmetric packed copy
351
+ pack_lhs(blockA, &lhs(k2, k2), lhsStride, actual_kc, actual_mc);
386
352
 
387
- gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
388
- }
353
+ gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
354
+ }
389
355
 
390
- for(Index i2=k2+kc; i2<size; i2+=mc)
391
- {
392
- const Index actual_mc = (std::min)(i2+mc,size)-i2;
393
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
394
- (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
356
+ for (Index i2 = k2 + kc; i2 < size; i2 += mc) {
357
+ const Index actual_mc = (std::min)(i2 + mc, size) - i2;
358
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
359
+ LhsStorageOrder, false>()(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
395
360
 
396
- gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
397
- }
361
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
398
362
  }
399
363
  }
364
+ }
400
365
 
401
366
  // matrix * selfadjoint product
402
- template <typename Scalar, typename Index,
403
- int LhsStorageOrder, bool ConjugateLhs,
404
- int RhsStorageOrder, bool ConjugateRhs,
405
- int ResInnerStride>
406
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
407
- {
408
-
409
- static EIGEN_DONT_INLINE void run(
410
- Index rows, Index cols,
411
- const Scalar* _lhs, Index lhsStride,
412
- const Scalar* _rhs, Index rhsStride,
413
- Scalar* res, Index resIncr, Index resStride,
414
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
367
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
368
+ bool ConjugateRhs, int ResInnerStride>
369
+ struct product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true,
370
+ ConjugateRhs, ColMajor, ResInnerStride> {
371
+ static EIGEN_DONT_INLINE void run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride, const Scalar* rhs_,
372
+ Index rhsStride, Scalar* res, Index resIncr, Index resStride, const Scalar& alpha,
373
+ level3_blocking<Scalar, Scalar>& blocking);
415
374
  };
416
375
 
417
- template <typename Scalar, typename Index,
418
- int LhsStorageOrder, bool ConjugateLhs,
419
- int RhsStorageOrder, bool ConjugateRhs,
420
- int ResInnerStride>
421
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
422
- Index rows, Index cols,
423
- const Scalar* _lhs, Index lhsStride,
424
- const Scalar* _rhs, Index rhsStride,
425
- Scalar* _res, Index resIncr, Index resStride,
426
- const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
427
- {
428
- Index size = cols;
429
-
430
- typedef gebp_traits<Scalar,Scalar> Traits;
431
-
432
- typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
433
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
434
- LhsMapper lhs(_lhs,lhsStride);
435
- ResMapper res(_res,resStride, resIncr);
436
-
437
- Index kc = blocking.kc(); // cache block size along the K direction
438
- Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
439
- std::size_t sizeA = kc*mc;
440
- std::size_t sizeB = kc*cols;
441
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
442
- ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
443
-
444
- gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
445
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
446
- symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
447
-
448
- for(Index k2=0; k2<size; k2+=kc)
449
- {
450
- const Index actual_kc = (std::min)(k2+kc,size)-k2;
451
-
452
- pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
453
-
454
- // => GEPP
455
- for(Index i2=0; i2<rows; i2+=mc)
456
- {
457
- const Index actual_mc = (std::min)(i2+mc,rows)-i2;
458
- pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
459
-
460
- gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
461
- }
376
+ template <typename Scalar, typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder,
377
+ bool ConjugateRhs, int ResInnerStride>
378
+ EIGEN_DONT_INLINE void
379
+ product_selfadjoint_matrix<Scalar, Index, LhsStorageOrder, false, ConjugateLhs, RhsStorageOrder, true, ConjugateRhs,
380
+ ColMajor, ResInnerStride>::run(Index rows, Index cols, const Scalar* lhs_, Index lhsStride,
381
+ const Scalar* rhs_, Index rhsStride, Scalar* res_,
382
+ Index resIncr, Index resStride, const Scalar& alpha,
383
+ level3_blocking<Scalar, Scalar>& blocking) {
384
+ Index size = cols;
385
+
386
+ typedef gebp_traits<Scalar, Scalar> Traits;
387
+
388
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
389
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
390
+ LhsMapper lhs(lhs_, lhsStride);
391
+ ResMapper res(res_, resStride, resIncr);
392
+
393
+ Index kc = blocking.kc(); // cache block size along the K direction
394
+ Index mc = (std::min)(rows, blocking.mc()); // cache block size along the M direction
395
+ std::size_t sizeA = kc * mc;
396
+ std::size_t sizeB = kc * cols;
397
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
398
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
399
+
400
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
401
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing,
402
+ LhsStorageOrder>
403
+ pack_lhs;
404
+ symm_pack_rhs<Scalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
405
+
406
+ for (Index k2 = 0; k2 < size; k2 += kc) {
407
+ const Index actual_kc = (std::min)(k2 + kc, size) - k2;
408
+
409
+ pack_rhs(blockB, rhs_, rhsStride, actual_kc, cols, k2);
410
+
411
+ // => GEPP
412
+ for (Index i2 = 0; i2 < rows; i2 += mc) {
413
+ const Index actual_mc = (std::min)(i2 + mc, rows) - i2;
414
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
415
+
416
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
462
417
  }
463
418
  }
419
+ }
464
420
 
465
- } // end namespace internal
421
+ } // end namespace internal
466
422
 
467
423
  /***************************************************************************
468
- * Wrapper to product_selfadjoint_matrix
469
- ***************************************************************************/
424
+ * Wrapper to product_selfadjoint_matrix
425
+ ***************************************************************************/
470
426
 
471
427
  namespace internal {
472
-
473
- template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
474
- struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
475
- {
476
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
477
-
428
+
429
+ template <typename Lhs, int LhsMode, typename Rhs, int RhsMode>
430
+ struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, RhsMode, false> {
431
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
432
+
478
433
  typedef internal::blas_traits<Lhs> LhsBlasTraits;
479
434
  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
480
435
  typedef internal::blas_traits<Rhs> RhsBlasTraits;
481
436
  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
482
-
437
+
483
438
  enum {
484
- LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
485
- LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
486
- RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
487
- RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
439
+ LhsIsUpper = (LhsMode & (Upper | Lower)) == Upper,
440
+ LhsIsSelfAdjoint = (LhsMode & SelfAdjoint) == SelfAdjoint,
441
+ RhsIsUpper = (RhsMode & (Upper | Lower)) == Upper,
442
+ RhsIsSelfAdjoint = (RhsMode & SelfAdjoint) == SelfAdjoint
488
443
  };
489
-
490
- template<typename Dest>
491
- static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
492
- {
493
- eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
494
444
 
495
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
496
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
445
+ template <typename Dest>
446
+ static void run(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
447
+ eigen_assert(dst.rows() == a_lhs.rows() && dst.cols() == a_rhs.cols());
448
+
449
+ add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
450
+ add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
497
451
 
498
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
499
- * RhsBlasTraits::extractScalarFactor(a_rhs);
452
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs);
500
453
 
501
- typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
502
- Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
454
+ typedef internal::gemm_blocking_space<(Dest::Flags & RowMajorBit) ? RowMajor : ColMajor, Scalar, Scalar,
455
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime,
456
+ Lhs::MaxColsAtCompileTime, 1>
457
+ BlockingType;
503
458
 
504
459
  BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
505
460
 
506
- internal::product_selfadjoint_matrix<Scalar, Index,
507
- EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
508
- NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
509
- EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
510
- NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
511
- internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
512
- Dest::InnerStrideAtCompileTime>
513
- ::run(
514
- lhs.rows(), rhs.cols(), // sizes
515
- &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
516
- &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
517
- &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
518
- actualAlpha, blocking // alpha
519
- );
461
+ internal::product_selfadjoint_matrix<
462
+ Scalar, Index,
463
+ internal::logical_xor(LhsIsUpper, internal::traits<Lhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
464
+ LhsIsSelfAdjoint,
465
+ NumTraits<Scalar>::IsComplex && internal::logical_xor(LhsIsUpper, bool(LhsBlasTraits::NeedToConjugate)),
466
+ internal::logical_xor(RhsIsUpper, internal::traits<Rhs>::Flags & RowMajorBit) ? RowMajor : ColMajor,
467
+ RhsIsSelfAdjoint,
468
+ NumTraits<Scalar>::IsComplex && internal::logical_xor(RhsIsUpper, bool(RhsBlasTraits::NeedToConjugate)),
469
+ internal::traits<Dest>::Flags & RowMajorBit ? RowMajor : ColMajor,
470
+ Dest::InnerStrideAtCompileTime>::run(lhs.rows(), rhs.cols(), // sizes
471
+ &lhs.coeffRef(0, 0), lhs.outerStride(), // lhs info
472
+ &rhs.coeffRef(0, 0), rhs.outerStride(), // rhs info
473
+ &dst.coeffRef(0, 0), dst.innerStride(), dst.outerStride(), // result info
474
+ actualAlpha, blocking // alpha
475
+ );
520
476
  }
521
477
  };
522
478
 
523
- } // end namespace internal
479
+ } // end namespace internal
524
480
 
525
- } // end namespace Eigen
481
+ } // end namespace Eigen
526
482
 
527
- #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
483
+ #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H