@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -1,338 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2001 Intel Corporation
5
- // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6
- // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
7
- //
8
- // This Source Code Form is subject to the terms of the Mozilla
9
- // Public License v. 2.0. If a copy of the MPL was not distributed
10
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
-
12
- // The SSE code for the 4x4 float and double matrix inverse in this file
13
- // comes from the following Intel's library:
14
- // http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/
15
- //
16
- // Here is the respective copyright and license statement:
17
- //
18
- // Copyright (c) 2001 Intel Corporation.
19
- //
20
- // Permition is granted to use, copy, distribute and prepare derivative works
21
- // of this library for any purpose and without fee, provided, that the above
22
- // copyright notice and this statement appear in all copies.
23
- // Intel makes no representations about the suitability of this software for
24
- // any purpose, and specifically disclaims all warranties.
25
- // See LEGAL.TXT for all the legal information.
26
-
27
- #ifndef EIGEN_INVERSE_SSE_H
28
- #define EIGEN_INVERSE_SSE_H
29
-
30
- namespace Eigen {
31
-
32
- namespace internal {
33
-
34
- template<typename MatrixType, typename ResultType>
35
- struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
36
- {
37
- enum {
38
- MatrixAlignment = traits<MatrixType>::Alignment,
39
- ResultAlignment = traits<ResultType>::Alignment,
40
- StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
41
- };
42
- typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
43
-
44
- static void run(const MatrixType& mat, ResultType& result)
45
- {
46
- ActualMatrixType matrix(mat);
47
- const Packet4f p4f_sign_PNNP = _mm_castsi128_ps(_mm_set_epi32(0x00000000, 0x80000000, 0x80000000, 0x00000000));
48
-
49
- // Load the full matrix into registers
50
- __m128 _L1 = matrix.template packet<MatrixAlignment>( 0);
51
- __m128 _L2 = matrix.template packet<MatrixAlignment>( 4);
52
- __m128 _L3 = matrix.template packet<MatrixAlignment>( 8);
53
- __m128 _L4 = matrix.template packet<MatrixAlignment>(12);
54
-
55
- // The inverse is calculated using "Divide and Conquer" technique. The
56
- // original matrix is divide into four 2x2 sub-matrices. Since each
57
- // register holds four matrix element, the smaller matrices are
58
- // represented as a registers. Hence we get a better locality of the
59
- // calculations.
60
-
61
- __m128 A, B, C, D; // the four sub-matrices
62
- if(!StorageOrdersMatch)
63
- {
64
- A = _mm_unpacklo_ps(_L1, _L2);
65
- B = _mm_unpacklo_ps(_L3, _L4);
66
- C = _mm_unpackhi_ps(_L1, _L2);
67
- D = _mm_unpackhi_ps(_L3, _L4);
68
- }
69
- else
70
- {
71
- A = _mm_movelh_ps(_L1, _L2);
72
- B = _mm_movehl_ps(_L2, _L1);
73
- C = _mm_movelh_ps(_L3, _L4);
74
- D = _mm_movehl_ps(_L4, _L3);
75
- }
76
-
77
- __m128 iA, iB, iC, iD, // partial inverse of the sub-matrices
78
- DC, AB;
79
- __m128 dA, dB, dC, dD; // determinant of the sub-matrices
80
- __m128 det, d, d1, d2;
81
- __m128 rd; // reciprocal of the determinant
82
-
83
- // AB = A# * B
84
- AB = _mm_mul_ps(_mm_shuffle_ps(A,A,0x0F), B);
85
- AB = _mm_sub_ps(AB,_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E)));
86
- // DC = D# * C
87
- DC = _mm_mul_ps(_mm_shuffle_ps(D,D,0x0F), C);
88
- DC = _mm_sub_ps(DC,_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E)));
89
-
90
- // dA = |A|
91
- dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A);
92
- dA = _mm_sub_ss(dA, _mm_movehl_ps(dA,dA));
93
- // dB = |B|
94
- dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B);
95
- dB = _mm_sub_ss(dB, _mm_movehl_ps(dB,dB));
96
-
97
- // dC = |C|
98
- dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C);
99
- dC = _mm_sub_ss(dC, _mm_movehl_ps(dC,dC));
100
- // dD = |D|
101
- dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D);
102
- dD = _mm_sub_ss(dD, _mm_movehl_ps(dD,dD));
103
-
104
- // d = trace(AB*DC) = trace(A#*B*D#*C)
105
- d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB);
106
-
107
- // iD = C*A#*B
108
- iD = _mm_mul_ps(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB));
109
- iD = _mm_add_ps(iD,_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB)));
110
- // iA = B*D#*C
111
- iA = _mm_mul_ps(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC));
112
- iA = _mm_add_ps(iA,_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC)));
113
-
114
- // d = trace(AB*DC) = trace(A#*B*D#*C) [continue]
115
- d = _mm_add_ps(d, _mm_movehl_ps(d, d));
116
- d = _mm_add_ss(d, _mm_shuffle_ps(d, d, 1));
117
- d1 = _mm_mul_ss(dA,dD);
118
- d2 = _mm_mul_ss(dB,dC);
119
-
120
- // iD = D*|A| - C*A#*B
121
- iD = _mm_sub_ps(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD);
122
-
123
- // iA = A*|D| - B*D#*C;
124
- iA = _mm_sub_ps(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA);
125
-
126
- // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
127
- det = _mm_sub_ss(_mm_add_ss(d1,d2),d);
128
- rd = _mm_div_ss(_mm_set_ss(1.0f), det);
129
-
130
- // #ifdef ZERO_SINGULAR
131
- // rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd);
132
- // #endif
133
-
134
- // iB = D * (A#B)# = D*B#*A
135
- iB = _mm_mul_ps(D, _mm_shuffle_ps(AB,AB,0x33));
136
- iB = _mm_sub_ps(iB, _mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66)));
137
- // iC = A * (D#C)# = A*C#*D
138
- iC = _mm_mul_ps(A, _mm_shuffle_ps(DC,DC,0x33));
139
- iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66)));
140
-
141
- rd = _mm_shuffle_ps(rd,rd,0);
142
- rd = _mm_xor_ps(rd, p4f_sign_PNNP);
143
-
144
- // iB = C*|B| - D*B#*A
145
- iB = _mm_sub_ps(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB);
146
-
147
- // iC = B*|C| - A*C#*D;
148
- iC = _mm_sub_ps(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC);
149
-
150
- // iX = iX / det
151
- iA = _mm_mul_ps(rd,iA);
152
- iB = _mm_mul_ps(rd,iB);
153
- iC = _mm_mul_ps(rd,iC);
154
- iD = _mm_mul_ps(rd,iD);
155
-
156
- Index res_stride = result.outerStride();
157
- float* res = result.data();
158
- pstoret<float, Packet4f, ResultAlignment>(res+0, _mm_shuffle_ps(iA,iB,0x77));
159
- pstoret<float, Packet4f, ResultAlignment>(res+res_stride, _mm_shuffle_ps(iA,iB,0x22));
160
- pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
161
- pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
162
- }
163
-
164
- };
165
-
166
- template<typename MatrixType, typename ResultType>
167
- struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
168
- {
169
- enum {
170
- MatrixAlignment = traits<MatrixType>::Alignment,
171
- ResultAlignment = traits<ResultType>::Alignment,
172
- StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
173
- };
174
- typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
175
-
176
- static void run(const MatrixType& mat, ResultType& result)
177
- {
178
- ActualMatrixType matrix(mat);
179
- const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
180
- const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
181
-
182
- // The inverse is calculated using "Divide and Conquer" technique. The
183
- // original matrix is divide into four 2x2 sub-matrices. Since each
184
- // register of the matrix holds two elements, the smaller matrices are
185
- // consisted of two registers. Hence we get a better locality of the
186
- // calculations.
187
-
188
- // the four sub-matrices
189
- __m128d A1, A2, B1, B2, C1, C2, D1, D2;
190
-
191
- if(StorageOrdersMatch)
192
- {
193
- A1 = matrix.template packet<MatrixAlignment>( 0); B1 = matrix.template packet<MatrixAlignment>( 2);
194
- A2 = matrix.template packet<MatrixAlignment>( 4); B2 = matrix.template packet<MatrixAlignment>( 6);
195
- C1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
196
- C2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
197
- }
198
- else
199
- {
200
- __m128d tmp;
201
- A1 = matrix.template packet<MatrixAlignment>( 0); C1 = matrix.template packet<MatrixAlignment>( 2);
202
- A2 = matrix.template packet<MatrixAlignment>( 4); C2 = matrix.template packet<MatrixAlignment>( 6);
203
- tmp = A1;
204
- A1 = _mm_unpacklo_pd(A1,A2);
205
- A2 = _mm_unpackhi_pd(tmp,A2);
206
- tmp = C1;
207
- C1 = _mm_unpacklo_pd(C1,C2);
208
- C2 = _mm_unpackhi_pd(tmp,C2);
209
-
210
- B1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
211
- B2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
212
- tmp = B1;
213
- B1 = _mm_unpacklo_pd(B1,B2);
214
- B2 = _mm_unpackhi_pd(tmp,B2);
215
- tmp = D1;
216
- D1 = _mm_unpacklo_pd(D1,D2);
217
- D2 = _mm_unpackhi_pd(tmp,D2);
218
- }
219
-
220
- __m128d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2, // partial invese of the sub-matrices
221
- DC1, DC2, AB1, AB2;
222
- __m128d dA, dB, dC, dD; // determinant of the sub-matrices
223
- __m128d det, d1, d2, rd;
224
-
225
- // dA = |A|
226
- dA = _mm_shuffle_pd(A2, A2, 1);
227
- dA = _mm_mul_pd(A1, dA);
228
- dA = _mm_sub_sd(dA, _mm_shuffle_pd(dA,dA,3));
229
- // dB = |B|
230
- dB = _mm_shuffle_pd(B2, B2, 1);
231
- dB = _mm_mul_pd(B1, dB);
232
- dB = _mm_sub_sd(dB, _mm_shuffle_pd(dB,dB,3));
233
-
234
- // AB = A# * B
235
- AB1 = _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,3));
236
- AB2 = _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,0));
237
- AB1 = _mm_sub_pd(AB1, _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,3)));
238
- AB2 = _mm_sub_pd(AB2, _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,0)));
239
-
240
- // dC = |C|
241
- dC = _mm_shuffle_pd(C2, C2, 1);
242
- dC = _mm_mul_pd(C1, dC);
243
- dC = _mm_sub_sd(dC, _mm_shuffle_pd(dC,dC,3));
244
- // dD = |D|
245
- dD = _mm_shuffle_pd(D2, D2, 1);
246
- dD = _mm_mul_pd(D1, dD);
247
- dD = _mm_sub_sd(dD, _mm_shuffle_pd(dD,dD,3));
248
-
249
- // DC = D# * C
250
- DC1 = _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,3));
251
- DC2 = _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,0));
252
- DC1 = _mm_sub_pd(DC1, _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,3)));
253
- DC2 = _mm_sub_pd(DC2, _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,0)));
254
-
255
- // rd = trace(AB*DC) = trace(A#*B*D#*C)
256
- d1 = _mm_mul_pd(AB1, _mm_shuffle_pd(DC1, DC2, 0));
257
- d2 = _mm_mul_pd(AB2, _mm_shuffle_pd(DC1, DC2, 3));
258
- rd = _mm_add_pd(d1, d2);
259
- rd = _mm_add_sd(rd, _mm_shuffle_pd(rd, rd,3));
260
-
261
- // iD = C*A#*B
262
- iD1 = _mm_mul_pd(AB1, _mm_shuffle_pd(C1,C1,0));
263
- iD2 = _mm_mul_pd(AB1, _mm_shuffle_pd(C2,C2,0));
264
- iD1 = _mm_add_pd(iD1, _mm_mul_pd(AB2, _mm_shuffle_pd(C1,C1,3)));
265
- iD2 = _mm_add_pd(iD2, _mm_mul_pd(AB2, _mm_shuffle_pd(C2,C2,3)));
266
-
267
- // iA = B*D#*C
268
- iA1 = _mm_mul_pd(DC1, _mm_shuffle_pd(B1,B1,0));
269
- iA2 = _mm_mul_pd(DC1, _mm_shuffle_pd(B2,B2,0));
270
- iA1 = _mm_add_pd(iA1, _mm_mul_pd(DC2, _mm_shuffle_pd(B1,B1,3)));
271
- iA2 = _mm_add_pd(iA2, _mm_mul_pd(DC2, _mm_shuffle_pd(B2,B2,3)));
272
-
273
- // iD = D*|A| - C*A#*B
274
- dA = _mm_shuffle_pd(dA,dA,0);
275
- iD1 = _mm_sub_pd(_mm_mul_pd(D1, dA), iD1);
276
- iD2 = _mm_sub_pd(_mm_mul_pd(D2, dA), iD2);
277
-
278
- // iA = A*|D| - B*D#*C;
279
- dD = _mm_shuffle_pd(dD,dD,0);
280
- iA1 = _mm_sub_pd(_mm_mul_pd(A1, dD), iA1);
281
- iA2 = _mm_sub_pd(_mm_mul_pd(A2, dD), iA2);
282
-
283
- d1 = _mm_mul_sd(dA, dD);
284
- d2 = _mm_mul_sd(dB, dC);
285
-
286
- // iB = D * (A#B)# = D*B#*A
287
- iB1 = _mm_mul_pd(D1, _mm_shuffle_pd(AB2,AB1,1));
288
- iB2 = _mm_mul_pd(D2, _mm_shuffle_pd(AB2,AB1,1));
289
- iB1 = _mm_sub_pd(iB1, _mm_mul_pd(_mm_shuffle_pd(D1,D1,1), _mm_shuffle_pd(AB2,AB1,2)));
290
- iB2 = _mm_sub_pd(iB2, _mm_mul_pd(_mm_shuffle_pd(D2,D2,1), _mm_shuffle_pd(AB2,AB1,2)));
291
-
292
- // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
293
- det = _mm_add_sd(d1, d2);
294
- det = _mm_sub_sd(det, rd);
295
-
296
- // iC = A * (D#C)# = A*C#*D
297
- iC1 = _mm_mul_pd(A1, _mm_shuffle_pd(DC2,DC1,1));
298
- iC2 = _mm_mul_pd(A2, _mm_shuffle_pd(DC2,DC1,1));
299
- iC1 = _mm_sub_pd(iC1, _mm_mul_pd(_mm_shuffle_pd(A1,A1,1), _mm_shuffle_pd(DC2,DC1,2)));
300
- iC2 = _mm_sub_pd(iC2, _mm_mul_pd(_mm_shuffle_pd(A2,A2,1), _mm_shuffle_pd(DC2,DC1,2)));
301
-
302
- rd = _mm_div_sd(_mm_set_sd(1.0), det);
303
- // #ifdef ZERO_SINGULAR
304
- // rd = _mm_and_pd(_mm_cmpneq_sd(det,_mm_setzero_pd()), rd);
305
- // #endif
306
- rd = _mm_shuffle_pd(rd,rd,0);
307
-
308
- // iB = C*|B| - D*B#*A
309
- dB = _mm_shuffle_pd(dB,dB,0);
310
- iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1);
311
- iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2);
312
-
313
- d1 = _mm_xor_pd(rd, _Sign_PN);
314
- d2 = _mm_xor_pd(rd, _Sign_NP);
315
-
316
- // iC = B*|C| - A*C#*D;
317
- dC = _mm_shuffle_pd(dC,dC,0);
318
- iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
319
- iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
320
-
321
- Index res_stride = result.outerStride();
322
- double* res = result.data();
323
- pstoret<double, Packet2d, ResultAlignment>(res+0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
324
- pstoret<double, Packet2d, ResultAlignment>(res+res_stride, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
325
- pstoret<double, Packet2d, ResultAlignment>(res+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
326
- pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
327
- pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
328
- pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
329
- pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
330
- pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
331
- }
332
- };
333
-
334
- } // end namespace internal
335
-
336
- } // end namespace Eigen
337
-
338
- #endif // EIGEN_INVERSE_SSE_H
@@ -1,67 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_MAPPED_SPARSEMATRIX_H
11
- #define EIGEN_MAPPED_SPARSEMATRIX_H
12
-
13
- namespace Eigen {
14
-
15
- /** \deprecated Use Map<SparseMatrix<> >
16
- * \class MappedSparseMatrix
17
- *
18
- * \brief Sparse matrix
19
- *
20
- * \param _Scalar the scalar type, i.e. the type of the coefficients
21
- *
22
- * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme.
23
- *
24
- */
25
- namespace internal {
26
- template<typename _Scalar, int _Flags, typename _StorageIndex>
27
- struct traits<MappedSparseMatrix<_Scalar, _Flags, _StorageIndex> > : traits<SparseMatrix<_Scalar, _Flags, _StorageIndex> >
28
- {};
29
- } // end namespace internal
30
-
31
- template<typename _Scalar, int _Flags, typename _StorageIndex>
32
- class MappedSparseMatrix
33
- : public Map<SparseMatrix<_Scalar, _Flags, _StorageIndex> >
34
- {
35
- typedef Map<SparseMatrix<_Scalar, _Flags, _StorageIndex> > Base;
36
-
37
- public:
38
-
39
- typedef typename Base::StorageIndex StorageIndex;
40
- typedef typename Base::Scalar Scalar;
41
-
42
- inline MappedSparseMatrix(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZeroPtr = 0)
43
- : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZeroPtr)
44
- {}
45
-
46
- /** Empty destructor */
47
- inline ~MappedSparseMatrix() {}
48
- };
49
-
50
- namespace internal {
51
-
52
- template<typename _Scalar, int _Options, typename _StorageIndex>
53
- struct evaluator<MappedSparseMatrix<_Scalar,_Options,_StorageIndex> >
54
- : evaluator<SparseCompressedBase<MappedSparseMatrix<_Scalar,_Options,_StorageIndex> > >
55
- {
56
- typedef MappedSparseMatrix<_Scalar,_Options,_StorageIndex> XprType;
57
- typedef evaluator<SparseCompressedBase<XprType> > Base;
58
-
59
- evaluator() : Base() {}
60
- explicit evaluator(const XprType &mat) : Base(mat) {}
61
- };
62
-
63
- }
64
-
65
- } // end namespace Eigen
66
-
67
- #endif // EIGEN_MAPPED_SPARSEMATRIX_H
@@ -1,280 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
11
- #define EIGEN_SPARSELU_GEMM_KERNEL_H
12
-
13
- namespace Eigen {
14
-
15
- namespace internal {
16
-
17
-
18
- /** \internal
19
- * A general matrix-matrix product kernel optimized for the SparseLU factorization.
20
- * - A, B, and C must be column major
21
- * - lda and ldc must be multiples of the respective packet size
22
- * - C must have the same alignment as A
23
- */
24
- template<typename Scalar>
25
- EIGEN_DONT_INLINE
26
- void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
27
- {
28
- using namespace Eigen::internal;
29
-
30
- typedef typename packet_traits<Scalar>::type Packet;
31
- enum {
32
- NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
33
- PacketSize = packet_traits<Scalar>::size,
34
- PM = 8, // peeling in M
35
- RN = 2, // register blocking
36
- RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking
37
- BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk
38
- SM = PM*PacketSize // step along M
39
- };
40
- Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
41
- Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
42
- Index i0 = internal::first_default_aligned(A,m);
43
-
44
- eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
45
-
46
- // handle the non aligned rows of A and C without any optimization:
47
- for(Index i=0; i<i0; ++i)
48
- {
49
- for(Index j=0; j<n; ++j)
50
- {
51
- Scalar c = C[i+j*ldc];
52
- for(Index k=0; k<d; ++k)
53
- c += B[k+j*ldb] * A[i+k*lda];
54
- C[i+j*ldc] = c;
55
- }
56
- }
57
- // process the remaining rows per chunk of BM rows
58
- for(Index ib=i0; ib<m; ib+=BM)
59
- {
60
- Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows
61
- Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling
62
- Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization
63
-
64
- // Let's process two columns of B-C at once
65
- for(Index j=0; j<n_end; j+=RN)
66
- {
67
- const Scalar* Bc0 = B+(j+0)*ldb;
68
- const Scalar* Bc1 = B+(j+1)*ldb;
69
-
70
- for(Index k=0; k<d_end; k+=RK)
71
- {
72
-
73
- // load and expand a RN x RK block of B
74
- Packet b00, b10, b20, b30, b01, b11, b21, b31;
75
- { b00 = pset1<Packet>(Bc0[0]); }
76
- { b10 = pset1<Packet>(Bc0[1]); }
77
- if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
78
- if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
79
- { b01 = pset1<Packet>(Bc1[0]); }
80
- { b11 = pset1<Packet>(Bc1[1]); }
81
- if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
82
- if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
83
-
84
- Packet a0, a1, a2, a3, c0, c1, t0, t1;
85
-
86
- const Scalar* A0 = A+ib+(k+0)*lda;
87
- const Scalar* A1 = A+ib+(k+1)*lda;
88
- const Scalar* A2 = A+ib+(k+2)*lda;
89
- const Scalar* A3 = A+ib+(k+3)*lda;
90
-
91
- Scalar* C0 = C+ib+(j+0)*ldc;
92
- Scalar* C1 = C+ib+(j+1)*ldc;
93
-
94
- a0 = pload<Packet>(A0);
95
- a1 = pload<Packet>(A1);
96
- if(RK==4)
97
- {
98
- a2 = pload<Packet>(A2);
99
- a3 = pload<Packet>(A3);
100
- }
101
- else
102
- {
103
- // workaround "may be used uninitialized in this function" warning
104
- a2 = a3 = a0;
105
- }
106
-
107
- #define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
108
- #define WORK(I) \
109
- c0 = pload<Packet>(C0+i+(I)*PacketSize); \
110
- c1 = pload<Packet>(C1+i+(I)*PacketSize); \
111
- KMADD(c0, a0, b00, t0) \
112
- KMADD(c1, a0, b01, t1) \
113
- a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
114
- KMADD(c0, a1, b10, t0) \
115
- KMADD(c1, a1, b11, t1) \
116
- a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
117
- if(RK==4){ KMADD(c0, a2, b20, t0) }\
118
- if(RK==4){ KMADD(c1, a2, b21, t1) }\
119
- if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
120
- if(RK==4){ KMADD(c0, a3, b30, t0) }\
121
- if(RK==4){ KMADD(c1, a3, b31, t1) }\
122
- if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
123
- pstore(C0+i+(I)*PacketSize, c0); \
124
- pstore(C1+i+(I)*PacketSize, c1)
125
-
126
- // process rows of A' - C' with aggressive vectorization and peeling
127
- for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
128
- {
129
- EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
130
- prefetch((A0+i+(5)*PacketSize));
131
- prefetch((A1+i+(5)*PacketSize));
132
- if(RK==4) prefetch((A2+i+(5)*PacketSize));
133
- if(RK==4) prefetch((A3+i+(5)*PacketSize));
134
-
135
- WORK(0);
136
- WORK(1);
137
- WORK(2);
138
- WORK(3);
139
- WORK(4);
140
- WORK(5);
141
- WORK(6);
142
- WORK(7);
143
- }
144
- // process the remaining rows with vectorization only
145
- for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
146
- {
147
- WORK(0);
148
- }
149
- #undef WORK
150
- // process the remaining rows without vectorization
151
- for(Index i=actual_b_end2; i<actual_b; ++i)
152
- {
153
- if(RK==4)
154
- {
155
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
156
- C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];
157
- }
158
- else
159
- {
160
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
161
- C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];
162
- }
163
- }
164
-
165
- Bc0 += RK;
166
- Bc1 += RK;
167
- } // peeled loop on k
168
- } // peeled loop on the columns j
169
- // process the last column (we now perform a matrix-vector product)
170
- if((n-n_end)>0)
171
- {
172
- const Scalar* Bc0 = B+(n-1)*ldb;
173
-
174
- for(Index k=0; k<d_end; k+=RK)
175
- {
176
-
177
- // load and expand a 1 x RK block of B
178
- Packet b00, b10, b20, b30;
179
- b00 = pset1<Packet>(Bc0[0]);
180
- b10 = pset1<Packet>(Bc0[1]);
181
- if(RK==4) b20 = pset1<Packet>(Bc0[2]);
182
- if(RK==4) b30 = pset1<Packet>(Bc0[3]);
183
-
184
- Packet a0, a1, a2, a3, c0, t0/*, t1*/;
185
-
186
- const Scalar* A0 = A+ib+(k+0)*lda;
187
- const Scalar* A1 = A+ib+(k+1)*lda;
188
- const Scalar* A2 = A+ib+(k+2)*lda;
189
- const Scalar* A3 = A+ib+(k+3)*lda;
190
-
191
- Scalar* C0 = C+ib+(n_end)*ldc;
192
-
193
- a0 = pload<Packet>(A0);
194
- a1 = pload<Packet>(A1);
195
- if(RK==4)
196
- {
197
- a2 = pload<Packet>(A2);
198
- a3 = pload<Packet>(A3);
199
- }
200
- else
201
- {
202
- // workaround "may be used uninitialized in this function" warning
203
- a2 = a3 = a0;
204
- }
205
-
206
- #define WORK(I) \
207
- c0 = pload<Packet>(C0+i+(I)*PacketSize); \
208
- KMADD(c0, a0, b00, t0) \
209
- a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
210
- KMADD(c0, a1, b10, t0) \
211
- a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
212
- if(RK==4){ KMADD(c0, a2, b20, t0) }\
213
- if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
214
- if(RK==4){ KMADD(c0, a3, b30, t0) }\
215
- if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
216
- pstore(C0+i+(I)*PacketSize, c0);
217
-
218
- // agressive vectorization and peeling
219
- for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
220
- {
221
- EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
222
- WORK(0);
223
- WORK(1);
224
- WORK(2);
225
- WORK(3);
226
- WORK(4);
227
- WORK(5);
228
- WORK(6);
229
- WORK(7);
230
- }
231
- // vectorization only
232
- for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
233
- {
234
- WORK(0);
235
- }
236
- // remaining scalars
237
- for(Index i=actual_b_end2; i<actual_b; ++i)
238
- {
239
- if(RK==4)
240
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
241
- else
242
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
243
- }
244
-
245
- Bc0 += RK;
246
- #undef WORK
247
- }
248
- }
249
-
250
- // process the last columns of A, corresponding to the last rows of B
251
- Index rd = d-d_end;
252
- if(rd>0)
253
- {
254
- for(Index j=0; j<n; ++j)
255
- {
256
- enum {
257
- Alignment = PacketSize>1 ? Aligned : 0
258
- };
259
- typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;
260
- typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;
261
- if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);
262
-
263
- else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
264
- + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);
265
-
266
- else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
267
- + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)
268
- + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);
269
- }
270
- }
271
-
272
- } // blocking on the rows of A and C
273
- }
274
- #undef KMADD
275
-
276
- } // namespace internal
277
-
278
- } // namespace Eigen
279
-
280
- #endif // EIGEN_SPARSELU_GEMM_KERNEL_H