ruby-eigen 0.0.9 → 0.0.10.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +22 -0
  3. data/README.md +21 -0
  4. data/ext/eigen/eigen3/COPYING.BSD +26 -0
  5. data/ext/eigen/eigen3/COPYING.MPL2 +373 -0
  6. data/ext/eigen/eigen3/COPYING.README +18 -0
  7. data/ext/eigen/eigen3/Eigen/Array +11 -0
  8. data/ext/eigen/eigen3/Eigen/Cholesky +32 -0
  9. data/ext/eigen/eigen3/Eigen/CholmodSupport +45 -0
  10. data/ext/eigen/eigen3/Eigen/Core +376 -0
  11. data/ext/eigen/eigen3/Eigen/Dense +7 -0
  12. data/ext/eigen/eigen3/Eigen/Eigen +2 -0
  13. data/ext/eigen/eigen3/Eigen/Eigen2Support +95 -0
  14. data/ext/eigen/eigen3/Eigen/Eigenvalues +48 -0
  15. data/ext/eigen/eigen3/Eigen/Geometry +63 -0
  16. data/ext/eigen/eigen3/Eigen/Householder +23 -0
  17. data/ext/eigen/eigen3/Eigen/IterativeLinearSolvers +40 -0
  18. data/ext/eigen/eigen3/Eigen/Jacobi +26 -0
  19. data/ext/eigen/eigen3/Eigen/LU +41 -0
  20. data/ext/eigen/eigen3/Eigen/LeastSquares +32 -0
  21. data/ext/eigen/eigen3/Eigen/MetisSupport +28 -0
  22. data/ext/eigen/eigen3/Eigen/PaStiXSupport +46 -0
  23. data/ext/eigen/eigen3/Eigen/PardisoSupport +30 -0
  24. data/ext/eigen/eigen3/Eigen/QR +45 -0
  25. data/ext/eigen/eigen3/Eigen/QtAlignedMalloc +34 -0
  26. data/ext/eigen/eigen3/Eigen/SPQRSupport +29 -0
  27. data/ext/eigen/eigen3/Eigen/SVD +37 -0
  28. data/ext/eigen/eigen3/Eigen/Sparse +27 -0
  29. data/ext/eigen/eigen3/Eigen/SparseCore +64 -0
  30. data/ext/eigen/eigen3/Eigen/SparseLU +49 -0
  31. data/ext/eigen/eigen3/Eigen/SparseQR +33 -0
  32. data/ext/eigen/eigen3/Eigen/StdDeque +27 -0
  33. data/ext/eigen/eigen3/Eigen/StdList +26 -0
  34. data/ext/eigen/eigen3/Eigen/StdVector +27 -0
  35. data/ext/eigen/eigen3/Eigen/SuperLUSupport +59 -0
  36. data/ext/eigen/eigen3/Eigen/UmfPackSupport +36 -0
  37. data/ext/eigen/eigen3/Eigen/src/Cholesky/LDLT.h +611 -0
  38. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT.h +498 -0
  39. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT_MKL.h +102 -0
  40. data/ext/eigen/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +607 -0
  41. data/ext/eigen/eigen3/Eigen/src/Core/Array.h +323 -0
  42. data/ext/eigen/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
  43. data/ext/eigen/eigen3/Eigen/src/Core/ArrayWrapper.h +264 -0
  44. data/ext/eigen/eigen3/Eigen/src/Core/Assign.h +590 -0
  45. data/ext/eigen/eigen3/Eigen/src/Core/Assign_MKL.h +224 -0
  46. data/ext/eigen/eigen3/Eigen/src/Core/BandMatrix.h +334 -0
  47. data/ext/eigen/eigen3/Eigen/src/Core/Block.h +406 -0
  48. data/ext/eigen/eigen3/Eigen/src/Core/BooleanRedux.h +154 -0
  49. data/ext/eigen/eigen3/Eigen/src/Core/CommaInitializer.h +154 -0
  50. data/ext/eigen/eigen3/Eigen/src/Core/CoreIterators.h +61 -0
  51. data/ext/eigen/eigen3/Eigen/src/Core/CwiseBinaryOp.h +230 -0
  52. data/ext/eigen/eigen3/Eigen/src/Core/CwiseNullaryOp.h +864 -0
  53. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryOp.h +126 -0
  54. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryView.h +139 -0
  55. data/ext/eigen/eigen3/Eigen/src/Core/DenseBase.h +521 -0
  56. data/ext/eigen/eigen3/Eigen/src/Core/DenseCoeffsBase.h +754 -0
  57. data/ext/eigen/eigen3/Eigen/src/Core/DenseStorage.h +434 -0
  58. data/ext/eigen/eigen3/Eigen/src/Core/Diagonal.h +237 -0
  59. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalMatrix.h +313 -0
  60. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalProduct.h +131 -0
  61. data/ext/eigen/eigen3/Eigen/src/Core/Dot.h +263 -0
  62. data/ext/eigen/eigen3/Eigen/src/Core/EigenBase.h +131 -0
  63. data/ext/eigen/eigen3/Eigen/src/Core/Flagged.h +140 -0
  64. data/ext/eigen/eigen3/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  65. data/ext/eigen/eigen3/Eigen/src/Core/Functors.h +1026 -0
  66. data/ext/eigen/eigen3/Eigen/src/Core/Fuzzy.h +150 -0
  67. data/ext/eigen/eigen3/Eigen/src/Core/GeneralProduct.h +635 -0
  68. data/ext/eigen/eigen3/Eigen/src/Core/GenericPacketMath.h +350 -0
  69. data/ext/eigen/eigen3/Eigen/src/Core/GlobalFunctions.h +92 -0
  70. data/ext/eigen/eigen3/Eigen/src/Core/IO.h +250 -0
  71. data/ext/eigen/eigen3/Eigen/src/Core/Map.h +192 -0
  72. data/ext/eigen/eigen3/Eigen/src/Core/MapBase.h +247 -0
  73. data/ext/eigen/eigen3/Eigen/src/Core/MathFunctions.h +768 -0
  74. data/ext/eigen/eigen3/Eigen/src/Core/Matrix.h +420 -0
  75. data/ext/eigen/eigen3/Eigen/src/Core/MatrixBase.h +563 -0
  76. data/ext/eigen/eigen3/Eigen/src/Core/NestByValue.h +111 -0
  77. data/ext/eigen/eigen3/Eigen/src/Core/NoAlias.h +134 -0
  78. data/ext/eigen/eigen3/Eigen/src/Core/NumTraits.h +150 -0
  79. data/ext/eigen/eigen3/Eigen/src/Core/PermutationMatrix.h +721 -0
  80. data/ext/eigen/eigen3/Eigen/src/Core/PlainObjectBase.h +822 -0
  81. data/ext/eigen/eigen3/Eigen/src/Core/ProductBase.h +290 -0
  82. data/ext/eigen/eigen3/Eigen/src/Core/Random.h +152 -0
  83. data/ext/eigen/eigen3/Eigen/src/Core/Redux.h +409 -0
  84. data/ext/eigen/eigen3/Eigen/src/Core/Ref.h +278 -0
  85. data/ext/eigen/eigen3/Eigen/src/Core/Replicate.h +177 -0
  86. data/ext/eigen/eigen3/Eigen/src/Core/ReturnByValue.h +99 -0
  87. data/ext/eigen/eigen3/Eigen/src/Core/Reverse.h +224 -0
  88. data/ext/eigen/eigen3/Eigen/src/Core/Select.h +162 -0
  89. data/ext/eigen/eigen3/Eigen/src/Core/SelfAdjointView.h +314 -0
  90. data/ext/eigen/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +191 -0
  91. data/ext/eigen/eigen3/Eigen/src/Core/SolveTriangular.h +260 -0
  92. data/ext/eigen/eigen3/Eigen/src/Core/StableNorm.h +203 -0
  93. data/ext/eigen/eigen3/Eigen/src/Core/Stride.h +108 -0
  94. data/ext/eigen/eigen3/Eigen/src/Core/Swap.h +126 -0
  95. data/ext/eigen/eigen3/Eigen/src/Core/Transpose.h +419 -0
  96. data/ext/eigen/eigen3/Eigen/src/Core/Transpositions.h +436 -0
  97. data/ext/eigen/eigen3/Eigen/src/Core/TriangularMatrix.h +839 -0
  98. data/ext/eigen/eigen3/Eigen/src/Core/VectorBlock.h +95 -0
  99. data/ext/eigen/eigen3/Eigen/src/Core/VectorwiseOp.h +642 -0
  100. data/ext/eigen/eigen3/Eigen/src/Core/Visitor.h +237 -0
  101. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +217 -0
  102. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +501 -0
  103. data/ext/eigen/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
  104. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/Complex.h +253 -0
  105. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +420 -0
  106. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/Complex.h +442 -0
  107. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +475 -0
  108. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +649 -0
  109. data/ext/eigen/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +476 -0
  110. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1341 -0
  111. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +427 -0
  112. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +278 -0
  113. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +146 -0
  114. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +118 -0
  115. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +566 -0
  116. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +131 -0
  117. data/ext/eigen/eigen3/Eigen/src/Core/products/Parallelizer.h +162 -0
  118. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +436 -0
  119. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +295 -0
  120. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +281 -0
  121. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +114 -0
  122. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +123 -0
  123. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  124. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +427 -0
  125. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +309 -0
  126. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +348 -0
  127. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +247 -0
  128. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +332 -0
  129. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +155 -0
  130. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +139 -0
  131. data/ext/eigen/eigen3/Eigen/src/Core/util/BlasUtil.h +264 -0
  132. data/ext/eigen/eigen3/Eigen/src/Core/util/Constants.h +451 -0
  133. data/ext/eigen/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +40 -0
  134. data/ext/eigen/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  135. data/ext/eigen/eigen3/Eigen/src/Core/util/MKL_support.h +158 -0
  136. data/ext/eigen/eigen3/Eigen/src/Core/util/Macros.h +451 -0
  137. data/ext/eigen/eigen3/Eigen/src/Core/util/Memory.h +977 -0
  138. data/ext/eigen/eigen3/Eigen/src/Core/util/Meta.h +243 -0
  139. data/ext/eigen/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
  140. data/ext/eigen/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +14 -0
  141. data/ext/eigen/eigen3/Eigen/src/Core/util/StaticAssert.h +208 -0
  142. data/ext/eigen/eigen3/Eigen/src/Core/util/XprHelper.h +469 -0
  143. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Block.h +126 -0
  144. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Cwise.h +192 -0
  145. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/CwiseOperators.h +298 -0
  146. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +159 -0
  147. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/All.h +115 -0
  148. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +214 -0
  149. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +254 -0
  150. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +141 -0
  151. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Quaternion.h +495 -0
  152. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +145 -0
  153. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/RotationBase.h +123 -0
  154. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Scaling.h +167 -0
  155. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Transform.h +786 -0
  156. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Translation.h +184 -0
  157. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LU.h +120 -0
  158. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Lazy.h +71 -0
  159. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LeastSquares.h +169 -0
  160. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Macros.h +20 -0
  161. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/MathFunctions.h +57 -0
  162. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Memory.h +45 -0
  163. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Meta.h +75 -0
  164. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Minor.h +117 -0
  165. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/QR.h +67 -0
  166. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/SVD.h +637 -0
  167. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/TriangularSolver.h +42 -0
  168. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/VectorBlock.h +94 -0
  169. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +341 -0
  170. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +456 -0
  171. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +94 -0
  172. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +607 -0
  173. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +350 -0
  174. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +227 -0
  175. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +373 -0
  176. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +160 -0
  177. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealQZ.h +624 -0
  178. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur.h +525 -0
  179. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur_MKL.h +83 -0
  180. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +801 -0
  181. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +92 -0
  182. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +557 -0
  183. data/ext/eigen/eigen3/Eigen/src/Geometry/AlignedBox.h +392 -0
  184. data/ext/eigen/eigen3/Eigen/src/Geometry/AngleAxis.h +233 -0
  185. data/ext/eigen/eigen3/Eigen/src/Geometry/EulerAngles.h +104 -0
  186. data/ext/eigen/eigen3/Eigen/src/Geometry/Homogeneous.h +307 -0
  187. data/ext/eigen/eigen3/Eigen/src/Geometry/Hyperplane.h +280 -0
  188. data/ext/eigen/eigen3/Eigen/src/Geometry/OrthoMethods.h +218 -0
  189. data/ext/eigen/eigen3/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  190. data/ext/eigen/eigen3/Eigen/src/Geometry/Quaternion.h +776 -0
  191. data/ext/eigen/eigen3/Eigen/src/Geometry/Rotation2D.h +160 -0
  192. data/ext/eigen/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
  193. data/ext/eigen/eigen3/Eigen/src/Geometry/Scaling.h +166 -0
  194. data/ext/eigen/eigen3/Eigen/src/Geometry/Transform.h +1455 -0
  195. data/ext/eigen/eigen3/Eigen/src/Geometry/Translation.h +206 -0
  196. data/ext/eigen/eigen3/Eigen/src/Geometry/Umeyama.h +177 -0
  197. data/ext/eigen/eigen3/Eigen/src/Geometry/arch/Geometry_SSE.h +115 -0
  198. data/ext/eigen/eigen3/Eigen/src/Householder/BlockHouseholder.h +68 -0
  199. data/ext/eigen/eigen3/Eigen/src/Householder/Householder.h +171 -0
  200. data/ext/eigen/eigen3/Eigen/src/Householder/HouseholderSequence.h +441 -0
  201. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -0
  202. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +263 -0
  203. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +256 -0
  204. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +282 -0
  205. data/ext/eigen/eigen3/Eigen/src/Jacobi/Jacobi.h +433 -0
  206. data/ext/eigen/eigen3/Eigen/src/LU/Determinant.h +101 -0
  207. data/ext/eigen/eigen3/Eigen/src/LU/FullPivLU.h +751 -0
  208. data/ext/eigen/eigen3/Eigen/src/LU/Inverse.h +400 -0
  209. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU.h +509 -0
  210. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU_MKL.h +85 -0
  211. data/ext/eigen/eigen3/Eigen/src/LU/arch/Inverse_SSE.h +329 -0
  212. data/ext/eigen/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  213. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Amd.h +444 -0
  214. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1850 -0
  215. data/ext/eigen/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +721 -0
  216. data/ext/eigen/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +592 -0
  217. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +580 -0
  218. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR_MKL.h +99 -0
  219. data/ext/eigen/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +622 -0
  220. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR.h +388 -0
  221. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR_MKL.h +71 -0
  222. data/ext/eigen/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +338 -0
  223. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD.h +976 -0
  224. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD_MKL.h +92 -0
  225. data/ext/eigen/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +148 -0
  226. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +671 -0
  227. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  228. data/ext/eigen/eigen3/Eigen/src/SparseCore/AmbiVector.h +373 -0
  229. data/ext/eigen/eigen3/Eigen/src/SparseCore/CompressedStorage.h +233 -0
  230. data/ext/eigen/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +245 -0
  231. data/ext/eigen/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +181 -0
  232. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseBlock.h +537 -0
  233. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  234. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +325 -0
  235. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +163 -0
  236. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +311 -0
  237. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +196 -0
  238. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDot.h +101 -0
  239. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +26 -0
  240. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1262 -0
  241. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +461 -0
  242. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparsePermutation.h +148 -0
  243. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseProduct.h +188 -0
  244. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseRedux.h +45 -0
  245. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +507 -0
  246. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +150 -0
  247. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTranspose.h +63 -0
  248. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +179 -0
  249. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseUtil.h +172 -0
  250. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseVector.h +448 -0
  251. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseView.h +99 -0
  252. data/ext/eigen/eigen3/Eigen/src/SparseCore/TriangularSolver.h +334 -0
  253. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU.h +806 -0
  254. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  255. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +227 -0
  256. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +111 -0
  257. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +298 -0
  258. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  259. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +180 -0
  260. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +177 -0
  261. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +106 -0
  262. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +279 -0
  263. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +127 -0
  264. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  265. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  266. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  267. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  268. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +135 -0
  269. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  270. data/ext/eigen/eigen3/Eigen/src/SparseQR/SparseQR.h +714 -0
  271. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdDeque.h +134 -0
  272. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdList.h +114 -0
  273. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdVector.h +126 -0
  274. data/ext/eigen/eigen3/Eigen/src/StlSupport/details.h +84 -0
  275. data/ext/eigen/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1026 -0
  276. data/ext/eigen/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +474 -0
  277. data/ext/eigen/eigen3/Eigen/src/misc/Image.h +84 -0
  278. data/ext/eigen/eigen3/Eigen/src/misc/Kernel.h +81 -0
  279. data/ext/eigen/eigen3/Eigen/src/misc/Solve.h +76 -0
  280. data/ext/eigen/eigen3/Eigen/src/misc/SparseSolve.h +128 -0
  281. data/ext/eigen/eigen3/Eigen/src/misc/blas.h +658 -0
  282. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +253 -0
  283. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +187 -0
  284. data/ext/eigen/eigen3/Eigen/src/plugins/BlockMethods.h +935 -0
  285. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +46 -0
  286. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +172 -0
  287. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +143 -0
  288. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +52 -0
  289. data/ext/eigen/eigen3/signature_of_eigen3_matrix_library +1 -0
  290. data/ext/eigen/eigen_wrap.cxx +19420 -10396
  291. data/ext/eigen/extconf.rb +37 -2
  292. data/lib/eigen.rb +146 -3
  293. metadata +294 -7
@@ -0,0 +1,146 @@
1
+ /*
2
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Intel Corporation nor the names of its contributors may
13
+ be used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ********************************************************************************
28
+ * Content : Eigen bindings to Intel(R) MKL
29
+ * Level 3 BLAS SYRK/HERK implementation.
30
+ ********************************************************************************
31
+ */
32
+
33
+ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
34
+ #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
35
+
36
+ namespace Eigen {
37
+
38
+ namespace internal {
39
+
40
+ template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
41
+ struct general_matrix_matrix_rankupdate :
42
+ general_matrix_matrix_triangular_product<
43
+ Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
44
+
45
+
46
+ // try to go to BLAS specialization
47
+ #define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
48
+ template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
49
+ int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
50
+ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
51
+ Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
52
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
53
+ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
54
+ { \
55
+ if (lhs==rhs) { \
56
+ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
57
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
58
+ } else { \
59
+ general_matrix_matrix_triangular_product<Index, \
60
+ Scalar, LhsStorageOrder, ConjugateLhs, \
61
+ Scalar, RhsStorageOrder, ConjugateRhs, \
62
+ ColMajor, UpLo, BuiltIn> \
63
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
64
+ } \
65
+ } \
66
+ };
67
+
68
+ EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
69
+ //EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
70
+ EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
71
+ //EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
72
+
73
+ // SYRK for float/double
74
+ #define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
75
+ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
76
+ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
77
+ enum { \
78
+ IsLower = (UpLo&Lower) == Lower, \
79
+ LowUp = IsLower ? Lower : Upper, \
80
+ conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
81
+ }; \
82
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
83
+ const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
84
+ { \
85
+ /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
86
+ \
87
+ MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
88
+ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
89
+ MKLTYPE alpha_, beta_; \
90
+ \
91
+ /* Set alpha_ & beta_ */ \
92
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
93
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
94
+ MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
95
+ } \
96
+ };
97
+
98
+ // HERK for complex data
99
+ #define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
100
+ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
101
+ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
102
+ enum { \
103
+ IsLower = (UpLo&Lower) == Lower, \
104
+ LowUp = IsLower ? Lower : Upper, \
105
+ conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
106
+ }; \
107
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
108
+ const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
109
+ { \
110
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
111
+ \
112
+ MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
113
+ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
114
+ RTYPE alpha_, beta_; \
115
+ const EIGTYPE* a_ptr; \
116
+ \
117
+ /* Set alpha_ & beta_ */ \
118
+ /* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
119
+ /* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
120
+ alpha_ = alpha.real(); \
121
+ beta_ = 1.0; \
122
+ /* Copy with conjugation in some cases*/ \
123
+ MatrixType a; \
124
+ if (conjA) { \
125
+ Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
126
+ a = mapA.conjugate(); \
127
+ lda = a.outerStride(); \
128
+ a_ptr = a.data(); \
129
+ } else a_ptr=lhs; \
130
+ MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
131
+ } \
132
+ };
133
+
134
+
135
+ EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
136
+ EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
137
+
138
+ //EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
139
+ //EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
140
+
141
+
142
+ } // end namespace internal
143
+
144
+ } // end namespace Eigen
145
+
146
+ #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
@@ -0,0 +1,118 @@
1
+ /*
2
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Intel Corporation nor the names of its contributors may
13
+ be used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ********************************************************************************
28
+ * Content : Eigen bindings to Intel(R) MKL
29
+ * General matrix-matrix product functionality based on ?GEMM.
30
+ ********************************************************************************
31
+ */
32
+
33
+ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
34
+ #define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
35
+
36
+ namespace Eigen {
37
+
38
+ namespace internal {
39
+
40
+ /**********************************************************************
41
+ * This file implements general matrix-matrix multiplication using BLAS
42
+ * gemm function via partial specialization of
43
+ * general_matrix_matrix_product::run(..) method for float, double,
44
+ * std::complex<float> and std::complex<double> types
45
+ **********************************************************************/
46
+
47
+ // gemm specialization
48
+
49
+ #define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
50
+ template< \
51
+ typename Index, \
52
+ int LhsStorageOrder, bool ConjugateLhs, \
53
+ int RhsStorageOrder, bool ConjugateRhs> \
54
+ struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
55
+ { \
56
+ static void run(Index rows, Index cols, Index depth, \
57
+ const EIGTYPE* _lhs, Index lhsStride, \
58
+ const EIGTYPE* _rhs, Index rhsStride, \
59
+ EIGTYPE* res, Index resStride, \
60
+ EIGTYPE alpha, \
61
+ level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
62
+ GemmParallelInfo<Index>* /*info = 0*/) \
63
+ { \
64
+ using std::conj; \
65
+ \
66
+ char transa, transb; \
67
+ MKL_INT m, n, k, lda, ldb, ldc; \
68
+ const EIGTYPE *a, *b; \
69
+ MKLTYPE alpha_, beta_; \
70
+ MatrixX##EIGPREFIX a_tmp, b_tmp; \
71
+ EIGTYPE myone(1);\
72
+ \
73
+ /* Set transpose options */ \
74
+ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
75
+ transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
76
+ \
77
+ /* Set m, n, k */ \
78
+ m = (MKL_INT)rows; \
79
+ n = (MKL_INT)cols; \
80
+ k = (MKL_INT)depth; \
81
+ \
82
+ /* Set alpha_ & beta_ */ \
83
+ assign_scalar_eig2mkl(alpha_, alpha); \
84
+ assign_scalar_eig2mkl(beta_, myone); \
85
+ \
86
+ /* Set lda, ldb, ldc */ \
87
+ lda = (MKL_INT)lhsStride; \
88
+ ldb = (MKL_INT)rhsStride; \
89
+ ldc = (MKL_INT)resStride; \
90
+ \
91
+ /* Set a, b, c */ \
92
+ if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
93
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
94
+ a_tmp = lhs.conjugate(); \
95
+ a = a_tmp.data(); \
96
+ lda = a_tmp.outerStride(); \
97
+ } else a = _lhs; \
98
+ \
99
+ if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
100
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
101
+ b_tmp = rhs.conjugate(); \
102
+ b = b_tmp.data(); \
103
+ ldb = b_tmp.outerStride(); \
104
+ } else b = _rhs; \
105
+ \
106
+ MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
107
+ }};
108
+
109
+ GEMM_SPECIALIZATION(double, d, double, d)
110
+ GEMM_SPECIALIZATION(float, f, float, s)
111
+ GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
112
+ GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
113
+
114
+ } // end namespase internal
115
+
116
+ } // end namespace Eigen
117
+
118
+ #endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
@@ -0,0 +1,566 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
11
+ #define EIGEN_GENERAL_MATRIX_VECTOR_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ /* Optimized col-major matrix * vector product:
18
+ * This algorithm processes 4 columns at onces that allows to both reduce
19
+ * the number of load/stores of the result by a factor 4 and to reduce
20
+ * the instruction dependency. Moreover, we know that all bands have the
21
+ * same alignment pattern.
22
+ *
23
+ * Mixing type logic: C += alpha * A * B
24
+ * | A | B |alpha| comments
25
+ * |real |cplx |cplx | no vectorization
26
+ * |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
27
+ * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
28
+ * |cplx |real |real | optimal case, vectorization possible via real-cplx mul
29
+ */
30
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
31
+ struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
32
+ {
33
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
34
+
35
+ enum {
36
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
37
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
38
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
39
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
40
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
41
+ };
42
+
43
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
44
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
45
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
46
+
47
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
48
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
49
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
50
+
51
+ EIGEN_DONT_INLINE static void run(
52
+ Index rows, Index cols,
53
+ const LhsScalar* lhs, Index lhsStride,
54
+ const RhsScalar* rhs, Index rhsIncr,
55
+ ResScalar* res, Index resIncr, RhsScalar alpha);
56
+ };
57
+
58
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
59
+ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
60
+ Index rows, Index cols,
61
+ const LhsScalar* lhs, Index lhsStride,
62
+ const RhsScalar* rhs, Index rhsIncr,
63
+ ResScalar* res, Index resIncr, RhsScalar alpha)
64
+ {
65
+ EIGEN_UNUSED_VARIABLE(resIncr)
66
+ eigen_internal_assert(resIncr==1);
67
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
68
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
69
+ #endif
70
+ #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
71
+ pstore(&res[j], \
72
+ padd(pload<ResPacket>(&res[j]), \
73
+ padd( \
74
+ padd(pcj.pmul(EIGEN_CAT(ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
75
+ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
76
+ padd(pcj.pmul(EIGEN_CAT(ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
77
+ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
78
+
79
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
80
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
81
+ if(ConjugateRhs)
82
+ alpha = numext::conj(alpha);
83
+
84
+ enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
85
+ const Index columnsAtOnce = 4;
86
+ const Index peels = 2;
87
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
88
+ const Index ResPacketAlignedMask = ResPacketSize-1;
89
+ // const Index PeelAlignedMask = ResPacketSize*peels-1;
90
+ const Index size = rows;
91
+
92
+ // How many coeffs of the result do we have to skip to be aligned.
93
+ // Here we assume data are at least aligned on the base scalar type.
94
+ Index alignedStart = internal::first_aligned(res,size);
95
+ Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
96
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
97
+
98
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
99
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
100
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
101
+ : FirstAligned;
102
+
103
+ // we cannot assume the first element is aligned because of sub-matrices
104
+ const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
105
+
106
+ // find how many columns do we have to skip to be aligned with the result (if possible)
107
+ Index skipColumns = 0;
108
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
109
+ if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) )
110
+ {
111
+ alignedSize = 0;
112
+ alignedStart = 0;
113
+ }
114
+ else if (LhsPacketSize>1)
115
+ {
116
+ eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
117
+
118
+ while (skipColumns<LhsPacketSize &&
119
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
120
+ ++skipColumns;
121
+ if (skipColumns==LhsPacketSize)
122
+ {
123
+ // nothing can be aligned, no need to skip any column
124
+ alignmentPattern = NoneAligned;
125
+ skipColumns = 0;
126
+ }
127
+ else
128
+ {
129
+ skipColumns = (std::min)(skipColumns,cols);
130
+ // note that the skiped columns are processed later.
131
+ }
132
+
133
+ eigen_internal_assert( (alignmentPattern==NoneAligned)
134
+ || (skipColumns + columnsAtOnce >= cols)
135
+ || LhsPacketSize > size
136
+ || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
137
+ }
138
+ else if(Vectorizable)
139
+ {
140
+ alignedStart = 0;
141
+ alignedSize = size;
142
+ alignmentPattern = AllAligned;
143
+ }
144
+
145
+ Index offset1 = (FirstAligned && alignmentStep==1?3:1);
146
+ Index offset3 = (FirstAligned && alignmentStep==1?1:3);
147
+
148
+ Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
149
+ for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
150
+ {
151
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
152
+ ptmp1 = pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
153
+ ptmp2 = pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
154
+ ptmp3 = pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
155
+
156
+ // this helps a lot generating better binary code
157
+ const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
158
+ *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
159
+
160
+ if (Vectorizable)
161
+ {
162
+ /* explicit vectorization */
163
+ // process initial unaligned coeffs
164
+ for (Index j=0; j<alignedStart; ++j)
165
+ {
166
+ res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
167
+ res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
168
+ res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
169
+ res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
170
+ }
171
+
172
+ if (alignedSize>alignedStart)
173
+ {
174
+ switch(alignmentPattern)
175
+ {
176
+ case AllAligned:
177
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
178
+ _EIGEN_ACCUMULATE_PACKETS(d,d,d);
179
+ break;
180
+ case EvenAligned:
181
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
182
+ _EIGEN_ACCUMULATE_PACKETS(d,du,d);
183
+ break;
184
+ case FirstAligned:
185
+ {
186
+ Index j = alignedStart;
187
+ if(peels>1)
188
+ {
189
+ LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
190
+ ResPacket T0, T1;
191
+
192
+ A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
193
+ A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
194
+ A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
195
+
196
+ for (; j<peeledSize; j+=peels*ResPacketSize)
197
+ {
198
+ A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
199
+ A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
200
+ A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
201
+
202
+ A00 = pload<LhsPacket>(&lhs0[j]);
203
+ A10 = pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
204
+ T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
205
+ T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
206
+
207
+ T0 = pcj.pmadd(A01, ptmp1, T0);
208
+ A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
209
+ T0 = pcj.pmadd(A02, ptmp2, T0);
210
+ A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
211
+ T0 = pcj.pmadd(A03, ptmp3, T0);
212
+ pstore(&res[j],T0);
213
+ A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
214
+ T1 = pcj.pmadd(A11, ptmp1, T1);
215
+ T1 = pcj.pmadd(A12, ptmp2, T1);
216
+ T1 = pcj.pmadd(A13, ptmp3, T1);
217
+ pstore(&res[j+ResPacketSize],T1);
218
+ }
219
+ }
220
+ for (; j<alignedSize; j+=ResPacketSize)
221
+ _EIGEN_ACCUMULATE_PACKETS(d,du,du);
222
+ break;
223
+ }
224
+ default:
225
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
226
+ _EIGEN_ACCUMULATE_PACKETS(du,du,du);
227
+ break;
228
+ }
229
+ }
230
+ } // end explicit vectorization
231
+
232
+ /* process remaining coeffs (or all if there is no explicit vectorization) */
233
+ for (Index j=alignedSize; j<size; ++j)
234
+ {
235
+ res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
236
+ res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
237
+ res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
238
+ res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
239
+ }
240
+ }
241
+
242
+ // process remaining first and last columns (at most columnsAtOnce-1)
243
+ Index end = cols;
244
+ Index start = columnBound;
245
+ do
246
+ {
247
+ for (Index k=start; k<end; ++k)
248
+ {
249
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
250
+ const LhsScalar* lhs0 = lhs + k*lhsStride;
251
+
252
+ if (Vectorizable)
253
+ {
254
+ /* explicit vectorization */
255
+ // process first unaligned result's coeffs
256
+ for (Index j=0; j<alignedStart; ++j)
257
+ res[j] += cj.pmul(lhs0[j], pfirst(ptmp0));
258
+ // process aligned result's coeffs
259
+ if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
260
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
261
+ pstore(&res[i], pcj.pmadd(pload<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
262
+ else
263
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
264
+ pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
265
+ }
266
+
267
+ // process remaining scalars (or all if no explicit vectorization)
268
+ for (Index i=alignedSize; i<size; ++i)
269
+ res[i] += cj.pmul(lhs0[i], pfirst(ptmp0));
270
+ }
271
+ if (skipColumns)
272
+ {
273
+ start = 0;
274
+ end = skipColumns;
275
+ skipColumns = 0;
276
+ }
277
+ else
278
+ break;
279
+ } while(Vectorizable);
280
+ #undef _EIGEN_ACCUMULATE_PACKETS
281
+ }
282
+
283
+ /* Optimized row-major matrix * vector product:
284
+ * This algorithm processes 4 rows at onces that allows to both reduce
285
+ * the number of load/stores of the result by a factor 4 and to reduce
286
+ * the instruction dependency. Moreover, we know that all bands have the
287
+ * same alignment pattern.
288
+ *
289
+ * Mixing type logic:
290
+ * - alpha is always a complex (or converted to a complex)
291
+ * - no vectorization
292
+ */
293
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
294
+ struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
295
+ {
296
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
297
+
298
+ enum {
299
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
300
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
301
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
302
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
303
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
304
+ };
305
+
306
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
307
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
308
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
309
+
310
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
311
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
312
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
313
+
314
+ EIGEN_DONT_INLINE static void run(
315
+ Index rows, Index cols,
316
+ const LhsScalar* lhs, Index lhsStride,
317
+ const RhsScalar* rhs, Index rhsIncr,
318
+ ResScalar* res, Index resIncr,
319
+ ResScalar alpha);
320
+ };
321
+
322
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
323
+ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
324
+ Index rows, Index cols,
325
+ const LhsScalar* lhs, Index lhsStride,
326
+ const RhsScalar* rhs, Index rhsIncr,
327
+ ResScalar* res, Index resIncr,
328
+ ResScalar alpha)
329
+ {
330
+ EIGEN_UNUSED_VARIABLE(rhsIncr);
331
+ eigen_internal_assert(rhsIncr==1);
332
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
333
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
334
+ #endif
335
+
336
+ #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
337
+ RhsPacket b = pload<RhsPacket>(&rhs[j]); \
338
+ ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
339
+ ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
340
+ ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
341
+ ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
342
+
343
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
344
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
345
+
346
+ enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
347
+ const Index rowsAtOnce = 4;
348
+ const Index peels = 2;
349
+ const Index RhsPacketAlignedMask = RhsPacketSize-1;
350
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
351
+ // const Index PeelAlignedMask = RhsPacketSize*peels-1;
352
+ const Index depth = cols;
353
+
354
+ // How many coeffs of the result do we have to skip to be aligned.
355
+ // Here we assume data are at least aligned on the base scalar type
356
+ // if that's not the case then vectorization is discarded, see below.
357
+ Index alignedStart = internal::first_aligned(rhs, depth);
358
+ Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
359
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
360
+
361
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
362
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
363
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
364
+ : FirstAligned;
365
+
366
+ // we cannot assume the first element is aligned because of sub-matrices
367
+ const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
368
+
369
+ // find how many rows do we have to skip to be aligned with rhs (if possible)
370
+ Index skipRows = 0;
371
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
372
+ if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) )
373
+ {
374
+ alignedSize = 0;
375
+ alignedStart = 0;
376
+ }
377
+ else if (LhsPacketSize>1)
378
+ {
379
+ eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
380
+
381
+ while (skipRows<LhsPacketSize &&
382
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
383
+ ++skipRows;
384
+ if (skipRows==LhsPacketSize)
385
+ {
386
+ // nothing can be aligned, no need to skip any column
387
+ alignmentPattern = NoneAligned;
388
+ skipRows = 0;
389
+ }
390
+ else
391
+ {
392
+ skipRows = (std::min)(skipRows,Index(rows));
393
+ // note that the skiped columns are processed later.
394
+ }
395
+ eigen_internal_assert( alignmentPattern==NoneAligned
396
+ || LhsPacketSize==1
397
+ || (skipRows + rowsAtOnce >= rows)
398
+ || LhsPacketSize > depth
399
+ || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);
400
+ }
401
+ else if(Vectorizable)
402
+ {
403
+ alignedStart = 0;
404
+ alignedSize = depth;
405
+ alignmentPattern = AllAligned;
406
+ }
407
+
408
+ Index offset1 = (FirstAligned && alignmentStep==1?3:1);
409
+ Index offset3 = (FirstAligned && alignmentStep==1?1:3);
410
+
411
+ Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
412
+ for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
413
+ {
414
+ EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
415
+ ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
416
+
417
+ // this helps the compiler generating good binary code
418
+ const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
419
+ *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
420
+
421
+ if (Vectorizable)
422
+ {
423
+ /* explicit vectorization */
424
+ ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
425
+ ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
426
+
427
+ // process initial unaligned coeffs
428
+ // FIXME this loop get vectorized by the compiler !
429
+ for (Index j=0; j<alignedStart; ++j)
430
+ {
431
+ RhsScalar b = rhs[j];
432
+ tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
433
+ tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
434
+ }
435
+
436
+ if (alignedSize>alignedStart)
437
+ {
438
+ switch(alignmentPattern)
439
+ {
440
+ case AllAligned:
441
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
442
+ _EIGEN_ACCUMULATE_PACKETS(d,d,d);
443
+ break;
444
+ case EvenAligned:
445
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
446
+ _EIGEN_ACCUMULATE_PACKETS(d,du,d);
447
+ break;
448
+ case FirstAligned:
449
+ {
450
+ Index j = alignedStart;
451
+ if (peels>1)
452
+ {
453
+ /* Here we proccess 4 rows with with two peeled iterations to hide
454
+ * the overhead of unaligned loads. Moreover unaligned loads are handled
455
+ * using special shift/move operations between the two aligned packets
456
+ * overlaping the desired unaligned packet. This is *much* more efficient
457
+ * than basic unaligned loads.
458
+ */
459
+ LhsPacket A01, A02, A03, A11, A12, A13;
460
+ A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
461
+ A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
462
+ A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
463
+
464
+ for (; j<peeledSize; j+=peels*RhsPacketSize)
465
+ {
466
+ RhsPacket b = pload<RhsPacket>(&rhs[j]);
467
+ A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
468
+ A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
469
+ A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
470
+
471
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), b, ptmp0);
472
+ ptmp1 = pcj.pmadd(A01, b, ptmp1);
473
+ A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
474
+ ptmp2 = pcj.pmadd(A02, b, ptmp2);
475
+ A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
476
+ ptmp3 = pcj.pmadd(A03, b, ptmp3);
477
+ A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
478
+
479
+ b = pload<RhsPacket>(&rhs[j+RhsPacketSize]);
480
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
481
+ ptmp1 = pcj.pmadd(A11, b, ptmp1);
482
+ ptmp2 = pcj.pmadd(A12, b, ptmp2);
483
+ ptmp3 = pcj.pmadd(A13, b, ptmp3);
484
+ }
485
+ }
486
+ for (; j<alignedSize; j+=RhsPacketSize)
487
+ _EIGEN_ACCUMULATE_PACKETS(d,du,du);
488
+ break;
489
+ }
490
+ default:
491
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
492
+ _EIGEN_ACCUMULATE_PACKETS(du,du,du);
493
+ break;
494
+ }
495
+ tmp0 += predux(ptmp0);
496
+ tmp1 += predux(ptmp1);
497
+ tmp2 += predux(ptmp2);
498
+ tmp3 += predux(ptmp3);
499
+ }
500
+ } // end explicit vectorization
501
+
502
+ // process remaining coeffs (or all if no explicit vectorization)
503
+ // FIXME this loop get vectorized by the compiler !
504
+ for (Index j=alignedSize; j<depth; ++j)
505
+ {
506
+ RhsScalar b = rhs[j];
507
+ tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
508
+ tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
509
+ }
510
+ res[i*resIncr] += alpha*tmp0;
511
+ res[(i+offset1)*resIncr] += alpha*tmp1;
512
+ res[(i+2)*resIncr] += alpha*tmp2;
513
+ res[(i+offset3)*resIncr] += alpha*tmp3;
514
+ }
515
+
516
+ // process remaining first and last rows (at most columnsAtOnce-1)
517
+ Index end = rows;
518
+ Index start = rowBound;
519
+ do
520
+ {
521
+ for (Index i=start; i<end; ++i)
522
+ {
523
+ EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
524
+ ResPacket ptmp0 = pset1<ResPacket>(tmp0);
525
+ const LhsScalar* lhs0 = lhs + i*lhsStride;
526
+ // process first unaligned result's coeffs
527
+ // FIXME this loop get vectorized by the compiler !
528
+ for (Index j=0; j<alignedStart; ++j)
529
+ tmp0 += cj.pmul(lhs0[j], rhs[j]);
530
+
531
+ if (alignedSize>alignedStart)
532
+ {
533
+ // process aligned rhs coeffs
534
+ if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
535
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
536
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
537
+ else
538
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
539
+ ptmp0 = pcj.pmadd(ploadu<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
540
+ tmp0 += predux(ptmp0);
541
+ }
542
+
543
+ // process remaining scalars
544
+ // FIXME this loop get vectorized by the compiler !
545
+ for (Index j=alignedSize; j<depth; ++j)
546
+ tmp0 += cj.pmul(lhs0[j], rhs[j]);
547
+ res[i*resIncr] += alpha*tmp0;
548
+ }
549
+ if (skipRows)
550
+ {
551
+ start = 0;
552
+ end = skipRows;
553
+ skipRows = 0;
554
+ }
555
+ else
556
+ break;
557
+ } while(Vectorizable);
558
+
559
+ #undef _EIGEN_ACCUMULATE_PACKETS
560
+ }
561
+
562
+ } // end namespace internal
563
+
564
+ } // end namespace Eigen
565
+
566
+ #endif // EIGEN_GENERAL_MATRIX_VECTOR_H