ruby-eigen 0.0.9 → 0.0.10.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +22 -0
  3. data/README.md +21 -0
  4. data/ext/eigen/eigen3/COPYING.BSD +26 -0
  5. data/ext/eigen/eigen3/COPYING.MPL2 +373 -0
  6. data/ext/eigen/eigen3/COPYING.README +18 -0
  7. data/ext/eigen/eigen3/Eigen/Array +11 -0
  8. data/ext/eigen/eigen3/Eigen/Cholesky +32 -0
  9. data/ext/eigen/eigen3/Eigen/CholmodSupport +45 -0
  10. data/ext/eigen/eigen3/Eigen/Core +376 -0
  11. data/ext/eigen/eigen3/Eigen/Dense +7 -0
  12. data/ext/eigen/eigen3/Eigen/Eigen +2 -0
  13. data/ext/eigen/eigen3/Eigen/Eigen2Support +95 -0
  14. data/ext/eigen/eigen3/Eigen/Eigenvalues +48 -0
  15. data/ext/eigen/eigen3/Eigen/Geometry +63 -0
  16. data/ext/eigen/eigen3/Eigen/Householder +23 -0
  17. data/ext/eigen/eigen3/Eigen/IterativeLinearSolvers +40 -0
  18. data/ext/eigen/eigen3/Eigen/Jacobi +26 -0
  19. data/ext/eigen/eigen3/Eigen/LU +41 -0
  20. data/ext/eigen/eigen3/Eigen/LeastSquares +32 -0
  21. data/ext/eigen/eigen3/Eigen/MetisSupport +28 -0
  22. data/ext/eigen/eigen3/Eigen/PaStiXSupport +46 -0
  23. data/ext/eigen/eigen3/Eigen/PardisoSupport +30 -0
  24. data/ext/eigen/eigen3/Eigen/QR +45 -0
  25. data/ext/eigen/eigen3/Eigen/QtAlignedMalloc +34 -0
  26. data/ext/eigen/eigen3/Eigen/SPQRSupport +29 -0
  27. data/ext/eigen/eigen3/Eigen/SVD +37 -0
  28. data/ext/eigen/eigen3/Eigen/Sparse +27 -0
  29. data/ext/eigen/eigen3/Eigen/SparseCore +64 -0
  30. data/ext/eigen/eigen3/Eigen/SparseLU +49 -0
  31. data/ext/eigen/eigen3/Eigen/SparseQR +33 -0
  32. data/ext/eigen/eigen3/Eigen/StdDeque +27 -0
  33. data/ext/eigen/eigen3/Eigen/StdList +26 -0
  34. data/ext/eigen/eigen3/Eigen/StdVector +27 -0
  35. data/ext/eigen/eigen3/Eigen/SuperLUSupport +59 -0
  36. data/ext/eigen/eigen3/Eigen/UmfPackSupport +36 -0
  37. data/ext/eigen/eigen3/Eigen/src/Cholesky/LDLT.h +611 -0
  38. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT.h +498 -0
  39. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT_MKL.h +102 -0
  40. data/ext/eigen/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +607 -0
  41. data/ext/eigen/eigen3/Eigen/src/Core/Array.h +323 -0
  42. data/ext/eigen/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
  43. data/ext/eigen/eigen3/Eigen/src/Core/ArrayWrapper.h +264 -0
  44. data/ext/eigen/eigen3/Eigen/src/Core/Assign.h +590 -0
  45. data/ext/eigen/eigen3/Eigen/src/Core/Assign_MKL.h +224 -0
  46. data/ext/eigen/eigen3/Eigen/src/Core/BandMatrix.h +334 -0
  47. data/ext/eigen/eigen3/Eigen/src/Core/Block.h +406 -0
  48. data/ext/eigen/eigen3/Eigen/src/Core/BooleanRedux.h +154 -0
  49. data/ext/eigen/eigen3/Eigen/src/Core/CommaInitializer.h +154 -0
  50. data/ext/eigen/eigen3/Eigen/src/Core/CoreIterators.h +61 -0
  51. data/ext/eigen/eigen3/Eigen/src/Core/CwiseBinaryOp.h +230 -0
  52. data/ext/eigen/eigen3/Eigen/src/Core/CwiseNullaryOp.h +864 -0
  53. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryOp.h +126 -0
  54. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryView.h +139 -0
  55. data/ext/eigen/eigen3/Eigen/src/Core/DenseBase.h +521 -0
  56. data/ext/eigen/eigen3/Eigen/src/Core/DenseCoeffsBase.h +754 -0
  57. data/ext/eigen/eigen3/Eigen/src/Core/DenseStorage.h +434 -0
  58. data/ext/eigen/eigen3/Eigen/src/Core/Diagonal.h +237 -0
  59. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalMatrix.h +313 -0
  60. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalProduct.h +131 -0
  61. data/ext/eigen/eigen3/Eigen/src/Core/Dot.h +263 -0
  62. data/ext/eigen/eigen3/Eigen/src/Core/EigenBase.h +131 -0
  63. data/ext/eigen/eigen3/Eigen/src/Core/Flagged.h +140 -0
  64. data/ext/eigen/eigen3/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  65. data/ext/eigen/eigen3/Eigen/src/Core/Functors.h +1026 -0
  66. data/ext/eigen/eigen3/Eigen/src/Core/Fuzzy.h +150 -0
  67. data/ext/eigen/eigen3/Eigen/src/Core/GeneralProduct.h +635 -0
  68. data/ext/eigen/eigen3/Eigen/src/Core/GenericPacketMath.h +350 -0
  69. data/ext/eigen/eigen3/Eigen/src/Core/GlobalFunctions.h +92 -0
  70. data/ext/eigen/eigen3/Eigen/src/Core/IO.h +250 -0
  71. data/ext/eigen/eigen3/Eigen/src/Core/Map.h +192 -0
  72. data/ext/eigen/eigen3/Eigen/src/Core/MapBase.h +247 -0
  73. data/ext/eigen/eigen3/Eigen/src/Core/MathFunctions.h +768 -0
  74. data/ext/eigen/eigen3/Eigen/src/Core/Matrix.h +420 -0
  75. data/ext/eigen/eigen3/Eigen/src/Core/MatrixBase.h +563 -0
  76. data/ext/eigen/eigen3/Eigen/src/Core/NestByValue.h +111 -0
  77. data/ext/eigen/eigen3/Eigen/src/Core/NoAlias.h +134 -0
  78. data/ext/eigen/eigen3/Eigen/src/Core/NumTraits.h +150 -0
  79. data/ext/eigen/eigen3/Eigen/src/Core/PermutationMatrix.h +721 -0
  80. data/ext/eigen/eigen3/Eigen/src/Core/PlainObjectBase.h +822 -0
  81. data/ext/eigen/eigen3/Eigen/src/Core/ProductBase.h +290 -0
  82. data/ext/eigen/eigen3/Eigen/src/Core/Random.h +152 -0
  83. data/ext/eigen/eigen3/Eigen/src/Core/Redux.h +409 -0
  84. data/ext/eigen/eigen3/Eigen/src/Core/Ref.h +278 -0
  85. data/ext/eigen/eigen3/Eigen/src/Core/Replicate.h +177 -0
  86. data/ext/eigen/eigen3/Eigen/src/Core/ReturnByValue.h +99 -0
  87. data/ext/eigen/eigen3/Eigen/src/Core/Reverse.h +224 -0
  88. data/ext/eigen/eigen3/Eigen/src/Core/Select.h +162 -0
  89. data/ext/eigen/eigen3/Eigen/src/Core/SelfAdjointView.h +314 -0
  90. data/ext/eigen/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +191 -0
  91. data/ext/eigen/eigen3/Eigen/src/Core/SolveTriangular.h +260 -0
  92. data/ext/eigen/eigen3/Eigen/src/Core/StableNorm.h +203 -0
  93. data/ext/eigen/eigen3/Eigen/src/Core/Stride.h +108 -0
  94. data/ext/eigen/eigen3/Eigen/src/Core/Swap.h +126 -0
  95. data/ext/eigen/eigen3/Eigen/src/Core/Transpose.h +419 -0
  96. data/ext/eigen/eigen3/Eigen/src/Core/Transpositions.h +436 -0
  97. data/ext/eigen/eigen3/Eigen/src/Core/TriangularMatrix.h +839 -0
  98. data/ext/eigen/eigen3/Eigen/src/Core/VectorBlock.h +95 -0
  99. data/ext/eigen/eigen3/Eigen/src/Core/VectorwiseOp.h +642 -0
  100. data/ext/eigen/eigen3/Eigen/src/Core/Visitor.h +237 -0
  101. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +217 -0
  102. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +501 -0
  103. data/ext/eigen/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
  104. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/Complex.h +253 -0
  105. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +420 -0
  106. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/Complex.h +442 -0
  107. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +475 -0
  108. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +649 -0
  109. data/ext/eigen/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +476 -0
  110. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1341 -0
  111. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +427 -0
  112. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +278 -0
  113. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +146 -0
  114. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +118 -0
  115. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +566 -0
  116. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +131 -0
  117. data/ext/eigen/eigen3/Eigen/src/Core/products/Parallelizer.h +162 -0
  118. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +436 -0
  119. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +295 -0
  120. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +281 -0
  121. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +114 -0
  122. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +123 -0
  123. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  124. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +427 -0
  125. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +309 -0
  126. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +348 -0
  127. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +247 -0
  128. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +332 -0
  129. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +155 -0
  130. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +139 -0
  131. data/ext/eigen/eigen3/Eigen/src/Core/util/BlasUtil.h +264 -0
  132. data/ext/eigen/eigen3/Eigen/src/Core/util/Constants.h +451 -0
  133. data/ext/eigen/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +40 -0
  134. data/ext/eigen/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  135. data/ext/eigen/eigen3/Eigen/src/Core/util/MKL_support.h +158 -0
  136. data/ext/eigen/eigen3/Eigen/src/Core/util/Macros.h +451 -0
  137. data/ext/eigen/eigen3/Eigen/src/Core/util/Memory.h +977 -0
  138. data/ext/eigen/eigen3/Eigen/src/Core/util/Meta.h +243 -0
  139. data/ext/eigen/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
  140. data/ext/eigen/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +14 -0
  141. data/ext/eigen/eigen3/Eigen/src/Core/util/StaticAssert.h +208 -0
  142. data/ext/eigen/eigen3/Eigen/src/Core/util/XprHelper.h +469 -0
  143. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Block.h +126 -0
  144. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Cwise.h +192 -0
  145. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/CwiseOperators.h +298 -0
  146. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +159 -0
  147. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/All.h +115 -0
  148. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +214 -0
  149. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +254 -0
  150. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +141 -0
  151. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Quaternion.h +495 -0
  152. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +145 -0
  153. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/RotationBase.h +123 -0
  154. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Scaling.h +167 -0
  155. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Transform.h +786 -0
  156. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Translation.h +184 -0
  157. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LU.h +120 -0
  158. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Lazy.h +71 -0
  159. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LeastSquares.h +169 -0
  160. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Macros.h +20 -0
  161. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/MathFunctions.h +57 -0
  162. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Memory.h +45 -0
  163. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Meta.h +75 -0
  164. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Minor.h +117 -0
  165. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/QR.h +67 -0
  166. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/SVD.h +637 -0
  167. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/TriangularSolver.h +42 -0
  168. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/VectorBlock.h +94 -0
  169. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +341 -0
  170. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +456 -0
  171. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +94 -0
  172. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +607 -0
  173. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +350 -0
  174. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +227 -0
  175. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +373 -0
  176. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +160 -0
  177. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealQZ.h +624 -0
  178. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur.h +525 -0
  179. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur_MKL.h +83 -0
  180. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +801 -0
  181. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +92 -0
  182. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +557 -0
  183. data/ext/eigen/eigen3/Eigen/src/Geometry/AlignedBox.h +392 -0
  184. data/ext/eigen/eigen3/Eigen/src/Geometry/AngleAxis.h +233 -0
  185. data/ext/eigen/eigen3/Eigen/src/Geometry/EulerAngles.h +104 -0
  186. data/ext/eigen/eigen3/Eigen/src/Geometry/Homogeneous.h +307 -0
  187. data/ext/eigen/eigen3/Eigen/src/Geometry/Hyperplane.h +280 -0
  188. data/ext/eigen/eigen3/Eigen/src/Geometry/OrthoMethods.h +218 -0
  189. data/ext/eigen/eigen3/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  190. data/ext/eigen/eigen3/Eigen/src/Geometry/Quaternion.h +776 -0
  191. data/ext/eigen/eigen3/Eigen/src/Geometry/Rotation2D.h +160 -0
  192. data/ext/eigen/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
  193. data/ext/eigen/eigen3/Eigen/src/Geometry/Scaling.h +166 -0
  194. data/ext/eigen/eigen3/Eigen/src/Geometry/Transform.h +1455 -0
  195. data/ext/eigen/eigen3/Eigen/src/Geometry/Translation.h +206 -0
  196. data/ext/eigen/eigen3/Eigen/src/Geometry/Umeyama.h +177 -0
  197. data/ext/eigen/eigen3/Eigen/src/Geometry/arch/Geometry_SSE.h +115 -0
  198. data/ext/eigen/eigen3/Eigen/src/Householder/BlockHouseholder.h +68 -0
  199. data/ext/eigen/eigen3/Eigen/src/Householder/Householder.h +171 -0
  200. data/ext/eigen/eigen3/Eigen/src/Householder/HouseholderSequence.h +441 -0
  201. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -0
  202. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +263 -0
  203. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +256 -0
  204. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +282 -0
  205. data/ext/eigen/eigen3/Eigen/src/Jacobi/Jacobi.h +433 -0
  206. data/ext/eigen/eigen3/Eigen/src/LU/Determinant.h +101 -0
  207. data/ext/eigen/eigen3/Eigen/src/LU/FullPivLU.h +751 -0
  208. data/ext/eigen/eigen3/Eigen/src/LU/Inverse.h +400 -0
  209. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU.h +509 -0
  210. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU_MKL.h +85 -0
  211. data/ext/eigen/eigen3/Eigen/src/LU/arch/Inverse_SSE.h +329 -0
  212. data/ext/eigen/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  213. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Amd.h +444 -0
  214. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1850 -0
  215. data/ext/eigen/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +721 -0
  216. data/ext/eigen/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +592 -0
  217. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +580 -0
  218. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR_MKL.h +99 -0
  219. data/ext/eigen/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +622 -0
  220. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR.h +388 -0
  221. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR_MKL.h +71 -0
  222. data/ext/eigen/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +338 -0
  223. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD.h +976 -0
  224. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD_MKL.h +92 -0
  225. data/ext/eigen/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +148 -0
  226. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +671 -0
  227. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  228. data/ext/eigen/eigen3/Eigen/src/SparseCore/AmbiVector.h +373 -0
  229. data/ext/eigen/eigen3/Eigen/src/SparseCore/CompressedStorage.h +233 -0
  230. data/ext/eigen/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +245 -0
  231. data/ext/eigen/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +181 -0
  232. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseBlock.h +537 -0
  233. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  234. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +325 -0
  235. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +163 -0
  236. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +311 -0
  237. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +196 -0
  238. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDot.h +101 -0
  239. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +26 -0
  240. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1262 -0
  241. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +461 -0
  242. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparsePermutation.h +148 -0
  243. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseProduct.h +188 -0
  244. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseRedux.h +45 -0
  245. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +507 -0
  246. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +150 -0
  247. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTranspose.h +63 -0
  248. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +179 -0
  249. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseUtil.h +172 -0
  250. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseVector.h +448 -0
  251. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseView.h +99 -0
  252. data/ext/eigen/eigen3/Eigen/src/SparseCore/TriangularSolver.h +334 -0
  253. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU.h +806 -0
  254. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  255. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +227 -0
  256. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +111 -0
  257. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +298 -0
  258. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  259. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +180 -0
  260. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +177 -0
  261. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +106 -0
  262. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +279 -0
  263. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +127 -0
  264. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  265. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  266. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  267. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  268. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +135 -0
  269. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  270. data/ext/eigen/eigen3/Eigen/src/SparseQR/SparseQR.h +714 -0
  271. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdDeque.h +134 -0
  272. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdList.h +114 -0
  273. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdVector.h +126 -0
  274. data/ext/eigen/eigen3/Eigen/src/StlSupport/details.h +84 -0
  275. data/ext/eigen/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1026 -0
  276. data/ext/eigen/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +474 -0
  277. data/ext/eigen/eigen3/Eigen/src/misc/Image.h +84 -0
  278. data/ext/eigen/eigen3/Eigen/src/misc/Kernel.h +81 -0
  279. data/ext/eigen/eigen3/Eigen/src/misc/Solve.h +76 -0
  280. data/ext/eigen/eigen3/Eigen/src/misc/SparseSolve.h +128 -0
  281. data/ext/eigen/eigen3/Eigen/src/misc/blas.h +658 -0
  282. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +253 -0
  283. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +187 -0
  284. data/ext/eigen/eigen3/Eigen/src/plugins/BlockMethods.h +935 -0
  285. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +46 -0
  286. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +172 -0
  287. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +143 -0
  288. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +52 -0
  289. data/ext/eigen/eigen3/signature_of_eigen3_matrix_library +1 -0
  290. data/ext/eigen/eigen_wrap.cxx +19420 -10396
  291. data/ext/eigen/extconf.rb +37 -2
  292. data/lib/eigen.rb +146 -3
  293. metadata +294 -7
@@ -0,0 +1,146 @@
1
+ /*
2
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Intel Corporation nor the names of its contributors may
13
+ be used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ********************************************************************************
28
+ * Content : Eigen bindings to Intel(R) MKL
29
+ * Level 3 BLAS SYRK/HERK implementation.
30
+ ********************************************************************************
31
+ */
32
+
33
+ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
34
+ #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
35
+
36
+ namespace Eigen {
37
+
38
+ namespace internal {
39
+
40
+ template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
41
+ struct general_matrix_matrix_rankupdate :
42
+ general_matrix_matrix_triangular_product<
43
+ Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
44
+
45
+
46
+ // try to go to BLAS specialization
47
+ #define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
48
+ template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
49
+ int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
50
+ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
51
+ Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
52
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
53
+ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
54
+ { \
55
+ if (lhs==rhs) { \
56
+ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
57
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
58
+ } else { \
59
+ general_matrix_matrix_triangular_product<Index, \
60
+ Scalar, LhsStorageOrder, ConjugateLhs, \
61
+ Scalar, RhsStorageOrder, ConjugateRhs, \
62
+ ColMajor, UpLo, BuiltIn> \
63
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
64
+ } \
65
+ } \
66
+ };
67
+
68
+ EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
69
+ //EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
70
+ EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
71
+ //EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
72
+
73
+ // SYRK for float/double
74
+ #define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
75
+ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
76
+ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
77
+ enum { \
78
+ IsLower = (UpLo&Lower) == Lower, \
79
+ LowUp = IsLower ? Lower : Upper, \
80
+ conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
81
+ }; \
82
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
83
+ const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
84
+ { \
85
+ /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
86
+ \
87
+ MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
88
+ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
89
+ MKLTYPE alpha_, beta_; \
90
+ \
91
+ /* Set alpha_ & beta_ */ \
92
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
93
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
94
+ MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
95
+ } \
96
+ };
97
+
98
+ // HERK for complex data
99
+ #define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
100
+ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
101
+ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
102
+ enum { \
103
+ IsLower = (UpLo&Lower) == Lower, \
104
+ LowUp = IsLower ? Lower : Upper, \
105
+ conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
106
+ }; \
107
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
108
+ const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
109
+ { \
110
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
111
+ \
112
+ MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
113
+ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
114
+ RTYPE alpha_, beta_; \
115
+ const EIGTYPE* a_ptr; \
116
+ \
117
+ /* Set alpha_ & beta_ */ \
118
+ /* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
119
+ /* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
120
+ alpha_ = alpha.real(); \
121
+ beta_ = 1.0; \
122
+ /* Copy with conjugation in some cases*/ \
123
+ MatrixType a; \
124
+ if (conjA) { \
125
+ Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
126
+ a = mapA.conjugate(); \
127
+ lda = a.outerStride(); \
128
+ a_ptr = a.data(); \
129
+ } else a_ptr=lhs; \
130
+ MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
131
+ } \
132
+ };
133
+
134
+
135
+ EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
136
+ EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
137
+
138
+ //EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
139
+ //EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
140
+
141
+
142
+ } // end namespace internal
143
+
144
+ } // end namespace Eigen
145
+
146
+ #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
@@ -0,0 +1,118 @@
1
+ /*
2
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Intel Corporation nor the names of its contributors may
13
+ be used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ********************************************************************************
28
+ * Content : Eigen bindings to Intel(R) MKL
29
+ * General matrix-matrix product functionality based on ?GEMM.
30
+ ********************************************************************************
31
+ */
32
+
33
+ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
34
+ #define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
35
+
36
+ namespace Eigen {
37
+
38
+ namespace internal {
39
+
40
+ /**********************************************************************
41
+ * This file implements general matrix-matrix multiplication using BLAS
42
+ * gemm function via partial specialization of
43
+ * general_matrix_matrix_product::run(..) method for float, double,
44
+ * std::complex<float> and std::complex<double> types
45
+ **********************************************************************/
46
+
47
+ // gemm specialization
48
+
49
+ #define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
50
+ template< \
51
+ typename Index, \
52
+ int LhsStorageOrder, bool ConjugateLhs, \
53
+ int RhsStorageOrder, bool ConjugateRhs> \
54
+ struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
55
+ { \
56
+ static void run(Index rows, Index cols, Index depth, \
57
+ const EIGTYPE* _lhs, Index lhsStride, \
58
+ const EIGTYPE* _rhs, Index rhsStride, \
59
+ EIGTYPE* res, Index resStride, \
60
+ EIGTYPE alpha, \
61
+ level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
62
+ GemmParallelInfo<Index>* /*info = 0*/) \
63
+ { \
64
+ using std::conj; \
65
+ \
66
+ char transa, transb; \
67
+ MKL_INT m, n, k, lda, ldb, ldc; \
68
+ const EIGTYPE *a, *b; \
69
+ MKLTYPE alpha_, beta_; \
70
+ MatrixX##EIGPREFIX a_tmp, b_tmp; \
71
+ EIGTYPE myone(1);\
72
+ \
73
+ /* Set transpose options */ \
74
+ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
75
+ transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
76
+ \
77
+ /* Set m, n, k */ \
78
+ m = (MKL_INT)rows; \
79
+ n = (MKL_INT)cols; \
80
+ k = (MKL_INT)depth; \
81
+ \
82
+ /* Set alpha_ & beta_ */ \
83
+ assign_scalar_eig2mkl(alpha_, alpha); \
84
+ assign_scalar_eig2mkl(beta_, myone); \
85
+ \
86
+ /* Set lda, ldb, ldc */ \
87
+ lda = (MKL_INT)lhsStride; \
88
+ ldb = (MKL_INT)rhsStride; \
89
+ ldc = (MKL_INT)resStride; \
90
+ \
91
+ /* Set a, b, c */ \
92
+ if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
93
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
94
+ a_tmp = lhs.conjugate(); \
95
+ a = a_tmp.data(); \
96
+ lda = a_tmp.outerStride(); \
97
+ } else a = _lhs; \
98
+ \
99
+ if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
100
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
101
+ b_tmp = rhs.conjugate(); \
102
+ b = b_tmp.data(); \
103
+ ldb = b_tmp.outerStride(); \
104
+ } else b = _rhs; \
105
+ \
106
+ MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
107
+ }};
108
+
109
+ GEMM_SPECIALIZATION(double, d, double, d)
110
+ GEMM_SPECIALIZATION(float, f, float, s)
111
+ GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
112
+ GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
113
+
114
+ } // end namespase internal
115
+
116
+ } // end namespace Eigen
117
+
118
+ #endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
@@ -0,0 +1,566 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
11
+ #define EIGEN_GENERAL_MATRIX_VECTOR_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ /* Optimized col-major matrix * vector product:
18
+ * This algorithm processes 4 columns at onces that allows to both reduce
19
+ * the number of load/stores of the result by a factor 4 and to reduce
20
+ * the instruction dependency. Moreover, we know that all bands have the
21
+ * same alignment pattern.
22
+ *
23
+ * Mixing type logic: C += alpha * A * B
24
+ * | A | B |alpha| comments
25
+ * |real |cplx |cplx | no vectorization
26
+ * |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
27
+ * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
28
+ * |cplx |real |real | optimal case, vectorization possible via real-cplx mul
29
+ */
30
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
31
+ struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
32
+ {
33
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
34
+
35
+ enum {
36
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
37
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
38
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
39
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
40
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
41
+ };
42
+
43
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
44
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
45
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
46
+
47
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
48
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
49
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
50
+
51
+ EIGEN_DONT_INLINE static void run(
52
+ Index rows, Index cols,
53
+ const LhsScalar* lhs, Index lhsStride,
54
+ const RhsScalar* rhs, Index rhsIncr,
55
+ ResScalar* res, Index resIncr, RhsScalar alpha);
56
+ };
57
+
58
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
59
+ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
60
+ Index rows, Index cols,
61
+ const LhsScalar* lhs, Index lhsStride,
62
+ const RhsScalar* rhs, Index rhsIncr,
63
+ ResScalar* res, Index resIncr, RhsScalar alpha)
64
+ {
65
+ EIGEN_UNUSED_VARIABLE(resIncr)
66
+ eigen_internal_assert(resIncr==1);
67
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
68
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
69
+ #endif
70
+ #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
71
+ pstore(&res[j], \
72
+ padd(pload<ResPacket>(&res[j]), \
73
+ padd( \
74
+ padd(pcj.pmul(EIGEN_CAT(ploa , A0)<LhsPacket>(&lhs0[j]), ptmp0), \
75
+ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs1[j]), ptmp1)), \
76
+ padd(pcj.pmul(EIGEN_CAT(ploa , A2)<LhsPacket>(&lhs2[j]), ptmp2), \
77
+ pcj.pmul(EIGEN_CAT(ploa , A13)<LhsPacket>(&lhs3[j]), ptmp3)) )))
78
+
79
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
80
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
81
+ if(ConjugateRhs)
82
+ alpha = numext::conj(alpha);
83
+
84
+ enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
85
+ const Index columnsAtOnce = 4;
86
+ const Index peels = 2;
87
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
88
+ const Index ResPacketAlignedMask = ResPacketSize-1;
89
+ // const Index PeelAlignedMask = ResPacketSize*peels-1;
90
+ const Index size = rows;
91
+
92
+ // How many coeffs of the result do we have to skip to be aligned.
93
+ // Here we assume data are at least aligned on the base scalar type.
94
+ Index alignedStart = internal::first_aligned(res,size);
95
+ Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
96
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
97
+
98
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
99
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
100
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
101
+ : FirstAligned;
102
+
103
+ // we cannot assume the first element is aligned because of sub-matrices
104
+ const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
105
+
106
+ // find how many columns do we have to skip to be aligned with the result (if possible)
107
+ Index skipColumns = 0;
108
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
109
+ if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) )
110
+ {
111
+ alignedSize = 0;
112
+ alignedStart = 0;
113
+ }
114
+ else if (LhsPacketSize>1)
115
+ {
116
+ eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
117
+
118
+ while (skipColumns<LhsPacketSize &&
119
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
120
+ ++skipColumns;
121
+ if (skipColumns==LhsPacketSize)
122
+ {
123
+ // nothing can be aligned, no need to skip any column
124
+ alignmentPattern = NoneAligned;
125
+ skipColumns = 0;
126
+ }
127
+ else
128
+ {
129
+ skipColumns = (std::min)(skipColumns,cols);
130
+ // note that the skiped columns are processed later.
131
+ }
132
+
133
+ eigen_internal_assert( (alignmentPattern==NoneAligned)
134
+ || (skipColumns + columnsAtOnce >= cols)
135
+ || LhsPacketSize > size
136
+ || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);
137
+ }
138
+ else if(Vectorizable)
139
+ {
140
+ alignedStart = 0;
141
+ alignedSize = size;
142
+ alignmentPattern = AllAligned;
143
+ }
144
+
145
+ Index offset1 = (FirstAligned && alignmentStep==1?3:1);
146
+ Index offset3 = (FirstAligned && alignmentStep==1?1:3);
147
+
148
+ Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
149
+ for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
150
+ {
151
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
152
+ ptmp1 = pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
153
+ ptmp2 = pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
154
+ ptmp3 = pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
155
+
156
+ // this helps a lot generating better binary code
157
+ const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
158
+ *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
159
+
160
+ if (Vectorizable)
161
+ {
162
+ /* explicit vectorization */
163
+ // process initial unaligned coeffs
164
+ for (Index j=0; j<alignedStart; ++j)
165
+ {
166
+ res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
167
+ res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
168
+ res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
169
+ res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
170
+ }
171
+
172
+ if (alignedSize>alignedStart)
173
+ {
174
+ switch(alignmentPattern)
175
+ {
176
+ case AllAligned:
177
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
178
+ _EIGEN_ACCUMULATE_PACKETS(d,d,d);
179
+ break;
180
+ case EvenAligned:
181
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
182
+ _EIGEN_ACCUMULATE_PACKETS(d,du,d);
183
+ break;
184
+ case FirstAligned:
185
+ {
186
+ Index j = alignedStart;
187
+ if(peels>1)
188
+ {
189
+ LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
190
+ ResPacket T0, T1;
191
+
192
+ A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
193
+ A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
194
+ A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
195
+
196
+ for (; j<peeledSize; j+=peels*ResPacketSize)
197
+ {
198
+ A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
199
+ A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
200
+ A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
201
+
202
+ A00 = pload<LhsPacket>(&lhs0[j]);
203
+ A10 = pload<LhsPacket>(&lhs0[j+LhsPacketSize]);
204
+ T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
205
+ T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
206
+
207
+ T0 = pcj.pmadd(A01, ptmp1, T0);
208
+ A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
209
+ T0 = pcj.pmadd(A02, ptmp2, T0);
210
+ A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
211
+ T0 = pcj.pmadd(A03, ptmp3, T0);
212
+ pstore(&res[j],T0);
213
+ A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
214
+ T1 = pcj.pmadd(A11, ptmp1, T1);
215
+ T1 = pcj.pmadd(A12, ptmp2, T1);
216
+ T1 = pcj.pmadd(A13, ptmp3, T1);
217
+ pstore(&res[j+ResPacketSize],T1);
218
+ }
219
+ }
220
+ for (; j<alignedSize; j+=ResPacketSize)
221
+ _EIGEN_ACCUMULATE_PACKETS(d,du,du);
222
+ break;
223
+ }
224
+ default:
225
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
226
+ _EIGEN_ACCUMULATE_PACKETS(du,du,du);
227
+ break;
228
+ }
229
+ }
230
+ } // end explicit vectorization
231
+
232
+ /* process remaining coeffs (or all if there is no explicit vectorization) */
233
+ for (Index j=alignedSize; j<size; ++j)
234
+ {
235
+ res[j] = cj.pmadd(lhs0[j], pfirst(ptmp0), res[j]);
236
+ res[j] = cj.pmadd(lhs1[j], pfirst(ptmp1), res[j]);
237
+ res[j] = cj.pmadd(lhs2[j], pfirst(ptmp2), res[j]);
238
+ res[j] = cj.pmadd(lhs3[j], pfirst(ptmp3), res[j]);
239
+ }
240
+ }
241
+
242
+ // process remaining first and last columns (at most columnsAtOnce-1)
243
+ Index end = cols;
244
+ Index start = columnBound;
245
+ do
246
+ {
247
+ for (Index k=start; k<end; ++k)
248
+ {
249
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
250
+ const LhsScalar* lhs0 = lhs + k*lhsStride;
251
+
252
+ if (Vectorizable)
253
+ {
254
+ /* explicit vectorization */
255
+ // process first unaligned result's coeffs
256
+ for (Index j=0; j<alignedStart; ++j)
257
+ res[j] += cj.pmul(lhs0[j], pfirst(ptmp0));
258
+ // process aligned result's coeffs
259
+ if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
260
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
261
+ pstore(&res[i], pcj.pmadd(pload<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
262
+ else
263
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
264
+ pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
265
+ }
266
+
267
+ // process remaining scalars (or all if no explicit vectorization)
268
+ for (Index i=alignedSize; i<size; ++i)
269
+ res[i] += cj.pmul(lhs0[i], pfirst(ptmp0));
270
+ }
271
+ if (skipColumns)
272
+ {
273
+ start = 0;
274
+ end = skipColumns;
275
+ skipColumns = 0;
276
+ }
277
+ else
278
+ break;
279
+ } while(Vectorizable);
280
+ #undef _EIGEN_ACCUMULATE_PACKETS
281
+ }
282
+
283
+ /* Optimized row-major matrix * vector product:
284
+ * This algorithm processes 4 rows at onces that allows to both reduce
285
+ * the number of load/stores of the result by a factor 4 and to reduce
286
+ * the instruction dependency. Moreover, we know that all bands have the
287
+ * same alignment pattern.
288
+ *
289
+ * Mixing type logic:
290
+ * - alpha is always a complex (or converted to a complex)
291
+ * - no vectorization
292
+ */
293
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
294
+ struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
295
+ {
296
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
297
+
298
+ enum {
299
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
300
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
301
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
302
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
303
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
304
+ };
305
+
306
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
307
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
308
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
309
+
310
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
311
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
312
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
313
+
314
+ EIGEN_DONT_INLINE static void run(
315
+ Index rows, Index cols,
316
+ const LhsScalar* lhs, Index lhsStride,
317
+ const RhsScalar* rhs, Index rhsIncr,
318
+ ResScalar* res, Index resIncr,
319
+ ResScalar alpha);
320
+ };
321
+
322
+ template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
323
+ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
324
+ Index rows, Index cols,
325
+ const LhsScalar* lhs, Index lhsStride,
326
+ const RhsScalar* rhs, Index rhsIncr,
327
+ ResScalar* res, Index resIncr,
328
+ ResScalar alpha)
329
+ {
330
+ EIGEN_UNUSED_VARIABLE(rhsIncr);
331
+ eigen_internal_assert(rhsIncr==1);
332
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
333
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
334
+ #endif
335
+
336
+ #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
337
+ RhsPacket b = pload<RhsPacket>(&rhs[j]); \
338
+ ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) <LhsPacket>(&lhs0[j]), b, ptmp0); \
339
+ ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs1[j]), b, ptmp1); \
340
+ ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) <LhsPacket>(&lhs2[j]), b, ptmp2); \
341
+ ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)<LhsPacket>(&lhs3[j]), b, ptmp3); }
342
+
343
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
344
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
345
+
346
+ enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
347
+ const Index rowsAtOnce = 4;
348
+ const Index peels = 2;
349
+ const Index RhsPacketAlignedMask = RhsPacketSize-1;
350
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
351
+ // const Index PeelAlignedMask = RhsPacketSize*peels-1;
352
+ const Index depth = cols;
353
+
354
+ // How many coeffs of the result do we have to skip to be aligned.
355
+ // Here we assume data are at least aligned on the base scalar type
356
+ // if that's not the case then vectorization is discarded, see below.
357
+ Index alignedStart = internal::first_aligned(rhs, depth);
358
+ Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
359
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
360
+
361
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
362
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
363
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
364
+ : FirstAligned;
365
+
366
+ // we cannot assume the first element is aligned because of sub-matrices
367
+ const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
368
+
369
+ // find how many rows do we have to skip to be aligned with rhs (if possible)
370
+ Index skipRows = 0;
371
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
372
+ if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) )
373
+ {
374
+ alignedSize = 0;
375
+ alignedStart = 0;
376
+ }
377
+ else if (LhsPacketSize>1)
378
+ {
379
+ eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
380
+
381
+ while (skipRows<LhsPacketSize &&
382
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
383
+ ++skipRows;
384
+ if (skipRows==LhsPacketSize)
385
+ {
386
+ // nothing can be aligned, no need to skip any column
387
+ alignmentPattern = NoneAligned;
388
+ skipRows = 0;
389
+ }
390
+ else
391
+ {
392
+ skipRows = (std::min)(skipRows,Index(rows));
393
+ // note that the skiped columns are processed later.
394
+ }
395
+ eigen_internal_assert( alignmentPattern==NoneAligned
396
+ || LhsPacketSize==1
397
+ || (skipRows + rowsAtOnce >= rows)
398
+ || LhsPacketSize > depth
399
+ || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);
400
+ }
401
+ else if(Vectorizable)
402
+ {
403
+ alignedStart = 0;
404
+ alignedSize = depth;
405
+ alignmentPattern = AllAligned;
406
+ }
407
+
408
+ Index offset1 = (FirstAligned && alignmentStep==1?3:1);
409
+ Index offset3 = (FirstAligned && alignmentStep==1?1:3);
410
+
411
+ Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
412
+ for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
413
+ {
414
+ EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
415
+ ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
416
+
417
+ // this helps the compiler generating good binary code
418
+ const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
419
+ *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
420
+
421
+ if (Vectorizable)
422
+ {
423
+ /* explicit vectorization */
424
+ ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
425
+ ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
426
+
427
+ // process initial unaligned coeffs
428
+ // FIXME this loop get vectorized by the compiler !
429
+ for (Index j=0; j<alignedStart; ++j)
430
+ {
431
+ RhsScalar b = rhs[j];
432
+ tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
433
+ tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
434
+ }
435
+
436
+ if (alignedSize>alignedStart)
437
+ {
438
+ switch(alignmentPattern)
439
+ {
440
+ case AllAligned:
441
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
442
+ _EIGEN_ACCUMULATE_PACKETS(d,d,d);
443
+ break;
444
+ case EvenAligned:
445
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
446
+ _EIGEN_ACCUMULATE_PACKETS(d,du,d);
447
+ break;
448
+ case FirstAligned:
449
+ {
450
+ Index j = alignedStart;
451
+ if (peels>1)
452
+ {
453
+ /* Here we proccess 4 rows with with two peeled iterations to hide
454
+ * the overhead of unaligned loads. Moreover unaligned loads are handled
455
+ * using special shift/move operations between the two aligned packets
456
+ * overlaping the desired unaligned packet. This is *much* more efficient
457
+ * than basic unaligned loads.
458
+ */
459
+ LhsPacket A01, A02, A03, A11, A12, A13;
460
+ A01 = pload<LhsPacket>(&lhs1[alignedStart-1]);
461
+ A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
462
+ A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
463
+
464
+ for (; j<peeledSize; j+=peels*RhsPacketSize)
465
+ {
466
+ RhsPacket b = pload<RhsPacket>(&rhs[j]);
467
+ A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
468
+ A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
469
+ A13 = pload<LhsPacket>(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13);
470
+
471
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), b, ptmp0);
472
+ ptmp1 = pcj.pmadd(A01, b, ptmp1);
473
+ A01 = pload<LhsPacket>(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01);
474
+ ptmp2 = pcj.pmadd(A02, b, ptmp2);
475
+ A02 = pload<LhsPacket>(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02);
476
+ ptmp3 = pcj.pmadd(A03, b, ptmp3);
477
+ A03 = pload<LhsPacket>(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03);
478
+
479
+ b = pload<RhsPacket>(&rhs[j+RhsPacketSize]);
480
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j+LhsPacketSize]), b, ptmp0);
481
+ ptmp1 = pcj.pmadd(A11, b, ptmp1);
482
+ ptmp2 = pcj.pmadd(A12, b, ptmp2);
483
+ ptmp3 = pcj.pmadd(A13, b, ptmp3);
484
+ }
485
+ }
486
+ for (; j<alignedSize; j+=RhsPacketSize)
487
+ _EIGEN_ACCUMULATE_PACKETS(d,du,du);
488
+ break;
489
+ }
490
+ default:
491
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
492
+ _EIGEN_ACCUMULATE_PACKETS(du,du,du);
493
+ break;
494
+ }
495
+ tmp0 += predux(ptmp0);
496
+ tmp1 += predux(ptmp1);
497
+ tmp2 += predux(ptmp2);
498
+ tmp3 += predux(ptmp3);
499
+ }
500
+ } // end explicit vectorization
501
+
502
+ // process remaining coeffs (or all if no explicit vectorization)
503
+ // FIXME this loop get vectorized by the compiler !
504
+ for (Index j=alignedSize; j<depth; ++j)
505
+ {
506
+ RhsScalar b = rhs[j];
507
+ tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b);
508
+ tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b);
509
+ }
510
+ res[i*resIncr] += alpha*tmp0;
511
+ res[(i+offset1)*resIncr] += alpha*tmp1;
512
+ res[(i+2)*resIncr] += alpha*tmp2;
513
+ res[(i+offset3)*resIncr] += alpha*tmp3;
514
+ }
515
+
516
+ // process remaining first and last rows (at most columnsAtOnce-1)
517
+ Index end = rows;
518
+ Index start = rowBound;
519
+ do
520
+ {
521
+ for (Index i=start; i<end; ++i)
522
+ {
523
+ EIGEN_ALIGN16 ResScalar tmp0 = ResScalar(0);
524
+ ResPacket ptmp0 = pset1<ResPacket>(tmp0);
525
+ const LhsScalar* lhs0 = lhs + i*lhsStride;
526
+ // process first unaligned result's coeffs
527
+ // FIXME this loop get vectorized by the compiler !
528
+ for (Index j=0; j<alignedStart; ++j)
529
+ tmp0 += cj.pmul(lhs0[j], rhs[j]);
530
+
531
+ if (alignedSize>alignedStart)
532
+ {
533
+ // process aligned rhs coeffs
534
+ if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
535
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
536
+ ptmp0 = pcj.pmadd(pload<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
537
+ else
538
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
539
+ ptmp0 = pcj.pmadd(ploadu<LhsPacket>(&lhs0[j]), pload<RhsPacket>(&rhs[j]), ptmp0);
540
+ tmp0 += predux(ptmp0);
541
+ }
542
+
543
+ // process remaining scalars
544
+ // FIXME this loop get vectorized by the compiler !
545
+ for (Index j=alignedSize; j<depth; ++j)
546
+ tmp0 += cj.pmul(lhs0[j], rhs[j]);
547
+ res[i*resIncr] += alpha*tmp0;
548
+ }
549
+ if (skipRows)
550
+ {
551
+ start = 0;
552
+ end = skipRows;
553
+ skipRows = 0;
554
+ }
555
+ else
556
+ break;
557
+ } while(Vectorizable);
558
+
559
+ #undef _EIGEN_ACCUMULATE_PACKETS
560
+ }
561
+
562
+ } // end namespace internal
563
+
564
+ } // end namespace Eigen
565
+
566
+ #endif // EIGEN_GENERAL_MATRIX_VECTOR_H