ruby-eigen 0.0.9 → 0.0.10.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +22 -0
  3. data/README.md +21 -0
  4. data/ext/eigen/eigen3/COPYING.BSD +26 -0
  5. data/ext/eigen/eigen3/COPYING.MPL2 +373 -0
  6. data/ext/eigen/eigen3/COPYING.README +18 -0
  7. data/ext/eigen/eigen3/Eigen/Array +11 -0
  8. data/ext/eigen/eigen3/Eigen/Cholesky +32 -0
  9. data/ext/eigen/eigen3/Eigen/CholmodSupport +45 -0
  10. data/ext/eigen/eigen3/Eigen/Core +376 -0
  11. data/ext/eigen/eigen3/Eigen/Dense +7 -0
  12. data/ext/eigen/eigen3/Eigen/Eigen +2 -0
  13. data/ext/eigen/eigen3/Eigen/Eigen2Support +95 -0
  14. data/ext/eigen/eigen3/Eigen/Eigenvalues +48 -0
  15. data/ext/eigen/eigen3/Eigen/Geometry +63 -0
  16. data/ext/eigen/eigen3/Eigen/Householder +23 -0
  17. data/ext/eigen/eigen3/Eigen/IterativeLinearSolvers +40 -0
  18. data/ext/eigen/eigen3/Eigen/Jacobi +26 -0
  19. data/ext/eigen/eigen3/Eigen/LU +41 -0
  20. data/ext/eigen/eigen3/Eigen/LeastSquares +32 -0
  21. data/ext/eigen/eigen3/Eigen/MetisSupport +28 -0
  22. data/ext/eigen/eigen3/Eigen/PaStiXSupport +46 -0
  23. data/ext/eigen/eigen3/Eigen/PardisoSupport +30 -0
  24. data/ext/eigen/eigen3/Eigen/QR +45 -0
  25. data/ext/eigen/eigen3/Eigen/QtAlignedMalloc +34 -0
  26. data/ext/eigen/eigen3/Eigen/SPQRSupport +29 -0
  27. data/ext/eigen/eigen3/Eigen/SVD +37 -0
  28. data/ext/eigen/eigen3/Eigen/Sparse +27 -0
  29. data/ext/eigen/eigen3/Eigen/SparseCore +64 -0
  30. data/ext/eigen/eigen3/Eigen/SparseLU +49 -0
  31. data/ext/eigen/eigen3/Eigen/SparseQR +33 -0
  32. data/ext/eigen/eigen3/Eigen/StdDeque +27 -0
  33. data/ext/eigen/eigen3/Eigen/StdList +26 -0
  34. data/ext/eigen/eigen3/Eigen/StdVector +27 -0
  35. data/ext/eigen/eigen3/Eigen/SuperLUSupport +59 -0
  36. data/ext/eigen/eigen3/Eigen/UmfPackSupport +36 -0
  37. data/ext/eigen/eigen3/Eigen/src/Cholesky/LDLT.h +611 -0
  38. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT.h +498 -0
  39. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT_MKL.h +102 -0
  40. data/ext/eigen/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +607 -0
  41. data/ext/eigen/eigen3/Eigen/src/Core/Array.h +323 -0
  42. data/ext/eigen/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
  43. data/ext/eigen/eigen3/Eigen/src/Core/ArrayWrapper.h +264 -0
  44. data/ext/eigen/eigen3/Eigen/src/Core/Assign.h +590 -0
  45. data/ext/eigen/eigen3/Eigen/src/Core/Assign_MKL.h +224 -0
  46. data/ext/eigen/eigen3/Eigen/src/Core/BandMatrix.h +334 -0
  47. data/ext/eigen/eigen3/Eigen/src/Core/Block.h +406 -0
  48. data/ext/eigen/eigen3/Eigen/src/Core/BooleanRedux.h +154 -0
  49. data/ext/eigen/eigen3/Eigen/src/Core/CommaInitializer.h +154 -0
  50. data/ext/eigen/eigen3/Eigen/src/Core/CoreIterators.h +61 -0
  51. data/ext/eigen/eigen3/Eigen/src/Core/CwiseBinaryOp.h +230 -0
  52. data/ext/eigen/eigen3/Eigen/src/Core/CwiseNullaryOp.h +864 -0
  53. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryOp.h +126 -0
  54. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryView.h +139 -0
  55. data/ext/eigen/eigen3/Eigen/src/Core/DenseBase.h +521 -0
  56. data/ext/eigen/eigen3/Eigen/src/Core/DenseCoeffsBase.h +754 -0
  57. data/ext/eigen/eigen3/Eigen/src/Core/DenseStorage.h +434 -0
  58. data/ext/eigen/eigen3/Eigen/src/Core/Diagonal.h +237 -0
  59. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalMatrix.h +313 -0
  60. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalProduct.h +131 -0
  61. data/ext/eigen/eigen3/Eigen/src/Core/Dot.h +263 -0
  62. data/ext/eigen/eigen3/Eigen/src/Core/EigenBase.h +131 -0
  63. data/ext/eigen/eigen3/Eigen/src/Core/Flagged.h +140 -0
  64. data/ext/eigen/eigen3/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  65. data/ext/eigen/eigen3/Eigen/src/Core/Functors.h +1026 -0
  66. data/ext/eigen/eigen3/Eigen/src/Core/Fuzzy.h +150 -0
  67. data/ext/eigen/eigen3/Eigen/src/Core/GeneralProduct.h +635 -0
  68. data/ext/eigen/eigen3/Eigen/src/Core/GenericPacketMath.h +350 -0
  69. data/ext/eigen/eigen3/Eigen/src/Core/GlobalFunctions.h +92 -0
  70. data/ext/eigen/eigen3/Eigen/src/Core/IO.h +250 -0
  71. data/ext/eigen/eigen3/Eigen/src/Core/Map.h +192 -0
  72. data/ext/eigen/eigen3/Eigen/src/Core/MapBase.h +247 -0
  73. data/ext/eigen/eigen3/Eigen/src/Core/MathFunctions.h +768 -0
  74. data/ext/eigen/eigen3/Eigen/src/Core/Matrix.h +420 -0
  75. data/ext/eigen/eigen3/Eigen/src/Core/MatrixBase.h +563 -0
  76. data/ext/eigen/eigen3/Eigen/src/Core/NestByValue.h +111 -0
  77. data/ext/eigen/eigen3/Eigen/src/Core/NoAlias.h +134 -0
  78. data/ext/eigen/eigen3/Eigen/src/Core/NumTraits.h +150 -0
  79. data/ext/eigen/eigen3/Eigen/src/Core/PermutationMatrix.h +721 -0
  80. data/ext/eigen/eigen3/Eigen/src/Core/PlainObjectBase.h +822 -0
  81. data/ext/eigen/eigen3/Eigen/src/Core/ProductBase.h +290 -0
  82. data/ext/eigen/eigen3/Eigen/src/Core/Random.h +152 -0
  83. data/ext/eigen/eigen3/Eigen/src/Core/Redux.h +409 -0
  84. data/ext/eigen/eigen3/Eigen/src/Core/Ref.h +278 -0
  85. data/ext/eigen/eigen3/Eigen/src/Core/Replicate.h +177 -0
  86. data/ext/eigen/eigen3/Eigen/src/Core/ReturnByValue.h +99 -0
  87. data/ext/eigen/eigen3/Eigen/src/Core/Reverse.h +224 -0
  88. data/ext/eigen/eigen3/Eigen/src/Core/Select.h +162 -0
  89. data/ext/eigen/eigen3/Eigen/src/Core/SelfAdjointView.h +314 -0
  90. data/ext/eigen/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +191 -0
  91. data/ext/eigen/eigen3/Eigen/src/Core/SolveTriangular.h +260 -0
  92. data/ext/eigen/eigen3/Eigen/src/Core/StableNorm.h +203 -0
  93. data/ext/eigen/eigen3/Eigen/src/Core/Stride.h +108 -0
  94. data/ext/eigen/eigen3/Eigen/src/Core/Swap.h +126 -0
  95. data/ext/eigen/eigen3/Eigen/src/Core/Transpose.h +419 -0
  96. data/ext/eigen/eigen3/Eigen/src/Core/Transpositions.h +436 -0
  97. data/ext/eigen/eigen3/Eigen/src/Core/TriangularMatrix.h +839 -0
  98. data/ext/eigen/eigen3/Eigen/src/Core/VectorBlock.h +95 -0
  99. data/ext/eigen/eigen3/Eigen/src/Core/VectorwiseOp.h +642 -0
  100. data/ext/eigen/eigen3/Eigen/src/Core/Visitor.h +237 -0
  101. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +217 -0
  102. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +501 -0
  103. data/ext/eigen/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
  104. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/Complex.h +253 -0
  105. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +420 -0
  106. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/Complex.h +442 -0
  107. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +475 -0
  108. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +649 -0
  109. data/ext/eigen/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +476 -0
  110. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1341 -0
  111. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +427 -0
  112. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +278 -0
  113. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +146 -0
  114. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +118 -0
  115. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +566 -0
  116. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +131 -0
  117. data/ext/eigen/eigen3/Eigen/src/Core/products/Parallelizer.h +162 -0
  118. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +436 -0
  119. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +295 -0
  120. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +281 -0
  121. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +114 -0
  122. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +123 -0
  123. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  124. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +427 -0
  125. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +309 -0
  126. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +348 -0
  127. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +247 -0
  128. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +332 -0
  129. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +155 -0
  130. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +139 -0
  131. data/ext/eigen/eigen3/Eigen/src/Core/util/BlasUtil.h +264 -0
  132. data/ext/eigen/eigen3/Eigen/src/Core/util/Constants.h +451 -0
  133. data/ext/eigen/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +40 -0
  134. data/ext/eigen/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  135. data/ext/eigen/eigen3/Eigen/src/Core/util/MKL_support.h +158 -0
  136. data/ext/eigen/eigen3/Eigen/src/Core/util/Macros.h +451 -0
  137. data/ext/eigen/eigen3/Eigen/src/Core/util/Memory.h +977 -0
  138. data/ext/eigen/eigen3/Eigen/src/Core/util/Meta.h +243 -0
  139. data/ext/eigen/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
  140. data/ext/eigen/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +14 -0
  141. data/ext/eigen/eigen3/Eigen/src/Core/util/StaticAssert.h +208 -0
  142. data/ext/eigen/eigen3/Eigen/src/Core/util/XprHelper.h +469 -0
  143. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Block.h +126 -0
  144. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Cwise.h +192 -0
  145. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/CwiseOperators.h +298 -0
  146. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +159 -0
  147. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/All.h +115 -0
  148. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +214 -0
  149. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +254 -0
  150. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +141 -0
  151. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Quaternion.h +495 -0
  152. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +145 -0
  153. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/RotationBase.h +123 -0
  154. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Scaling.h +167 -0
  155. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Transform.h +786 -0
  156. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Translation.h +184 -0
  157. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LU.h +120 -0
  158. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Lazy.h +71 -0
  159. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LeastSquares.h +169 -0
  160. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Macros.h +20 -0
  161. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/MathFunctions.h +57 -0
  162. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Memory.h +45 -0
  163. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Meta.h +75 -0
  164. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Minor.h +117 -0
  165. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/QR.h +67 -0
  166. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/SVD.h +637 -0
  167. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/TriangularSolver.h +42 -0
  168. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/VectorBlock.h +94 -0
  169. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +341 -0
  170. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +456 -0
  171. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +94 -0
  172. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +607 -0
  173. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +350 -0
  174. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +227 -0
  175. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +373 -0
  176. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +160 -0
  177. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealQZ.h +624 -0
  178. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur.h +525 -0
  179. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur_MKL.h +83 -0
  180. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +801 -0
  181. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +92 -0
  182. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +557 -0
  183. data/ext/eigen/eigen3/Eigen/src/Geometry/AlignedBox.h +392 -0
  184. data/ext/eigen/eigen3/Eigen/src/Geometry/AngleAxis.h +233 -0
  185. data/ext/eigen/eigen3/Eigen/src/Geometry/EulerAngles.h +104 -0
  186. data/ext/eigen/eigen3/Eigen/src/Geometry/Homogeneous.h +307 -0
  187. data/ext/eigen/eigen3/Eigen/src/Geometry/Hyperplane.h +280 -0
  188. data/ext/eigen/eigen3/Eigen/src/Geometry/OrthoMethods.h +218 -0
  189. data/ext/eigen/eigen3/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  190. data/ext/eigen/eigen3/Eigen/src/Geometry/Quaternion.h +776 -0
  191. data/ext/eigen/eigen3/Eigen/src/Geometry/Rotation2D.h +160 -0
  192. data/ext/eigen/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
  193. data/ext/eigen/eigen3/Eigen/src/Geometry/Scaling.h +166 -0
  194. data/ext/eigen/eigen3/Eigen/src/Geometry/Transform.h +1455 -0
  195. data/ext/eigen/eigen3/Eigen/src/Geometry/Translation.h +206 -0
  196. data/ext/eigen/eigen3/Eigen/src/Geometry/Umeyama.h +177 -0
  197. data/ext/eigen/eigen3/Eigen/src/Geometry/arch/Geometry_SSE.h +115 -0
  198. data/ext/eigen/eigen3/Eigen/src/Householder/BlockHouseholder.h +68 -0
  199. data/ext/eigen/eigen3/Eigen/src/Householder/Householder.h +171 -0
  200. data/ext/eigen/eigen3/Eigen/src/Householder/HouseholderSequence.h +441 -0
  201. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -0
  202. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +263 -0
  203. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +256 -0
  204. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +282 -0
  205. data/ext/eigen/eigen3/Eigen/src/Jacobi/Jacobi.h +433 -0
  206. data/ext/eigen/eigen3/Eigen/src/LU/Determinant.h +101 -0
  207. data/ext/eigen/eigen3/Eigen/src/LU/FullPivLU.h +751 -0
  208. data/ext/eigen/eigen3/Eigen/src/LU/Inverse.h +400 -0
  209. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU.h +509 -0
  210. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU_MKL.h +85 -0
  211. data/ext/eigen/eigen3/Eigen/src/LU/arch/Inverse_SSE.h +329 -0
  212. data/ext/eigen/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  213. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Amd.h +444 -0
  214. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1850 -0
  215. data/ext/eigen/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +721 -0
  216. data/ext/eigen/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +592 -0
  217. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +580 -0
  218. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR_MKL.h +99 -0
  219. data/ext/eigen/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +622 -0
  220. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR.h +388 -0
  221. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR_MKL.h +71 -0
  222. data/ext/eigen/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +338 -0
  223. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD.h +976 -0
  224. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD_MKL.h +92 -0
  225. data/ext/eigen/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +148 -0
  226. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +671 -0
  227. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  228. data/ext/eigen/eigen3/Eigen/src/SparseCore/AmbiVector.h +373 -0
  229. data/ext/eigen/eigen3/Eigen/src/SparseCore/CompressedStorage.h +233 -0
  230. data/ext/eigen/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +245 -0
  231. data/ext/eigen/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +181 -0
  232. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseBlock.h +537 -0
  233. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  234. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +325 -0
  235. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +163 -0
  236. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +311 -0
  237. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +196 -0
  238. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDot.h +101 -0
  239. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +26 -0
  240. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1262 -0
  241. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +461 -0
  242. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparsePermutation.h +148 -0
  243. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseProduct.h +188 -0
  244. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseRedux.h +45 -0
  245. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +507 -0
  246. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +150 -0
  247. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTranspose.h +63 -0
  248. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +179 -0
  249. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseUtil.h +172 -0
  250. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseVector.h +448 -0
  251. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseView.h +99 -0
  252. data/ext/eigen/eigen3/Eigen/src/SparseCore/TriangularSolver.h +334 -0
  253. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU.h +806 -0
  254. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  255. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +227 -0
  256. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +111 -0
  257. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +298 -0
  258. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  259. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +180 -0
  260. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +177 -0
  261. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +106 -0
  262. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +279 -0
  263. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +127 -0
  264. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  265. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  266. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  267. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  268. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +135 -0
  269. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  270. data/ext/eigen/eigen3/Eigen/src/SparseQR/SparseQR.h +714 -0
  271. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdDeque.h +134 -0
  272. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdList.h +114 -0
  273. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdVector.h +126 -0
  274. data/ext/eigen/eigen3/Eigen/src/StlSupport/details.h +84 -0
  275. data/ext/eigen/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1026 -0
  276. data/ext/eigen/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +474 -0
  277. data/ext/eigen/eigen3/Eigen/src/misc/Image.h +84 -0
  278. data/ext/eigen/eigen3/Eigen/src/misc/Kernel.h +81 -0
  279. data/ext/eigen/eigen3/Eigen/src/misc/Solve.h +76 -0
  280. data/ext/eigen/eigen3/Eigen/src/misc/SparseSolve.h +128 -0
  281. data/ext/eigen/eigen3/Eigen/src/misc/blas.h +658 -0
  282. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +253 -0
  283. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +187 -0
  284. data/ext/eigen/eigen3/Eigen/src/plugins/BlockMethods.h +935 -0
  285. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +46 -0
  286. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +172 -0
  287. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +143 -0
  288. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +52 -0
  289. data/ext/eigen/eigen3/signature_of_eigen3_matrix_library +1 -0
  290. data/ext/eigen/eigen_wrap.cxx +19420 -10396
  291. data/ext/eigen/extconf.rb +37 -2
  292. data/lib/eigen.rb +146 -3
  293. metadata +294 -7
@@ -0,0 +1,1341 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_GENERAL_BLOCK_PANEL_H
11
+ #define EIGEN_GENERAL_BLOCK_PANEL_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
18
+ class gebp_traits;
19
+
20
+
21
+ /** \internal \returns b if a<=0, and returns a otherwise. */
22
+ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
23
+ {
24
+ return a<=0 ? b : a;
25
+ }
26
+
27
+ /** \internal */
28
+ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
29
+ {
30
+ static std::ptrdiff_t m_l1CacheSize = 0;
31
+ static std::ptrdiff_t m_l2CacheSize = 0;
32
+ if(m_l2CacheSize==0)
33
+ {
34
+ m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
35
+ m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
36
+ }
37
+
38
+ if(action==SetAction)
39
+ {
40
+ // set the cpu cache size and cache all block sizes from a global cache size in byte
41
+ eigen_internal_assert(l1!=0 && l2!=0);
42
+ m_l1CacheSize = *l1;
43
+ m_l2CacheSize = *l2;
44
+ }
45
+ else if(action==GetAction)
46
+ {
47
+ eigen_internal_assert(l1!=0 && l2!=0);
48
+ *l1 = m_l1CacheSize;
49
+ *l2 = m_l2CacheSize;
50
+ }
51
+ else
52
+ {
53
+ eigen_internal_assert(false);
54
+ }
55
+ }
56
+
57
+ /** \brief Computes the blocking parameters for a m x k times k x n matrix product
58
+ *
59
+ * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
60
+ * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
61
+ * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
62
+ *
63
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
64
+ * this function computes the blocking size parameters along the respective dimensions
65
+ * for matrix products and related algorithms. The blocking sizes depends on various
66
+ * parameters:
67
+ * - the L1 and L2 cache sizes,
68
+ * - the register level blocking sizes defined by gebp_traits,
69
+ * - the number of scalars that fit into a packet (when vectorization is enabled).
70
+ *
71
+ * \sa setCpuCacheSizes */
72
+ template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
73
+ void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
74
+ {
75
+ EIGEN_UNUSED_VARIABLE(n);
76
+ // Explanations:
77
+ // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
78
+ // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
79
+ // per kc x nr vertical small panels where nr is the blocking size along the n dimension
80
+ // at the register level. For vectorization purpose, these small vertical panels are unpacked,
81
+ // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
82
+ // stay in L1 cache.
83
+ std::ptrdiff_t l1, l2;
84
+
85
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
86
+ enum {
87
+ kdiv = KcFactor * 2 * Traits::nr
88
+ * Traits::RhsProgress * sizeof(RhsScalar),
89
+ mr = gebp_traits<LhsScalar,RhsScalar>::mr,
90
+ mr_mask = (0xffffffff/mr)*mr
91
+ };
92
+
93
+ manage_caching_sizes(GetAction, &l1, &l2);
94
+ k = std::min<SizeType>(k, l1/kdiv);
95
+ SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
96
+ if(_m<m) m = _m & mr_mask;
97
+ }
98
+
99
+ template<typename LhsScalar, typename RhsScalar, typename SizeType>
100
+ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
101
+ {
102
+ computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
103
+ }
104
+
105
+ #ifdef EIGEN_HAS_FUSE_CJMADD
106
+ #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
107
+ #else
108
+
109
+ // FIXME (a bit overkill maybe ?)
110
+
111
+ template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
112
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
113
+ {
114
+ c = cj.pmadd(a,b,c);
115
+ }
116
+ };
117
+
118
+ template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
119
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
120
+ {
121
+ t = b; t = cj.pmul(a,t); c = padd(c,t);
122
+ }
123
+ };
124
+
125
+ template<typename CJ, typename A, typename B, typename C, typename T>
126
+ EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
127
+ {
128
+ gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
129
+ }
130
+
131
+ #define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
132
+ // #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
133
+ #endif
134
+
135
+ /* Vectorization logic
136
+ * real*real: unpack rhs to constant packets, ...
137
+ *
138
+ * cd*cd : unpack rhs to (b_r,b_r), (b_i,b_i), mul to get (a_r b_r,a_i b_r) (a_r b_i,a_i b_i),
139
+ * storing each res packet into two packets (2x2),
140
+ * at the end combine them: swap the second and addsub them
141
+ * cf*cf : same but with 2x4 blocks
142
+ * cplx*real : unpack rhs to constant packets, ...
143
+ * real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual
144
+ */
145
+ template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
146
+ class gebp_traits
147
+ {
148
+ public:
149
+ typedef _LhsScalar LhsScalar;
150
+ typedef _RhsScalar RhsScalar;
151
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
152
+
153
+ enum {
154
+ ConjLhs = _ConjLhs,
155
+ ConjRhs = _ConjRhs,
156
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
157
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
158
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
159
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
160
+
161
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
162
+
163
+ // register block size along the N direction (must be either 2 or 4)
164
+ nr = NumberOfRegisters/4,
165
+
166
+ // register block size along the M direction (currently, this one cannot be modified)
167
+ mr = 2 * LhsPacketSize,
168
+
169
+ WorkSpaceFactor = nr * RhsPacketSize,
170
+
171
+ LhsProgress = LhsPacketSize,
172
+ RhsProgress = RhsPacketSize
173
+ };
174
+
175
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
176
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
177
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
178
+
179
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
180
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
181
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
182
+
183
+ typedef ResPacket AccPacket;
184
+
185
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
186
+ {
187
+ p = pset1<ResPacket>(ResScalar(0));
188
+ }
189
+
190
+ EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
191
+ {
192
+ for(DenseIndex k=0; k<n; k++)
193
+ pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
194
+ }
195
+
196
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
197
+ {
198
+ dest = pload<RhsPacket>(b);
199
+ }
200
+
201
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
202
+ {
203
+ dest = pload<LhsPacket>(a);
204
+ }
205
+
206
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, AccPacket& tmp) const
207
+ {
208
+ tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
209
+ }
210
+
211
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
212
+ {
213
+ r = pmadd(c,alpha,r);
214
+ }
215
+
216
+ protected:
217
+ // conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
218
+ // conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
219
+ };
220
+
221
+ template<typename RealScalar, bool _ConjLhs>
222
+ class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
223
+ {
224
+ public:
225
+ typedef std::complex<RealScalar> LhsScalar;
226
+ typedef RealScalar RhsScalar;
227
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
228
+
229
+ enum {
230
+ ConjLhs = _ConjLhs,
231
+ ConjRhs = false,
232
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
233
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
234
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
235
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
236
+
237
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
238
+ nr = NumberOfRegisters/4,
239
+ mr = 2 * LhsPacketSize,
240
+ WorkSpaceFactor = nr*RhsPacketSize,
241
+
242
+ LhsProgress = LhsPacketSize,
243
+ RhsProgress = RhsPacketSize
244
+ };
245
+
246
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
247
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
248
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
249
+
250
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
251
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
252
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
253
+
254
+ typedef ResPacket AccPacket;
255
+
256
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
257
+ {
258
+ p = pset1<ResPacket>(ResScalar(0));
259
+ }
260
+
261
+ EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
262
+ {
263
+ for(DenseIndex k=0; k<n; k++)
264
+ pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
265
+ }
266
+
267
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
268
+ {
269
+ dest = pload<RhsPacket>(b);
270
+ }
271
+
272
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
273
+ {
274
+ dest = pload<LhsPacket>(a);
275
+ }
276
+
277
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
278
+ {
279
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
280
+ }
281
+
282
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
283
+ {
284
+ tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
285
+ }
286
+
287
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
288
+ {
289
+ c += a * b;
290
+ }
291
+
292
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
293
+ {
294
+ r = cj.pmadd(c,alpha,r);
295
+ }
296
+
297
+ protected:
298
+ conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
299
+ };
300
+
301
+ template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
302
+ class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
303
+ {
304
+ public:
305
+ typedef std::complex<RealScalar> Scalar;
306
+ typedef std::complex<RealScalar> LhsScalar;
307
+ typedef std::complex<RealScalar> RhsScalar;
308
+ typedef std::complex<RealScalar> ResScalar;
309
+
310
+ enum {
311
+ ConjLhs = _ConjLhs,
312
+ ConjRhs = _ConjRhs,
313
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
314
+ && packet_traits<Scalar>::Vectorizable,
315
+ RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
316
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
317
+
318
+ nr = 2,
319
+ mr = 2 * ResPacketSize,
320
+ WorkSpaceFactor = Vectorizable ? 2*nr*RealPacketSize : nr,
321
+
322
+ LhsProgress = ResPacketSize,
323
+ RhsProgress = Vectorizable ? 2*ResPacketSize : 1
324
+ };
325
+
326
+ typedef typename packet_traits<RealScalar>::type RealPacket;
327
+ typedef typename packet_traits<Scalar>::type ScalarPacket;
328
+ struct DoublePacket
329
+ {
330
+ RealPacket first;
331
+ RealPacket second;
332
+ };
333
+
334
+ typedef typename conditional<Vectorizable,RealPacket, Scalar>::type LhsPacket;
335
+ typedef typename conditional<Vectorizable,DoublePacket,Scalar>::type RhsPacket;
336
+ typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
337
+ typedef typename conditional<Vectorizable,DoublePacket,Scalar>::type AccPacket;
338
+
339
+ EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
340
+
341
+ EIGEN_STRONG_INLINE void initAcc(DoublePacket& p)
342
+ {
343
+ p.first = pset1<RealPacket>(RealScalar(0));
344
+ p.second = pset1<RealPacket>(RealScalar(0));
345
+ }
346
+
347
+ /* Unpack the rhs coeff such that each complex coefficient is spread into
348
+ * two packects containing respectively the real and imaginary coefficient
349
+ * duplicated as many time as needed: (x+iy) => [x, ..., x] [y, ..., y]
350
+ */
351
+ EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const Scalar* rhs, Scalar* b)
352
+ {
353
+ for(DenseIndex k=0; k<n; k++)
354
+ {
355
+ if(Vectorizable)
356
+ {
357
+ pstore1<RealPacket>((RealScalar*)&b[k*ResPacketSize*2+0], real(rhs[k]));
358
+ pstore1<RealPacket>((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], imag(rhs[k]));
359
+ }
360
+ else
361
+ b[k] = rhs[k];
362
+ }
363
+ }
364
+
365
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const { dest = *b; }
366
+
367
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket& dest) const
368
+ {
369
+ dest.first = pload<RealPacket>((const RealScalar*)b);
370
+ dest.second = pload<RealPacket>((const RealScalar*)(b+ResPacketSize));
371
+ }
372
+
373
+ // nothing special here
374
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
375
+ {
376
+ dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
377
+ }
378
+
379
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacket& c, RhsPacket& /*tmp*/) const
380
+ {
381
+ c.first = padd(pmul(a,b.first), c.first);
382
+ c.second = padd(pmul(a,b.second),c.second);
383
+ }
384
+
385
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const
386
+ {
387
+ c = cj.pmadd(a,b,c);
388
+ }
389
+
390
+ EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }
391
+
392
+ EIGEN_STRONG_INLINE void acc(const DoublePacket& c, const ResPacket& alpha, ResPacket& r) const
393
+ {
394
+ // assemble c
395
+ ResPacket tmp;
396
+ if((!ConjLhs)&&(!ConjRhs))
397
+ {
398
+ tmp = pcplxflip(pconj(ResPacket(c.second)));
399
+ tmp = padd(ResPacket(c.first),tmp);
400
+ }
401
+ else if((!ConjLhs)&&(ConjRhs))
402
+ {
403
+ tmp = pconj(pcplxflip(ResPacket(c.second)));
404
+ tmp = padd(ResPacket(c.first),tmp);
405
+ }
406
+ else if((ConjLhs)&&(!ConjRhs))
407
+ {
408
+ tmp = pcplxflip(ResPacket(c.second));
409
+ tmp = padd(pconj(ResPacket(c.first)),tmp);
410
+ }
411
+ else if((ConjLhs)&&(ConjRhs))
412
+ {
413
+ tmp = pcplxflip(ResPacket(c.second));
414
+ tmp = psub(pconj(ResPacket(c.first)),tmp);
415
+ }
416
+
417
+ r = pmadd(tmp,alpha,r);
418
+ }
419
+
420
+ protected:
421
+ conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
422
+ };
423
+
424
+ template<typename RealScalar, bool _ConjRhs>
425
+ class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
426
+ {
427
+ public:
428
+ typedef std::complex<RealScalar> Scalar;
429
+ typedef RealScalar LhsScalar;
430
+ typedef Scalar RhsScalar;
431
+ typedef Scalar ResScalar;
432
+
433
+ enum {
434
+ ConjLhs = false,
435
+ ConjRhs = _ConjRhs,
436
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
437
+ && packet_traits<Scalar>::Vectorizable,
438
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
439
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
440
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
441
+
442
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
443
+ nr = 4,
444
+ mr = 2*ResPacketSize,
445
+ WorkSpaceFactor = nr*RhsPacketSize,
446
+
447
+ LhsProgress = ResPacketSize,
448
+ RhsProgress = ResPacketSize
449
+ };
450
+
451
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
452
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
453
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
454
+
455
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
456
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
457
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
458
+
459
+ typedef ResPacket AccPacket;
460
+
461
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
462
+ {
463
+ p = pset1<ResPacket>(ResScalar(0));
464
+ }
465
+
466
+ EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b)
467
+ {
468
+ for(DenseIndex k=0; k<n; k++)
469
+ pstore1<RhsPacket>(&b[k*RhsPacketSize], rhs[k]);
470
+ }
471
+
472
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
473
+ {
474
+ dest = pload<RhsPacket>(b);
475
+ }
476
+
477
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
478
+ {
479
+ dest = ploaddup<LhsPacket>(a);
480
+ }
481
+
482
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
483
+ {
484
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
485
+ }
486
+
487
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
488
+ {
489
+ tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
490
+ }
491
+
492
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
493
+ {
494
+ c += a * b;
495
+ }
496
+
497
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
498
+ {
499
+ r = cj.pmadd(alpha,c,r);
500
+ }
501
+
502
+ protected:
503
+ conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
504
+ };
505
+
506
+ /* optimized GEneral packed Block * packed Panel product kernel
507
+ *
508
+ * Mixing type logic: C += A * B
509
+ * | A | B | comments
510
+ * |real |cplx | no vectorization yet, would require to pack A with duplication
511
+ * |cplx |real | easy vectorization
512
+ */
513
+ template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
514
+ struct gebp_kernel
515
+ {
516
+ typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
517
+ typedef typename Traits::ResScalar ResScalar;
518
+ typedef typename Traits::LhsPacket LhsPacket;
519
+ typedef typename Traits::RhsPacket RhsPacket;
520
+ typedef typename Traits::ResPacket ResPacket;
521
+ typedef typename Traits::AccPacket AccPacket;
522
+
523
+ enum {
524
+ Vectorizable = Traits::Vectorizable,
525
+ LhsProgress = Traits::LhsProgress,
526
+ RhsProgress = Traits::RhsProgress,
527
+ ResPacketSize = Traits::ResPacketSize
528
+ };
529
+
530
+ EIGEN_DONT_INLINE
531
+ void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
532
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
533
+ };
534
+
535
+ template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
536
+ EIGEN_DONT_INLINE
537
+ void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
538
+ ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
539
+ Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
540
+ {
541
+ Traits traits;
542
+
543
+ if(strideA==-1) strideA = depth;
544
+ if(strideB==-1) strideB = depth;
545
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
546
+ // conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
547
+ Index packet_cols = (cols/nr) * nr;
548
+ const Index peeled_mc = (rows/mr)*mr;
549
+ // FIXME:
550
+ const Index peeled_mc2 = peeled_mc + (rows-peeled_mc >= LhsProgress ? LhsProgress : 0);
551
+ const Index peeled_kc = (depth/4)*4;
552
+
553
+ if(unpackedB==0)
554
+ unpackedB = const_cast<RhsScalar*>(blockB - strideB * nr * RhsProgress);
555
+
556
+ // loops on each micro vertical panel of rhs (depth x nr)
557
+ for(Index j2=0; j2<packet_cols; j2+=nr)
558
+ {
559
+ traits.unpackRhs(depth*nr,&blockB[j2*strideB+offsetB*nr],unpackedB);
560
+
561
+ // loops on each largest micro horizontal panel of lhs (mr x depth)
562
+ // => we select a mr x nr micro block of res which is entirely
563
+ // stored into mr/packet_size x nr registers.
564
+ for(Index i=0; i<peeled_mc; i+=mr)
565
+ {
566
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*mr];
567
+ prefetch(&blA[0]);
568
+
569
+ // gets res block as register
570
+ AccPacket C0, C1, C2, C3, C4, C5, C6, C7;
571
+ traits.initAcc(C0);
572
+ traits.initAcc(C1);
573
+ if(nr==4) traits.initAcc(C2);
574
+ if(nr==4) traits.initAcc(C3);
575
+ traits.initAcc(C4);
576
+ traits.initAcc(C5);
577
+ if(nr==4) traits.initAcc(C6);
578
+ if(nr==4) traits.initAcc(C7);
579
+
580
+ ResScalar* r0 = &res[(j2+0)*resStride + i];
581
+ ResScalar* r1 = r0 + resStride;
582
+ ResScalar* r2 = r1 + resStride;
583
+ ResScalar* r3 = r2 + resStride;
584
+
585
+ prefetch(r0+16);
586
+ prefetch(r1+16);
587
+ prefetch(r2+16);
588
+ prefetch(r3+16);
589
+
590
+ // performs "inner" product
591
+ // TODO let's check wether the folowing peeled loop could not be
592
+ // optimized via optimal prefetching from one loop to the other
593
+ const RhsScalar* blB = unpackedB;
594
+ for(Index k=0; k<peeled_kc; k+=4)
595
+ {
596
+ if(nr==2)
597
+ {
598
+ LhsPacket A0, A1;
599
+ RhsPacket B_0;
600
+ RhsPacket T0;
601
+
602
+ EIGEN_ASM_COMMENT("mybegin2");
603
+ traits.loadLhs(&blA[0*LhsProgress], A0);
604
+ traits.loadLhs(&blA[1*LhsProgress], A1);
605
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
606
+ traits.madd(A0,B_0,C0,T0);
607
+ traits.madd(A1,B_0,C4,B_0);
608
+ traits.loadRhs(&blB[1*RhsProgress], B_0);
609
+ traits.madd(A0,B_0,C1,T0);
610
+ traits.madd(A1,B_0,C5,B_0);
611
+
612
+ traits.loadLhs(&blA[2*LhsProgress], A0);
613
+ traits.loadLhs(&blA[3*LhsProgress], A1);
614
+ traits.loadRhs(&blB[2*RhsProgress], B_0);
615
+ traits.madd(A0,B_0,C0,T0);
616
+ traits.madd(A1,B_0,C4,B_0);
617
+ traits.loadRhs(&blB[3*RhsProgress], B_0);
618
+ traits.madd(A0,B_0,C1,T0);
619
+ traits.madd(A1,B_0,C5,B_0);
620
+
621
+ traits.loadLhs(&blA[4*LhsProgress], A0);
622
+ traits.loadLhs(&blA[5*LhsProgress], A1);
623
+ traits.loadRhs(&blB[4*RhsProgress], B_0);
624
+ traits.madd(A0,B_0,C0,T0);
625
+ traits.madd(A1,B_0,C4,B_0);
626
+ traits.loadRhs(&blB[5*RhsProgress], B_0);
627
+ traits.madd(A0,B_0,C1,T0);
628
+ traits.madd(A1,B_0,C5,B_0);
629
+
630
+ traits.loadLhs(&blA[6*LhsProgress], A0);
631
+ traits.loadLhs(&blA[7*LhsProgress], A1);
632
+ traits.loadRhs(&blB[6*RhsProgress], B_0);
633
+ traits.madd(A0,B_0,C0,T0);
634
+ traits.madd(A1,B_0,C4,B_0);
635
+ traits.loadRhs(&blB[7*RhsProgress], B_0);
636
+ traits.madd(A0,B_0,C1,T0);
637
+ traits.madd(A1,B_0,C5,B_0);
638
+ EIGEN_ASM_COMMENT("myend");
639
+ }
640
+ else
641
+ {
642
+ EIGEN_ASM_COMMENT("mybegin4");
643
+ LhsPacket A0, A1;
644
+ RhsPacket B_0, B1, B2, B3;
645
+ RhsPacket T0;
646
+
647
+ traits.loadLhs(&blA[0*LhsProgress], A0);
648
+ traits.loadLhs(&blA[1*LhsProgress], A1);
649
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
650
+ traits.loadRhs(&blB[1*RhsProgress], B1);
651
+
652
+ traits.madd(A0,B_0,C0,T0);
653
+ traits.loadRhs(&blB[2*RhsProgress], B2);
654
+ traits.madd(A1,B_0,C4,B_0);
655
+ traits.loadRhs(&blB[3*RhsProgress], B3);
656
+ traits.loadRhs(&blB[4*RhsProgress], B_0);
657
+ traits.madd(A0,B1,C1,T0);
658
+ traits.madd(A1,B1,C5,B1);
659
+ traits.loadRhs(&blB[5*RhsProgress], B1);
660
+ traits.madd(A0,B2,C2,T0);
661
+ traits.madd(A1,B2,C6,B2);
662
+ traits.loadRhs(&blB[6*RhsProgress], B2);
663
+ traits.madd(A0,B3,C3,T0);
664
+ traits.loadLhs(&blA[2*LhsProgress], A0);
665
+ traits.madd(A1,B3,C7,B3);
666
+ traits.loadLhs(&blA[3*LhsProgress], A1);
667
+ traits.loadRhs(&blB[7*RhsProgress], B3);
668
+ traits.madd(A0,B_0,C0,T0);
669
+ traits.madd(A1,B_0,C4,B_0);
670
+ traits.loadRhs(&blB[8*RhsProgress], B_0);
671
+ traits.madd(A0,B1,C1,T0);
672
+ traits.madd(A1,B1,C5,B1);
673
+ traits.loadRhs(&blB[9*RhsProgress], B1);
674
+ traits.madd(A0,B2,C2,T0);
675
+ traits.madd(A1,B2,C6,B2);
676
+ traits.loadRhs(&blB[10*RhsProgress], B2);
677
+ traits.madd(A0,B3,C3,T0);
678
+ traits.loadLhs(&blA[4*LhsProgress], A0);
679
+ traits.madd(A1,B3,C7,B3);
680
+ traits.loadLhs(&blA[5*LhsProgress], A1);
681
+ traits.loadRhs(&blB[11*RhsProgress], B3);
682
+
683
+ traits.madd(A0,B_0,C0,T0);
684
+ traits.madd(A1,B_0,C4,B_0);
685
+ traits.loadRhs(&blB[12*RhsProgress], B_0);
686
+ traits.madd(A0,B1,C1,T0);
687
+ traits.madd(A1,B1,C5,B1);
688
+ traits.loadRhs(&blB[13*RhsProgress], B1);
689
+ traits.madd(A0,B2,C2,T0);
690
+ traits.madd(A1,B2,C6,B2);
691
+ traits.loadRhs(&blB[14*RhsProgress], B2);
692
+ traits.madd(A0,B3,C3,T0);
693
+ traits.loadLhs(&blA[6*LhsProgress], A0);
694
+ traits.madd(A1,B3,C7,B3);
695
+ traits.loadLhs(&blA[7*LhsProgress], A1);
696
+ traits.loadRhs(&blB[15*RhsProgress], B3);
697
+ traits.madd(A0,B_0,C0,T0);
698
+ traits.madd(A1,B_0,C4,B_0);
699
+ traits.madd(A0,B1,C1,T0);
700
+ traits.madd(A1,B1,C5,B1);
701
+ traits.madd(A0,B2,C2,T0);
702
+ traits.madd(A1,B2,C6,B2);
703
+ traits.madd(A0,B3,C3,T0);
704
+ traits.madd(A1,B3,C7,B3);
705
+ }
706
+
707
+ blB += 4*nr*RhsProgress;
708
+ blA += 4*mr;
709
+ }
710
+ // process remaining peeled loop
711
+ for(Index k=peeled_kc; k<depth; k++)
712
+ {
713
+ if(nr==2)
714
+ {
715
+ LhsPacket A0, A1;
716
+ RhsPacket B_0;
717
+ RhsPacket T0;
718
+
719
+ traits.loadLhs(&blA[0*LhsProgress], A0);
720
+ traits.loadLhs(&blA[1*LhsProgress], A1);
721
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
722
+ traits.madd(A0,B_0,C0,T0);
723
+ traits.madd(A1,B_0,C4,B_0);
724
+ traits.loadRhs(&blB[1*RhsProgress], B_0);
725
+ traits.madd(A0,B_0,C1,T0);
726
+ traits.madd(A1,B_0,C5,B_0);
727
+ }
728
+ else
729
+ {
730
+ LhsPacket A0, A1;
731
+ RhsPacket B_0, B1, B2, B3;
732
+ RhsPacket T0;
733
+
734
+ traits.loadLhs(&blA[0*LhsProgress], A0);
735
+ traits.loadLhs(&blA[1*LhsProgress], A1);
736
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
737
+ traits.loadRhs(&blB[1*RhsProgress], B1);
738
+
739
+ traits.madd(A0,B_0,C0,T0);
740
+ traits.loadRhs(&blB[2*RhsProgress], B2);
741
+ traits.madd(A1,B_0,C4,B_0);
742
+ traits.loadRhs(&blB[3*RhsProgress], B3);
743
+ traits.madd(A0,B1,C1,T0);
744
+ traits.madd(A1,B1,C5,B1);
745
+ traits.madd(A0,B2,C2,T0);
746
+ traits.madd(A1,B2,C6,B2);
747
+ traits.madd(A0,B3,C3,T0);
748
+ traits.madd(A1,B3,C7,B3);
749
+ }
750
+
751
+ blB += nr*RhsProgress;
752
+ blA += mr;
753
+ }
754
+
755
+ if(nr==4)
756
+ {
757
+ ResPacket R0, R1, R2, R3, R4, R5, R6;
758
+ ResPacket alphav = pset1<ResPacket>(alpha);
759
+
760
+ R0 = ploadu<ResPacket>(r0);
761
+ R1 = ploadu<ResPacket>(r1);
762
+ R2 = ploadu<ResPacket>(r2);
763
+ R3 = ploadu<ResPacket>(r3);
764
+ R4 = ploadu<ResPacket>(r0 + ResPacketSize);
765
+ R5 = ploadu<ResPacket>(r1 + ResPacketSize);
766
+ R6 = ploadu<ResPacket>(r2 + ResPacketSize);
767
+ traits.acc(C0, alphav, R0);
768
+ pstoreu(r0, R0);
769
+ R0 = ploadu<ResPacket>(r3 + ResPacketSize);
770
+
771
+ traits.acc(C1, alphav, R1);
772
+ traits.acc(C2, alphav, R2);
773
+ traits.acc(C3, alphav, R3);
774
+ traits.acc(C4, alphav, R4);
775
+ traits.acc(C5, alphav, R5);
776
+ traits.acc(C6, alphav, R6);
777
+ traits.acc(C7, alphav, R0);
778
+
779
+ pstoreu(r1, R1);
780
+ pstoreu(r2, R2);
781
+ pstoreu(r3, R3);
782
+ pstoreu(r0 + ResPacketSize, R4);
783
+ pstoreu(r1 + ResPacketSize, R5);
784
+ pstoreu(r2 + ResPacketSize, R6);
785
+ pstoreu(r3 + ResPacketSize, R0);
786
+ }
787
+ else
788
+ {
789
+ ResPacket R0, R1, R4;
790
+ ResPacket alphav = pset1<ResPacket>(alpha);
791
+
792
+ R0 = ploadu<ResPacket>(r0);
793
+ R1 = ploadu<ResPacket>(r1);
794
+ R4 = ploadu<ResPacket>(r0 + ResPacketSize);
795
+ traits.acc(C0, alphav, R0);
796
+ pstoreu(r0, R0);
797
+ R0 = ploadu<ResPacket>(r1 + ResPacketSize);
798
+ traits.acc(C1, alphav, R1);
799
+ traits.acc(C4, alphav, R4);
800
+ traits.acc(C5, alphav, R0);
801
+ pstoreu(r1, R1);
802
+ pstoreu(r0 + ResPacketSize, R4);
803
+ pstoreu(r1 + ResPacketSize, R0);
804
+ }
805
+
806
+ }
807
+
808
+ if(rows-peeled_mc>=LhsProgress)
809
+ {
810
+ Index i = peeled_mc;
811
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress];
812
+ prefetch(&blA[0]);
813
+
814
+ // gets res block as register
815
+ AccPacket C0, C1, C2, C3;
816
+ traits.initAcc(C0);
817
+ traits.initAcc(C1);
818
+ if(nr==4) traits.initAcc(C2);
819
+ if(nr==4) traits.initAcc(C3);
820
+
821
+ // performs "inner" product
822
+ const RhsScalar* blB = unpackedB;
823
+ for(Index k=0; k<peeled_kc; k+=4)
824
+ {
825
+ if(nr==2)
826
+ {
827
+ LhsPacket A0;
828
+ RhsPacket B_0, B1;
829
+
830
+ traits.loadLhs(&blA[0*LhsProgress], A0);
831
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
832
+ traits.loadRhs(&blB[1*RhsProgress], B1);
833
+ traits.madd(A0,B_0,C0,B_0);
834
+ traits.loadRhs(&blB[2*RhsProgress], B_0);
835
+ traits.madd(A0,B1,C1,B1);
836
+ traits.loadLhs(&blA[1*LhsProgress], A0);
837
+ traits.loadRhs(&blB[3*RhsProgress], B1);
838
+ traits.madd(A0,B_0,C0,B_0);
839
+ traits.loadRhs(&blB[4*RhsProgress], B_0);
840
+ traits.madd(A0,B1,C1,B1);
841
+ traits.loadLhs(&blA[2*LhsProgress], A0);
842
+ traits.loadRhs(&blB[5*RhsProgress], B1);
843
+ traits.madd(A0,B_0,C0,B_0);
844
+ traits.loadRhs(&blB[6*RhsProgress], B_0);
845
+ traits.madd(A0,B1,C1,B1);
846
+ traits.loadLhs(&blA[3*LhsProgress], A0);
847
+ traits.loadRhs(&blB[7*RhsProgress], B1);
848
+ traits.madd(A0,B_0,C0,B_0);
849
+ traits.madd(A0,B1,C1,B1);
850
+ }
851
+ else
852
+ {
853
+ LhsPacket A0;
854
+ RhsPacket B_0, B1, B2, B3;
855
+
856
+ traits.loadLhs(&blA[0*LhsProgress], A0);
857
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
858
+ traits.loadRhs(&blB[1*RhsProgress], B1);
859
+
860
+ traits.madd(A0,B_0,C0,B_0);
861
+ traits.loadRhs(&blB[2*RhsProgress], B2);
862
+ traits.loadRhs(&blB[3*RhsProgress], B3);
863
+ traits.loadRhs(&blB[4*RhsProgress], B_0);
864
+ traits.madd(A0,B1,C1,B1);
865
+ traits.loadRhs(&blB[5*RhsProgress], B1);
866
+ traits.madd(A0,B2,C2,B2);
867
+ traits.loadRhs(&blB[6*RhsProgress], B2);
868
+ traits.madd(A0,B3,C3,B3);
869
+ traits.loadLhs(&blA[1*LhsProgress], A0);
870
+ traits.loadRhs(&blB[7*RhsProgress], B3);
871
+ traits.madd(A0,B_0,C0,B_0);
872
+ traits.loadRhs(&blB[8*RhsProgress], B_0);
873
+ traits.madd(A0,B1,C1,B1);
874
+ traits.loadRhs(&blB[9*RhsProgress], B1);
875
+ traits.madd(A0,B2,C2,B2);
876
+ traits.loadRhs(&blB[10*RhsProgress], B2);
877
+ traits.madd(A0,B3,C3,B3);
878
+ traits.loadLhs(&blA[2*LhsProgress], A0);
879
+ traits.loadRhs(&blB[11*RhsProgress], B3);
880
+
881
+ traits.madd(A0,B_0,C0,B_0);
882
+ traits.loadRhs(&blB[12*RhsProgress], B_0);
883
+ traits.madd(A0,B1,C1,B1);
884
+ traits.loadRhs(&blB[13*RhsProgress], B1);
885
+ traits.madd(A0,B2,C2,B2);
886
+ traits.loadRhs(&blB[14*RhsProgress], B2);
887
+ traits.madd(A0,B3,C3,B3);
888
+
889
+ traits.loadLhs(&blA[3*LhsProgress], A0);
890
+ traits.loadRhs(&blB[15*RhsProgress], B3);
891
+ traits.madd(A0,B_0,C0,B_0);
892
+ traits.madd(A0,B1,C1,B1);
893
+ traits.madd(A0,B2,C2,B2);
894
+ traits.madd(A0,B3,C3,B3);
895
+ }
896
+
897
+ blB += nr*4*RhsProgress;
898
+ blA += 4*LhsProgress;
899
+ }
900
+ // process remaining peeled loop
901
+ for(Index k=peeled_kc; k<depth; k++)
902
+ {
903
+ if(nr==2)
904
+ {
905
+ LhsPacket A0;
906
+ RhsPacket B_0, B1;
907
+
908
+ traits.loadLhs(&blA[0*LhsProgress], A0);
909
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
910
+ traits.loadRhs(&blB[1*RhsProgress], B1);
911
+ traits.madd(A0,B_0,C0,B_0);
912
+ traits.madd(A0,B1,C1,B1);
913
+ }
914
+ else
915
+ {
916
+ LhsPacket A0;
917
+ RhsPacket B_0, B1, B2, B3;
918
+
919
+ traits.loadLhs(&blA[0*LhsProgress], A0);
920
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
921
+ traits.loadRhs(&blB[1*RhsProgress], B1);
922
+ traits.loadRhs(&blB[2*RhsProgress], B2);
923
+ traits.loadRhs(&blB[3*RhsProgress], B3);
924
+
925
+ traits.madd(A0,B_0,C0,B_0);
926
+ traits.madd(A0,B1,C1,B1);
927
+ traits.madd(A0,B2,C2,B2);
928
+ traits.madd(A0,B3,C3,B3);
929
+ }
930
+
931
+ blB += nr*RhsProgress;
932
+ blA += LhsProgress;
933
+ }
934
+
935
+ ResPacket R0, R1, R2, R3;
936
+ ResPacket alphav = pset1<ResPacket>(alpha);
937
+
938
+ ResScalar* r0 = &res[(j2+0)*resStride + i];
939
+ ResScalar* r1 = r0 + resStride;
940
+ ResScalar* r2 = r1 + resStride;
941
+ ResScalar* r3 = r2 + resStride;
942
+
943
+ R0 = ploadu<ResPacket>(r0);
944
+ R1 = ploadu<ResPacket>(r1);
945
+ if(nr==4) R2 = ploadu<ResPacket>(r2);
946
+ if(nr==4) R3 = ploadu<ResPacket>(r3);
947
+
948
+ traits.acc(C0, alphav, R0);
949
+ traits.acc(C1, alphav, R1);
950
+ if(nr==4) traits.acc(C2, alphav, R2);
951
+ if(nr==4) traits.acc(C3, alphav, R3);
952
+
953
+ pstoreu(r0, R0);
954
+ pstoreu(r1, R1);
955
+ if(nr==4) pstoreu(r2, R2);
956
+ if(nr==4) pstoreu(r3, R3);
957
+ }
958
+ for(Index i=peeled_mc2; i<rows; i++)
959
+ {
960
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
961
+ prefetch(&blA[0]);
962
+
963
+ // gets a 1 x nr res block as registers
964
+ ResScalar C0(0), C1(0), C2(0), C3(0);
965
+ // TODO directly use blockB ???
966
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
967
+ for(Index k=0; k<depth; k++)
968
+ {
969
+ if(nr==2)
970
+ {
971
+ LhsScalar A0;
972
+ RhsScalar B_0, B1;
973
+
974
+ A0 = blA[k];
975
+ B_0 = blB[0];
976
+ B1 = blB[1];
977
+ MADD(cj,A0,B_0,C0,B_0);
978
+ MADD(cj,A0,B1,C1,B1);
979
+ }
980
+ else
981
+ {
982
+ LhsScalar A0;
983
+ RhsScalar B_0, B1, B2, B3;
984
+
985
+ A0 = blA[k];
986
+ B_0 = blB[0];
987
+ B1 = blB[1];
988
+ B2 = blB[2];
989
+ B3 = blB[3];
990
+
991
+ MADD(cj,A0,B_0,C0,B_0);
992
+ MADD(cj,A0,B1,C1,B1);
993
+ MADD(cj,A0,B2,C2,B2);
994
+ MADD(cj,A0,B3,C3,B3);
995
+ }
996
+
997
+ blB += nr;
998
+ }
999
+ res[(j2+0)*resStride + i] += alpha*C0;
1000
+ res[(j2+1)*resStride + i] += alpha*C1;
1001
+ if(nr==4) res[(j2+2)*resStride + i] += alpha*C2;
1002
+ if(nr==4) res[(j2+3)*resStride + i] += alpha*C3;
1003
+ }
1004
+ }
1005
+ // process remaining rhs/res columns one at a time
1006
+ // => do the same but with nr==1
1007
+ for(Index j2=packet_cols; j2<cols; j2++)
1008
+ {
1009
+ // unpack B
1010
+ traits.unpackRhs(depth, &blockB[j2*strideB+offsetB], unpackedB);
1011
+
1012
+ for(Index i=0; i<peeled_mc; i+=mr)
1013
+ {
1014
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*mr];
1015
+ prefetch(&blA[0]);
1016
+
1017
+ // TODO move the res loads to the stores
1018
+
1019
+ // get res block as registers
1020
+ AccPacket C0, C4;
1021
+ traits.initAcc(C0);
1022
+ traits.initAcc(C4);
1023
+
1024
+ const RhsScalar* blB = unpackedB;
1025
+ for(Index k=0; k<depth; k++)
1026
+ {
1027
+ LhsPacket A0, A1;
1028
+ RhsPacket B_0;
1029
+ RhsPacket T0;
1030
+
1031
+ traits.loadLhs(&blA[0*LhsProgress], A0);
1032
+ traits.loadLhs(&blA[1*LhsProgress], A1);
1033
+ traits.loadRhs(&blB[0*RhsProgress], B_0);
1034
+ traits.madd(A0,B_0,C0,T0);
1035
+ traits.madd(A1,B_0,C4,B_0);
1036
+
1037
+ blB += RhsProgress;
1038
+ blA += 2*LhsProgress;
1039
+ }
1040
+ ResPacket R0, R4;
1041
+ ResPacket alphav = pset1<ResPacket>(alpha);
1042
+
1043
+ ResScalar* r0 = &res[(j2+0)*resStride + i];
1044
+
1045
+ R0 = ploadu<ResPacket>(r0);
1046
+ R4 = ploadu<ResPacket>(r0+ResPacketSize);
1047
+
1048
+ traits.acc(C0, alphav, R0);
1049
+ traits.acc(C4, alphav, R4);
1050
+
1051
+ pstoreu(r0, R0);
1052
+ pstoreu(r0+ResPacketSize, R4);
1053
+ }
1054
+ if(rows-peeled_mc>=LhsProgress)
1055
+ {
1056
+ Index i = peeled_mc;
1057
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress];
1058
+ prefetch(&blA[0]);
1059
+
1060
+ AccPacket C0;
1061
+ traits.initAcc(C0);
1062
+
1063
+ const RhsScalar* blB = unpackedB;
1064
+ for(Index k=0; k<depth; k++)
1065
+ {
1066
+ LhsPacket A0;
1067
+ RhsPacket B_0;
1068
+ traits.loadLhs(blA, A0);
1069
+ traits.loadRhs(blB, B_0);
1070
+ traits.madd(A0, B_0, C0, B_0);
1071
+ blB += RhsProgress;
1072
+ blA += LhsProgress;
1073
+ }
1074
+
1075
+ ResPacket alphav = pset1<ResPacket>(alpha);
1076
+ ResPacket R0 = ploadu<ResPacket>(&res[(j2+0)*resStride + i]);
1077
+ traits.acc(C0, alphav, R0);
1078
+ pstoreu(&res[(j2+0)*resStride + i], R0);
1079
+ }
1080
+ for(Index i=peeled_mc2; i<rows; i++)
1081
+ {
1082
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
1083
+ prefetch(&blA[0]);
1084
+
1085
+ // gets a 1 x 1 res block as registers
1086
+ ResScalar C0(0);
1087
+ // FIXME directly use blockB ??
1088
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
1089
+ for(Index k=0; k<depth; k++)
1090
+ {
1091
+ LhsScalar A0 = blA[k];
1092
+ RhsScalar B_0 = blB[k];
1093
+ MADD(cj, A0, B_0, C0, B_0);
1094
+ }
1095
+ res[(j2+0)*resStride + i] += alpha*C0;
1096
+ }
1097
+ }
1098
+ }
1099
+
1100
+
1101
+ #undef CJMADD
1102
+
1103
+ // pack a block of the lhs
1104
+ // The traversal is as follow (mr==4):
1105
+ // 0 4 8 12 ...
1106
+ // 1 5 9 13 ...
1107
+ // 2 6 10 14 ...
1108
+ // 3 7 11 15 ...
1109
+ //
1110
+ // 16 20 24 28 ...
1111
+ // 17 21 25 29 ...
1112
+ // 18 22 26 30 ...
1113
+ // 19 23 27 31 ...
1114
+ //
1115
+ // 32 33 34 35 ...
1116
+ // 36 36 38 39 ...
1117
+ template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
1118
+ struct gemm_pack_lhs
1119
+ {
1120
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
1121
+ };
1122
+
1123
+ template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
1124
+ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
1125
+ ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
1126
+ {
1127
+ typedef typename packet_traits<Scalar>::type Packet;
1128
+ enum { PacketSize = packet_traits<Scalar>::size };
1129
+
1130
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
1131
+ EIGEN_UNUSED_VARIABLE(stride)
1132
+ EIGEN_UNUSED_VARIABLE(offset)
1133
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
1134
+ eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
1135
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
1136
+ const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
1137
+ Index count = 0;
1138
+ Index peeled_mc = (rows/Pack1)*Pack1;
1139
+ for(Index i=0; i<peeled_mc; i+=Pack1)
1140
+ {
1141
+ if(PanelMode) count += Pack1 * offset;
1142
+
1143
+ if(StorageOrder==ColMajor)
1144
+ {
1145
+ for(Index k=0; k<depth; k++)
1146
+ {
1147
+ Packet A, B, C, D;
1148
+ if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
1149
+ if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
1150
+ if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
1151
+ if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
1152
+ if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
1153
+ if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
1154
+ if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
1155
+ if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
1156
+ }
1157
+ }
1158
+ else
1159
+ {
1160
+ for(Index k=0; k<depth; k++)
1161
+ {
1162
+ // TODO add a vectorized transpose here
1163
+ Index w=0;
1164
+ for(; w<Pack1-3; w+=4)
1165
+ {
1166
+ Scalar a(cj(lhs(i+w+0, k))),
1167
+ b(cj(lhs(i+w+1, k))),
1168
+ c(cj(lhs(i+w+2, k))),
1169
+ d(cj(lhs(i+w+3, k)));
1170
+ blockA[count++] = a;
1171
+ blockA[count++] = b;
1172
+ blockA[count++] = c;
1173
+ blockA[count++] = d;
1174
+ }
1175
+ if(Pack1%4)
1176
+ for(;w<Pack1;++w)
1177
+ blockA[count++] = cj(lhs(i+w, k));
1178
+ }
1179
+ }
1180
+ if(PanelMode) count += Pack1 * (stride-offset-depth);
1181
+ }
1182
+ if(rows-peeled_mc>=Pack2)
1183
+ {
1184
+ if(PanelMode) count += Pack2*offset;
1185
+ for(Index k=0; k<depth; k++)
1186
+ for(Index w=0; w<Pack2; w++)
1187
+ blockA[count++] = cj(lhs(peeled_mc+w, k));
1188
+ if(PanelMode) count += Pack2 * (stride-offset-depth);
1189
+ peeled_mc += Pack2;
1190
+ }
1191
+ for(Index i=peeled_mc; i<rows; i++)
1192
+ {
1193
+ if(PanelMode) count += offset;
1194
+ for(Index k=0; k<depth; k++)
1195
+ blockA[count++] = cj(lhs(i, k));
1196
+ if(PanelMode) count += (stride-offset-depth);
1197
+ }
1198
+ }
1199
+
1200
+ // copy a complete panel of the rhs
1201
+ // this version is optimized for column major matrices
1202
+ // The traversal order is as follow: (nr==4):
1203
+ // 0 1 2 3 12 13 14 15 24 27
1204
+ // 4 5 6 7 16 17 18 19 25 28
1205
+ // 8 9 10 11 20 21 22 23 26 29
1206
+ // . . . . . . . . . .
1207
+ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1208
+ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
1209
+ {
1210
+ typedef typename packet_traits<Scalar>::type Packet;
1211
+ enum { PacketSize = packet_traits<Scalar>::size };
1212
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
1213
+ };
1214
+
1215
+ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1216
+ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
1217
+ ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
1218
+ {
1219
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
1220
+ EIGEN_UNUSED_VARIABLE(stride)
1221
+ EIGEN_UNUSED_VARIABLE(offset)
1222
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
1223
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
1224
+ Index packet_cols = (cols/nr) * nr;
1225
+ Index count = 0;
1226
+ for(Index j2=0; j2<packet_cols; j2+=nr)
1227
+ {
1228
+ // skip what we have before
1229
+ if(PanelMode) count += nr * offset;
1230
+ const Scalar* b0 = &rhs[(j2+0)*rhsStride];
1231
+ const Scalar* b1 = &rhs[(j2+1)*rhsStride];
1232
+ const Scalar* b2 = &rhs[(j2+2)*rhsStride];
1233
+ const Scalar* b3 = &rhs[(j2+3)*rhsStride];
1234
+ for(Index k=0; k<depth; k++)
1235
+ {
1236
+ blockB[count+0] = cj(b0[k]);
1237
+ blockB[count+1] = cj(b1[k]);
1238
+ if(nr==4) blockB[count+2] = cj(b2[k]);
1239
+ if(nr==4) blockB[count+3] = cj(b3[k]);
1240
+ count += nr;
1241
+ }
1242
+ // skip what we have after
1243
+ if(PanelMode) count += nr * (stride-offset-depth);
1244
+ }
1245
+
1246
+ // copy the remaining columns one at a time (nr==1)
1247
+ for(Index j2=packet_cols; j2<cols; ++j2)
1248
+ {
1249
+ if(PanelMode) count += offset;
1250
+ const Scalar* b0 = &rhs[(j2+0)*rhsStride];
1251
+ for(Index k=0; k<depth; k++)
1252
+ {
1253
+ blockB[count] = cj(b0[k]);
1254
+ count += 1;
1255
+ }
1256
+ if(PanelMode) count += (stride-offset-depth);
1257
+ }
1258
+ }
1259
+
1260
+ // this version is optimized for row major matrices
1261
+ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1262
+ struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
1263
+ {
1264
+ enum { PacketSize = packet_traits<Scalar>::size };
1265
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
1266
+ };
1267
+
1268
+ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
1269
+ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
1270
+ ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
1271
+ {
1272
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
1273
+ EIGEN_UNUSED_VARIABLE(stride)
1274
+ EIGEN_UNUSED_VARIABLE(offset)
1275
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
1276
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
1277
+ Index packet_cols = (cols/nr) * nr;
1278
+ Index count = 0;
1279
+ for(Index j2=0; j2<packet_cols; j2+=nr)
1280
+ {
1281
+ // skip what we have before
1282
+ if(PanelMode) count += nr * offset;
1283
+ for(Index k=0; k<depth; k++)
1284
+ {
1285
+ const Scalar* b0 = &rhs[k*rhsStride + j2];
1286
+ blockB[count+0] = cj(b0[0]);
1287
+ blockB[count+1] = cj(b0[1]);
1288
+ if(nr==4) blockB[count+2] = cj(b0[2]);
1289
+ if(nr==4) blockB[count+3] = cj(b0[3]);
1290
+ count += nr;
1291
+ }
1292
+ // skip what we have after
1293
+ if(PanelMode) count += nr * (stride-offset-depth);
1294
+ }
1295
+ // copy the remaining columns one at a time (nr==1)
1296
+ for(Index j2=packet_cols; j2<cols; ++j2)
1297
+ {
1298
+ if(PanelMode) count += offset;
1299
+ const Scalar* b0 = &rhs[j2];
1300
+ for(Index k=0; k<depth; k++)
1301
+ {
1302
+ blockB[count] = cj(b0[k*rhsStride]);
1303
+ count += 1;
1304
+ }
1305
+ if(PanelMode) count += stride-offset-depth;
1306
+ }
1307
+ }
1308
+
1309
+ } // end namespace internal
1310
+
1311
+ /** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
1312
+ * \sa setCpuCacheSize */
1313
+ inline std::ptrdiff_t l1CacheSize()
1314
+ {
1315
+ std::ptrdiff_t l1, l2;
1316
+ internal::manage_caching_sizes(GetAction, &l1, &l2);
1317
+ return l1;
1318
+ }
1319
+
1320
+ /** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
1321
+ * \sa setCpuCacheSize */
1322
+ inline std::ptrdiff_t l2CacheSize()
1323
+ {
1324
+ std::ptrdiff_t l1, l2;
1325
+ internal::manage_caching_sizes(GetAction, &l1, &l2);
1326
+ return l2;
1327
+ }
1328
+
1329
+ /** Set the cpu L1 and L2 cache sizes (in bytes).
1330
+ * These values are use to adjust the size of the blocks
1331
+ * for the algorithms working per blocks.
1332
+ *
1333
+ * \sa computeProductBlockingSizes */
1334
+ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
1335
+ {
1336
+ internal::manage_caching_sizes(SetAction, &l1, &l2);
1337
+ }
1338
+
1339
+ } // end namespace Eigen
1340
+
1341
+ #endif // EIGEN_GENERAL_BLOCK_PANEL_H