tomoto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,136 @@
1
+ /*
2
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Intel Corporation nor the names of its contributors may
13
+ be used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ********************************************************************************
28
+ * Content : Eigen bindings to BLAS F77
29
+ * General matrix-vector product functionality based on ?GEMV.
30
+ ********************************************************************************
31
+ */
32
+
33
+ #ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
34
+ #define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
35
+
36
+ namespace Eigen {
37
+
38
+ namespace internal {
39
+
40
+ /**********************************************************************
41
+ * This file implements general matrix-vector multiplication using BLAS
42
+ * gemv function via partial specialization of
43
+ * general_matrix_vector_product::run(..) method for float, double,
44
+ * std::complex<float> and std::complex<double> types
45
+ **********************************************************************/
46
+
47
+ // gemv specialization
48
+
49
+ template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
50
+ struct general_matrix_vector_product_gemv;
51
+
52
+ #define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
53
+ template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
54
+ struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
55
+ static void run( \
56
+ Index rows, Index cols, \
57
+ const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
58
+ const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
59
+ Scalar* res, Index resIncr, Scalar alpha) \
60
+ { \
61
+ if (ConjugateLhs) { \
62
+ general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
63
+ rows, cols, lhs, rhs, res, resIncr, alpha); \
64
+ } else { \
65
+ general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
66
+ rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
67
+ } \
68
+ } \
69
+ }; \
70
+ template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
71
+ struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
72
+ static void run( \
73
+ Index rows, Index cols, \
74
+ const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
75
+ const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
76
+ Scalar* res, Index resIncr, Scalar alpha) \
77
+ { \
78
+ general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
79
+ rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
80
+ } \
81
+ }; \
82
+
83
+ EIGEN_BLAS_GEMV_SPECIALIZE(double)
84
+ EIGEN_BLAS_GEMV_SPECIALIZE(float)
85
+ EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
86
+ EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
87
+
88
+ #define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
89
+ template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
90
+ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
91
+ { \
92
+ typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
93
+ \
94
+ static void run( \
95
+ Index rows, Index cols, \
96
+ const EIGTYPE* lhs, Index lhsStride, \
97
+ const EIGTYPE* rhs, Index rhsIncr, \
98
+ EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
99
+ { \
100
+ BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
101
+ lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
102
+ const EIGTYPE beta(1); \
103
+ const EIGTYPE *x_ptr; \
104
+ char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
105
+ if (LhsStorageOrder==RowMajor) { \
106
+ m = convert_index<BlasIndex>(cols); \
107
+ n = convert_index<BlasIndex>(rows); \
108
+ }\
109
+ GEMVVector x_tmp; \
110
+ if (ConjugateRhs) { \
111
+ Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
112
+ x_tmp=map_x.conjugate(); \
113
+ x_ptr=x_tmp.data(); \
114
+ incx=1; \
115
+ } else x_ptr=rhs; \
116
+ BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
117
+ }\
118
+ };
119
+
120
+ #ifdef EIGEN_USE_MKL
121
+ EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
122
+ EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
123
+ EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
124
+ EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
125
+ #else
126
+ EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
127
+ EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
128
+ EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
129
+ EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
130
+ #endif
131
+
132
+ } // end namespase internal
133
+
134
+ } // end namespace Eigen
135
+
136
+ #endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
@@ -0,0 +1,163 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_PARALLELIZER_H
11
+ #define EIGEN_PARALLELIZER_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ /** \internal */
18
+ inline void manage_multi_threading(Action action, int* v)
19
+ {
20
+ static EIGEN_UNUSED int m_maxThreads = -1;
21
+
22
+ if(action==SetAction)
23
+ {
24
+ eigen_internal_assert(v!=0);
25
+ m_maxThreads = *v;
26
+ }
27
+ else if(action==GetAction)
28
+ {
29
+ eigen_internal_assert(v!=0);
30
+ #ifdef EIGEN_HAS_OPENMP
31
+ if(m_maxThreads>0)
32
+ *v = m_maxThreads;
33
+ else
34
+ *v = omp_get_max_threads();
35
+ #else
36
+ *v = 1;
37
+ #endif
38
+ }
39
+ else
40
+ {
41
+ eigen_internal_assert(false);
42
+ }
43
+ }
44
+
45
+ }
46
+
47
+ /** Must be call first when calling Eigen from multiple threads */
48
+ inline void initParallel()
49
+ {
50
+ int nbt;
51
+ internal::manage_multi_threading(GetAction, &nbt);
52
+ std::ptrdiff_t l1, l2, l3;
53
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
54
+ }
55
+
56
+ /** \returns the max number of threads reserved for Eigen
57
+ * \sa setNbThreads */
58
+ inline int nbThreads()
59
+ {
60
+ int ret;
61
+ internal::manage_multi_threading(GetAction, &ret);
62
+ return ret;
63
+ }
64
+
65
+ /** Sets the max number of threads reserved for Eigen
66
+ * \sa nbThreads */
67
+ inline void setNbThreads(int v)
68
+ {
69
+ internal::manage_multi_threading(SetAction, &v);
70
+ }
71
+
72
+ namespace internal {
73
+
74
+ template<typename Index> struct GemmParallelInfo
75
+ {
76
+ GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
77
+
78
+ Index volatile sync;
79
+ int volatile users;
80
+
81
+ Index lhs_start;
82
+ Index lhs_length;
83
+ };
84
+
85
+ template<bool Condition, typename Functor, typename Index>
86
+ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
87
+ {
88
+ // TODO when EIGEN_USE_BLAS is defined,
89
+ // we should still enable OMP for other scalar types
90
+ #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
91
+ // FIXME the transpose variable is only needed to properly split
92
+ // the matrix product when multithreading is enabled. This is a temporary
93
+ // fix to support row-major destination matrices. This whole
94
+ // parallelizer mechanism has to be redisigned anyway.
95
+ EIGEN_UNUSED_VARIABLE(depth);
96
+ EIGEN_UNUSED_VARIABLE(transpose);
97
+ func(0,rows, 0,cols);
98
+ #else
99
+
100
+ // Dynamically check whether we should enable or disable OpenMP.
101
+ // The conditions are:
102
+ // - the max number of threads we can create is greater than 1
103
+ // - we are not already in a parallel code
104
+ // - the sizes are large enough
105
+
106
+ // compute the maximal number of threads from the size of the product:
107
+ // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
108
+ Index size = transpose ? rows : cols;
109
+ Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
110
+
111
+ // compute the maximal number of threads from the total amount of work:
112
+ double work = static_cast<double>(rows) * static_cast<double>(cols) *
113
+ static_cast<double>(depth);
114
+ double kMinTaskSize = 50000; // FIXME improve this heuristic.
115
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
116
+
117
+ // compute the number of threads we are going to use
118
+ Index threads = std::min<Index>(nbThreads(), pb_max_threads);
119
+
120
+ // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
121
+ // then abort multi-threading
122
+ // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
123
+ if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
124
+ return func(0,rows, 0,cols);
125
+
126
+ Eigen::initParallel();
127
+ func.initParallelSession(threads);
128
+
129
+ if(transpose)
130
+ std::swap(rows,cols);
131
+
132
+ ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
133
+
134
+ #pragma omp parallel num_threads(threads)
135
+ {
136
+ Index i = omp_get_thread_num();
137
+ // Note that the actual number of threads might be lower than the number of request ones.
138
+ Index actual_threads = omp_get_num_threads();
139
+
140
+ Index blockCols = (cols / actual_threads) & ~Index(0x3);
141
+ Index blockRows = (rows / actual_threads);
142
+ blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
143
+
144
+ Index r0 = i*blockRows;
145
+ Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
146
+
147
+ Index c0 = i*blockCols;
148
+ Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
149
+
150
+ info[i].lhs_start = r0;
151
+ info[i].lhs_length = actualBlockRows;
152
+
153
+ if(transpose) func(c0, actualBlockCols, 0, rows, info);
154
+ else func(0, rows, c0, actualBlockCols, info);
155
+ }
156
+ #endif
157
+ }
158
+
159
+ } // end namespace internal
160
+
161
+ } // end namespace Eigen
162
+
163
+ #endif // EIGEN_PARALLELIZER_H
@@ -0,0 +1,521 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11
+ #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ // pack a selfadjoint block diagonal for use with the gebp_kernel
18
+ template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
19
+ struct symm_pack_lhs
20
+ {
21
+ template<int BlockRows> inline
22
+ void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23
+ {
24
+ // normal copy
25
+ for(Index k=0; k<i; k++)
26
+ for(Index w=0; w<BlockRows; w++)
27
+ blockA[count++] = lhs(i+w,k); // normal
28
+ // symmetric copy
29
+ Index h = 0;
30
+ for(Index k=i; k<i+BlockRows; k++)
31
+ {
32
+ for(Index w=0; w<h; w++)
33
+ blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
34
+
35
+ blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
36
+
37
+ for(Index w=h+1; w<BlockRows; w++)
38
+ blockA[count++] = lhs(i+w, k); // normal
39
+ ++h;
40
+ }
41
+ // transposed copy
42
+ for(Index k=i+BlockRows; k<cols; k++)
43
+ for(Index w=0; w<BlockRows; w++)
44
+ blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
45
+ }
46
+ void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47
+ {
48
+ enum { PacketSize = packet_traits<Scalar>::size };
49
+ const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
50
+ Index count = 0;
51
+ //Index peeled_mc3 = (rows/Pack1)*Pack1;
52
+
53
+ const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
54
+ const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
55
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
56
+
57
+ if(Pack1>=3*PacketSize)
58
+ for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
59
+ pack<3*PacketSize>(blockA, lhs, cols, i, count);
60
+
61
+ if(Pack1>=2*PacketSize)
62
+ for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
63
+ pack<2*PacketSize>(blockA, lhs, cols, i, count);
64
+
65
+ if(Pack1>=1*PacketSize)
66
+ for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
67
+ pack<1*PacketSize>(blockA, lhs, cols, i, count);
68
+
69
+ // do the same with mr==1
70
+ for(Index i=peeled_mc1; i<rows; i++)
71
+ {
72
+ for(Index k=0; k<i; k++)
73
+ blockA[count++] = lhs(i, k); // normal
74
+
75
+ blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
76
+
77
+ for(Index k=i+1; k<cols; k++)
78
+ blockA[count++] = numext::conj(lhs(k, i)); // transposed
79
+ }
80
+ }
81
+ };
82
+
83
+ template<typename Scalar, typename Index, int nr, int StorageOrder>
84
+ struct symm_pack_rhs
85
+ {
86
+ enum { PacketSize = packet_traits<Scalar>::size };
87
+ void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
88
+ {
89
+ Index end_k = k2 + rows;
90
+ Index count = 0;
91
+ const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
92
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
93
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
94
+
95
+ // first part: normal case
96
+ for(Index j2=0; j2<k2; j2+=nr)
97
+ {
98
+ for(Index k=k2; k<end_k; k++)
99
+ {
100
+ blockB[count+0] = rhs(k,j2+0);
101
+ blockB[count+1] = rhs(k,j2+1);
102
+ if (nr>=4)
103
+ {
104
+ blockB[count+2] = rhs(k,j2+2);
105
+ blockB[count+3] = rhs(k,j2+3);
106
+ }
107
+ if (nr>=8)
108
+ {
109
+ blockB[count+4] = rhs(k,j2+4);
110
+ blockB[count+5] = rhs(k,j2+5);
111
+ blockB[count+6] = rhs(k,j2+6);
112
+ blockB[count+7] = rhs(k,j2+7);
113
+ }
114
+ count += nr;
115
+ }
116
+ }
117
+
118
+ // second part: diagonal block
119
+ Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
120
+ if(nr>=8)
121
+ {
122
+ for(Index j2=k2; j2<end8; j2+=8)
123
+ {
124
+ // again we can split vertically in three different parts (transpose, symmetric, normal)
125
+ // transpose
126
+ for(Index k=k2; k<j2; k++)
127
+ {
128
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
129
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
130
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
131
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
132
+ blockB[count+4] = numext::conj(rhs(j2+4,k));
133
+ blockB[count+5] = numext::conj(rhs(j2+5,k));
134
+ blockB[count+6] = numext::conj(rhs(j2+6,k));
135
+ blockB[count+7] = numext::conj(rhs(j2+7,k));
136
+ count += 8;
137
+ }
138
+ // symmetric
139
+ Index h = 0;
140
+ for(Index k=j2; k<j2+8; k++)
141
+ {
142
+ // normal
143
+ for (Index w=0 ; w<h; ++w)
144
+ blockB[count+w] = rhs(k,j2+w);
145
+
146
+ blockB[count+h] = numext::real(rhs(k,k));
147
+
148
+ // transpose
149
+ for (Index w=h+1 ; w<8; ++w)
150
+ blockB[count+w] = numext::conj(rhs(j2+w,k));
151
+ count += 8;
152
+ ++h;
153
+ }
154
+ // normal
155
+ for(Index k=j2+8; k<end_k; k++)
156
+ {
157
+ blockB[count+0] = rhs(k,j2+0);
158
+ blockB[count+1] = rhs(k,j2+1);
159
+ blockB[count+2] = rhs(k,j2+2);
160
+ blockB[count+3] = rhs(k,j2+3);
161
+ blockB[count+4] = rhs(k,j2+4);
162
+ blockB[count+5] = rhs(k,j2+5);
163
+ blockB[count+6] = rhs(k,j2+6);
164
+ blockB[count+7] = rhs(k,j2+7);
165
+ count += 8;
166
+ }
167
+ }
168
+ }
169
+ if(nr>=4)
170
+ {
171
+ for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
172
+ {
173
+ // again we can split vertically in three different parts (transpose, symmetric, normal)
174
+ // transpose
175
+ for(Index k=k2; k<j2; k++)
176
+ {
177
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
178
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
179
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
180
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
181
+ count += 4;
182
+ }
183
+ // symmetric
184
+ Index h = 0;
185
+ for(Index k=j2; k<j2+4; k++)
186
+ {
187
+ // normal
188
+ for (Index w=0 ; w<h; ++w)
189
+ blockB[count+w] = rhs(k,j2+w);
190
+
191
+ blockB[count+h] = numext::real(rhs(k,k));
192
+
193
+ // transpose
194
+ for (Index w=h+1 ; w<4; ++w)
195
+ blockB[count+w] = numext::conj(rhs(j2+w,k));
196
+ count += 4;
197
+ ++h;
198
+ }
199
+ // normal
200
+ for(Index k=j2+4; k<end_k; k++)
201
+ {
202
+ blockB[count+0] = rhs(k,j2+0);
203
+ blockB[count+1] = rhs(k,j2+1);
204
+ blockB[count+2] = rhs(k,j2+2);
205
+ blockB[count+3] = rhs(k,j2+3);
206
+ count += 4;
207
+ }
208
+ }
209
+ }
210
+
211
+ // third part: transposed
212
+ if(nr>=8)
213
+ {
214
+ for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
215
+ {
216
+ for(Index k=k2; k<end_k; k++)
217
+ {
218
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
219
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
220
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
221
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
222
+ blockB[count+4] = numext::conj(rhs(j2+4,k));
223
+ blockB[count+5] = numext::conj(rhs(j2+5,k));
224
+ blockB[count+6] = numext::conj(rhs(j2+6,k));
225
+ blockB[count+7] = numext::conj(rhs(j2+7,k));
226
+ count += 8;
227
+ }
228
+ }
229
+ }
230
+ if(nr>=4)
231
+ {
232
+ for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
233
+ {
234
+ for(Index k=k2; k<end_k; k++)
235
+ {
236
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
237
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
238
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
239
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
240
+ count += 4;
241
+ }
242
+ }
243
+ }
244
+
245
+ // copy the remaining columns one at a time (=> the same with nr==1)
246
+ for(Index j2=packet_cols4; j2<cols; ++j2)
247
+ {
248
+ // transpose
249
+ Index half = (std::min)(end_k,j2);
250
+ for(Index k=k2; k<half; k++)
251
+ {
252
+ blockB[count] = numext::conj(rhs(j2,k));
253
+ count += 1;
254
+ }
255
+
256
+ if(half==j2 && half<k2+rows)
257
+ {
258
+ blockB[count] = numext::real(rhs(j2,j2));
259
+ count += 1;
260
+ }
261
+ else
262
+ half--;
263
+
264
+ // normal
265
+ for(Index k=half+1; k<k2+rows; k++)
266
+ {
267
+ blockB[count] = rhs(k,j2);
268
+ count += 1;
269
+ }
270
+ }
271
+ }
272
+ };
273
+
274
+ /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
275
+ * the general matrix matrix product.
276
+ */
277
+ template <typename Scalar, typename Index,
278
+ int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
279
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
280
+ int ResStorageOrder>
281
+ struct product_selfadjoint_matrix;
282
+
283
+ template <typename Scalar, typename Index,
284
+ int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
285
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
286
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
287
+ {
288
+
289
+ static EIGEN_STRONG_INLINE void run(
290
+ Index rows, Index cols,
291
+ const Scalar* lhs, Index lhsStride,
292
+ const Scalar* rhs, Index rhsStride,
293
+ Scalar* res, Index resStride,
294
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
295
+ {
296
+ product_selfadjoint_matrix<Scalar, Index,
297
+ EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
298
+ RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
299
+ EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
300
+ LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
301
+ ColMajor>
302
+ ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
303
+ }
304
+ };
305
+
306
+ template <typename Scalar, typename Index,
307
+ int LhsStorageOrder, bool ConjugateLhs,
308
+ int RhsStorageOrder, bool ConjugateRhs>
309
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
310
+ {
311
+
312
+ static EIGEN_DONT_INLINE void run(
313
+ Index rows, Index cols,
314
+ const Scalar* _lhs, Index lhsStride,
315
+ const Scalar* _rhs, Index rhsStride,
316
+ Scalar* res, Index resStride,
317
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
318
+ };
319
+
320
+ template <typename Scalar, typename Index,
321
+ int LhsStorageOrder, bool ConjugateLhs,
322
+ int RhsStorageOrder, bool ConjugateRhs>
323
+ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
324
+ Index rows, Index cols,
325
+ const Scalar* _lhs, Index lhsStride,
326
+ const Scalar* _rhs, Index rhsStride,
327
+ Scalar* _res, Index resStride,
328
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
329
+ {
330
+ Index size = rows;
331
+
332
+ typedef gebp_traits<Scalar,Scalar> Traits;
333
+
334
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
335
+ typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
336
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
337
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
338
+ LhsMapper lhs(_lhs,lhsStride);
339
+ LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
340
+ RhsMapper rhs(_rhs,rhsStride);
341
+ ResMapper res(_res, resStride);
342
+
343
+ Index kc = blocking.kc(); // cache block size along the K direction
344
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
345
+ // kc must be smaller than mc
346
+ kc = (std::min)(kc,mc);
347
+ std::size_t sizeA = kc*mc;
348
+ std::size_t sizeB = kc*cols;
349
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
350
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
351
+
352
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
353
+ symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
354
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
355
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
356
+
357
+ for(Index k2=0; k2<size; k2+=kc)
358
+ {
359
+ const Index actual_kc = (std::min)(k2+kc,size)-k2;
360
+
361
+ // we have selected one row panel of rhs and one column panel of lhs
362
+ // pack rhs's panel into a sequential chunk of memory
363
+ // and expand each coeff to a constant packet for further reuse
364
+ pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
365
+
366
+ // the select lhs's panel has to be split in three different parts:
367
+ // 1 - the transposed panel above the diagonal block => transposed packed copy
368
+ // 2 - the diagonal block => special packed copy
369
+ // 3 - the panel below the diagonal block => generic packed copy
370
+ for(Index i2=0; i2<k2; i2+=mc)
371
+ {
372
+ const Index actual_mc = (std::min)(i2+mc,k2)-i2;
373
+ // transposed packed copy
374
+ pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
375
+
376
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
377
+ }
378
+ // the block diagonal
379
+ {
380
+ const Index actual_mc = (std::min)(k2+kc,size)-k2;
381
+ // symmetric packed copy
382
+ pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
383
+
384
+ gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
385
+ }
386
+
387
+ for(Index i2=k2+kc; i2<size; i2+=mc)
388
+ {
389
+ const Index actual_mc = (std::min)(i2+mc,size)-i2;
390
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
391
+ (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
392
+
393
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
394
+ }
395
+ }
396
+ }
397
+
398
+ // matrix * selfadjoint product
399
+ template <typename Scalar, typename Index,
400
+ int LhsStorageOrder, bool ConjugateLhs,
401
+ int RhsStorageOrder, bool ConjugateRhs>
402
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
403
+ {
404
+
405
+ static EIGEN_DONT_INLINE void run(
406
+ Index rows, Index cols,
407
+ const Scalar* _lhs, Index lhsStride,
408
+ const Scalar* _rhs, Index rhsStride,
409
+ Scalar* res, Index resStride,
410
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
411
+ };
412
+
413
+ template <typename Scalar, typename Index,
414
+ int LhsStorageOrder, bool ConjugateLhs,
415
+ int RhsStorageOrder, bool ConjugateRhs>
416
+ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
417
+ Index rows, Index cols,
418
+ const Scalar* _lhs, Index lhsStride,
419
+ const Scalar* _rhs, Index rhsStride,
420
+ Scalar* _res, Index resStride,
421
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
422
+ {
423
+ Index size = cols;
424
+
425
+ typedef gebp_traits<Scalar,Scalar> Traits;
426
+
427
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
428
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
429
+ LhsMapper lhs(_lhs,lhsStride);
430
+ ResMapper res(_res,resStride);
431
+
432
+ Index kc = blocking.kc(); // cache block size along the K direction
433
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
434
+ std::size_t sizeA = kc*mc;
435
+ std::size_t sizeB = kc*cols;
436
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
437
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
438
+
439
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
440
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
441
+ symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
442
+
443
+ for(Index k2=0; k2<size; k2+=kc)
444
+ {
445
+ const Index actual_kc = (std::min)(k2+kc,size)-k2;
446
+
447
+ pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
448
+
449
+ // => GEPP
450
+ for(Index i2=0; i2<rows; i2+=mc)
451
+ {
452
+ const Index actual_mc = (std::min)(i2+mc,rows)-i2;
453
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
454
+
455
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
456
+ }
457
+ }
458
+ }
459
+
460
+ } // end namespace internal
461
+
462
+ /***************************************************************************
463
+ * Wrapper to product_selfadjoint_matrix
464
+ ***************************************************************************/
465
+
466
+ namespace internal {
467
+
468
+ template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
469
+ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
470
+ {
471
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
472
+
473
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
474
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
475
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
476
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
477
+
478
+ enum {
479
+ LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
480
+ LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
481
+ RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
482
+ RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
483
+ };
484
+
485
+ template<typename Dest>
486
+ static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
487
+ {
488
+ eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
489
+
490
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
491
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
492
+
493
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
494
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
495
+
496
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
497
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
498
+
499
+ BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
500
+
501
+ internal::product_selfadjoint_matrix<Scalar, Index,
502
+ EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
503
+ NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
504
+ EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
505
+ NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
506
+ internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
507
+ ::run(
508
+ lhs.rows(), rhs.cols(), // sizes
509
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
510
+ &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
511
+ &dst.coeffRef(0,0), dst.outerStride(), // result info
512
+ actualAlpha, blocking // alpha
513
+ );
514
+ }
515
+ };
516
+
517
+ } // end namespace internal
518
+
519
+ } // end namespace Eigen
520
+
521
+ #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H