tomoto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,280 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
11
+ #define EIGEN_SPARSELU_GEMM_KERNEL_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+
18
+ /** \internal
19
+ * A general matrix-matrix product kernel optimized for the SparseLU factorization.
20
+ * - A, B, and C must be column major
21
+ * - lda and ldc must be multiples of the respective packet size
22
+ * - C must have the same alignment as A
23
+ */
24
+ template<typename Scalar>
25
+ EIGEN_DONT_INLINE
26
+ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
27
+ {
28
+ using namespace Eigen::internal;
29
+
30
+ typedef typename packet_traits<Scalar>::type Packet;
31
+ enum {
32
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
33
+ PacketSize = packet_traits<Scalar>::size,
34
+ PM = 8, // peeling in M
35
+ RN = 2, // register blocking
36
+ RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking
37
+ BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk
38
+ SM = PM*PacketSize // step along M
39
+ };
40
+ Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
41
+ Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
42
+ Index i0 = internal::first_default_aligned(A,m);
43
+
44
+ eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
45
+
46
+ // handle the non aligned rows of A and C without any optimization:
47
+ for(Index i=0; i<i0; ++i)
48
+ {
49
+ for(Index j=0; j<n; ++j)
50
+ {
51
+ Scalar c = C[i+j*ldc];
52
+ for(Index k=0; k<d; ++k)
53
+ c += B[k+j*ldb] * A[i+k*lda];
54
+ C[i+j*ldc] = c;
55
+ }
56
+ }
57
+ // process the remaining rows per chunk of BM rows
58
+ for(Index ib=i0; ib<m; ib+=BM)
59
+ {
60
+ Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows
61
+ Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling
62
+ Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization
63
+
64
+ // Let's process two columns of B-C at once
65
+ for(Index j=0; j<n_end; j+=RN)
66
+ {
67
+ const Scalar* Bc0 = B+(j+0)*ldb;
68
+ const Scalar* Bc1 = B+(j+1)*ldb;
69
+
70
+ for(Index k=0; k<d_end; k+=RK)
71
+ {
72
+
73
+ // load and expand a RN x RK block of B
74
+ Packet b00, b10, b20, b30, b01, b11, b21, b31;
75
+ { b00 = pset1<Packet>(Bc0[0]); }
76
+ { b10 = pset1<Packet>(Bc0[1]); }
77
+ if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
78
+ if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
79
+ { b01 = pset1<Packet>(Bc1[0]); }
80
+ { b11 = pset1<Packet>(Bc1[1]); }
81
+ if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
82
+ if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
83
+
84
+ Packet a0, a1, a2, a3, c0, c1, t0, t1;
85
+
86
+ const Scalar* A0 = A+ib+(k+0)*lda;
87
+ const Scalar* A1 = A+ib+(k+1)*lda;
88
+ const Scalar* A2 = A+ib+(k+2)*lda;
89
+ const Scalar* A3 = A+ib+(k+3)*lda;
90
+
91
+ Scalar* C0 = C+ib+(j+0)*ldc;
92
+ Scalar* C1 = C+ib+(j+1)*ldc;
93
+
94
+ a0 = pload<Packet>(A0);
95
+ a1 = pload<Packet>(A1);
96
+ if(RK==4)
97
+ {
98
+ a2 = pload<Packet>(A2);
99
+ a3 = pload<Packet>(A3);
100
+ }
101
+ else
102
+ {
103
+ // workaround "may be used uninitialized in this function" warning
104
+ a2 = a3 = a0;
105
+ }
106
+
107
+ #define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
108
+ #define WORK(I) \
109
+ c0 = pload<Packet>(C0+i+(I)*PacketSize); \
110
+ c1 = pload<Packet>(C1+i+(I)*PacketSize); \
111
+ KMADD(c0, a0, b00, t0) \
112
+ KMADD(c1, a0, b01, t1) \
113
+ a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
114
+ KMADD(c0, a1, b10, t0) \
115
+ KMADD(c1, a1, b11, t1) \
116
+ a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
117
+ if(RK==4){ KMADD(c0, a2, b20, t0) }\
118
+ if(RK==4){ KMADD(c1, a2, b21, t1) }\
119
+ if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
120
+ if(RK==4){ KMADD(c0, a3, b30, t0) }\
121
+ if(RK==4){ KMADD(c1, a3, b31, t1) }\
122
+ if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
123
+ pstore(C0+i+(I)*PacketSize, c0); \
124
+ pstore(C1+i+(I)*PacketSize, c1)
125
+
126
+ // process rows of A' - C' with aggressive vectorization and peeling
127
+ for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
128
+ {
129
+ EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
130
+ prefetch((A0+i+(5)*PacketSize));
131
+ prefetch((A1+i+(5)*PacketSize));
132
+ if(RK==4) prefetch((A2+i+(5)*PacketSize));
133
+ if(RK==4) prefetch((A3+i+(5)*PacketSize));
134
+
135
+ WORK(0);
136
+ WORK(1);
137
+ WORK(2);
138
+ WORK(3);
139
+ WORK(4);
140
+ WORK(5);
141
+ WORK(6);
142
+ WORK(7);
143
+ }
144
+ // process the remaining rows with vectorization only
145
+ for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
146
+ {
147
+ WORK(0);
148
+ }
149
+ #undef WORK
150
+ // process the remaining rows without vectorization
151
+ for(Index i=actual_b_end2; i<actual_b; ++i)
152
+ {
153
+ if(RK==4)
154
+ {
155
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
156
+ C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];
157
+ }
158
+ else
159
+ {
160
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
161
+ C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];
162
+ }
163
+ }
164
+
165
+ Bc0 += RK;
166
+ Bc1 += RK;
167
+ } // peeled loop on k
168
+ } // peeled loop on the columns j
169
+ // process the last column (we now perform a matrix-vector product)
170
+ if((n-n_end)>0)
171
+ {
172
+ const Scalar* Bc0 = B+(n-1)*ldb;
173
+
174
+ for(Index k=0; k<d_end; k+=RK)
175
+ {
176
+
177
+ // load and expand a 1 x RK block of B
178
+ Packet b00, b10, b20, b30;
179
+ b00 = pset1<Packet>(Bc0[0]);
180
+ b10 = pset1<Packet>(Bc0[1]);
181
+ if(RK==4) b20 = pset1<Packet>(Bc0[2]);
182
+ if(RK==4) b30 = pset1<Packet>(Bc0[3]);
183
+
184
+ Packet a0, a1, a2, a3, c0, t0/*, t1*/;
185
+
186
+ const Scalar* A0 = A+ib+(k+0)*lda;
187
+ const Scalar* A1 = A+ib+(k+1)*lda;
188
+ const Scalar* A2 = A+ib+(k+2)*lda;
189
+ const Scalar* A3 = A+ib+(k+3)*lda;
190
+
191
+ Scalar* C0 = C+ib+(n_end)*ldc;
192
+
193
+ a0 = pload<Packet>(A0);
194
+ a1 = pload<Packet>(A1);
195
+ if(RK==4)
196
+ {
197
+ a2 = pload<Packet>(A2);
198
+ a3 = pload<Packet>(A3);
199
+ }
200
+ else
201
+ {
202
+ // workaround "may be used uninitialized in this function" warning
203
+ a2 = a3 = a0;
204
+ }
205
+
206
+ #define WORK(I) \
207
+ c0 = pload<Packet>(C0+i+(I)*PacketSize); \
208
+ KMADD(c0, a0, b00, t0) \
209
+ a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
210
+ KMADD(c0, a1, b10, t0) \
211
+ a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
212
+ if(RK==4){ KMADD(c0, a2, b20, t0) }\
213
+ if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
214
+ if(RK==4){ KMADD(c0, a3, b30, t0) }\
215
+ if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
216
+ pstore(C0+i+(I)*PacketSize, c0);
217
+
218
+ // agressive vectorization and peeling
219
+ for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
220
+ {
221
+ EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
222
+ WORK(0);
223
+ WORK(1);
224
+ WORK(2);
225
+ WORK(3);
226
+ WORK(4);
227
+ WORK(5);
228
+ WORK(6);
229
+ WORK(7);
230
+ }
231
+ // vectorization only
232
+ for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
233
+ {
234
+ WORK(0);
235
+ }
236
+ // remaining scalars
237
+ for(Index i=actual_b_end2; i<actual_b; ++i)
238
+ {
239
+ if(RK==4)
240
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
241
+ else
242
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
243
+ }
244
+
245
+ Bc0 += RK;
246
+ #undef WORK
247
+ }
248
+ }
249
+
250
+ // process the last columns of A, corresponding to the last rows of B
251
+ Index rd = d-d_end;
252
+ if(rd>0)
253
+ {
254
+ for(Index j=0; j<n; ++j)
255
+ {
256
+ enum {
257
+ Alignment = PacketSize>1 ? Aligned : 0
258
+ };
259
+ typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;
260
+ typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;
261
+ if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);
262
+
263
+ else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
264
+ + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);
265
+
266
+ else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
267
+ + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)
268
+ + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);
269
+ }
270
+ }
271
+
272
+ } // blocking on the rows of A and C
273
+ }
274
+ #undef KMADD
275
+
276
+ } // namespace internal
277
+
278
+ } // namespace Eigen
279
+
280
+ #endif // EIGEN_SPARSELU_GEMM_KERNEL_H
@@ -0,0 +1,126 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ /* This file is a modified version of heap_relax_snode.c file in SuperLU
11
+ * -- SuperLU routine (version 3.0) --
12
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
13
+ * and Lawrence Berkeley National Lab.
14
+ * October 15, 2003
15
+ *
16
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
17
+ *
18
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
19
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
20
+ *
21
+ * Permission is hereby granted to use or copy this program for any
22
+ * purpose, provided the above notices are retained on all copies.
23
+ * Permission to modify the code and to distribute modified code is
24
+ * granted, provided the above notices are retained, and a notice that
25
+ * the code was modified is included with the above copyright notice.
26
+ */
27
+
28
+ #ifndef SPARSELU_HEAP_RELAX_SNODE_H
29
+ #define SPARSELU_HEAP_RELAX_SNODE_H
30
+
31
+ namespace Eigen {
32
+ namespace internal {
33
+
34
+ /**
35
+ * \brief Identify the initial relaxed supernodes
36
+ *
37
+ * This routine applied to a symmetric elimination tree.
38
+ * It assumes that the matrix has been reordered according to the postorder of the etree
39
+ * \param n The number of columns
40
+ * \param et elimination tree
41
+ * \param relax_columns Maximum number of columns allowed in a relaxed snode
42
+ * \param descendants Number of descendants of each node in the etree
43
+ * \param relax_end last column in a supernode
44
+ */
45
+ template <typename Scalar, typename StorageIndex>
46
+ void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
47
+ {
48
+
49
+ // The etree may not be postordered, but its heap ordered
50
+ IndexVector post;
51
+ internal::treePostorder(StorageIndex(n), et, post); // Post order etree
52
+ IndexVector inv_post(n+1);
53
+ for (StorageIndex i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
54
+
55
+ // Renumber etree in postorder
56
+ IndexVector iwork(n);
57
+ IndexVector et_save(n+1);
58
+ for (Index i = 0; i < n; ++i)
59
+ {
60
+ iwork(post(i)) = post(et(i));
61
+ }
62
+ et_save = et; // Save the original etree
63
+ et = iwork;
64
+
65
+ // compute the number of descendants of each node in the etree
66
+ relax_end.setConstant(emptyIdxLU);
67
+ Index j, parent;
68
+ descendants.setZero();
69
+ for (j = 0; j < n; j++)
70
+ {
71
+ parent = et(j);
72
+ if (parent != n) // not the dummy root
73
+ descendants(parent) += descendants(j) + 1;
74
+ }
75
+ // Identify the relaxed supernodes by postorder traversal of the etree
76
+ Index snode_start; // beginning of a snode
77
+ StorageIndex k;
78
+ Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree
79
+ Index nsuper_et = 0; // Number of relaxed snodes in the original etree
80
+ StorageIndex l;
81
+ for (j = 0; j < n; )
82
+ {
83
+ parent = et(j);
84
+ snode_start = j;
85
+ while ( parent != n && descendants(parent) < relax_columns )
86
+ {
87
+ j = parent;
88
+ parent = et(j);
89
+ }
90
+ // Found a supernode in postordered etree, j is the last column
91
+ ++nsuper_et_post;
92
+ k = StorageIndex(n);
93
+ for (Index i = snode_start; i <= j; ++i)
94
+ k = (std::min)(k, inv_post(i));
95
+ l = inv_post(j);
96
+ if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode
97
+ {
98
+ // This is also a supernode in the original etree
99
+ relax_end(k) = l; // Record last column
100
+ ++nsuper_et;
101
+ }
102
+ else
103
+ {
104
+ for (Index i = snode_start; i <= j; ++i)
105
+ {
106
+ l = inv_post(i);
107
+ if (descendants(i) == 0)
108
+ {
109
+ relax_end(l) = l;
110
+ ++nsuper_et;
111
+ }
112
+ }
113
+ }
114
+ j++;
115
+ // Search for a new leaf
116
+ while (descendants(j) != 0 && j < n) j++;
117
+ } // End postorder traversal of the etree
118
+
119
+ // Recover the original etree
120
+ et = et_save;
121
+ }
122
+
123
+ } // end namespace internal
124
+
125
+ } // end namespace Eigen
126
+ #endif // SPARSELU_HEAP_RELAX_SNODE_H
@@ -0,0 +1,130 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
5
+ // Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
6
+ //
7
+ // This Source Code Form is subject to the terms of the Mozilla
8
+ // Public License v. 2.0. If a copy of the MPL was not distributed
9
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
+
11
+ #ifndef SPARSELU_KERNEL_BMOD_H
12
+ #define SPARSELU_KERNEL_BMOD_H
13
+
14
+ namespace Eigen {
15
+ namespace internal {
16
+
17
+ template <int SegSizeAtCompileTime> struct LU_kernel_bmod
18
+ {
19
+ /** \internal
20
+ * \brief Performs numeric block updates from a given supernode to a single column
21
+ *
22
+ * \param segsize Size of the segment (and blocks ) to use for updates
23
+ * \param[in,out] dense Packed values of the original matrix
24
+ * \param tempv temporary vector to use for updates
25
+ * \param lusup array containing the supernodes
26
+ * \param lda Leading dimension in the supernode
27
+ * \param nrow Number of rows in the rectangular part of the supernode
28
+ * \param lsub compressed row subscripts of supernodes
29
+ * \param lptr pointer to the first column of the current supernode in lsub
30
+ * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
31
+ */
32
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
33
+ static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
34
+ const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
35
+ };
36
+
37
+ template <int SegSizeAtCompileTime>
38
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
39
+ EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
40
+ const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
41
+ {
42
+ typedef typename ScalarVector::Scalar Scalar;
43
+ // First, copy U[*,j] segment from dense(*) to tempv(*)
44
+ // The result of triangular solve is in tempv[*];
45
+ // The result of matric-vector update is in dense[*]
46
+ Index isub = lptr + no_zeros;
47
+ Index i;
48
+ Index irow;
49
+ for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
50
+ {
51
+ irow = lsub(isub);
52
+ tempv(i) = dense(irow);
53
+ ++isub;
54
+ }
55
+ // Dense triangular solve -- start effective triangle
56
+ luptr += lda * no_zeros + no_zeros;
57
+ // Form Eigen matrix and vector
58
+ Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
59
+ Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
60
+
61
+ u = A.template triangularView<UnitLower>().solve(u);
62
+
63
+ // Dense matrix-vector product y <-- B*x
64
+ luptr += segsize;
65
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
66
+ Index ldl = internal::first_multiple(nrow, PacketSize);
67
+ Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
68
+ Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);
69
+ Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
70
+ Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
71
+
72
+ l.setZero();
73
+ internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
74
+
75
+ // Scatter tempv[] into SPA dense[] as a temporary storage
76
+ isub = lptr + no_zeros;
77
+ for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
78
+ {
79
+ irow = lsub(isub++);
80
+ dense(irow) = tempv(i);
81
+ }
82
+
83
+ // Scatter l into SPA dense[]
84
+ for (i = 0; i < nrow; i++)
85
+ {
86
+ irow = lsub(isub++);
87
+ dense(irow) -= l(i);
88
+ }
89
+ }
90
+
91
+ template <> struct LU_kernel_bmod<1>
92
+ {
93
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
94
+ static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
95
+ const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
96
+ };
97
+
98
+
99
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
100
+ EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
101
+ const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
102
+ {
103
+ typedef typename ScalarVector::Scalar Scalar;
104
+ typedef typename IndexVector::Scalar StorageIndex;
105
+ Scalar f = dense(lsub(lptr + no_zeros));
106
+ luptr += lda * no_zeros + no_zeros + 1;
107
+ const Scalar* a(lusup.data() + luptr);
108
+ const StorageIndex* irow(lsub.data()+lptr + no_zeros + 1);
109
+ Index i = 0;
110
+ for (; i+1 < nrow; i+=2)
111
+ {
112
+ Index i0 = *(irow++);
113
+ Index i1 = *(irow++);
114
+ Scalar a0 = *(a++);
115
+ Scalar a1 = *(a++);
116
+ Scalar d0 = dense.coeff(i0);
117
+ Scalar d1 = dense.coeff(i1);
118
+ d0 -= f*a0;
119
+ d1 -= f*a1;
120
+ dense.coeffRef(i0) = d0;
121
+ dense.coeffRef(i1) = d1;
122
+ }
123
+ if(i<nrow)
124
+ dense.coeffRef(*(irow++)) -= f * *(a++);
125
+ }
126
+
127
+ } // end namespace internal
128
+
129
+ } // end namespace Eigen
130
+ #endif // SPARSELU_KERNEL_BMOD_H