tomoto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,144 @@
1
+ #pragma once
2
+ #include "TopicModel.hpp"
3
+
4
+ namespace tomoto
5
+ {
6
+ enum class TermWeight { one, idf, pmi, size };
7
+
8
+ template<typename _Scalar>
9
+ struct ShareableVector : Eigen::Map<Eigen::Matrix<_Scalar, -1, 1>>
10
+ {
11
+ Eigen::Matrix<_Scalar, -1, 1> ownData;
12
+ ShareableVector(_Scalar* ptr = nullptr, Eigen::Index len = 0)
13
+ : Eigen::Map<Eigen::Matrix<_Scalar, -1, 1>>(nullptr, 0)
14
+ {
15
+ init(ptr, len);
16
+ }
17
+
18
+ void init(_Scalar* ptr, Eigen::Index len)
19
+ {
20
+ if (!ptr && len)
21
+ {
22
+ ownData = Eigen::Matrix<_Scalar, -1, 1>::Zero(len);
23
+ ptr = ownData.data();
24
+ }
25
+ // is this the best way??
26
+ this->m_data = ptr;
27
+ ((Eigen::internal::variable_if_dynamic<Eigen::Index, -1>*)&this->m_rows)->setValue(len);
28
+ }
29
+
30
+ void conservativeResize(size_t newSize)
31
+ {
32
+ ownData.conservativeResize(newSize);
33
+ init(ownData.data(), ownData.size());
34
+ }
35
+
36
+ void becomeOwner()
37
+ {
38
+ if (ownData.data() != this->m_data)
39
+ {
40
+ ownData = *this;
41
+ init(ownData.data(), ownData.size());
42
+ }
43
+ }
44
+ };
45
+
46
+ template<typename _Base, TermWeight _tw>
47
+ struct SumWordWeight
48
+ {
49
+ Float sumWordWeight = 0;
50
+ Float getSumWordWeight() const
51
+ {
52
+ return sumWordWeight;
53
+ }
54
+
55
+ void updateSumWordWeight(size_t realV)
56
+ {
57
+ sumWordWeight = std::accumulate(static_cast<_Base*>(this)->wordWeights.begin(), static_cast<_Base*>(this)->wordWeights.end(), 0.f);
58
+ }
59
+ };
60
+
61
+ template<typename _Base>
62
+ struct SumWordWeight<_Base, TermWeight::one>
63
+ {
64
+ int32_t sumWordWeight = 0;
65
+ int32_t getSumWordWeight() const
66
+ {
67
+ return sumWordWeight;
68
+ }
69
+
70
+ void updateSumWordWeight(size_t realV)
71
+ {
72
+ sumWordWeight = std::count_if(static_cast<_Base*>(this)->words.begin(), static_cast<_Base*>(this)->words.end(), [realV](Vid w)
73
+ {
74
+ return w < realV;
75
+ });
76
+ }
77
+ };
78
+
79
+ template<TermWeight _tw>
80
+ struct DocumentLDA : public DocumentBase, SumWordWeight<DocumentLDA<_tw>, _tw>
81
+ {
82
+ public:
83
+ using DocumentBase::DocumentBase;
84
+ using WeightType = typename std::conditional<_tw == TermWeight::one, int32_t, float>::type;
85
+
86
+ tvector<Tid> Zs;
87
+ tvector<Float> wordWeights;
88
+ ShareableVector<WeightType> numByTopic;
89
+
90
+ DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(DocumentBase, 0, Zs, wordWeights);
91
+ DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(DocumentBase, 1, 0x00010001, Zs, wordWeights);
92
+
93
+ template<typename _TopicModel> void update(WeightType* ptr, const _TopicModel& mdl);
94
+
95
+ WeightType getWordWeight(size_t idx) const
96
+ {
97
+ return _tw == TermWeight::one ? 1 : wordWeights[idx];
98
+ }
99
+
100
+ std::vector<Float> getCountVector(size_t V) const
101
+ {
102
+ std::vector<Float> vs(V);
103
+ for (size_t i = 0; i < words.size(); ++i)
104
+ {
105
+ if (words[i] >= V) continue;
106
+ vs[words[i]] += wordWeights.empty() ? 1.f : wordWeights[i];
107
+ }
108
+ return vs;
109
+ }
110
+ };
111
+
112
+ class ILDAModel : public ITopicModel
113
+ {
114
+ public:
115
+ using DefaultDocType = DocumentLDA<TermWeight::one>;
116
+ static ILDAModel* create(TermWeight _weight, size_t _K = 1,
117
+ Float _alpha = 0.1, Float _eta = 0.01, size_t seed = std::random_device{}(),
118
+ bool scalarRng = false);
119
+
120
+ virtual size_t addDoc(const std::vector<std::string>& words) = 0;
121
+ virtual std::unique_ptr<DocumentBase> makeDoc(const std::vector<std::string>& words) const = 0;
122
+
123
+ virtual size_t addDoc(const std::string& rawStr, const RawDocTokenizer::Factory& tokenizer) = 0;
124
+ virtual std::unique_ptr<DocumentBase> makeDoc(const std::string& rawStr, const RawDocTokenizer::Factory& tokenizer) const = 0;
125
+
126
+ virtual size_t addDoc(const std::string& rawStr, const std::vector<Vid>& words,
127
+ const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len) = 0;
128
+ virtual std::unique_ptr<DocumentBase> makeDoc(const std::string& rawStr, const std::vector<Vid>& words,
129
+ const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len) const = 0;
130
+
131
+ virtual TermWeight getTermWeight() const = 0;
132
+ virtual size_t getOptimInterval() const = 0;
133
+ virtual void setOptimInterval(size_t) = 0;
134
+ virtual size_t getBurnInIteration() const = 0;
135
+ virtual void setBurnInIteration(size_t) = 0;
136
+ virtual std::vector<uint64_t> getCountByTopic() const = 0;
137
+ virtual Float getAlpha() const = 0;
138
+ virtual Float getAlpha(size_t k) const = 0;
139
+ virtual Float getEta() const = 0;
140
+
141
+ virtual std::vector<Float> getWordPrior(const std::string& word) const = 0;
142
+ virtual void setWordPrior(const std::string& word, const std::vector<Float>& priors) = 0;
143
+ };
144
+ }
@@ -0,0 +1,442 @@
1
+ #pragma once
2
+ #include <unordered_set>
3
+ #include <numeric>
4
+ #include "TopicModel.hpp"
5
+ #include <Eigen/Dense>
6
+ #include "../Utils/Utils.hpp"
7
+ #include "../Utils/math.h"
8
+ #include "../Utils/sample.hpp"
9
+
10
+ /*
11
+ Implementation of LDA using Collapsed Variational Bayes zero-order estimation by bab2min
12
+
13
+ * Blei, D. M., Ng, A. Y., & Jordan, M. I. (2003). Latent dirichlet allocation. Journal of machine Learning research, 3(Jan), 993-1022.
14
+
15
+ Term Weighting Scheme is based on following paper:
16
+ * Wilson, A. T., & Chew, P. A. (2010, June). Term weighting schemes for latent dirichlet allocation. In human language technologies: The 2010 annual conference of the North American Chapter of the Association for Computational Linguistics (pp. 465-473). Association for Computational Linguistics.
17
+
18
+ */
19
+
20
+ #define GETTER(name, type, field) type get##name() const override { return field; }
21
+ namespace tomoto
22
+ {
23
+ struct DocumentLDACVB0 : public DocumentBase
24
+ {
25
+ public:
26
+ using DocumentBase::DocumentBase;
27
+
28
+ Eigen::MatrixXf Zs;
29
+ Eigen::VectorXf numByTopic;
30
+
31
+ DEFINE_SERIALIZER_AFTER_BASE(DocumentBase, Zs);
32
+
33
+ template<typename _TopicModel> void update(Float* ptr, const _TopicModel& mdl);
34
+
35
+ int32_t getSumWordWeight() const
36
+ {
37
+ return this->words.size();
38
+ }
39
+ };
40
+
41
+ struct ModelStateLDACVB0
42
+ {
43
+ Eigen::VectorXf zLikelihood;
44
+ Eigen::VectorXf numByTopic;
45
+ Eigen::MatrixXf numByTopicWord;
46
+
47
+ DEFINE_SERIALIZER(numByTopic, numByTopicWord);
48
+ };
49
+
50
+ class ILDACVB0Model : public ITopicModel
51
+ {
52
+ public:
53
+ using DefaultDocType = DocumentLDACVB0;
54
+ static ILDACVB0Model* create(size_t _K = 1, Float _alpha = 0.1, Float _eta = 0.01, size_t _rg = std::random_device{}());
55
+
56
+ virtual size_t addDoc(const std::vector<std::string>& words) = 0;
57
+ virtual std::unique_ptr<DocumentBase> makeDoc(const std::vector<std::string>& words) const = 0;
58
+ TermWeight getTermWeight() const { return TermWeight::one; };
59
+ virtual size_t getOptimInterval() const = 0;
60
+ virtual void setOptimInterval(size_t) = 0;
61
+ virtual void setBurnInIteration(size_t) {}
62
+ virtual std::vector<size_t> getCountByTopic() const = 0;
63
+ virtual size_t getK() const = 0;
64
+ virtual Float getAlpha() const = 0;
65
+ virtual Float getEta() const = 0;
66
+
67
+ virtual std::vector<Float> getWordPrior(const std::string& word) const { return {}; }
68
+ virtual void setWordPrior(const std::string& word, const std::vector<Float>& priors) {}
69
+ };
70
+
71
+ template<typename _Interface = ILDACVB0Model,
72
+ typename _Derived = void,
73
+ typename _DocType = DocumentLDACVB0,
74
+ typename _ModelState = ModelStateLDACVB0>
75
+ class LDACVB0Model : public TopicModel<0, _Interface,
76
+ typename std::conditional<std::is_same<_Derived, void>::value, LDACVB0Model<>, _Derived>::type,
77
+ _DocType, _ModelState>
78
+ {
79
+ protected:
80
+ using DerivedClass = typename std::conditional<std::is_same<_Derived, void>::value, LDACVB0Model, _Derived>::type;
81
+ using BaseClass = TopicModel<0, _Interface, DerivedClass, _DocType, _ModelState>;
82
+ friend BaseClass;
83
+
84
+ static constexpr const char TWID[] = "one\0";
85
+ static constexpr static constexpr char TMID[] = "LDA\0";
86
+
87
+ Float alpha;
88
+ Eigen::Matrix<Float, -1, 1> alphas;
89
+ Float eta;
90
+ Tid K;
91
+ size_t optimInterval = 50;
92
+
93
+ template<typename _List>
94
+ static Float calcDigammaSum(_List list, size_t len, Float alpha)
95
+ {
96
+ auto listExpr = Eigen::Matrix<Float, -1, 1>::NullaryExpr(len, list);
97
+ auto dAlpha = math::digammaT(alpha);
98
+ return (math::digammaApprox(listExpr.array() + alpha) - dAlpha).sum();
99
+ }
100
+
101
+ void optimizeParameters(ThreadPool& pool, _ModelState* localData)
102
+ {
103
+ const auto K = this->K;
104
+ for (size_t i = 0; i < 5; ++i)
105
+ {
106
+ Float denom = calcDigammaSum([&](size_t i) { return this->docs[i].getSumWordWeight(); }, this->docs.size(), alphas.sum());
107
+ for (size_t k = 0; k < K; ++k)
108
+ {
109
+ Float nom = calcDigammaSum([&](size_t i) { return this->docs[i].numByTopic[k]; }, this->docs.size(), alphas(k));
110
+ alphas(k) = std::max(nom / denom * alphas(k), 1e-5f);
111
+ }
112
+ }
113
+ }
114
+
115
+ const Eigen::VectorXf& getZLikelihoods(_ModelState& ld, const _DocType& doc, size_t docId, size_t vid) const
116
+ {
117
+ const size_t V = this->realV;
118
+ assert(vid < V);
119
+ auto& zLikelihood = ld.zLikelihood;
120
+ zLikelihood = (doc.numByTopic.array().template cast<Float>() + alphas.array())
121
+ * (ld.numByTopicWord.col(vid).array().template cast<Float>() + eta)
122
+ / (ld.numByTopic.array().template cast<Float>() + V * eta);
123
+ zLikelihood /= zLikelihood.sum() + 1e-10;
124
+ return zLikelihood;
125
+ }
126
+
127
+ template<int _Inc, typename _Vec>
128
+ inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, _Vec tDist) const
129
+ {
130
+ assert(vid < this->realV);
131
+ constexpr bool _dec = _Inc < 0;
132
+ doc.numByTopic += _Inc * tDist;
133
+ if (_dec) doc.numByTopic = doc.numByTopic.cwiseMax(0);
134
+ ld.numByTopic += _Inc * tDist;
135
+ if (_dec) ld.numByTopic = ld.numByTopic.cwiseMax(0);
136
+ ld.numByTopicWord.col(vid) += _Inc * tDist;
137
+ if (_dec) ld.numByTopicWord.col(vid) = ld.numByTopicWord.col(vid).cwiseMax(0);
138
+ }
139
+
140
+ template<ParallelScheme _ps, bool _infer, typename _ExtraDocData>
141
+ void sampleDocument(_DocType& doc, const _ExtraDocData& edd, size_t docId, _ModelState& ld, _RandGen& rgs, size_t iterationCnt, size_t partitionId = 0) const
142
+ {
143
+ for (size_t w = 0; w < doc.words.size(); ++w)
144
+ {
145
+ if (doc.words[w] >= this->realV) continue;
146
+ addWordTo<-1>(ld, doc, w, doc.words[w], doc.Zs.col(w));
147
+ doc.Zs.col(w) = static_cast<const DerivedClass*>(this)->getZLikelihoods(ld, doc, docId, doc.words[w]);
148
+ addWordTo<1>(ld, doc, w, doc.words[w], doc.Zs.col(w));
149
+ }
150
+ }
151
+
152
+ template<typename _DocIter, typename _ExtraDocData>
153
+ void updatePartition(ThreadPool& pool, _ModelState* localData, _DocIter first, _DocIter last, _ExtraDocData& edd)
154
+ {
155
+ }
156
+
157
+ template<ParallelScheme _ps>
158
+ void trainOne(ThreadPool& pool, _ModelState* localData, _RandGen* rgs)
159
+ {
160
+ std::vector<std::future<void>> res;
161
+ const size_t chStride = std::min(pool.getNumWorkers() * 8, this->docs.size());
162
+ for (size_t ch = 0; ch < chStride; ++ch)
163
+ {
164
+ res.emplace_back(pool.enqueue([&, this, ch, chStride](size_t threadId)
165
+ {
166
+ forRandom((this->docs.size() - 1 - ch) / chStride + 1, rgs[threadId](), [&, this](size_t id)
167
+ {
168
+ static_cast<DerivedClass*>(this)->template sampleDocument<ParallelScheme::copy_merge>(
169
+ this->docs[id * chStride + ch], 0, id * chStride + ch,
170
+ localData[threadId], rgs[threadId], this->globalStep);
171
+ });
172
+ }));
173
+ }
174
+ for (auto& r : res) r.get();
175
+ static_cast<DerivedClass*>(this)->updateGlobalInfo(pool, localData);
176
+ static_cast<DerivedClass*>(this)->mergeState(pool, this->globalState, this->tState, localData);
177
+ if (this->globalStep >= 250 && optimInterval && (this->globalStep + 1) % optimInterval == 0)
178
+ {
179
+ static_cast<DerivedClass*>(this)->optimizeParameters(pool, localData);
180
+ }
181
+ }
182
+
183
+ void updateGlobalInfo(ThreadPool& pool, _ModelState* localData)
184
+ {
185
+ std::vector<std::future<void>> res;
186
+
187
+ this->globalState.numByTopic.setZero();
188
+ this->globalState.numByTopicWord.setZero();
189
+ for (auto& doc : this->docs)
190
+ {
191
+ doc.numByTopic = doc.Zs.rowwise().sum();
192
+ this->globalState.numByTopic += doc.numByTopic;
193
+ for (size_t i = 0; i < doc.words.size(); ++i)
194
+ {
195
+ this->globalState.numByTopicWord.col(doc.words[i]) += doc.Zs.col(i);
196
+ }
197
+ }
198
+
199
+ for (size_t i = 0; i < pool.getNumWorkers(); ++i)
200
+ {
201
+ res.emplace_back(pool.enqueue([&, i](size_t threadId)
202
+ {
203
+ localData[i] = this->globalState;
204
+ }));
205
+ }
206
+ for (auto& r : res) r.get();
207
+ }
208
+
209
+ void mergeState(ThreadPool& pool, _ModelState& globalState, _ModelState& tState, _ModelState* localData) const
210
+ {
211
+ }
212
+
213
+ template<typename _DocIter>
214
+ double getLLDocs(_DocIter _first, _DocIter _last) const
215
+ {
216
+ double ll = 0;
217
+ // doc-topic distribution
218
+ ll += (math::lgammaT(K*alpha) - math::lgammaT(alpha)*K) * std::distance(_first, _last);
219
+ for (; _first != _last; ++_first)
220
+ {
221
+ auto& doc = *_first;
222
+ ll -= math::lgammaT(doc.getSumWordWeight() + K * alpha);
223
+ for (Tid k = 0; k < K; ++k)
224
+ {
225
+ ll += math::lgammaT(doc.numByTopic[k] + alpha);
226
+ }
227
+ }
228
+ return ll;
229
+ }
230
+
231
+ double getLLRest(const _ModelState& ld) const
232
+ {
233
+ double ll = 0;
234
+ const size_t V = this->realV;
235
+ // topic-word distribution
236
+ // it has the very-small-value problem
237
+ ll += (math::lgammaT(V*eta) - math::lgammaT(eta)*V) * K;
238
+ for (Tid k = 0; k < K; ++k)
239
+ {
240
+ ll -= math::lgammaT(ld.numByTopic[k] + V * eta);
241
+ for (Vid v = 0; v < V; ++v)
242
+ {
243
+ ll += math::lgammaT(ld.numByTopicWord(k, v) + eta);
244
+ }
245
+ }
246
+ return ll;
247
+ }
248
+
249
+ double getLL() const
250
+ {
251
+ return static_cast<const DerivedClass*>(this)->template getLLDocs<>(this->docs.begin(), this->docs.end())
252
+ + static_cast<const DerivedClass*>(this)->getLLRest(this->globalState);
253
+ }
254
+
255
+ void prepareShared()
256
+ {
257
+ }
258
+
259
+ void prepareDoc(_DocType& doc, Float* topicDocPtr, size_t wordSize) const
260
+ {
261
+ doc.numByTopic = Eigen::VectorXf::Zero(K);
262
+ doc.Zs = Eigen::MatrixXf::Zero(K, wordSize);
263
+ }
264
+
265
+ void initGlobalState(bool initDocs)
266
+ {
267
+ const size_t V = this->realV;
268
+ this->globalState.zLikelihood = Eigen::Matrix<Float, -1, 1>::Zero(K);
269
+ if (initDocs)
270
+ {
271
+ this->globalState.numByTopic = Eigen::Matrix<Float, -1, 1>::Zero(K);
272
+ this->globalState.numByTopicWord = Eigen::Matrix<Float, -1, -1>::Zero(K, V);
273
+ }
274
+ }
275
+
276
+ struct Generator
277
+ {
278
+ std::uniform_int_distribution<Tid> theta;
279
+ };
280
+
281
+ Generator makeGeneratorForInit(const _DocType*) const
282
+ {
283
+ return Generator{ std::uniform_int_distribution<Tid>{0, (Tid)(K - 1)} };
284
+ }
285
+
286
+ template<bool _Infer>
287
+ void updateStateWithDoc(Generator& g, _ModelState& ld, _RandGen& rgs, _DocType& doc, size_t i) const
288
+ {
289
+ doc.Zs.col(i).setZero();
290
+ doc.Zs(g.theta(rgs), i) = 1;
291
+ addWordTo<1>(ld, doc, i, doc.words[i], doc.Zs.col(i));
292
+ }
293
+
294
+ template<bool _Infer, typename _Generator>
295
+ void initializeDocState(_DocType& doc, Float* topicDocPtr, _Generator& g, _ModelState& ld, _RandGen& rgs) const
296
+ {
297
+ std::vector<uint32_t> tf(this->realV);
298
+ static_cast<const DerivedClass*>(this)->prepareDoc(doc, topicDocPtr, doc.words.size());
299
+
300
+ for (size_t i = 0; i < doc.words.size(); ++i)
301
+ {
302
+ if (doc.words[i] >= this->realV) continue;
303
+ static_cast<const DerivedClass*>(this)->template updateStateWithDoc<_Infer>(g, ld, rgs, doc, i);
304
+ }
305
+ }
306
+
307
+ std::vector<uint64_t> _getTopicsCount() const
308
+ {
309
+ Eigen::VectorXf cnt = Eigen::VectorXf::Zero(K);
310
+ for (auto& doc : this->docs)
311
+ {
312
+ cnt += doc.Zs.rowwise().sum();
313
+ }
314
+
315
+ return { cnt.data(), cnt.data() + K };
316
+ }
317
+
318
+ template<ParallelScheme _ps>
319
+ size_t estimateMaxThreads() const
320
+ {
321
+ if (_ps == ParallelScheme::partition)
322
+ {
323
+ return this->realV / 4;
324
+ }
325
+ if (_ps == ParallelScheme::copy_merge)
326
+ {
327
+ return this->docs.size() / 2;
328
+ }
329
+ return (size_t)-1;
330
+ }
331
+
332
+ DEFINE_SERIALIZER(alpha, eta, K);
333
+
334
+ public:
335
+ LDACVB0Model(size_t _K = 1, Float _alpha = 0.1, Float _eta = 0.01, size_t _rg = std::random_device{}())
336
+ : BaseClass(_rg), K(_K), alpha(_alpha), eta(_eta)
337
+ {
338
+ alphas = Eigen::Matrix<Float, -1, 1>::Constant(K, alpha);
339
+ }
340
+ GETTER(K, size_t, K);
341
+ GETTER(Alpha, Float, alpha);
342
+ GETTER(Eta, Float, eta);
343
+ GETTER(OptimInterval, size_t, optimInterval);
344
+
345
+
346
+ void setOptimInterval(size_t _optimInterval) override
347
+ {
348
+ optimInterval = _optimInterval;
349
+ }
350
+
351
+ size_t addDoc(const std::vector<std::string>& words) override
352
+ {
353
+ return this->_addDoc(this->_makeDoc(words));
354
+ }
355
+
356
+ std::unique_ptr<DocumentBase> makeDoc(const std::vector<std::string>& words) const override
357
+ {
358
+ return make_unique<_DocType>(as_mutable(this)->template _makeDoc<true>(words));
359
+ }
360
+
361
+ void updateDocs()
362
+ {
363
+ for (auto& doc : this->docs)
364
+ {
365
+ doc.template update<>(nullptr, *static_cast<DerivedClass*>(this));
366
+ }
367
+ }
368
+
369
+ void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
370
+ {
371
+ if (initDocs) this->removeStopwords(minWordCnt, minWordDf, removeTopN);
372
+ static_cast<DerivedClass*>(this)->updateWeakArray();
373
+ static_cast<DerivedClass*>(this)->initGlobalState(initDocs);
374
+
375
+ if (initDocs)
376
+ {
377
+ auto generator = static_cast<DerivedClass*>(this)->makeGeneratorForInit(nullptr);
378
+ for (auto& doc : this->docs)
379
+ {
380
+ initializeDocState<false>(doc, nullptr, generator, this->globalState, this->rg);
381
+ }
382
+ }
383
+ else
384
+ {
385
+ static_cast<DerivedClass*>(this)->updateDocs();
386
+ }
387
+ static_cast<DerivedClass*>(this)->prepareShared();
388
+ }
389
+
390
+ std::vector<size_t> getCountByTopic() const override
391
+ {
392
+ return static_cast<const DerivedClass*>(this)->_getTopicsCount();
393
+ }
394
+
395
+ std::vector<Float> getTopicsByDoc(const _DocType& doc) const
396
+ {
397
+ std::vector<Float> ret(K);
398
+ Float sum = doc.getSumWordWeight() + K * alpha;
399
+ transform(doc.numByTopic.data(), doc.numByTopic.data() + K, ret.begin(), [sum, this](size_t n)
400
+ {
401
+ return (n + alpha) / sum;
402
+ });
403
+ return ret;
404
+ }
405
+
406
+ std::vector<Float> _getWidsByTopic(Tid tid) const
407
+ {
408
+ assert(tid < K);
409
+ const size_t V = this->realV;
410
+ std::vector<Float> ret(V);
411
+ Float sum = this->globalState.numByTopic[tid] + V * eta;
412
+ auto r = this->globalState.numByTopicWord.row(tid);
413
+ for (size_t v = 0; v < V; ++v)
414
+ {
415
+ ret[v] = (r[v] + eta) / sum;
416
+ }
417
+ return ret;
418
+ }
419
+
420
+ template<bool _Together, ParallelScheme _ps, typename _Iter>
421
+ std::vector<double> _infer(_Iter docFirst, _Iter docLast, size_t maxIter, Float tolerance, size_t numWorkers) const
422
+ {
423
+ return {};
424
+ }
425
+ };
426
+
427
+ template<typename _TopicModel>
428
+ void DocumentLDACVB0::update(Float * ptr, const _TopicModel & mdl)
429
+ {
430
+ numByTopic = Eigen::VectorXf::Zero(mdl.getK());
431
+ for (size_t i = 0; i < Zs.cols(); ++i)
432
+ {
433
+ numByTopic += Zs.col(i);
434
+ }
435
+ }
436
+
437
+ inline ILDACVB0Model* ILDACVB0Model::create(size_t _K, Float _alpha, Float _eta, const _RandGen& _rg)
438
+ {
439
+ return new LDACVB0Model<>(_K, _alpha, _eta, _rg);
440
+ }
441
+
442
+ }