tomoto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,692 @@
1
+ #pragma once
2
+ #include <numeric>
3
+ #include <unordered_set>
4
+ #include "../Utils/Utils.hpp"
5
+ #include "../Utils/Dictionary.h"
6
+ #include "../Utils/tvector.hpp"
7
+ #include "../Utils/ThreadPool.hpp"
8
+ #include "../Utils/serializer.hpp"
9
+ #include "../Utils/exception.h"
10
+ #include <EigenRand/EigenRand>
11
+
12
+ namespace tomoto
13
+ {
14
+ using RandGen = Eigen::Rand::P8_mt19937_64<uint32_t>;
15
+ using ScalarRandGen = Eigen::Rand::UniversalRandomEngine<uint32_t, std::mt19937_64>;
16
+
17
+ class DocumentBase
18
+ {
19
+ public:
20
+ Float weight = 1;
21
+ tvector<Vid> words; // word id of each word
22
+ std::vector<uint32_t> wOrder; // original word order (optional)
23
+
24
+ std::string docUid;
25
+ std::string rawStr;
26
+ std::vector<uint32_t> origWordPos;
27
+ std::vector<uint16_t> origWordLen;
28
+ DocumentBase(Float _weight = 1) : weight(_weight) {}
29
+ virtual ~DocumentBase() {}
30
+
31
+ DEFINE_SERIALIZER_WITH_VERSION(0, serializer::to_key("Docu"), weight, words, wOrder);
32
+ DEFINE_TAGGED_SERIALIZER_WITH_VERSION(1, 0x00010001, weight, words, wOrder,
33
+ rawStr, origWordPos, origWordLen,
34
+ docUid
35
+ );
36
+ };
37
+
38
+ enum class ParallelScheme { default_, none, copy_merge, partition, size };
39
+
40
+ inline const char* toString(ParallelScheme ps)
41
+ {
42
+ switch (ps)
43
+ {
44
+ case ParallelScheme::default_: return "default";
45
+ case ParallelScheme::none: return "none";
46
+ case ParallelScheme::copy_merge: return "copy_merge";
47
+ case ParallelScheme::partition: return "partition";
48
+ default: return "unknown";
49
+ }
50
+ }
51
+
52
+ class RawDocTokenizer
53
+ {
54
+ public:
55
+ using Token = std::tuple<std::string, uint32_t, uint32_t, bool>;
56
+ using Factory = std::function<RawDocTokenizer(const std::string&)>;
57
+ private:
58
+ std::function<Token()> fnNext;
59
+ public:
60
+ class Iterator
61
+ {
62
+ RawDocTokenizer* p = nullptr;
63
+ bool end = true;
64
+ std::tuple<std::string, uint32_t, uint32_t> value;
65
+ public:
66
+ Iterator()
67
+ {
68
+ }
69
+
70
+ Iterator(RawDocTokenizer* _p)
71
+ : p{ _p }, end{ false }
72
+ {
73
+ operator++();
74
+ }
75
+
76
+ std::tuple<std::string, uint32_t, uint32_t>& operator*()
77
+ {
78
+ return value;
79
+ }
80
+
81
+ Iterator& operator++()
82
+ {
83
+ auto v = p->fnNext();
84
+ if (std::get<3>(v))
85
+ {
86
+ end = true;
87
+ }
88
+ else
89
+ {
90
+ value = std::make_tuple(std::get<0>(v), std::get<1>(v), std::get<2>(v));
91
+ }
92
+ return *this;
93
+ }
94
+
95
+ bool operator==(const Iterator& o) const
96
+ {
97
+ return o.end && end;
98
+ }
99
+
100
+ bool operator!=(const Iterator& o) const
101
+ {
102
+ return !operator==(o);
103
+ }
104
+ };
105
+
106
+ template<typename _Fn>
107
+ RawDocTokenizer(_Fn&& fn) : fnNext{ std::forward<_Fn>(fn) }
108
+ {
109
+ }
110
+
111
+ Iterator begin()
112
+ {
113
+ return Iterator{ this };
114
+ }
115
+
116
+ Iterator end()
117
+ {
118
+ return Iterator{};
119
+ }
120
+ };
121
+
122
+ class ITopicModel
123
+ {
124
+ public:
125
+ virtual void saveModel(std::ostream& writer, bool fullModel,
126
+ const std::vector<uint8_t>* extra_data = nullptr) const = 0;
127
+ virtual void loadModel(std::istream& reader,
128
+ std::vector<uint8_t>* extra_data = nullptr) = 0;
129
+ virtual const DocumentBase* getDoc(size_t docId) const = 0;
130
+
131
+ virtual void updateVocab(const std::vector<std::string>& words) = 0;
132
+
133
+ virtual double getLLPerWord() const = 0;
134
+ virtual double getPerplexity() const = 0;
135
+ virtual uint64_t getV() const = 0;
136
+ virtual uint64_t getN() const = 0;
137
+ virtual size_t getNumDocs() const = 0;
138
+ virtual const Dictionary& getVocabDict() const = 0;
139
+ virtual const std::vector<uint64_t>& getVocabCf() const = 0;
140
+ virtual const std::vector<uint64_t>& getVocabDf() const = 0;
141
+
142
+ virtual int train(size_t iteration, size_t numWorkers, ParallelScheme ps = ParallelScheme::default_) = 0;
143
+ virtual size_t getGlobalStep() const = 0;
144
+ virtual void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) = 0;
145
+
146
+ virtual size_t getK() const = 0;
147
+ virtual std::vector<Float> getWidsByTopic(size_t tid) const = 0;
148
+ virtual std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const = 0;
149
+
150
+ virtual std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
151
+
152
+ virtual std::vector<Float> getTopicsByDoc(const DocumentBase* doc) const = 0;
153
+ virtual std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
154
+ virtual std::vector<double> infer(const std::vector<DocumentBase*>& docs, size_t maxIter, Float tolerance, size_t numWorkers, ParallelScheme ps, bool together) const = 0;
155
+ virtual ~ITopicModel() {}
156
+ };
157
+
158
+ template<class _TyKey, class _TyValue>
159
+ static std::vector<std::pair<_TyKey, _TyValue>> extractTopN(const std::vector<_TyValue>& vec, size_t topN)
160
+ {
161
+ typedef std::pair<_TyKey, _TyValue> pair_t;
162
+ std::vector<pair_t> ret;
163
+ _TyKey k = 0;
164
+ for (auto& t : vec)
165
+ {
166
+ ret.emplace_back(std::make_pair(k++, t));
167
+ }
168
+ std::sort(ret.begin(), ret.end(), [](const pair_t& a, const pair_t& b)
169
+ {
170
+ return a.second > b.second;
171
+ });
172
+ if (topN < ret.size()) ret.erase(ret.begin() + topN, ret.end());
173
+ return ret;
174
+ }
175
+
176
+ namespace flags
177
+ {
178
+ enum
179
+ {
180
+ continuous_doc_data = 1 << 0,
181
+ shared_state = 1 << 1,
182
+ partitioned_multisampling = 1 << 2,
183
+ end_flag_of_TopicModel = 1 << 3,
184
+ };
185
+ }
186
+
187
+ template<typename _RandGen, size_t _Flags, typename _Interface, typename _Derived,
188
+ typename _DocType, typename _ModelState
189
+ >
190
+ class TopicModel : public _Interface
191
+ {
192
+ friend class Document;
193
+ public:
194
+ using DocType = _DocType;
195
+ protected:
196
+ _RandGen rg;
197
+ std::vector<_RandGen> localRG;
198
+ std::vector<Vid> words;
199
+ std::vector<uint32_t> wOffsetByDoc;
200
+
201
+ std::vector<DocType> docs;
202
+ std::vector<uint64_t> vocabCf;
203
+ std::vector<uint64_t> vocabDf;
204
+ size_t globalStep = 0;
205
+ _ModelState globalState, tState;
206
+ Dictionary dict;
207
+ uint64_t realV = 0; // vocab size after removing stopwords
208
+ uint64_t realN = 0; // total word size after removing stopwords
209
+ size_t maxThreads[(size_t)ParallelScheme::size] = { 0, };
210
+ size_t minWordCf = 0, minWordDf = 0, removeTopN = 0;
211
+
212
+ std::unique_ptr<ThreadPool> cachedPool;
213
+
214
+ void _saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const
215
+ {
216
+ serializer::writeMany(writer,
217
+ serializer::to_keyz(static_cast<const _Derived*>(this)->TMID),
218
+ serializer::to_keyz(static_cast<const _Derived*>(this)->TWID));
219
+ serializer::writeTaggedMany(writer, 0x00010001,
220
+ serializer::to_keyz("dict"), dict,
221
+ serializer::to_keyz("vocabCf"), vocabCf,
222
+ serializer::to_keyz("vocabDf"), vocabDf,
223
+ serializer::to_keyz("realV"), realV,
224
+ serializer::to_keyz("globalStep"), globalStep,
225
+ serializer::to_keyz("extra"), extra_data ? *extra_data : std::vector<uint8_t>(0));
226
+ serializer::writeMany(writer, *static_cast<const _Derived*>(this));
227
+ globalState.serializerWrite(writer);
228
+ if (fullModel)
229
+ {
230
+ serializer::writeMany(writer, docs);
231
+ }
232
+ else
233
+ {
234
+ serializer::writeMany(writer, std::vector<size_t>{});
235
+ }
236
+ }
237
+
238
+ void _loadModel(std::istream& reader, std::vector<uint8_t>* extra_data)
239
+ {
240
+ auto start_pos = reader.tellg();
241
+ try
242
+ {
243
+ std::vector<uint8_t> extra;
244
+ serializer::readMany(reader,
245
+ serializer::to_keyz(static_cast<_Derived*>(this)->TMID),
246
+ serializer::to_keyz(static_cast<_Derived*>(this)->TWID));
247
+ serializer::readTaggedMany(reader, 0x00010001,
248
+ serializer::to_keyz("dict"), dict,
249
+ serializer::to_keyz("vocabCf"), vocabCf,
250
+ serializer::to_keyz("vocabDf"), vocabDf,
251
+ serializer::to_keyz("realV"), realV,
252
+ serializer::to_keyz("globalStep"), globalStep,
253
+ serializer::to_keyz("extra"), extra);
254
+ if (extra_data) *extra_data = std::move(extra);
255
+ }
256
+ catch (const std::ios_base::failure&)
257
+ {
258
+ reader.seekg(start_pos);
259
+ serializer::readMany(reader,
260
+ serializer::to_key(static_cast<_Derived*>(this)->TMID),
261
+ serializer::to_key(static_cast<_Derived*>(this)->TWID),
262
+ dict, vocabCf, realV);
263
+ }
264
+ serializer::readMany(reader, *static_cast<_Derived*>(this));
265
+ globalState.serializerRead(reader);
266
+ serializer::readMany(reader, docs);
267
+ realN = countRealN();
268
+ }
269
+
270
+ template<typename _DocTy>
271
+ typename std::enable_if<std::is_same<DocType,
272
+ typename std::remove_reference<typename std::remove_cv<_DocTy>::type>::type
273
+ >::value, size_t>::type _addDoc(_DocTy&& doc)
274
+ {
275
+ if (doc.words.empty()) return -1;
276
+ size_t maxWid = *std::max_element(doc.words.begin(), doc.words.end());
277
+ if (vocabCf.size() <= maxWid)
278
+ {
279
+ vocabCf.resize(maxWid + 1);
280
+ vocabDf.resize(maxWid + 1);
281
+ }
282
+ for (auto w : doc.words) ++vocabCf[w];
283
+ std::unordered_set<Vid> uniq{ doc.words.begin(), doc.words.end() };
284
+ for (auto w : uniq) ++vocabDf[w];
285
+ docs.emplace_back(std::forward<_DocTy>(doc));
286
+ return docs.size() - 1;
287
+ }
288
+
289
+ template<bool _const = false>
290
+ DocType _makeDoc(const std::vector<std::string>& words, Float weight = 1)
291
+ {
292
+ DocType doc{ weight };
293
+ for (auto& w : words)
294
+ {
295
+ Vid id;
296
+ if (_const)
297
+ {
298
+ id = dict.toWid(w);
299
+ if (id == (Vid)-1) continue;
300
+ }
301
+ else
302
+ {
303
+ id = dict.add(w);
304
+ }
305
+ doc.words.emplace_back(id);
306
+ }
307
+ return doc;
308
+ }
309
+
310
+ DocType _makeRawDoc(const std::string& rawStr, const std::vector<Vid>& words,
311
+ const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len, Float weight = 1) const
312
+ {
313
+ DocType doc{ weight };
314
+ doc.rawStr = rawStr;
315
+ for (auto& w : words) doc.words.emplace_back(w);
316
+ doc.origWordPos = pos;
317
+ doc.origWordLen = len;
318
+ return doc;
319
+ }
320
+
321
+ template<bool _const, typename _FnTokenizer>
322
+ DocType _makeRawDoc(const std::string& rawStr, _FnTokenizer&& tokenizer, Float weight = 1)
323
+ {
324
+ DocType doc{ weight };
325
+ doc.rawStr = rawStr;
326
+ for (auto& p : tokenizer(doc.rawStr))
327
+ {
328
+ Vid wid;
329
+ if (_const)
330
+ {
331
+ wid = dict.toWid(std::get<0>(p));
332
+ if (wid == (Vid)-1) continue;
333
+ }
334
+ else
335
+ {
336
+ wid = dict.add(std::get<0>(p));
337
+ }
338
+ auto pos = std::get<1>(p);
339
+ auto len = std::get<2>(p);
340
+ doc.words.emplace_back(wid);
341
+ doc.origWordPos.emplace_back(pos);
342
+ doc.origWordLen.emplace_back(len);
343
+ }
344
+ return doc;
345
+ }
346
+
347
+ const DocType& _getDoc(size_t docId) const
348
+ {
349
+ return docs[docId];
350
+ }
351
+
352
+ void updateWeakArray()
353
+ {
354
+ wOffsetByDoc.emplace_back(0);
355
+ for (auto& doc : docs)
356
+ {
357
+ wOffsetByDoc.emplace_back(wOffsetByDoc.back() + doc.words.size());
358
+ }
359
+ auto tx = [](_DocType& doc) { return &doc.words; };
360
+ tvector<Vid>::trade(words,
361
+ makeTransformIter(docs.begin(), tx),
362
+ makeTransformIter(docs.end(), tx));
363
+ }
364
+
365
+ size_t countRealN() const
366
+ {
367
+ size_t n = 0;
368
+ for (auto& doc : docs)
369
+ {
370
+ for (auto& w : doc.words)
371
+ {
372
+ if (w < realV) ++n;
373
+ }
374
+ }
375
+ return n;
376
+ }
377
+
378
+ void removeStopwords(size_t minWordCnt, size_t minWordDf, size_t removeTopN)
379
+ {
380
+ if (minWordCnt <= 1 && minWordDf <= 1 && removeTopN == 0) realV = dict.size();
381
+ this->minWordCf = minWordCnt;
382
+ this->minWordDf = minWordDf;
383
+ this->removeTopN = removeTopN;
384
+ std::vector<std::pair<size_t, size_t>> vocabCfDf;
385
+ for (size_t i = 0; i < vocabCf.size(); ++i)
386
+ {
387
+ vocabCfDf.emplace_back(vocabCf[i], vocabDf[i]);
388
+ }
389
+
390
+ std::vector<Vid> order;
391
+ sortAndWriteOrder(vocabCfDf, order, removeTopN, [&](const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b)
392
+ {
393
+ if (a.first < minWordCnt || a.second < minWordDf)
394
+ {
395
+ if (b.first < minWordCnt || b.second < minWordDf)
396
+ {
397
+ return a > b;
398
+ }
399
+ return false;
400
+ }
401
+ if (b.first < minWordCnt || b.second < minWordDf)
402
+ {
403
+ return true;
404
+ }
405
+ return a > b;
406
+ });
407
+ realV = std::find_if(vocabCfDf.begin(), vocabCfDf.end() - std::min(removeTopN, vocabCfDf.size()), [&](const std::pair<size_t, size_t>& a)
408
+ {
409
+ return a.first < minWordCnt || a.second < minWordDf;
410
+ }) - vocabCfDf.begin();
411
+
412
+ for (size_t i = 0; i < vocabCfDf.size(); ++i)
413
+ {
414
+ vocabCf[i] = vocabCfDf[i].first;
415
+ vocabDf[i] = vocabCfDf[i].second;
416
+ }
417
+
418
+ dict.reorder(order);
419
+ realN = 0;
420
+ for (auto& doc : docs)
421
+ {
422
+ for (auto& w : doc.words)
423
+ {
424
+ w = order[w];
425
+ if (w < realV) ++realN;
426
+ }
427
+ }
428
+ }
429
+
430
+ int restoreFromTrainingError(const exception::TrainingError& e, ThreadPool& pool, _ModelState* localData, _RandGen* rgs)
431
+ {
432
+ throw e;
433
+ }
434
+
435
+ public:
436
+ TopicModel(size_t _rg) : rg(_rg)
437
+ {
438
+ }
439
+
440
+ size_t getNumDocs() const override
441
+ {
442
+ return docs.size();
443
+ }
444
+
445
+ uint64_t getN() const override
446
+ {
447
+ return realN;
448
+ }
449
+
450
+ uint64_t getV() const override
451
+ {
452
+ return realV;
453
+ }
454
+
455
+ void updateVocab(const std::vector<std::string>& words) override
456
+ {
457
+ if(dict.size()) THROW_ERROR_WITH_INFO(exception::InvalidArgument, "updateVocab after addDoc");
458
+ for(auto& w : words) dict.add(w);
459
+ }
460
+
461
+ void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
462
+ {
463
+ maxThreads[(size_t)ParallelScheme::default_] = -1;
464
+ maxThreads[(size_t)ParallelScheme::none] = -1;
465
+ maxThreads[(size_t)ParallelScheme::copy_merge] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::copy_merge>();
466
+ maxThreads[(size_t)ParallelScheme::partition] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::partition>();
467
+ }
468
+
469
+ static ParallelScheme getRealScheme(ParallelScheme ps)
470
+ {
471
+ switch (ps)
472
+ {
473
+ case ParallelScheme::default_:
474
+ if ((_Flags & flags::partitioned_multisampling)) return ParallelScheme::partition;
475
+ if ((_Flags & flags::shared_state)) return ParallelScheme::none;
476
+ return ParallelScheme::copy_merge;
477
+ case ParallelScheme::copy_merge:
478
+ if ((_Flags & flags::shared_state)) THROW_ERROR_WITH_INFO(exception::InvalidArgument,
479
+ std::string{ "This model doesn't provide ParallelScheme::" } + toString(ps));
480
+ break;
481
+ case ParallelScheme::partition:
482
+ if (!(_Flags & flags::partitioned_multisampling)) THROW_ERROR_WITH_INFO(exception::InvalidArgument,
483
+ std::string{ "This model doesn't provide ParallelScheme::" } + toString(ps));
484
+ break;
485
+ }
486
+ return ps;
487
+ }
488
+
489
+ int train(size_t iteration, size_t numWorkers, ParallelScheme ps) override
490
+ {
491
+ if (!numWorkers) numWorkers = std::thread::hardware_concurrency();
492
+ ps = getRealScheme(ps);
493
+ numWorkers = std::min(numWorkers, maxThreads[(size_t)ps]);
494
+ if (numWorkers == 1 || (_Flags & flags::shared_state)) ps = ParallelScheme::none;
495
+ if (!cachedPool || cachedPool->getNumWorkers() != numWorkers)
496
+ {
497
+ cachedPool = make_unique<ThreadPool>(numWorkers);
498
+ }
499
+
500
+ std::vector<_ModelState> localData;
501
+
502
+ while(localRG.size() < numWorkers)
503
+ {
504
+ localRG.emplace_back(rg());
505
+ }
506
+
507
+ for (size_t i = 0; i < numWorkers; ++i)
508
+ {
509
+ if(ps == ParallelScheme::copy_merge) localData.emplace_back(static_cast<_Derived*>(this)->globalState);
510
+ }
511
+
512
+ if (ps == ParallelScheme::partition)
513
+ {
514
+ localData.resize(numWorkers);
515
+ static_cast<_Derived*>(this)->updatePartition(*cachedPool, globalState, localData.data(), docs.begin(), docs.end(),
516
+ static_cast<_Derived*>(this)->eddTrain);
517
+ }
518
+
519
+ auto state = ps == ParallelScheme::none ? &globalState : localData.data();
520
+ for (size_t i = 0; i < iteration; ++i)
521
+ {
522
+ while (1)
523
+ {
524
+ try
525
+ {
526
+ switch (ps)
527
+ {
528
+ case ParallelScheme::none:
529
+ static_cast<_Derived*>(this)->template trainOne<ParallelScheme::none>(
530
+ *cachedPool, state, localRG.data());
531
+ break;
532
+ case ParallelScheme::copy_merge:
533
+ static_cast<_Derived*>(this)->template trainOne<ParallelScheme::copy_merge>(
534
+ *cachedPool, state, localRG.data());
535
+ break;
536
+ case ParallelScheme::partition:
537
+ static_cast<_Derived*>(this)->template trainOne<ParallelScheme::partition>(
538
+ *cachedPool, state, localRG.data());
539
+ break;
540
+ }
541
+ break;
542
+ }
543
+ catch (const exception::TrainingError& e)
544
+ {
545
+ std::cerr << e.what() << std::endl;
546
+ int ret = static_cast<_Derived*>(this)->restoreFromTrainingError(
547
+ e, *cachedPool, state, localRG.data());
548
+ if(ret < 0) return ret;
549
+ }
550
+ }
551
+ ++globalStep;
552
+ }
553
+ return 0;
554
+ }
555
+
556
+ double getLLPerWord() const override
557
+ {
558
+ return words.empty() ? 0 : static_cast<const _Derived*>(this)->getLL() / realN;
559
+ }
560
+
561
+ double getPerplexity() const override
562
+ {
563
+ return exp(-getLLPerWord());
564
+ }
565
+
566
+ size_t getK() const override
567
+ {
568
+ return 0;
569
+ }
570
+
571
+ std::vector<Float> getWidsByTopic(size_t tid) const override
572
+ {
573
+ return static_cast<const _Derived*>(this)->_getWidsByTopic(tid);
574
+ }
575
+
576
+ std::vector<std::pair<Vid, Float>> getWidsByTopicSorted(size_t tid, size_t topN) const
577
+ {
578
+ return extractTopN<Vid>(static_cast<const _Derived*>(this)->_getWidsByTopic(tid), topN);
579
+ }
580
+
581
+ std::vector<std::pair<std::string, Float>> vid2String(const std::vector<std::pair<Vid, Float>>& vids) const
582
+ {
583
+ std::vector<std::pair<std::string, Float>> ret(vids.size());
584
+ for (size_t i = 0; i < vids.size(); ++i)
585
+ {
586
+ ret[i] = std::make_pair(dict.toWord(vids[i].first), vids[i].second);
587
+ }
588
+ return ret;
589
+ }
590
+
591
+ std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const override
592
+ {
593
+ return vid2String(getWidsByTopicSorted(tid, topN));
594
+ }
595
+
596
+ std::vector<std::pair<Vid, Float>> getWidsByDocSorted(const DocumentBase* doc, size_t topN) const
597
+ {
598
+ std::vector<Float> cnt(dict.size());
599
+ for (auto w : doc->words) cnt[w] += 1;
600
+ for (auto& c : cnt) c /= doc->words.size();
601
+ return extractTopN<Vid>(cnt, topN);
602
+ }
603
+
604
+ std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const override
605
+ {
606
+ return vid2String(getWidsByDocSorted(doc, topN));
607
+ }
608
+
609
+ std::vector<double> infer(const std::vector<DocumentBase*>& docs, size_t maxIter, Float tolerance, size_t numWorkers, ParallelScheme ps, bool together) const override
610
+ {
611
+ if (!numWorkers) numWorkers = std::thread::hardware_concurrency();
612
+ ps = getRealScheme(ps);
613
+ if (numWorkers == 1) ps = ParallelScheme::none;
614
+ auto tx = [](DocumentBase* p)->DocType& { return *static_cast<DocType*>(p); };
615
+ auto b = makeTransformIter(docs.begin(), tx), e = makeTransformIter(docs.end(), tx);
616
+
617
+ if (together)
618
+ {
619
+ switch (ps)
620
+ {
621
+ case ParallelScheme::none:
622
+ return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::none>(b, e, maxIter, tolerance, numWorkers);
623
+ case ParallelScheme::copy_merge:
624
+ return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::copy_merge>(b, e, maxIter, tolerance, numWorkers);
625
+ case ParallelScheme::partition:
626
+ return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::partition>(b, e, maxIter, tolerance, numWorkers);
627
+ }
628
+ }
629
+ else
630
+ {
631
+ switch (ps)
632
+ {
633
+ case ParallelScheme::none:
634
+ return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::none>(b, e, maxIter, tolerance, numWorkers);
635
+ case ParallelScheme::copy_merge:
636
+ return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::copy_merge>(b, e, maxIter, tolerance, numWorkers);
637
+ case ParallelScheme::partition:
638
+ return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::partition>(b, e, maxIter, tolerance, numWorkers);
639
+ }
640
+ }
641
+ THROW_ERROR_WITH_INFO(exception::InvalidArgument, "invalid ParallelScheme");
642
+ }
643
+
644
+ std::vector<Float> getTopicsByDoc(const DocumentBase* doc) const override
645
+ {
646
+ return static_cast<const _Derived*>(this)->getTopicsByDoc(*static_cast<const DocType*>(doc));
647
+ }
648
+
649
+ std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const override
650
+ {
651
+ return extractTopN<Tid>(getTopicsByDoc(doc), topN);
652
+ }
653
+
654
+
655
+ const DocumentBase* getDoc(size_t docId) const override
656
+ {
657
+ return &_getDoc(docId);
658
+ }
659
+
660
+ size_t getGlobalStep() const override
661
+ {
662
+ return globalStep;
663
+ }
664
+
665
+ const Dictionary& getVocabDict() const override
666
+ {
667
+ return dict;
668
+ }
669
+
670
+ const std::vector<uint64_t>& getVocabCf() const override
671
+ {
672
+ return vocabCf;
673
+ }
674
+
675
+ const std::vector<uint64_t>& getVocabDf() const override
676
+ {
677
+ return vocabDf;
678
+ }
679
+
680
+ void saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const override
681
+ {
682
+ static_cast<const _Derived*>(this)->_saveModel(writer, fullModel, extra_data);
683
+ }
684
+
685
+ void loadModel(std::istream& reader, std::vector<uint8_t>* extra_data) override
686
+ {
687
+ static_cast<_Derived*>(this)->_loadModel(reader, extra_data);
688
+ static_cast<_Derived*>(this)->prepare(false);
689
+ }
690
+ };
691
+
692
+ }