tomoto 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,692 @@
1
+ #pragma once
2
+ #include <numeric>
3
+ #include <unordered_set>
4
+ #include "../Utils/Utils.hpp"
5
+ #include "../Utils/Dictionary.h"
6
+ #include "../Utils/tvector.hpp"
7
+ #include "../Utils/ThreadPool.hpp"
8
+ #include "../Utils/serializer.hpp"
9
+ #include "../Utils/exception.h"
10
+ #include <EigenRand/EigenRand>
11
+
12
+ namespace tomoto
13
+ {
14
+ using RandGen = Eigen::Rand::P8_mt19937_64<uint32_t>;
15
+ using ScalarRandGen = Eigen::Rand::UniversalRandomEngine<uint32_t, std::mt19937_64>;
16
+
17
+ class DocumentBase
18
+ {
19
+ public:
20
+ Float weight = 1;
21
+ tvector<Vid> words; // word id of each word
22
+ std::vector<uint32_t> wOrder; // original word order (optional)
23
+
24
+ std::string docUid;
25
+ std::string rawStr;
26
+ std::vector<uint32_t> origWordPos;
27
+ std::vector<uint16_t> origWordLen;
28
+ DocumentBase(Float _weight = 1) : weight(_weight) {}
29
+ virtual ~DocumentBase() {}
30
+
31
+ DEFINE_SERIALIZER_WITH_VERSION(0, serializer::to_key("Docu"), weight, words, wOrder);
32
+ DEFINE_TAGGED_SERIALIZER_WITH_VERSION(1, 0x00010001, weight, words, wOrder,
33
+ rawStr, origWordPos, origWordLen,
34
+ docUid
35
+ );
36
+ };
37
+
38
+ enum class ParallelScheme { default_, none, copy_merge, partition, size };
39
+
40
+ inline const char* toString(ParallelScheme ps)
41
+ {
42
+ switch (ps)
43
+ {
44
+ case ParallelScheme::default_: return "default";
45
+ case ParallelScheme::none: return "none";
46
+ case ParallelScheme::copy_merge: return "copy_merge";
47
+ case ParallelScheme::partition: return "partition";
48
+ default: return "unknown";
49
+ }
50
+ }
51
+
52
+ class RawDocTokenizer
53
+ {
54
+ public:
55
+ using Token = std::tuple<std::string, uint32_t, uint32_t, bool>;
56
+ using Factory = std::function<RawDocTokenizer(const std::string&)>;
57
+ private:
58
+ std::function<Token()> fnNext;
59
+ public:
60
+ class Iterator
61
+ {
62
+ RawDocTokenizer* p = nullptr;
63
+ bool end = true;
64
+ std::tuple<std::string, uint32_t, uint32_t> value;
65
+ public:
66
+ Iterator()
67
+ {
68
+ }
69
+
70
+ Iterator(RawDocTokenizer* _p)
71
+ : p{ _p }, end{ false }
72
+ {
73
+ operator++();
74
+ }
75
+
76
+ std::tuple<std::string, uint32_t, uint32_t>& operator*()
77
+ {
78
+ return value;
79
+ }
80
+
81
+ Iterator& operator++()
82
+ {
83
+ auto v = p->fnNext();
84
+ if (std::get<3>(v))
85
+ {
86
+ end = true;
87
+ }
88
+ else
89
+ {
90
+ value = std::make_tuple(std::get<0>(v), std::get<1>(v), std::get<2>(v));
91
+ }
92
+ return *this;
93
+ }
94
+
95
+ bool operator==(const Iterator& o) const
96
+ {
97
+ return o.end && end;
98
+ }
99
+
100
+ bool operator!=(const Iterator& o) const
101
+ {
102
+ return !operator==(o);
103
+ }
104
+ };
105
+
106
+ template<typename _Fn>
107
+ RawDocTokenizer(_Fn&& fn) : fnNext{ std::forward<_Fn>(fn) }
108
+ {
109
+ }
110
+
111
+ Iterator begin()
112
+ {
113
+ return Iterator{ this };
114
+ }
115
+
116
+ Iterator end()
117
+ {
118
+ return Iterator{};
119
+ }
120
+ };
121
+
122
+ class ITopicModel
123
+ {
124
+ public:
125
+ virtual void saveModel(std::ostream& writer, bool fullModel,
126
+ const std::vector<uint8_t>* extra_data = nullptr) const = 0;
127
+ virtual void loadModel(std::istream& reader,
128
+ std::vector<uint8_t>* extra_data = nullptr) = 0;
129
+ virtual const DocumentBase* getDoc(size_t docId) const = 0;
130
+
131
+ virtual void updateVocab(const std::vector<std::string>& words) = 0;
132
+
133
+ virtual double getLLPerWord() const = 0;
134
+ virtual double getPerplexity() const = 0;
135
+ virtual uint64_t getV() const = 0;
136
+ virtual uint64_t getN() const = 0;
137
+ virtual size_t getNumDocs() const = 0;
138
+ virtual const Dictionary& getVocabDict() const = 0;
139
+ virtual const std::vector<uint64_t>& getVocabCf() const = 0;
140
+ virtual const std::vector<uint64_t>& getVocabDf() const = 0;
141
+
142
+ virtual int train(size_t iteration, size_t numWorkers, ParallelScheme ps = ParallelScheme::default_) = 0;
143
+ virtual size_t getGlobalStep() const = 0;
144
+ virtual void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) = 0;
145
+
146
+ virtual size_t getK() const = 0;
147
+ virtual std::vector<Float> getWidsByTopic(size_t tid) const = 0;
148
+ virtual std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const = 0;
149
+
150
+ virtual std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
151
+
152
+ virtual std::vector<Float> getTopicsByDoc(const DocumentBase* doc) const = 0;
153
+ virtual std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
154
+ virtual std::vector<double> infer(const std::vector<DocumentBase*>& docs, size_t maxIter, Float tolerance, size_t numWorkers, ParallelScheme ps, bool together) const = 0;
155
+ virtual ~ITopicModel() {}
156
+ };
157
+
158
+ template<class _TyKey, class _TyValue>
159
+ static std::vector<std::pair<_TyKey, _TyValue>> extractTopN(const std::vector<_TyValue>& vec, size_t topN)
160
+ {
161
+ typedef std::pair<_TyKey, _TyValue> pair_t;
162
+ std::vector<pair_t> ret;
163
+ _TyKey k = 0;
164
+ for (auto& t : vec)
165
+ {
166
+ ret.emplace_back(std::make_pair(k++, t));
167
+ }
168
+ std::sort(ret.begin(), ret.end(), [](const pair_t& a, const pair_t& b)
169
+ {
170
+ return a.second > b.second;
171
+ });
172
+ if (topN < ret.size()) ret.erase(ret.begin() + topN, ret.end());
173
+ return ret;
174
+ }
175
+
176
+ namespace flags
177
+ {
178
+ enum
179
+ {
180
+ continuous_doc_data = 1 << 0,
181
+ shared_state = 1 << 1,
182
+ partitioned_multisampling = 1 << 2,
183
+ end_flag_of_TopicModel = 1 << 3,
184
+ };
185
+ }
186
+
187
+ template<typename _RandGen, size_t _Flags, typename _Interface, typename _Derived,
188
+ typename _DocType, typename _ModelState
189
+ >
190
+ class TopicModel : public _Interface
191
+ {
192
+ friend class Document;
193
+ public:
194
+ using DocType = _DocType;
195
+ protected:
196
+ _RandGen rg;
197
+ std::vector<_RandGen> localRG;
198
+ std::vector<Vid> words;
199
+ std::vector<uint32_t> wOffsetByDoc;
200
+
201
+ std::vector<DocType> docs;
202
+ std::vector<uint64_t> vocabCf;
203
+ std::vector<uint64_t> vocabDf;
204
+ size_t globalStep = 0;
205
+ _ModelState globalState, tState;
206
+ Dictionary dict;
207
+ uint64_t realV = 0; // vocab size after removing stopwords
208
+ uint64_t realN = 0; // total word size after removing stopwords
209
+ size_t maxThreads[(size_t)ParallelScheme::size] = { 0, };
210
+ size_t minWordCf = 0, minWordDf = 0, removeTopN = 0;
211
+
212
+ std::unique_ptr<ThreadPool> cachedPool;
213
+
214
+ void _saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const
215
+ {
216
+ serializer::writeMany(writer,
217
+ serializer::to_keyz(static_cast<const _Derived*>(this)->TMID),
218
+ serializer::to_keyz(static_cast<const _Derived*>(this)->TWID));
219
+ serializer::writeTaggedMany(writer, 0x00010001,
220
+ serializer::to_keyz("dict"), dict,
221
+ serializer::to_keyz("vocabCf"), vocabCf,
222
+ serializer::to_keyz("vocabDf"), vocabDf,
223
+ serializer::to_keyz("realV"), realV,
224
+ serializer::to_keyz("globalStep"), globalStep,
225
+ serializer::to_keyz("extra"), extra_data ? *extra_data : std::vector<uint8_t>(0));
226
+ serializer::writeMany(writer, *static_cast<const _Derived*>(this));
227
+ globalState.serializerWrite(writer);
228
+ if (fullModel)
229
+ {
230
+ serializer::writeMany(writer, docs);
231
+ }
232
+ else
233
+ {
234
+ serializer::writeMany(writer, std::vector<size_t>{});
235
+ }
236
+ }
237
+
238
+ void _loadModel(std::istream& reader, std::vector<uint8_t>* extra_data)
239
+ {
240
+ auto start_pos = reader.tellg();
241
+ try
242
+ {
243
+ std::vector<uint8_t> extra;
244
+ serializer::readMany(reader,
245
+ serializer::to_keyz(static_cast<_Derived*>(this)->TMID),
246
+ serializer::to_keyz(static_cast<_Derived*>(this)->TWID));
247
+ serializer::readTaggedMany(reader, 0x00010001,
248
+ serializer::to_keyz("dict"), dict,
249
+ serializer::to_keyz("vocabCf"), vocabCf,
250
+ serializer::to_keyz("vocabDf"), vocabDf,
251
+ serializer::to_keyz("realV"), realV,
252
+ serializer::to_keyz("globalStep"), globalStep,
253
+ serializer::to_keyz("extra"), extra);
254
+ if (extra_data) *extra_data = std::move(extra);
255
+ }
256
+ catch (const std::ios_base::failure&)
257
+ {
258
+ reader.seekg(start_pos);
259
+ serializer::readMany(reader,
260
+ serializer::to_key(static_cast<_Derived*>(this)->TMID),
261
+ serializer::to_key(static_cast<_Derived*>(this)->TWID),
262
+ dict, vocabCf, realV);
263
+ }
264
+ serializer::readMany(reader, *static_cast<_Derived*>(this));
265
+ globalState.serializerRead(reader);
266
+ serializer::readMany(reader, docs);
267
+ realN = countRealN();
268
+ }
269
+
270
+ template<typename _DocTy>
271
+ typename std::enable_if<std::is_same<DocType,
272
+ typename std::remove_reference<typename std::remove_cv<_DocTy>::type>::type
273
+ >::value, size_t>::type _addDoc(_DocTy&& doc)
274
+ {
275
+ if (doc.words.empty()) return -1;
276
+ size_t maxWid = *std::max_element(doc.words.begin(), doc.words.end());
277
+ if (vocabCf.size() <= maxWid)
278
+ {
279
+ vocabCf.resize(maxWid + 1);
280
+ vocabDf.resize(maxWid + 1);
281
+ }
282
+ for (auto w : doc.words) ++vocabCf[w];
283
+ std::unordered_set<Vid> uniq{ doc.words.begin(), doc.words.end() };
284
+ for (auto w : uniq) ++vocabDf[w];
285
+ docs.emplace_back(std::forward<_DocTy>(doc));
286
+ return docs.size() - 1;
287
+ }
288
+
289
+ template<bool _const = false>
290
+ DocType _makeDoc(const std::vector<std::string>& words, Float weight = 1)
291
+ {
292
+ DocType doc{ weight };
293
+ for (auto& w : words)
294
+ {
295
+ Vid id;
296
+ if (_const)
297
+ {
298
+ id = dict.toWid(w);
299
+ if (id == (Vid)-1) continue;
300
+ }
301
+ else
302
+ {
303
+ id = dict.add(w);
304
+ }
305
+ doc.words.emplace_back(id);
306
+ }
307
+ return doc;
308
+ }
309
+
310
+ DocType _makeRawDoc(const std::string& rawStr, const std::vector<Vid>& words,
311
+ const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len, Float weight = 1) const
312
+ {
313
+ DocType doc{ weight };
314
+ doc.rawStr = rawStr;
315
+ for (auto& w : words) doc.words.emplace_back(w);
316
+ doc.origWordPos = pos;
317
+ doc.origWordLen = len;
318
+ return doc;
319
+ }
320
+
321
+ template<bool _const, typename _FnTokenizer>
322
+ DocType _makeRawDoc(const std::string& rawStr, _FnTokenizer&& tokenizer, Float weight = 1)
323
+ {
324
+ DocType doc{ weight };
325
+ doc.rawStr = rawStr;
326
+ for (auto& p : tokenizer(doc.rawStr))
327
+ {
328
+ Vid wid;
329
+ if (_const)
330
+ {
331
+ wid = dict.toWid(std::get<0>(p));
332
+ if (wid == (Vid)-1) continue;
333
+ }
334
+ else
335
+ {
336
+ wid = dict.add(std::get<0>(p));
337
+ }
338
+ auto pos = std::get<1>(p);
339
+ auto len = std::get<2>(p);
340
+ doc.words.emplace_back(wid);
341
+ doc.origWordPos.emplace_back(pos);
342
+ doc.origWordLen.emplace_back(len);
343
+ }
344
+ return doc;
345
+ }
346
+
347
+ const DocType& _getDoc(size_t docId) const
348
+ {
349
+ return docs[docId];
350
+ }
351
+
352
+ void updateWeakArray()
353
+ {
354
+ wOffsetByDoc.emplace_back(0);
355
+ for (auto& doc : docs)
356
+ {
357
+ wOffsetByDoc.emplace_back(wOffsetByDoc.back() + doc.words.size());
358
+ }
359
+ auto tx = [](_DocType& doc) { return &doc.words; };
360
+ tvector<Vid>::trade(words,
361
+ makeTransformIter(docs.begin(), tx),
362
+ makeTransformIter(docs.end(), tx));
363
+ }
364
+
365
+ size_t countRealN() const
366
+ {
367
+ size_t n = 0;
368
+ for (auto& doc : docs)
369
+ {
370
+ for (auto& w : doc.words)
371
+ {
372
+ if (w < realV) ++n;
373
+ }
374
+ }
375
+ return n;
376
+ }
377
+
378
+ void removeStopwords(size_t minWordCnt, size_t minWordDf, size_t removeTopN)
379
+ {
380
+ if (minWordCnt <= 1 && minWordDf <= 1 && removeTopN == 0) realV = dict.size();
381
+ this->minWordCf = minWordCnt;
382
+ this->minWordDf = minWordDf;
383
+ this->removeTopN = removeTopN;
384
+ std::vector<std::pair<size_t, size_t>> vocabCfDf;
385
+ for (size_t i = 0; i < vocabCf.size(); ++i)
386
+ {
387
+ vocabCfDf.emplace_back(vocabCf[i], vocabDf[i]);
388
+ }
389
+
390
+ std::vector<Vid> order;
391
+ sortAndWriteOrder(vocabCfDf, order, removeTopN, [&](const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b)
392
+ {
393
+ if (a.first < minWordCnt || a.second < minWordDf)
394
+ {
395
+ if (b.first < minWordCnt || b.second < minWordDf)
396
+ {
397
+ return a > b;
398
+ }
399
+ return false;
400
+ }
401
+ if (b.first < minWordCnt || b.second < minWordDf)
402
+ {
403
+ return true;
404
+ }
405
+ return a > b;
406
+ });
407
+ realV = std::find_if(vocabCfDf.begin(), vocabCfDf.end() - std::min(removeTopN, vocabCfDf.size()), [&](const std::pair<size_t, size_t>& a)
408
+ {
409
+ return a.first < minWordCnt || a.second < minWordDf;
410
+ }) - vocabCfDf.begin();
411
+
412
+ for (size_t i = 0; i < vocabCfDf.size(); ++i)
413
+ {
414
+ vocabCf[i] = vocabCfDf[i].first;
415
+ vocabDf[i] = vocabCfDf[i].second;
416
+ }
417
+
418
+ dict.reorder(order);
419
+ realN = 0;
420
+ for (auto& doc : docs)
421
+ {
422
+ for (auto& w : doc.words)
423
+ {
424
+ w = order[w];
425
+ if (w < realV) ++realN;
426
+ }
427
+ }
428
+ }
429
+
430
+ int restoreFromTrainingError(const exception::TrainingError& e, ThreadPool& pool, _ModelState* localData, _RandGen* rgs)
431
+ {
432
+ throw e;
433
+ }
434
+
435
+ public:
436
+ TopicModel(size_t _rg) : rg(_rg)
437
+ {
438
+ }
439
+
440
+ size_t getNumDocs() const override
441
+ {
442
+ return docs.size();
443
+ }
444
+
445
+ uint64_t getN() const override
446
+ {
447
+ return realN;
448
+ }
449
+
450
+ uint64_t getV() const override
451
+ {
452
+ return realV;
453
+ }
454
+
455
+ void updateVocab(const std::vector<std::string>& words) override
456
+ {
457
+ if(dict.size()) THROW_ERROR_WITH_INFO(exception::InvalidArgument, "updateVocab after addDoc");
458
+ for(auto& w : words) dict.add(w);
459
+ }
460
+
461
+ void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
462
+ {
463
+ maxThreads[(size_t)ParallelScheme::default_] = -1;
464
+ maxThreads[(size_t)ParallelScheme::none] = -1;
465
+ maxThreads[(size_t)ParallelScheme::copy_merge] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::copy_merge>();
466
+ maxThreads[(size_t)ParallelScheme::partition] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::partition>();
467
+ }
468
+
469
+ static ParallelScheme getRealScheme(ParallelScheme ps)
470
+ {
471
+ switch (ps)
472
+ {
473
+ case ParallelScheme::default_:
474
+ if ((_Flags & flags::partitioned_multisampling)) return ParallelScheme::partition;
475
+ if ((_Flags & flags::shared_state)) return ParallelScheme::none;
476
+ return ParallelScheme::copy_merge;
477
+ case ParallelScheme::copy_merge:
478
+ if ((_Flags & flags::shared_state)) THROW_ERROR_WITH_INFO(exception::InvalidArgument,
479
+ std::string{ "This model doesn't provide ParallelScheme::" } + toString(ps));
480
+ break;
481
+ case ParallelScheme::partition:
482
+ if (!(_Flags & flags::partitioned_multisampling)) THROW_ERROR_WITH_INFO(exception::InvalidArgument,
483
+ std::string{ "This model doesn't provide ParallelScheme::" } + toString(ps));
484
+ break;
485
+ }
486
+ return ps;
487
+ }
488
+
489
+ int train(size_t iteration, size_t numWorkers, ParallelScheme ps) override
490
+ {
491
+ if (!numWorkers) numWorkers = std::thread::hardware_concurrency();
492
+ ps = getRealScheme(ps);
493
+ numWorkers = std::min(numWorkers, maxThreads[(size_t)ps]);
494
+ if (numWorkers == 1 || (_Flags & flags::shared_state)) ps = ParallelScheme::none;
495
+ if (!cachedPool || cachedPool->getNumWorkers() != numWorkers)
496
+ {
497
+ cachedPool = make_unique<ThreadPool>(numWorkers);
498
+ }
499
+
500
+ std::vector<_ModelState> localData;
501
+
502
+ while(localRG.size() < numWorkers)
503
+ {
504
+ localRG.emplace_back(rg());
505
+ }
506
+
507
+ for (size_t i = 0; i < numWorkers; ++i)
508
+ {
509
+ if(ps == ParallelScheme::copy_merge) localData.emplace_back(static_cast<_Derived*>(this)->globalState);
510
+ }
511
+
512
+ if (ps == ParallelScheme::partition)
513
+ {
514
+ localData.resize(numWorkers);
515
+ static_cast<_Derived*>(this)->updatePartition(*cachedPool, globalState, localData.data(), docs.begin(), docs.end(),
516
+ static_cast<_Derived*>(this)->eddTrain);
517
+ }
518
+
519
+ auto state = ps == ParallelScheme::none ? &globalState : localData.data();
520
+ for (size_t i = 0; i < iteration; ++i)
521
+ {
522
+ while (1)
523
+ {
524
+ try
525
+ {
526
+ switch (ps)
527
+ {
528
+ case ParallelScheme::none:
529
+ static_cast<_Derived*>(this)->template trainOne<ParallelScheme::none>(
530
+ *cachedPool, state, localRG.data());
531
+ break;
532
+ case ParallelScheme::copy_merge:
533
+ static_cast<_Derived*>(this)->template trainOne<ParallelScheme::copy_merge>(
534
+ *cachedPool, state, localRG.data());
535
+ break;
536
+ case ParallelScheme::partition:
537
+ static_cast<_Derived*>(this)->template trainOne<ParallelScheme::partition>(
538
+ *cachedPool, state, localRG.data());
539
+ break;
540
+ }
541
+ break;
542
+ }
543
+ catch (const exception::TrainingError& e)
544
+ {
545
+ std::cerr << e.what() << std::endl;
546
+ int ret = static_cast<_Derived*>(this)->restoreFromTrainingError(
547
+ e, *cachedPool, state, localRG.data());
548
+ if(ret < 0) return ret;
549
+ }
550
+ }
551
+ ++globalStep;
552
+ }
553
+ return 0;
554
+ }
555
+
556
+ double getLLPerWord() const override
557
+ {
558
+ return words.empty() ? 0 : static_cast<const _Derived*>(this)->getLL() / realN;
559
+ }
560
+
561
+ double getPerplexity() const override
562
+ {
563
+ return exp(-getLLPerWord());
564
+ }
565
+
566
+ size_t getK() const override
567
+ {
568
+ return 0;
569
+ }
570
+
571
+ std::vector<Float> getWidsByTopic(size_t tid) const override
572
+ {
573
+ return static_cast<const _Derived*>(this)->_getWidsByTopic(tid);
574
+ }
575
+
576
+ std::vector<std::pair<Vid, Float>> getWidsByTopicSorted(size_t tid, size_t topN) const
577
+ {
578
+ return extractTopN<Vid>(static_cast<const _Derived*>(this)->_getWidsByTopic(tid), topN);
579
+ }
580
+
581
+ std::vector<std::pair<std::string, Float>> vid2String(const std::vector<std::pair<Vid, Float>>& vids) const
582
+ {
583
+ std::vector<std::pair<std::string, Float>> ret(vids.size());
584
+ for (size_t i = 0; i < vids.size(); ++i)
585
+ {
586
+ ret[i] = std::make_pair(dict.toWord(vids[i].first), vids[i].second);
587
+ }
588
+ return ret;
589
+ }
590
+
591
+ std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const override
592
+ {
593
+ return vid2String(getWidsByTopicSorted(tid, topN));
594
+ }
595
+
596
+ std::vector<std::pair<Vid, Float>> getWidsByDocSorted(const DocumentBase* doc, size_t topN) const
597
+ {
598
+ std::vector<Float> cnt(dict.size());
599
+ for (auto w : doc->words) cnt[w] += 1;
600
+ for (auto& c : cnt) c /= doc->words.size();
601
+ return extractTopN<Vid>(cnt, topN);
602
+ }
603
+
604
+ std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const override
605
+ {
606
+ return vid2String(getWidsByDocSorted(doc, topN));
607
+ }
608
+
609
+ std::vector<double> infer(const std::vector<DocumentBase*>& docs, size_t maxIter, Float tolerance, size_t numWorkers, ParallelScheme ps, bool together) const override
610
+ {
611
+ if (!numWorkers) numWorkers = std::thread::hardware_concurrency();
612
+ ps = getRealScheme(ps);
613
+ if (numWorkers == 1) ps = ParallelScheme::none;
614
+ auto tx = [](DocumentBase* p)->DocType& { return *static_cast<DocType*>(p); };
615
+ auto b = makeTransformIter(docs.begin(), tx), e = makeTransformIter(docs.end(), tx);
616
+
617
+ if (together)
618
+ {
619
+ switch (ps)
620
+ {
621
+ case ParallelScheme::none:
622
+ return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::none>(b, e, maxIter, tolerance, numWorkers);
623
+ case ParallelScheme::copy_merge:
624
+ return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::copy_merge>(b, e, maxIter, tolerance, numWorkers);
625
+ case ParallelScheme::partition:
626
+ return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::partition>(b, e, maxIter, tolerance, numWorkers);
627
+ }
628
+ }
629
+ else
630
+ {
631
+ switch (ps)
632
+ {
633
+ case ParallelScheme::none:
634
+ return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::none>(b, e, maxIter, tolerance, numWorkers);
635
+ case ParallelScheme::copy_merge:
636
+ return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::copy_merge>(b, e, maxIter, tolerance, numWorkers);
637
+ case ParallelScheme::partition:
638
+ return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::partition>(b, e, maxIter, tolerance, numWorkers);
639
+ }
640
+ }
641
+ THROW_ERROR_WITH_INFO(exception::InvalidArgument, "invalid ParallelScheme");
642
+ }
643
+
644
+ std::vector<Float> getTopicsByDoc(const DocumentBase* doc) const override
645
+ {
646
+ return static_cast<const _Derived*>(this)->getTopicsByDoc(*static_cast<const DocType*>(doc));
647
+ }
648
+
649
+ std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const override
650
+ {
651
+ return extractTopN<Tid>(getTopicsByDoc(doc), topN);
652
+ }
653
+
654
+
655
+ const DocumentBase* getDoc(size_t docId) const override
656
+ {
657
+ return &_getDoc(docId);
658
+ }
659
+
660
+ size_t getGlobalStep() const override
661
+ {
662
+ return globalStep;
663
+ }
664
+
665
+ const Dictionary& getVocabDict() const override
666
+ {
667
+ return dict;
668
+ }
669
+
670
+ const std::vector<uint64_t>& getVocabCf() const override
671
+ {
672
+ return vocabCf;
673
+ }
674
+
675
+ const std::vector<uint64_t>& getVocabDf() const override
676
+ {
677
+ return vocabDf;
678
+ }
679
+
680
+ void saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const override
681
+ {
682
+ static_cast<const _Derived*>(this)->_saveModel(writer, fullModel, extra_data);
683
+ }
684
+
685
+ void loadModel(std::istream& reader, std::vector<uint8_t>* extra_data) override
686
+ {
687
+ static_cast<_Derived*>(this)->_loadModel(reader, extra_data);
688
+ static_cast<_Derived*>(this)->prepare(false);
689
+ }
690
+ };
691
+
692
+ }