tomoto 0.3.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +45 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +162 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +34 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +33 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/2.7/tomoto.so +0 -0
  22. data/lib/tomoto/3.0/tomoto.so +0 -0
  23. data/lib/tomoto/3.1/tomoto.so +0 -0
  24. data/lib/tomoto/ct.rb +24 -0
  25. data/lib/tomoto/dmr.rb +27 -0
  26. data/lib/tomoto/dt.rb +15 -0
  27. data/lib/tomoto/gdmr.rb +15 -0
  28. data/lib/tomoto/hdp.rb +11 -0
  29. data/lib/tomoto/hlda.rb +56 -0
  30. data/lib/tomoto/hpa.rb +11 -0
  31. data/lib/tomoto/lda.rb +181 -0
  32. data/lib/tomoto/llda.rb +15 -0
  33. data/lib/tomoto/mglda.rb +15 -0
  34. data/lib/tomoto/pa.rb +11 -0
  35. data/lib/tomoto/plda.rb +15 -0
  36. data/lib/tomoto/slda.rb +37 -0
  37. data/lib/tomoto/version.rb +3 -0
  38. data/lib/tomoto.rb +27 -0
  39. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  40. data/vendor/EigenRand/LICENSE +21 -0
  41. data/vendor/EigenRand/README.md +426 -0
  42. data/vendor/eigen/COPYING.APACHE +203 -0
  43. data/vendor/eigen/COPYING.BSD +26 -0
  44. data/vendor/eigen/COPYING.GPL +674 -0
  45. data/vendor/eigen/COPYING.LGPL +502 -0
  46. data/vendor/eigen/COPYING.MINPACK +51 -0
  47. data/vendor/eigen/COPYING.MPL2 +373 -0
  48. data/vendor/eigen/COPYING.README +18 -0
  49. data/vendor/eigen/Eigen/Cholesky +45 -0
  50. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  51. data/vendor/eigen/Eigen/Core +384 -0
  52. data/vendor/eigen/Eigen/Dense +7 -0
  53. data/vendor/eigen/Eigen/Eigen +2 -0
  54. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  55. data/vendor/eigen/Eigen/Geometry +59 -0
  56. data/vendor/eigen/Eigen/Householder +29 -0
  57. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  58. data/vendor/eigen/Eigen/Jacobi +32 -0
  59. data/vendor/eigen/Eigen/KLUSupport +41 -0
  60. data/vendor/eigen/Eigen/LU +47 -0
  61. data/vendor/eigen/Eigen/MetisSupport +35 -0
  62. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  63. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  64. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  65. data/vendor/eigen/Eigen/QR +50 -0
  66. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  67. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  68. data/vendor/eigen/Eigen/SVD +50 -0
  69. data/vendor/eigen/Eigen/Sparse +34 -0
  70. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  71. data/vendor/eigen/Eigen/SparseCore +69 -0
  72. data/vendor/eigen/Eigen/SparseLU +50 -0
  73. data/vendor/eigen/Eigen/SparseQR +36 -0
  74. data/vendor/eigen/Eigen/StdDeque +27 -0
  75. data/vendor/eigen/Eigen/StdList +26 -0
  76. data/vendor/eigen/Eigen/StdVector +27 -0
  77. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  78. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  79. data/vendor/eigen/README.md +5 -0
  80. data/vendor/eigen/bench/README.txt +55 -0
  81. data/vendor/eigen/bench/btl/COPYING +340 -0
  82. data/vendor/eigen/bench/btl/README +154 -0
  83. data/vendor/eigen/bench/tensors/README +20 -0
  84. data/vendor/eigen/blas/README.txt +6 -0
  85. data/vendor/eigen/ci/README.md +56 -0
  86. data/vendor/eigen/demos/mandelbrot/README +10 -0
  87. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  88. data/vendor/eigen/demos/opengl/README +13 -0
  89. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  90. data/vendor/eigen/unsupported/README.txt +50 -0
  91. data/vendor/tomotopy/LICENSE +21 -0
  92. data/vendor/tomotopy/README.kr.rst +512 -0
  93. data/vendor/tomotopy/README.rst +516 -0
  94. data/vendor/variant/LICENSE +25 -0
  95. data/vendor/variant/LICENSE_1_0.txt +23 -0
  96. data/vendor/variant/README.md +102 -0
  97. metadata +140 -0
@@ -0,0 +1,281 @@
1
+ #include <fstream>
2
+ #include <iostream>
3
+
4
+ #include <LDA.h>
5
+
6
+ #include <rice/rice.hpp>
7
+
8
+ #include "utils.h"
9
+
10
+ class DocumentObject
11
+ {
12
+ public:
13
+ DocumentObject(const tomoto::DocumentBase* _doc, const tomoto::ITopicModel* _tm) : doc{ _doc }, tm{ _tm } {}
14
+
15
+ const tomoto::DocumentBase* doc;
16
+ const tomoto::ITopicModel* tm;
17
+ };
18
+
19
+ void init_lda(Rice::Module& m) {
20
+ Rice::define_class_under<DocumentObject>(m, "Document")
21
+ .define_method(
22
+ "topics",
23
+ [](DocumentObject& self) {
24
+ Rice::Hash res;
25
+ auto topics = self.tm->getTopicsByDoc(self.doc);
26
+ for (size_t i = 0; i < topics.size(); i++) {
27
+ res[i] = topics[i];
28
+ }
29
+ return res;
30
+ });
31
+
32
+ Rice::define_class_under<tomoto::ILDAModel>(m, "LDA")
33
+ .define_singleton_function(
34
+ "_new",
35
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
36
+ tomoto::LDAArgs args;
37
+ args.k = k;
38
+ args.alpha = {alpha};
39
+ args.eta = eta;
40
+ if (seed >= 0) {
41
+ args.seed = seed;
42
+ }
43
+ return tomoto::ILDAModel::create((tomoto::TermWeight)tw, args);
44
+ }, Rice::Return().takeOwnership())
45
+ .define_method(
46
+ "_add_doc",
47
+ [](tomoto::ILDAModel& self, std::vector<std::string> words) {
48
+ return self.addDoc(buildDoc(words));
49
+ })
50
+ .define_method(
51
+ "_make_doc",
52
+ *[](tomoto::ILDAModel& self, std::vector<std::string> words) {
53
+ return DocumentObject(self.makeDoc(buildDoc(words)).release(), &self);
54
+ })
55
+ .define_method(
56
+ "_infer",
57
+ *[](tomoto::ILDAModel& self, DocumentObject& doc_object, size_t iteration, float tolerance, size_t workers, size_t ps, size_t together) {
58
+ std::vector<tomoto::DocumentBase*> docs;
59
+ auto doc = doc_object.doc;
60
+ docs.emplace_back(const_cast<tomoto::DocumentBase*>(doc));
61
+ float ll = self.infer(docs, iteration, tolerance, workers, (tomoto::ParallelScheme)ps, !!together)[0];
62
+
63
+ auto topic_dist = self.getTopicsByDoc(doc);
64
+ auto topic_res = Array();
65
+ for (size_t i = 0; i < topic_dist.size(); i++) {
66
+ topic_res.push(topic_dist[i]);
67
+ }
68
+
69
+ auto res = Array();
70
+ res.push(topic_res);
71
+ res.push(ll);
72
+ return res;
73
+ })
74
+ .define_method(
75
+ "alpha",
76
+ [](tomoto::ILDAModel& self) {
77
+ Array res;
78
+ for (size_t i = 0; i < self.getK(); i++) {
79
+ res.push(self.getAlpha(i));
80
+ }
81
+ return res;
82
+ })
83
+ .define_method(
84
+ "burn_in",
85
+ [](tomoto::ILDAModel& self) {
86
+ return self.getBurnInIteration();
87
+ })
88
+ .define_method(
89
+ "burn_in=",
90
+ [](tomoto::ILDAModel& self, size_t iteration) {
91
+ self.setBurnInIteration(iteration);
92
+ return iteration;
93
+ })
94
+ .define_method(
95
+ "_count_by_topics",
96
+ [](tomoto::ILDAModel& self) {
97
+ Array res;
98
+ for (auto const& v : self.getCountByTopic()) {
99
+ res.push(v);
100
+ }
101
+ return res;
102
+ })
103
+ .define_method(
104
+ "docs",
105
+ [](tomoto::ILDAModel& self) {
106
+ Array res;
107
+ auto n = self.getNumDocs();
108
+ for (size_t i = 0; i < n; i++) {
109
+ auto v = DocumentObject(self.getDoc(i), &self);
110
+ res.push(Object(Rice::detail::To_Ruby<DocumentObject>().convert(v)));
111
+ }
112
+ return res;
113
+ })
114
+ .define_method(
115
+ "eta",
116
+ [](tomoto::ILDAModel& self) {
117
+ return self.getEta();
118
+ })
119
+ .define_method(
120
+ "global_step",
121
+ [](tomoto::ILDAModel& self) {
122
+ return self.getGlobalStep();
123
+ })
124
+ .define_method(
125
+ "k",
126
+ [](tomoto::ILDAModel& self) {
127
+ return self.getK();
128
+ })
129
+ .define_method(
130
+ "_load",
131
+ [](tomoto::ILDAModel& self, const char* filename) {
132
+ std::ifstream str{ filename, std::ios_base::binary };
133
+ if (!str) throw std::runtime_error{ std::string("cannot open file '") + filename + std::string("'") };
134
+ std::vector<uint8_t> extra_data;
135
+ self.loadModel(str, &extra_data);
136
+ })
137
+ .define_method(
138
+ "ll_per_word",
139
+ [](tomoto::ILDAModel& self) {
140
+ return self.getLLPerWord();
141
+ })
142
+ .define_method(
143
+ "num_docs",
144
+ [](tomoto::ILDAModel& self) {
145
+ return self.getNumDocs();
146
+ })
147
+ .define_method(
148
+ "num_vocabs",
149
+ [](tomoto::ILDAModel& self) {
150
+ return self.getV();
151
+ })
152
+ .define_method(
153
+ "num_words",
154
+ [](tomoto::ILDAModel& self) {
155
+ return self.getN();
156
+ })
157
+ .define_method(
158
+ "optim_interval",
159
+ [](tomoto::ILDAModel& self) {
160
+ return self.getOptimInterval();
161
+ })
162
+ .define_method(
163
+ "optim_interval=",
164
+ [](tomoto::ILDAModel& self, size_t value) {
165
+ self.setOptimInterval(value);
166
+ return value;
167
+ })
168
+ .define_method(
169
+ "perplexity",
170
+ [](tomoto::ILDAModel& self) {
171
+ return self.getPerplexity();
172
+ })
173
+ .define_method(
174
+ "_prepare",
175
+ [](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
176
+ self.prepare(true, minCnt, minDf, rmTop);
177
+ })
178
+ .define_method(
179
+ "_removed_top_words",
180
+ [](tomoto::ILDAModel& self, size_t rmTop) {
181
+ Array res;
182
+ auto dict = self.getVocabDict();
183
+ size_t size = dict.size();
184
+ for (size_t i = rmTop; i > 0; i--) {
185
+ res.push(dict.toWord(size - i));
186
+ }
187
+ return res;
188
+ })
189
+ .define_method(
190
+ "_save",
191
+ [](tomoto::ILDAModel& self, const char* filename, bool full) {
192
+ std::ofstream str{ filename, std::ios_base::binary };
193
+ std::vector<uint8_t> extra_data;
194
+ self.saveModel(str, full, &extra_data);
195
+ })
196
+ .define_method(
197
+ "_topic_words",
198
+ [](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
199
+ Rice::Hash res;
200
+ for (auto const& v : self.getWordsByTopicSorted(topicId, topN)) {
201
+ res[v.first] = v.second;
202
+ }
203
+ return res;
204
+ })
205
+ .define_method(
206
+ "_train",
207
+ [](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
208
+ self.train(iteration, workers, (tomoto::ParallelScheme)ps);
209
+ })
210
+ .define_method(
211
+ "_tw",
212
+ [](tomoto::ILDAModel& self) {
213
+ return (int)self.getTermWeight();
214
+ })
215
+ .define_method(
216
+ "used_vocab_df",
217
+ [](tomoto::ILDAModel& self) {
218
+ auto vocab = self.getVocabDf();
219
+ Array res;
220
+ for (size_t i = 0; i < self.getV(); i++) {
221
+ res.push(vocab[i]);
222
+ }
223
+ return res;
224
+ })
225
+ .define_method(
226
+ "used_vocab_freq",
227
+ [](tomoto::ILDAModel& self) {
228
+ auto vocab = self.getVocabCf();
229
+ Array res;
230
+ for (size_t i = 0; i < self.getV(); i++) {
231
+ res.push(vocab[i]);
232
+ }
233
+ return res;
234
+ })
235
+ .define_method(
236
+ "used_vocabs",
237
+ [](tomoto::ILDAModel& self) {
238
+ auto dict = self.getVocabDict();
239
+ Array res;
240
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
241
+ for (size_t i = 0; i < self.getV(); i++) {
242
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
243
+ Object obj(value);
244
+ res.push(obj.call("force_encoding", utf8));
245
+ }
246
+ return res;
247
+ })
248
+ .define_method(
249
+ "vocab_df",
250
+ [](tomoto::ILDAModel& self) {
251
+ auto vocab = self.getVocabDf();
252
+ Array res;
253
+ for (size_t i = 0; i < vocab.size(); i++) {
254
+ res.push(vocab[i]);
255
+ }
256
+ return res;
257
+ })
258
+ .define_method(
259
+ "vocab_freq",
260
+ [](tomoto::ILDAModel& self) {
261
+ auto vocab = self.getVocabCf();
262
+ Array res;
263
+ for (size_t i = 0; i < vocab.size(); i++) {
264
+ res.push(vocab[i]);
265
+ }
266
+ return res;
267
+ })
268
+ .define_method(
269
+ "vocabs",
270
+ [](tomoto::ILDAModel& self) {
271
+ auto dict = self.getVocabDict();
272
+ Array res;
273
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
274
+ for (size_t i = 0; i < dict.size(); i++) {
275
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
276
+ Object obj(value);
277
+ res.push(obj.call("force_encoding", utf8));
278
+ }
279
+ return res;
280
+ });
281
+ }
@@ -0,0 +1,33 @@
1
+ #include <LLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_llda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(m, "LLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::LDAArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_add_doc",
23
+ [](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
24
+ auto doc = buildDoc(words);
25
+ doc.misc["labels"] = labels;
26
+ return self.addDoc(doc);
27
+ })
28
+ .define_method(
29
+ "topics_per_label",
30
+ [](tomoto::ILLDAModel& self) {
31
+ return self.getNumTopicsPerLabel();
32
+ });
33
+ }
@@ -0,0 +1,81 @@
1
+ #include <MGLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_mglda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(m, "MGLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
12
+ tomoto::MGLDAArgs args;
13
+ args.k = k_g;
14
+ args.kL = k_l;
15
+ args.t = t;
16
+ args.alpha = {alpha_g};
17
+ args.alphaL = {alpha_l};
18
+ args.alphaMG = alpha_mg;
19
+ args.alphaML = alpha_ml;
20
+ args.eta = eta_g;
21
+ // TODO more args
22
+ return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, args);
23
+ }, Rice::Return().takeOwnership())
24
+ .define_method(
25
+ "_add_doc",
26
+ [](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
27
+ auto doc = buildDoc(words);
28
+ doc.misc["delimiter"] = delimiter;
29
+ return self.addDoc(doc);
30
+ })
31
+ .define_method(
32
+ "alpha_g",
33
+ [](tomoto::IMGLDAModel& self) {
34
+ return self.getAlpha();
35
+ })
36
+ .define_method(
37
+ "alpha_l",
38
+ [](tomoto::IMGLDAModel& self) {
39
+ return self.getAlphaL();
40
+ })
41
+ .define_method(
42
+ "alpha_mg",
43
+ [](tomoto::IMGLDAModel& self) {
44
+ return self.getAlphaM();
45
+ })
46
+ .define_method(
47
+ "alpha_ml",
48
+ [](tomoto::IMGLDAModel& self) {
49
+ return self.getAlphaML();
50
+ })
51
+ .define_method(
52
+ "eta_g",
53
+ [](tomoto::IMGLDAModel& self) {
54
+ return self.getEta();
55
+ })
56
+ .define_method(
57
+ "eta_l",
58
+ [](tomoto::IMGLDAModel& self) {
59
+ return self.getEtaL();
60
+ })
61
+ .define_method(
62
+ "gamma",
63
+ [](tomoto::IMGLDAModel& self) {
64
+ return self.getGamma();
65
+ })
66
+ .define_method(
67
+ "k_g",
68
+ [](tomoto::IMGLDAModel& self) {
69
+ return self.getK();
70
+ })
71
+ .define_method(
72
+ "k_l",
73
+ [](tomoto::IMGLDAModel& self) {
74
+ return self.getKL();
75
+ })
76
+ .define_method(
77
+ "t",
78
+ [](tomoto::IMGLDAModel& self) {
79
+ return self.getT();
80
+ });
81
+ }
data/ext/tomoto/pa.cpp ADDED
@@ -0,0 +1,32 @@
1
+ #include <PA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_pa(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(m, "PA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::PAArgs args;
13
+ args.k = k1;
14
+ args.k2 = k2;
15
+ args.alpha = {alpha};
16
+ args.eta = eta;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IPAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "k1",
24
+ [](tomoto::IPAModel& self) {
25
+ return self.getK();
26
+ })
27
+ .define_method(
28
+ "k2",
29
+ [](tomoto::IPAModel& self) {
30
+ return self.getK2();
31
+ });
32
+ }
@@ -0,0 +1,33 @@
1
+ #include <PLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_plda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(m, "PLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::PLDAArgs args;
13
+ args.numLatentTopics = latent_topics;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_add_doc",
23
+ [](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
24
+ auto doc = buildDoc(words);
25
+ doc.misc["labels"] = labels;
26
+ return self.addDoc(doc);
27
+ })
28
+ .define_method(
29
+ "latent_topics",
30
+ [](tomoto::IPLDAModel& self) {
31
+ return self.getNumLatentTopics();
32
+ });
33
+ }
@@ -0,0 +1,48 @@
1
+ #include <SLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_slda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(m, "SLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, size_t seed) {
12
+ std::vector<tomoto::ISLDAModel::GLM> vars;
13
+ vars.reserve(rb_vars.size());
14
+ for (auto const& v : rb_vars) {
15
+ vars.push_back((tomoto::ISLDAModel::GLM) Rice::detail::From_Ruby<int>().convert(v.value()));
16
+ }
17
+ tomoto::SLDAArgs args;
18
+ args.k = k;
19
+ args.vars = vars;
20
+ args.alpha = {alpha};
21
+ args.eta = eta;
22
+ args.mu = mu;
23
+ args.nuSq = nu_sq;
24
+ args.glmParam = glm_param;
25
+ if (seed >= 0) {
26
+ args.seed = seed;
27
+ }
28
+ return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, args);
29
+ }, Rice::Return().takeOwnership())
30
+ .define_method(
31
+ "_add_doc",
32
+ [](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
33
+ auto doc = buildDoc(words);
34
+ doc.misc["y"] = y;
35
+ return self.addDoc(doc);
36
+ })
37
+ .define_method(
38
+ "f",
39
+ [](tomoto::ISLDAModel& self) {
40
+ return self.getF();
41
+ })
42
+ .define_method(
43
+ "_var_type",
44
+ [](tomoto::ISLDAModel& self, size_t var_id) {
45
+ if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
46
+ return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
47
+ });
48
+ }
@@ -0,0 +1,48 @@
1
+ #include <rice/rice.hpp>
2
+
3
+ void init_lda(Rice::Module& m);
4
+ void init_ct(Rice::Module& m);
5
+ void init_dmr(Rice::Module& m);
6
+ void init_dt(Rice::Module& m);
7
+ void init_gdmr(Rice::Module& m);
8
+ void init_hdp(Rice::Module& m);
9
+ void init_hlda(Rice::Module& m);
10
+ void init_pa(Rice::Module& m);
11
+ void init_hpa(Rice::Module& m);
12
+ void init_mglda(Rice::Module& m);
13
+ void init_llda(Rice::Module& m);
14
+ void init_plda(Rice::Module& m);
15
+ void init_slda(Rice::Module& m);
16
+
17
+ extern "C"
18
+ void Init_tomoto()
19
+ {
20
+ auto m = Rice::define_module("Tomoto")
21
+ .define_singleton_function(
22
+ "isa",
23
+ []() {
24
+ #ifdef __AVX2__
25
+ return Rice::String("avx2");
26
+ #elif defined(__AVX__)
27
+ return Rice::String("avx");
28
+ #elif defined(__SSE2__) || defined(__x86_64__) || defined(_WIN64)
29
+ return Rice::String("sse2");
30
+ #else
31
+ return Rice::String("none");
32
+ #endif
33
+ });
34
+
35
+ init_lda(m);
36
+ init_ct(m);
37
+ init_dmr(m);
38
+ init_dt(m);
39
+ init_gdmr(m);
40
+ init_hdp(m);
41
+ init_hlda(m);
42
+ init_pa(m);
43
+ init_hpa(m);
44
+ init_mglda(m);
45
+ init_llda(m);
46
+ init_plda(m);
47
+ init_slda(m);
48
+ }
@@ -0,0 +1,30 @@
1
+ #pragma once
2
+
3
+ #include <rice/rice.hpp>
4
+ #include <rice/stl.hpp>
5
+
6
+ using Rice::Array;
7
+ using Rice::Object;
8
+
9
+ namespace Rice::detail
10
+ {
11
+ template<typename T>
12
+ class To_Ruby<std::vector<T>>
13
+ {
14
+ public:
15
+ VALUE convert(std::vector<T> const & x)
16
+ {
17
+ auto a = rb_ary_new2(x.size());
18
+ for (const auto& v : x) {
19
+ detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
20
+ }
21
+ return a;
22
+ }
23
+ };
24
+ }
25
+
26
+ inline tomoto::RawDoc buildDoc(std::vector<std::string>& words) {
27
+ tomoto::RawDoc doc;
28
+ doc.rawWords = words;
29
+ return doc;
30
+ }
Binary file
Binary file
Binary file
data/lib/tomoto/ct.rb ADDED
@@ -0,0 +1,24 @@
1
+ module Tomoto
2
+ class CT
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
4
+ model = _new(to_tw(tw), k, alpha, eta, seed || -1)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+
11
+ def correlations(topic_id = nil)
12
+ prepare
13
+ if topic_id
14
+ _correlations(topic_id)
15
+ else
16
+ k.times.map { |i| _correlations(i) }
17
+ end
18
+ end
19
+
20
+ def prior_cov
21
+ _prior_cov.each_slice(k).to_a
22
+ end
23
+ end
24
+ end
data/lib/tomoto/dmr.rb ADDED
@@ -0,0 +1,27 @@
1
+ module Tomoto
2
+ class DMR
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, sigma: 1.0, alpha_epsilon: 1e-10, seed: nil)
4
+ model = _new(to_tw(tw), k, alpha, sigma, eta, alpha_epsilon, seed || -1)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+
11
+ def add_doc(doc, metadata: "")
12
+ _add_doc(prepare_doc(doc), metadata)
13
+ end
14
+
15
+ def lambdas
16
+ if f == 0
17
+ []
18
+ else
19
+ k.times.map { |i| _lambdas(i) }
20
+ end
21
+ end
22
+
23
+ def alpha
24
+ lambdas.map { |v| v.map { |v2| Math.exp(v2) } }
25
+ end
26
+ end
27
+ end
data/lib/tomoto/dt.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Tomoto
2
+ class DT
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, t: 1, alpha_var: 0.1, eta_var: 0.1, phi_var: 0.1, lr_a: 0.01, lr_b: 0.1, lr_c: 0.55) #, seed: nil)
4
+ model = _new(to_tw(tw), k, t, alpha_var, eta_var, phi_var, lr_a, lr_b, lr_c)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+
11
+ def add_doc(doc, timepoint: 0)
12
+ _add_doc(prepare_doc(doc), timepoint)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Tomoto
2
+ class GDMR
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, degrees: [], alpha: 0.1, eta: 0.01, sigma: 1.0, sigma0: 3.0, alpha_epsilon: 1e-10, seed: nil)
4
+ model = _new(to_tw(tw), k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed || -1)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+
11
+ def add_doc(doc, numeric_metadata: [])
12
+ _add_doc(prepare_doc(doc), numeric_metadata)
13
+ end
14
+ end
15
+ end
data/lib/tomoto/hdp.rb ADDED
@@ -0,0 +1,11 @@
1
+ module Tomoto
2
+ class HDP
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, initial_k: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
4
+ model = _new(to_tw(tw), initial_k, alpha, eta, gamma, seed || -1)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+ end
11
+ end