tomoto 0.3.2-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +54 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +164 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +42 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +33 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/2.7/tomoto.bundle +0 -0
  22. data/lib/tomoto/3.0/tomoto.bundle +0 -0
  23. data/lib/tomoto/3.1/tomoto.bundle +0 -0
  24. data/lib/tomoto/3.2/tomoto.bundle +0 -0
  25. data/lib/tomoto/ct.rb +24 -0
  26. data/lib/tomoto/dmr.rb +27 -0
  27. data/lib/tomoto/dt.rb +15 -0
  28. data/lib/tomoto/gdmr.rb +15 -0
  29. data/lib/tomoto/hdp.rb +11 -0
  30. data/lib/tomoto/hlda.rb +56 -0
  31. data/lib/tomoto/hpa.rb +11 -0
  32. data/lib/tomoto/lda.rb +181 -0
  33. data/lib/tomoto/llda.rb +15 -0
  34. data/lib/tomoto/mglda.rb +15 -0
  35. data/lib/tomoto/pa.rb +11 -0
  36. data/lib/tomoto/plda.rb +15 -0
  37. data/lib/tomoto/slda.rb +37 -0
  38. data/lib/tomoto/version.rb +3 -0
  39. data/lib/tomoto.rb +27 -0
  40. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  41. data/vendor/EigenRand/LICENSE +21 -0
  42. data/vendor/EigenRand/README.md +430 -0
  43. data/vendor/eigen/COPYING.APACHE +203 -0
  44. data/vendor/eigen/COPYING.BSD +26 -0
  45. data/vendor/eigen/COPYING.GPL +674 -0
  46. data/vendor/eigen/COPYING.LGPL +502 -0
  47. data/vendor/eigen/COPYING.MINPACK +51 -0
  48. data/vendor/eigen/COPYING.MPL2 +373 -0
  49. data/vendor/eigen/COPYING.README +18 -0
  50. data/vendor/eigen/Eigen/Cholesky +45 -0
  51. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  52. data/vendor/eigen/Eigen/Core +384 -0
  53. data/vendor/eigen/Eigen/Dense +7 -0
  54. data/vendor/eigen/Eigen/Eigen +2 -0
  55. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  56. data/vendor/eigen/Eigen/Geometry +59 -0
  57. data/vendor/eigen/Eigen/Householder +29 -0
  58. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  59. data/vendor/eigen/Eigen/Jacobi +32 -0
  60. data/vendor/eigen/Eigen/KLUSupport +41 -0
  61. data/vendor/eigen/Eigen/LU +47 -0
  62. data/vendor/eigen/Eigen/MetisSupport +35 -0
  63. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  64. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  65. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  66. data/vendor/eigen/Eigen/QR +50 -0
  67. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  68. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  69. data/vendor/eigen/Eigen/SVD +50 -0
  70. data/vendor/eigen/Eigen/Sparse +34 -0
  71. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  72. data/vendor/eigen/Eigen/SparseCore +69 -0
  73. data/vendor/eigen/Eigen/SparseLU +50 -0
  74. data/vendor/eigen/Eigen/SparseQR +36 -0
  75. data/vendor/eigen/Eigen/StdDeque +27 -0
  76. data/vendor/eigen/Eigen/StdList +26 -0
  77. data/vendor/eigen/Eigen/StdVector +27 -0
  78. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  79. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  80. data/vendor/eigen/README.md +5 -0
  81. data/vendor/eigen/bench/README.txt +55 -0
  82. data/vendor/eigen/bench/btl/COPYING +340 -0
  83. data/vendor/eigen/bench/btl/README +154 -0
  84. data/vendor/eigen/bench/tensors/README +20 -0
  85. data/vendor/eigen/blas/README.txt +6 -0
  86. data/vendor/eigen/ci/README.md +56 -0
  87. data/vendor/eigen/demos/mandelbrot/README +10 -0
  88. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  89. data/vendor/eigen/demos/opengl/README +13 -0
  90. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  91. data/vendor/eigen/unsupported/README.txt +50 -0
  92. data/vendor/tomotopy/LICENSE +21 -0
  93. data/vendor/tomotopy/README.kr.rst +519 -0
  94. data/vendor/tomotopy/README.rst +538 -0
  95. data/vendor/variant/LICENSE +25 -0
  96. data/vendor/variant/LICENSE_1_0.txt +23 -0
  97. data/vendor/variant/README.md +102 -0
  98. metadata +141 -0
@@ -0,0 +1,32 @@
1
+ #include <HPA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hpa(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(m, "HPA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::HPAArgs args;
13
+ args.k = k1;
14
+ args.k2 = k2;
15
+ args.alpha = {alpha};
16
+ args.eta = eta;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHPAModel& self) {
25
+ Array res;
26
+ // use <= to return k+1 elements
27
+ for (size_t i = 0; i <= self.getK(); i++) {
28
+ res.push(self.getAlpha(i));
29
+ }
30
+ return res;
31
+ });
32
+ }
@@ -0,0 +1,281 @@
1
+ #include <fstream>
2
+ #include <iostream>
3
+
4
+ #include <LDA.h>
5
+
6
+ #include <rice/rice.hpp>
7
+
8
+ #include "utils.h"
9
+
10
+ class DocumentObject
11
+ {
12
+ public:
13
+ DocumentObject(const tomoto::DocumentBase* _doc, const tomoto::ITopicModel* _tm) : doc{ _doc }, tm{ _tm } {}
14
+
15
+ const tomoto::DocumentBase* doc;
16
+ const tomoto::ITopicModel* tm;
17
+ };
18
+
19
+ void init_lda(Rice::Module& m) {
20
+ Rice::define_class_under<DocumentObject>(m, "Document")
21
+ .define_method(
22
+ "topics",
23
+ [](DocumentObject& self) {
24
+ Rice::Hash res;
25
+ auto topics = self.tm->getTopicsByDoc(self.doc);
26
+ for (size_t i = 0; i < topics.size(); i++) {
27
+ res[i] = topics[i];
28
+ }
29
+ return res;
30
+ });
31
+
32
+ Rice::define_class_under<tomoto::ILDAModel>(m, "LDA")
33
+ .define_singleton_function(
34
+ "_new",
35
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
36
+ tomoto::LDAArgs args;
37
+ args.k = k;
38
+ args.alpha = {alpha};
39
+ args.eta = eta;
40
+ if (seed >= 0) {
41
+ args.seed = seed;
42
+ }
43
+ return tomoto::ILDAModel::create((tomoto::TermWeight)tw, args);
44
+ }, Rice::Return().takeOwnership())
45
+ .define_method(
46
+ "_add_doc",
47
+ [](tomoto::ILDAModel& self, std::vector<std::string> words) {
48
+ return self.addDoc(buildDoc(words));
49
+ })
50
+ .define_method(
51
+ "_make_doc",
52
+ [](tomoto::ILDAModel& self, std::vector<std::string> words) {
53
+ return DocumentObject(self.makeDoc(buildDoc(words)).release(), &self);
54
+ })
55
+ .define_method(
56
+ "_infer",
57
+ [](tomoto::ILDAModel& self, DocumentObject& doc_object, size_t iteration, float tolerance, size_t workers, size_t ps, size_t together) {
58
+ std::vector<tomoto::DocumentBase*> docs;
59
+ auto doc = doc_object.doc;
60
+ docs.emplace_back(const_cast<tomoto::DocumentBase*>(doc));
61
+ float ll = self.infer(docs, iteration, tolerance, workers, (tomoto::ParallelScheme)ps, !!together)[0];
62
+
63
+ auto topic_dist = self.getTopicsByDoc(doc);
64
+ auto topic_res = Array();
65
+ for (size_t i = 0; i < topic_dist.size(); i++) {
66
+ topic_res.push(topic_dist[i]);
67
+ }
68
+
69
+ auto res = Array();
70
+ res.push(topic_res);
71
+ res.push(ll);
72
+ return res;
73
+ })
74
+ .define_method(
75
+ "alpha",
76
+ [](tomoto::ILDAModel& self) {
77
+ Array res;
78
+ for (size_t i = 0; i < self.getK(); i++) {
79
+ res.push(self.getAlpha(i));
80
+ }
81
+ return res;
82
+ })
83
+ .define_method(
84
+ "burn_in",
85
+ [](tomoto::ILDAModel& self) {
86
+ return self.getBurnInIteration();
87
+ })
88
+ .define_method(
89
+ "burn_in=",
90
+ [](tomoto::ILDAModel& self, size_t iteration) {
91
+ self.setBurnInIteration(iteration);
92
+ return iteration;
93
+ })
94
+ .define_method(
95
+ "_count_by_topics",
96
+ [](tomoto::ILDAModel& self) {
97
+ Array res;
98
+ for (auto const& v : self.getCountByTopic()) {
99
+ res.push(v);
100
+ }
101
+ return res;
102
+ })
103
+ .define_method(
104
+ "docs",
105
+ [](tomoto::ILDAModel& self) {
106
+ Array res;
107
+ auto n = self.getNumDocs();
108
+ for (size_t i = 0; i < n; i++) {
109
+ auto v = DocumentObject(self.getDoc(i), &self);
110
+ res.push(Object(Rice::detail::To_Ruby<DocumentObject>().convert(v)));
111
+ }
112
+ return res;
113
+ })
114
+ .define_method(
115
+ "eta",
116
+ [](tomoto::ILDAModel& self) {
117
+ return self.getEta();
118
+ })
119
+ .define_method(
120
+ "global_step",
121
+ [](tomoto::ILDAModel& self) {
122
+ return self.getGlobalStep();
123
+ })
124
+ .define_method(
125
+ "k",
126
+ [](tomoto::ILDAModel& self) {
127
+ return self.getK();
128
+ })
129
+ .define_method(
130
+ "_load",
131
+ [](tomoto::ILDAModel& self, const char* filename) {
132
+ std::ifstream str{ filename, std::ios_base::binary };
133
+ if (!str) throw std::runtime_error{ std::string("cannot open file '") + filename + std::string("'") };
134
+ std::vector<uint8_t> extra_data;
135
+ self.loadModel(str, &extra_data);
136
+ })
137
+ .define_method(
138
+ "ll_per_word",
139
+ [](tomoto::ILDAModel& self) {
140
+ return self.getLLPerWord();
141
+ })
142
+ .define_method(
143
+ "num_docs",
144
+ [](tomoto::ILDAModel& self) {
145
+ return self.getNumDocs();
146
+ })
147
+ .define_method(
148
+ "num_vocabs",
149
+ [](tomoto::ILDAModel& self) {
150
+ return self.getV();
151
+ })
152
+ .define_method(
153
+ "num_words",
154
+ [](tomoto::ILDAModel& self) {
155
+ return self.getN();
156
+ })
157
+ .define_method(
158
+ "optim_interval",
159
+ [](tomoto::ILDAModel& self) {
160
+ return self.getOptimInterval();
161
+ })
162
+ .define_method(
163
+ "optim_interval=",
164
+ [](tomoto::ILDAModel& self, size_t value) {
165
+ self.setOptimInterval(value);
166
+ return value;
167
+ })
168
+ .define_method(
169
+ "perplexity",
170
+ [](tomoto::ILDAModel& self) {
171
+ return self.getPerplexity();
172
+ })
173
+ .define_method(
174
+ "_prepare",
175
+ [](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
176
+ self.prepare(true, minCnt, minDf, rmTop);
177
+ })
178
+ .define_method(
179
+ "_removed_top_words",
180
+ [](tomoto::ILDAModel& self, size_t rmTop) {
181
+ Array res;
182
+ auto dict = self.getVocabDict();
183
+ size_t size = dict.size();
184
+ for (size_t i = rmTop; i > 0; i--) {
185
+ res.push(dict.toWord(size - i));
186
+ }
187
+ return res;
188
+ })
189
+ .define_method(
190
+ "_save",
191
+ [](tomoto::ILDAModel& self, const char* filename, bool full) {
192
+ std::ofstream str{ filename, std::ios_base::binary };
193
+ std::vector<uint8_t> extra_data;
194
+ self.saveModel(str, full, &extra_data);
195
+ })
196
+ .define_method(
197
+ "_topic_words",
198
+ [](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
199
+ Rice::Hash res;
200
+ for (auto const& v : self.getWordsByTopicSorted(topicId, topN)) {
201
+ res[v.first] = v.second;
202
+ }
203
+ return res;
204
+ })
205
+ .define_method(
206
+ "_train",
207
+ [](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
208
+ self.train(iteration, workers, (tomoto::ParallelScheme)ps);
209
+ })
210
+ .define_method(
211
+ "_tw",
212
+ [](tomoto::ILDAModel& self) {
213
+ return (int)self.getTermWeight();
214
+ })
215
+ .define_method(
216
+ "used_vocab_df",
217
+ [](tomoto::ILDAModel& self) {
218
+ auto vocab = self.getVocabDf();
219
+ Array res;
220
+ for (size_t i = 0; i < self.getV(); i++) {
221
+ res.push(vocab[i]);
222
+ }
223
+ return res;
224
+ })
225
+ .define_method(
226
+ "used_vocab_freq",
227
+ [](tomoto::ILDAModel& self) {
228
+ auto vocab = self.getVocabCf();
229
+ Array res;
230
+ for (size_t i = 0; i < self.getV(); i++) {
231
+ res.push(vocab[i]);
232
+ }
233
+ return res;
234
+ })
235
+ .define_method(
236
+ "used_vocabs",
237
+ [](tomoto::ILDAModel& self) {
238
+ auto dict = self.getVocabDict();
239
+ Array res;
240
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
241
+ for (size_t i = 0; i < self.getV(); i++) {
242
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
243
+ Object obj(value);
244
+ res.push(obj.call("force_encoding", utf8));
245
+ }
246
+ return res;
247
+ })
248
+ .define_method(
249
+ "vocab_df",
250
+ [](tomoto::ILDAModel& self) {
251
+ auto vocab = self.getVocabDf();
252
+ Array res;
253
+ for (size_t i = 0; i < vocab.size(); i++) {
254
+ res.push(vocab[i]);
255
+ }
256
+ return res;
257
+ })
258
+ .define_method(
259
+ "vocab_freq",
260
+ [](tomoto::ILDAModel& self) {
261
+ auto vocab = self.getVocabCf();
262
+ Array res;
263
+ for (size_t i = 0; i < vocab.size(); i++) {
264
+ res.push(vocab[i]);
265
+ }
266
+ return res;
267
+ })
268
+ .define_method(
269
+ "vocabs",
270
+ [](tomoto::ILDAModel& self) {
271
+ auto dict = self.getVocabDict();
272
+ Array res;
273
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
274
+ for (size_t i = 0; i < dict.size(); i++) {
275
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
276
+ Object obj(value);
277
+ res.push(obj.call("force_encoding", utf8));
278
+ }
279
+ return res;
280
+ });
281
+ }
@@ -0,0 +1,33 @@
1
+ #include <LLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_llda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(m, "LLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::LDAArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_add_doc",
23
+ [](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
24
+ auto doc = buildDoc(words);
25
+ doc.misc["labels"] = labels;
26
+ return self.addDoc(doc);
27
+ })
28
+ .define_method(
29
+ "topics_per_label",
30
+ [](tomoto::ILLDAModel& self) {
31
+ return self.getNumTopicsPerLabel();
32
+ });
33
+ }
@@ -0,0 +1,81 @@
1
+ #include <MGLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_mglda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(m, "MGLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
12
+ tomoto::MGLDAArgs args;
13
+ args.k = k_g;
14
+ args.kL = k_l;
15
+ args.t = t;
16
+ args.alpha = {alpha_g};
17
+ args.alphaL = {alpha_l};
18
+ args.alphaMG = alpha_mg;
19
+ args.alphaML = alpha_ml;
20
+ args.eta = eta_g;
21
+ // TODO more args
22
+ return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, args);
23
+ }, Rice::Return().takeOwnership())
24
+ .define_method(
25
+ "_add_doc",
26
+ [](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
27
+ auto doc = buildDoc(words);
28
+ doc.misc["delimiter"] = delimiter;
29
+ return self.addDoc(doc);
30
+ })
31
+ .define_method(
32
+ "alpha_g",
33
+ [](tomoto::IMGLDAModel& self) {
34
+ return self.getAlpha();
35
+ })
36
+ .define_method(
37
+ "alpha_l",
38
+ [](tomoto::IMGLDAModel& self) {
39
+ return self.getAlphaL();
40
+ })
41
+ .define_method(
42
+ "alpha_mg",
43
+ [](tomoto::IMGLDAModel& self) {
44
+ return self.getAlphaM();
45
+ })
46
+ .define_method(
47
+ "alpha_ml",
48
+ [](tomoto::IMGLDAModel& self) {
49
+ return self.getAlphaML();
50
+ })
51
+ .define_method(
52
+ "eta_g",
53
+ [](tomoto::IMGLDAModel& self) {
54
+ return self.getEta();
55
+ })
56
+ .define_method(
57
+ "eta_l",
58
+ [](tomoto::IMGLDAModel& self) {
59
+ return self.getEtaL();
60
+ })
61
+ .define_method(
62
+ "gamma",
63
+ [](tomoto::IMGLDAModel& self) {
64
+ return self.getGamma();
65
+ })
66
+ .define_method(
67
+ "k_g",
68
+ [](tomoto::IMGLDAModel& self) {
69
+ return self.getK();
70
+ })
71
+ .define_method(
72
+ "k_l",
73
+ [](tomoto::IMGLDAModel& self) {
74
+ return self.getKL();
75
+ })
76
+ .define_method(
77
+ "t",
78
+ [](tomoto::IMGLDAModel& self) {
79
+ return self.getT();
80
+ });
81
+ }
data/ext/tomoto/pa.cpp ADDED
@@ -0,0 +1,32 @@
1
+ #include <PA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_pa(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(m, "PA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::PAArgs args;
13
+ args.k = k1;
14
+ args.k2 = k2;
15
+ args.alpha = {alpha};
16
+ args.eta = eta;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IPAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "k1",
24
+ [](tomoto::IPAModel& self) {
25
+ return self.getK();
26
+ })
27
+ .define_method(
28
+ "k2",
29
+ [](tomoto::IPAModel& self) {
30
+ return self.getK2();
31
+ });
32
+ }
@@ -0,0 +1,33 @@
1
+ #include <PLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_plda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(m, "PLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::PLDAArgs args;
13
+ args.numLatentTopics = latent_topics;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_add_doc",
23
+ [](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
24
+ auto doc = buildDoc(words);
25
+ doc.misc["labels"] = labels;
26
+ return self.addDoc(doc);
27
+ })
28
+ .define_method(
29
+ "latent_topics",
30
+ [](tomoto::IPLDAModel& self) {
31
+ return self.getNumLatentTopics();
32
+ });
33
+ }
@@ -0,0 +1,48 @@
1
+ #include <SLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_slda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(m, "SLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, size_t seed) {
12
+ std::vector<tomoto::ISLDAModel::GLM> vars;
13
+ vars.reserve(rb_vars.size());
14
+ for (auto const& v : rb_vars) {
15
+ vars.push_back((tomoto::ISLDAModel::GLM) Rice::detail::From_Ruby<int>().convert(v.value()));
16
+ }
17
+ tomoto::SLDAArgs args;
18
+ args.k = k;
19
+ args.vars = vars;
20
+ args.alpha = {alpha};
21
+ args.eta = eta;
22
+ args.mu = mu;
23
+ args.nuSq = nu_sq;
24
+ args.glmParam = glm_param;
25
+ if (seed >= 0) {
26
+ args.seed = seed;
27
+ }
28
+ return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, args);
29
+ }, Rice::Return().takeOwnership())
30
+ .define_method(
31
+ "_add_doc",
32
+ [](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
33
+ auto doc = buildDoc(words);
34
+ doc.misc["y"] = y;
35
+ return self.addDoc(doc);
36
+ })
37
+ .define_method(
38
+ "f",
39
+ [](tomoto::ISLDAModel& self) {
40
+ return self.getF();
41
+ })
42
+ .define_method(
43
+ "_var_type",
44
+ [](tomoto::ISLDAModel& self, size_t var_id) {
45
+ if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
46
+ return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
47
+ });
48
+ }
@@ -0,0 +1,48 @@
1
+ #include <rice/rice.hpp>
2
+
3
+ void init_lda(Rice::Module& m);
4
+ void init_ct(Rice::Module& m);
5
+ void init_dmr(Rice::Module& m);
6
+ void init_dt(Rice::Module& m);
7
+ void init_gdmr(Rice::Module& m);
8
+ void init_hdp(Rice::Module& m);
9
+ void init_hlda(Rice::Module& m);
10
+ void init_pa(Rice::Module& m);
11
+ void init_hpa(Rice::Module& m);
12
+ void init_mglda(Rice::Module& m);
13
+ void init_llda(Rice::Module& m);
14
+ void init_plda(Rice::Module& m);
15
+ void init_slda(Rice::Module& m);
16
+
17
+ extern "C"
18
+ void Init_tomoto()
19
+ {
20
+ auto m = Rice::define_module("Tomoto")
21
+ .define_singleton_function(
22
+ "isa",
23
+ []() {
24
+ #ifdef __AVX2__
25
+ return Rice::String("avx2");
26
+ #elif defined(__AVX__)
27
+ return Rice::String("avx");
28
+ #elif defined(__SSE2__) || defined(__x86_64__) || defined(_WIN64)
29
+ return Rice::String("sse2");
30
+ #else
31
+ return Rice::String("none");
32
+ #endif
33
+ });
34
+
35
+ init_lda(m);
36
+ init_ct(m);
37
+ init_dmr(m);
38
+ init_dt(m);
39
+ init_gdmr(m);
40
+ init_hdp(m);
41
+ init_hlda(m);
42
+ init_pa(m);
43
+ init_hpa(m);
44
+ init_mglda(m);
45
+ init_llda(m);
46
+ init_plda(m);
47
+ init_slda(m);
48
+ }
@@ -0,0 +1,30 @@
1
+ #pragma once
2
+
3
+ #include <rice/rice.hpp>
4
+ #include <rice/stl.hpp>
5
+
6
+ using Rice::Array;
7
+ using Rice::Object;
8
+
9
+ namespace Rice::detail
10
+ {
11
+ template<typename T>
12
+ class To_Ruby<std::vector<T>>
13
+ {
14
+ public:
15
+ VALUE convert(std::vector<T> const & x)
16
+ {
17
+ auto a = rb_ary_new2(x.size());
18
+ for (const auto& v : x) {
19
+ detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
20
+ }
21
+ return a;
22
+ }
23
+ };
24
+ }
25
+
26
+ inline tomoto::RawDoc buildDoc(std::vector<std::string>& words) {
27
+ tomoto::RawDoc doc;
28
+ doc.rawWords = words;
29
+ return doc;
30
+ }
Binary file
Binary file
Binary file
Binary file
data/lib/tomoto/ct.rb ADDED
@@ -0,0 +1,24 @@
1
+ module Tomoto
2
+ class CT
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
4
+ model = _new(to_tw(tw), k, alpha, eta, seed || -1)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+
11
+ def correlations(topic_id = nil)
12
+ prepare
13
+ if topic_id
14
+ _correlations(topic_id)
15
+ else
16
+ k.times.map { |i| _correlations(i) }
17
+ end
18
+ end
19
+
20
+ def prior_cov
21
+ _prior_cov.each_slice(k).to_a
22
+ end
23
+ end
24
+ end
data/lib/tomoto/dmr.rb ADDED
@@ -0,0 +1,27 @@
1
+ module Tomoto
2
+ class DMR
3
+ def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, sigma: 1.0, alpha_epsilon: 1e-10, seed: nil)
4
+ model = _new(to_tw(tw), k, alpha, sigma, eta, alpha_epsilon, seed || -1)
5
+ model.instance_variable_set(:@min_cf, min_cf)
6
+ model.instance_variable_set(:@min_df, min_df)
7
+ model.instance_variable_set(:@rm_top, rm_top)
8
+ init_params(model, binding)
9
+ end
10
+
11
+ def add_doc(doc, metadata: "")
12
+ _add_doc(prepare_doc(doc), metadata)
13
+ end
14
+
15
+ def lambdas
16
+ if f == 0
17
+ []
18
+ else
19
+ k.times.map { |i| _lambdas(i) }
20
+ end
21
+ end
22
+
23
+ def alpha
24
+ lambdas.map { |v| v.map { |v2| Math.exp(v2) } }
25
+ end
26
+ end
27
+ end