tomoto 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +23 -17
- data/ext/tomoto/ext.cpp +512 -15
- data/ext/tomoto/extconf.rb +3 -0
- data/lib/tomoto.rb +10 -0
- data/lib/tomoto/ct.rb +13 -0
- data/lib/tomoto/dmr.rb +23 -0
- data/lib/tomoto/dt.rb +15 -0
- data/lib/tomoto/gdmr.rb +15 -0
- data/lib/tomoto/hlda.rb +43 -0
- data/lib/tomoto/hpa.rb +11 -0
- data/lib/tomoto/lda.rb +11 -3
- data/lib/tomoto/llda.rb +15 -0
- data/lib/tomoto/mglda.rb +15 -0
- data/lib/tomoto/pa.rb +11 -0
- data/lib/tomoto/plda.rb +15 -0
- data/lib/tomoto/slda.rb +37 -0
- data/lib/tomoto/version.rb +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b40c9adf2f0162eb6174b17395ea37b9294e14b22609e9f51951e9904125ff9
|
4
|
+
data.tar.gz: be3f68438f60a7e4fc11033921636f8d03bf411bd3d3eb6aa3b4fb448faac41a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a74747ae372d030c42562d4e2b99ab167ccc28533468ed08819f4bd34d42b340349870712c12e565388eb7833f993349432e77baee8618c34c265676ca181072
|
7
|
+
data.tar.gz: da9e833bb98726278108a68a7dd6bed0e54b3979c25d49d8db01aa613e6205a6a3512881688209baf2589c4dc5514ed2663fb251a5e273c42b678bb1daa06d74
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
#
|
1
|
+
# tomoto
|
2
2
|
|
3
|
-
[
|
3
|
+
:tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
|
4
|
+
|
5
|
+
[](https://travis-ci.org/ankane/tomoto)
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -10,7 +12,7 @@ Add this line to your application’s Gemfile:
|
|
10
12
|
gem 'tomoto'
|
11
13
|
```
|
12
14
|
|
13
|
-
It can take
|
15
|
+
It can take 10-20 minutes to compile the extension.
|
14
16
|
|
15
17
|
## Getting Started
|
16
18
|
|
@@ -65,23 +67,27 @@ model.ll_per_word
|
|
65
67
|
Supports:
|
66
68
|
|
67
69
|
- Latent Dirichlet Allocation (`LDA`)
|
70
|
+
- Labeled LDA (`LLDA`)
|
71
|
+
- Partially Labeled LDA (`PLDA`)
|
72
|
+
- Supervised LDA (`SLDA`)
|
73
|
+
- Dirichlet Multinomial Regression (`DMR`)
|
74
|
+
- Generalized Dirichlet Multinomial Regression (`GDMR`)
|
68
75
|
- Hierarchical Dirichlet Process (`HDP`)
|
76
|
+
- Hierarchical LDA (`HLDA`)
|
77
|
+
- Multi Grain LDA (`MGLDA`)
|
78
|
+
- Pachinko Allocation (`PA`)
|
79
|
+
- Hierarchical PA (`HPA`)
|
69
80
|
- Correlated Topic Model (`CT`)
|
81
|
+
- Dynamic Topic Model (`DT`)
|
70
82
|
|
71
|
-
##
|
83
|
+
## API
|
72
84
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
k: 1,
|
80
|
-
alpha: 0.1,
|
81
|
-
eta: 0.01,
|
82
|
-
seed: nil
|
83
|
-
)
|
84
|
-
```
|
85
|
+
This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
|
86
|
+
|
87
|
+
- The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
|
88
|
+
- Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
|
89
|
+
|
90
|
+
If a method or option you need isn’t supported, feel free to open an issue.
|
85
91
|
|
86
92
|
## Tokenization
|
87
93
|
|
@@ -93,7 +99,7 @@ model.add_doc(["tokens", "from", "document", "one"])
|
|
93
99
|
|
94
100
|
## Performance
|
95
101
|
|
96
|
-
|
102
|
+
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check what it’s using with:
|
97
103
|
|
98
104
|
```ruby
|
99
105
|
Tomoto.isa
|
data/ext/tomoto/ext.cpp
CHANGED
@@ -1,7 +1,21 @@
|
|
1
|
+
// stdlib
|
2
|
+
#include <fstream>
|
3
|
+
#include <iostream>
|
4
|
+
|
1
5
|
// tomoto
|
2
6
|
#include <CT.h>
|
7
|
+
#include <DMR.h>
|
8
|
+
#include <DT.h>
|
9
|
+
#include <GDMR.h>
|
3
10
|
#include <HDP.h>
|
11
|
+
#include <HLDA.h>
|
12
|
+
#include <HPA.h>
|
4
13
|
#include <LDA.h>
|
14
|
+
#include <LLDA.h>
|
15
|
+
#include <MGLDA.h>
|
16
|
+
#include <PA.h>
|
17
|
+
#include <PLDA.h>
|
18
|
+
#include <SLDA.h>
|
5
19
|
|
6
20
|
// rice
|
7
21
|
#include <rice/Array.hpp>
|
@@ -26,6 +40,62 @@ Object to_ruby<std::vector<float>>(std::vector<float> const & x)
|
|
26
40
|
return res;
|
27
41
|
}
|
28
42
|
|
43
|
+
template<>
|
44
|
+
Object to_ruby<std::vector<uint32_t>>(std::vector<uint32_t> const & x)
|
45
|
+
{
|
46
|
+
Array res;
|
47
|
+
for (auto const& v : x) {
|
48
|
+
res.push(v);
|
49
|
+
}
|
50
|
+
return res;
|
51
|
+
}
|
52
|
+
|
53
|
+
template<>
|
54
|
+
Object to_ruby<std::vector<uint64_t>>(std::vector<uint64_t> const & x)
|
55
|
+
{
|
56
|
+
Array res;
|
57
|
+
for (auto const& v : x) {
|
58
|
+
res.push(v);
|
59
|
+
}
|
60
|
+
return res;
|
61
|
+
}
|
62
|
+
|
63
|
+
template<>
|
64
|
+
std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
|
65
|
+
{
|
66
|
+
Array a = Array(x);
|
67
|
+
std::vector<std::string> res;
|
68
|
+
res.reserve(a.size());
|
69
|
+
for (auto const& v : a) {
|
70
|
+
res.push_back(from_ruby<std::string>(v));
|
71
|
+
}
|
72
|
+
return res;
|
73
|
+
}
|
74
|
+
|
75
|
+
template<>
|
76
|
+
std::vector<float> from_ruby<std::vector<float>>(Object x)
|
77
|
+
{
|
78
|
+
Array a = Array(x);
|
79
|
+
std::vector<float> res;
|
80
|
+
res.reserve(a.size());
|
81
|
+
for (auto const& v : a) {
|
82
|
+
res.push_back(from_ruby<float>(v));
|
83
|
+
}
|
84
|
+
return res;
|
85
|
+
}
|
86
|
+
|
87
|
+
template<>
|
88
|
+
std::vector<uint64_t> from_ruby<std::vector<uint64_t>>(Object x)
|
89
|
+
{
|
90
|
+
Array a = Array(x);
|
91
|
+
std::vector<uint64_t> res;
|
92
|
+
res.reserve(a.size());
|
93
|
+
for (auto const& v : a) {
|
94
|
+
res.push_back(from_ruby<uint64_t>(v));
|
95
|
+
}
|
96
|
+
return res;
|
97
|
+
}
|
98
|
+
|
29
99
|
extern "C"
|
30
100
|
void Init_ext()
|
31
101
|
{
|
@@ -55,12 +125,7 @@ void Init_ext()
|
|
55
125
|
})
|
56
126
|
.define_method(
|
57
127
|
"_add_doc",
|
58
|
-
*[](tomoto::ILDAModel& self,
|
59
|
-
std::vector<std::string> words;
|
60
|
-
words.reserve(rb_words.size());
|
61
|
-
for (auto const& v : rb_words) {
|
62
|
-
words.push_back(from_ruby<std::string>(v));
|
63
|
-
}
|
128
|
+
*[](tomoto::ILDAModel& self, std::vector<std::string> words) {
|
64
129
|
self.addDoc(words);
|
65
130
|
})
|
66
131
|
.define_method(
|
@@ -93,6 +158,11 @@ void Init_ext()
|
|
93
158
|
*[](tomoto::ILDAModel& self) {
|
94
159
|
return self.getEta();
|
95
160
|
})
|
161
|
+
.define_method(
|
162
|
+
"global_step",
|
163
|
+
*[](tomoto::ILDAModel& self) {
|
164
|
+
return self.getGlobalStep();
|
165
|
+
})
|
96
166
|
.define_method(
|
97
167
|
"k",
|
98
168
|
*[](tomoto::ILDAModel& self) {
|
@@ -112,15 +182,36 @@ void Init_ext()
|
|
112
182
|
return self.getLLPerWord();
|
113
183
|
})
|
114
184
|
.define_method(
|
115
|
-
"
|
185
|
+
"num_docs",
|
116
186
|
*[](tomoto::ILDAModel& self) {
|
117
|
-
return self.
|
187
|
+
return self.getNumDocs();
|
118
188
|
})
|
119
189
|
.define_method(
|
120
190
|
"num_vocabs",
|
121
191
|
*[](tomoto::ILDAModel& self) {
|
122
192
|
return self.getV();
|
123
193
|
})
|
194
|
+
.define_method(
|
195
|
+
"num_words",
|
196
|
+
*[](tomoto::ILDAModel& self) {
|
197
|
+
return self.getN();
|
198
|
+
})
|
199
|
+
.define_method(
|
200
|
+
"optim_interval",
|
201
|
+
*[](tomoto::ILDAModel& self) {
|
202
|
+
return self.getOptimInterval();
|
203
|
+
})
|
204
|
+
.define_method(
|
205
|
+
"optim_interval=",
|
206
|
+
*[](tomoto::ILDAModel& self, size_t value) {
|
207
|
+
self.setOptimInterval(value);
|
208
|
+
return value;
|
209
|
+
})
|
210
|
+
.define_method(
|
211
|
+
"perplexity",
|
212
|
+
*[](tomoto::ILDAModel& self) {
|
213
|
+
return self.getPerplexity();
|
214
|
+
})
|
124
215
|
.define_method(
|
125
216
|
"_prepare",
|
126
217
|
*[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
|
@@ -159,6 +250,62 @@ void Init_ext()
|
|
159
250
|
size_t ps = 0;
|
160
251
|
self.train(iteration, workers, (tomoto::ParallelScheme)ps);
|
161
252
|
})
|
253
|
+
.define_method(
|
254
|
+
"_tw",
|
255
|
+
*[](tomoto::ILDAModel& self) {
|
256
|
+
return (int)self.getTermWeight();
|
257
|
+
})
|
258
|
+
.define_method(
|
259
|
+
"used_vocab_df",
|
260
|
+
*[](tomoto::ILDAModel& self) {
|
261
|
+
auto vocab = self.getVocabDf();
|
262
|
+
Array res;
|
263
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
264
|
+
res.push(vocab[i]);
|
265
|
+
}
|
266
|
+
return res;
|
267
|
+
})
|
268
|
+
.define_method(
|
269
|
+
"used_vocab_freq",
|
270
|
+
*[](tomoto::ILDAModel& self) {
|
271
|
+
auto vocab = self.getVocabCf();
|
272
|
+
Array res;
|
273
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
274
|
+
res.push(vocab[i]);
|
275
|
+
}
|
276
|
+
return res;
|
277
|
+
})
|
278
|
+
.define_method(
|
279
|
+
"used_vocabs",
|
280
|
+
*[](tomoto::ILDAModel& self) {
|
281
|
+
auto dict = self.getVocabDict();
|
282
|
+
Array res;
|
283
|
+
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
284
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
285
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
286
|
+
}
|
287
|
+
return res;
|
288
|
+
})
|
289
|
+
.define_method(
|
290
|
+
"vocab_df",
|
291
|
+
*[](tomoto::ILDAModel& self) {
|
292
|
+
auto vocab = self.getVocabDf();
|
293
|
+
Array res;
|
294
|
+
for (size_t i = 0; i < vocab.size(); i++) {
|
295
|
+
res.push(vocab[i]);
|
296
|
+
}
|
297
|
+
return res;
|
298
|
+
})
|
299
|
+
.define_method(
|
300
|
+
"vocab_freq",
|
301
|
+
*[](tomoto::ILDAModel& self) {
|
302
|
+
auto vocab = self.getVocabCf();
|
303
|
+
Array res;
|
304
|
+
for (size_t i = 0; i < vocab.size(); i++) {
|
305
|
+
res.push(vocab[i]);
|
306
|
+
}
|
307
|
+
return res;
|
308
|
+
})
|
162
309
|
.define_method(
|
163
310
|
"vocabs",
|
164
311
|
*[](tomoto::ILDAModel& self) {
|
@@ -180,6 +327,11 @@ void Init_ext()
|
|
180
327
|
}
|
181
328
|
return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
182
329
|
})
|
330
|
+
.define_method(
|
331
|
+
"_correlations",
|
332
|
+
*[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
333
|
+
return self.getCorrelationTopic(topic_id);
|
334
|
+
})
|
183
335
|
.define_method(
|
184
336
|
"num_beta_sample",
|
185
337
|
*[](tomoto::ICTModel& self) {
|
@@ -187,9 +339,9 @@ void Init_ext()
|
|
187
339
|
})
|
188
340
|
.define_method(
|
189
341
|
"num_beta_sample=",
|
190
|
-
*[](tomoto::ICTModel& self, size_t
|
191
|
-
self.setNumBetaSample(
|
192
|
-
return
|
342
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
343
|
+
self.setNumBetaSample(value);
|
344
|
+
return value;
|
193
345
|
})
|
194
346
|
.define_method(
|
195
347
|
"num_tmn_sample",
|
@@ -198,12 +350,12 @@ void Init_ext()
|
|
198
350
|
})
|
199
351
|
.define_method(
|
200
352
|
"num_tmn_sample=",
|
201
|
-
*[](tomoto::ICTModel& self, size_t
|
202
|
-
self.setNumTMNSample(
|
203
|
-
return
|
353
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
354
|
+
self.setNumTMNSample(value);
|
355
|
+
return value;
|
204
356
|
})
|
205
357
|
.define_method(
|
206
|
-
"
|
358
|
+
"_prior_cov",
|
207
359
|
*[](tomoto::ICTModel& self) {
|
208
360
|
return self.getPriorCov();
|
209
361
|
})
|
@@ -213,6 +365,138 @@ void Init_ext()
|
|
213
365
|
return self.getPriorMean();
|
214
366
|
});
|
215
367
|
|
368
|
+
Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
|
369
|
+
.define_singleton_method(
|
370
|
+
"_new",
|
371
|
+
*[](size_t tw, size_t k, float alpha, float sigma, float eta, float alpha_epsilon, int seed) {
|
372
|
+
if (seed < 0) {
|
373
|
+
seed = std::random_device{}();
|
374
|
+
}
|
375
|
+
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
|
376
|
+
})
|
377
|
+
.define_method(
|
378
|
+
"_add_doc",
|
379
|
+
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::vector<std::string> metadata) {
|
380
|
+
self.addDoc(words, metadata);
|
381
|
+
})
|
382
|
+
.define_method(
|
383
|
+
"alpha_epsilon",
|
384
|
+
*[](tomoto::IDMRModel& self) {
|
385
|
+
return self.getAlphaEps();
|
386
|
+
})
|
387
|
+
.define_method(
|
388
|
+
"alpha_epsilon=",
|
389
|
+
*[](tomoto::IDMRModel& self, float value) {
|
390
|
+
self.setAlphaEps(value);
|
391
|
+
return value;
|
392
|
+
})
|
393
|
+
.define_method(
|
394
|
+
"f",
|
395
|
+
*[](tomoto::IDMRModel& self) {
|
396
|
+
return self.getF();
|
397
|
+
})
|
398
|
+
.define_method(
|
399
|
+
"_lambdas",
|
400
|
+
*[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
401
|
+
return self.getLambdaByTopic(topic_id);
|
402
|
+
})
|
403
|
+
.define_method(
|
404
|
+
"metadata_dict",
|
405
|
+
*[](tomoto::IDMRModel& self) {
|
406
|
+
auto dict = self.getMetadataDict();
|
407
|
+
Array res;
|
408
|
+
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
409
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
410
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
411
|
+
}
|
412
|
+
return res;
|
413
|
+
})
|
414
|
+
.define_method(
|
415
|
+
"sigma",
|
416
|
+
*[](tomoto::IDMRModel& self) {
|
417
|
+
return self.getSigma();
|
418
|
+
});
|
419
|
+
|
420
|
+
Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
|
421
|
+
.define_singleton_method(
|
422
|
+
"_new",
|
423
|
+
*[](size_t tw, size_t k, size_t t, float alphaVar, float etaVar, float phiVar, float shapeA, float shapeB, float shapeC) {
|
424
|
+
// Rice only supports 10 arguments
|
425
|
+
int seed = -1;
|
426
|
+
if (seed < 0) {
|
427
|
+
seed = std::random_device{}();
|
428
|
+
}
|
429
|
+
return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
|
430
|
+
})
|
431
|
+
.define_method(
|
432
|
+
"_add_doc",
|
433
|
+
*[](tomoto::IDTModel& self, std::vector<std::string> words, size_t timepoint) {
|
434
|
+
self.addDoc(words, timepoint);
|
435
|
+
})
|
436
|
+
.define_method(
|
437
|
+
"lr_a",
|
438
|
+
*[](tomoto::IDTModel& self) {
|
439
|
+
return self.getShapeA();
|
440
|
+
})
|
441
|
+
.define_method(
|
442
|
+
"lr_a=",
|
443
|
+
*[](tomoto::IDTModel& self, float value) {
|
444
|
+
self.setShapeA(value);
|
445
|
+
return value;
|
446
|
+
})
|
447
|
+
.define_method(
|
448
|
+
"lr_b",
|
449
|
+
*[](tomoto::IDTModel& self) {
|
450
|
+
return self.getShapeB();
|
451
|
+
})
|
452
|
+
.define_method(
|
453
|
+
"lr_b=",
|
454
|
+
*[](tomoto::IDTModel& self, float value) {
|
455
|
+
self.setShapeB(value);
|
456
|
+
return value;
|
457
|
+
})
|
458
|
+
.define_method(
|
459
|
+
"lr_c",
|
460
|
+
*[](tomoto::IDTModel& self) {
|
461
|
+
return self.getShapeC();
|
462
|
+
})
|
463
|
+
.define_method(
|
464
|
+
"lr_c=",
|
465
|
+
*[](tomoto::IDTModel& self, float value) {
|
466
|
+
self.setShapeC(value);
|
467
|
+
return value;
|
468
|
+
})
|
469
|
+
.define_method(
|
470
|
+
"num_docs_by_timepoint",
|
471
|
+
*[](tomoto::IDTModel& self) {
|
472
|
+
return self.getNumDocsByT();
|
473
|
+
})
|
474
|
+
.define_method(
|
475
|
+
"num_timepoints",
|
476
|
+
*[](tomoto::IDTModel& self) {
|
477
|
+
return self.getT();
|
478
|
+
});
|
479
|
+
|
480
|
+
Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
|
481
|
+
.define_singleton_method(
|
482
|
+
"_new",
|
483
|
+
*[](size_t tw, size_t k, std::vector<uint64_t> degrees, float alpha, float sigma, float sigma0, float eta, float alpha_epsilon, int seed) {
|
484
|
+
if (seed < 0) {
|
485
|
+
seed = std::random_device{}();
|
486
|
+
}
|
487
|
+
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed);
|
488
|
+
})
|
489
|
+
.define_method(
|
490
|
+
"degrees",
|
491
|
+
*[](tomoto::IGDMRModel& self) {
|
492
|
+
return self.getFs();
|
493
|
+
})
|
494
|
+
.define_method(
|
495
|
+
"sigma0",
|
496
|
+
*[](tomoto::IGDMRModel& self) {
|
497
|
+
return self.getSigma0();
|
498
|
+
});
|
499
|
+
|
216
500
|
Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
|
217
501
|
.define_singleton_method(
|
218
502
|
"_new",
|
@@ -242,4 +526,217 @@ void Init_ext()
|
|
242
526
|
*[](tomoto::IHDPModel& self) {
|
243
527
|
return self.getTotalTables();
|
244
528
|
});
|
529
|
+
|
530
|
+
Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
|
531
|
+
.define_singleton_method(
|
532
|
+
"_new",
|
533
|
+
*[](size_t tw, size_t levelDepth, float alpha, float eta, float gamma, int seed) {
|
534
|
+
if (seed < 0) {
|
535
|
+
seed = std::random_device{}();
|
536
|
+
}
|
537
|
+
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
|
538
|
+
})
|
539
|
+
.define_method(
|
540
|
+
"_children_topics",
|
541
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
542
|
+
return self.getChildTopicId(topic_id);
|
543
|
+
})
|
544
|
+
.define_method(
|
545
|
+
"depth",
|
546
|
+
*[](tomoto::IHLDAModel& self) {
|
547
|
+
return self.getLevelDepth();
|
548
|
+
})
|
549
|
+
.define_method(
|
550
|
+
"gamma",
|
551
|
+
*[](tomoto::IHLDAModel& self) {
|
552
|
+
return self.getGamma();
|
553
|
+
})
|
554
|
+
.define_method(
|
555
|
+
"_level",
|
556
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
557
|
+
return self.getLevelOfTopic(topic_id);
|
558
|
+
})
|
559
|
+
.define_method(
|
560
|
+
"live_k",
|
561
|
+
*[](tomoto::IHLDAModel& self) {
|
562
|
+
return self.getLiveK();
|
563
|
+
})
|
564
|
+
.define_method(
|
565
|
+
"_live_topic?",
|
566
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
567
|
+
return self.isLiveTopic(topic_id);
|
568
|
+
})
|
569
|
+
.define_method(
|
570
|
+
"_num_docs_of_topic",
|
571
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
572
|
+
return self.getNumDocsOfTopic(topic_id);
|
573
|
+
})
|
574
|
+
.define_method(
|
575
|
+
"_parent_topic",
|
576
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
577
|
+
return self.getParentTopicId(topic_id);
|
578
|
+
});
|
579
|
+
|
580
|
+
Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
|
581
|
+
.define_singleton_method(
|
582
|
+
"_new",
|
583
|
+
*[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
|
584
|
+
if (seed < 0) {
|
585
|
+
seed = std::random_device{}();
|
586
|
+
}
|
587
|
+
return tomoto::IPAModel::create((tomoto::TermWeight)tw, k1, k2, alpha, eta, seed);
|
588
|
+
})
|
589
|
+
.define_method(
|
590
|
+
"k1",
|
591
|
+
*[](tomoto::IPAModel& self) {
|
592
|
+
return self.getK();
|
593
|
+
})
|
594
|
+
.define_method(
|
595
|
+
"k2",
|
596
|
+
*[](tomoto::IPAModel& self) {
|
597
|
+
return self.getK2();
|
598
|
+
});
|
599
|
+
|
600
|
+
Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
|
601
|
+
.define_singleton_method(
|
602
|
+
"_new",
|
603
|
+
*[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
|
604
|
+
if (seed < 0) {
|
605
|
+
seed = std::random_device{}();
|
606
|
+
}
|
607
|
+
return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
|
608
|
+
});
|
609
|
+
|
610
|
+
Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
|
611
|
+
.define_singleton_method(
|
612
|
+
"_new",
|
613
|
+
*[](size_t tw, size_t k_g, size_t k_l, size_t t, float alpha_g, float alpha_l, float alpha_mg, float alpha_ml, float eta_g) {
|
614
|
+
return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
|
615
|
+
})
|
616
|
+
.define_method(
|
617
|
+
"_add_doc",
|
618
|
+
*[](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
|
619
|
+
self.addDoc(words, delimiter);
|
620
|
+
})
|
621
|
+
.define_method(
|
622
|
+
"alpha_g",
|
623
|
+
*[](tomoto::IMGLDAModel& self) {
|
624
|
+
return self.getAlpha();
|
625
|
+
})
|
626
|
+
.define_method(
|
627
|
+
"alpha_l",
|
628
|
+
*[](tomoto::IMGLDAModel& self) {
|
629
|
+
return self.getAlphaL();
|
630
|
+
})
|
631
|
+
.define_method(
|
632
|
+
"alpha_mg",
|
633
|
+
*[](tomoto::IMGLDAModel& self) {
|
634
|
+
return self.getAlphaM();
|
635
|
+
})
|
636
|
+
.define_method(
|
637
|
+
"alpha_ml",
|
638
|
+
*[](tomoto::IMGLDAModel& self) {
|
639
|
+
return self.getAlphaML();
|
640
|
+
})
|
641
|
+
.define_method(
|
642
|
+
"eta_g",
|
643
|
+
*[](tomoto::IMGLDAModel& self) {
|
644
|
+
return self.getEta();
|
645
|
+
})
|
646
|
+
.define_method(
|
647
|
+
"eta_l",
|
648
|
+
*[](tomoto::IMGLDAModel& self) {
|
649
|
+
return self.getEtaL();
|
650
|
+
})
|
651
|
+
.define_method(
|
652
|
+
"gamma",
|
653
|
+
*[](tomoto::IMGLDAModel& self) {
|
654
|
+
return self.getGamma();
|
655
|
+
})
|
656
|
+
.define_method(
|
657
|
+
"k_g",
|
658
|
+
*[](tomoto::IMGLDAModel& self) {
|
659
|
+
return self.getK();
|
660
|
+
})
|
661
|
+
.define_method(
|
662
|
+
"k_l",
|
663
|
+
*[](tomoto::IMGLDAModel& self) {
|
664
|
+
return self.getKL();
|
665
|
+
})
|
666
|
+
.define_method(
|
667
|
+
"t",
|
668
|
+
*[](tomoto::IMGLDAModel& self) {
|
669
|
+
return self.getT();
|
670
|
+
});
|
671
|
+
|
672
|
+
Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
|
673
|
+
.define_singleton_method(
|
674
|
+
"_new",
|
675
|
+
*[](size_t tw, size_t k, float alpha, float eta, int seed) {
|
676
|
+
if (seed < 0) {
|
677
|
+
seed = std::random_device{}();
|
678
|
+
}
|
679
|
+
return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
680
|
+
})
|
681
|
+
.define_method(
|
682
|
+
"_add_doc",
|
683
|
+
*[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
684
|
+
self.addDoc(words, labels);
|
685
|
+
})
|
686
|
+
.define_method(
|
687
|
+
"topics_per_label",
|
688
|
+
*[](tomoto::ILLDAModel& self) {
|
689
|
+
return self.getNumTopicsPerLabel();
|
690
|
+
});
|
691
|
+
|
692
|
+
Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
|
693
|
+
.define_singleton_method(
|
694
|
+
"_new",
|
695
|
+
*[](size_t tw, size_t latent_topics, float alpha, float eta, int seed) {
|
696
|
+
if (seed < 0) {
|
697
|
+
seed = std::random_device{}();
|
698
|
+
}
|
699
|
+
return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, latent_topics, 1, alpha, eta, seed);
|
700
|
+
})
|
701
|
+
.define_method(
|
702
|
+
"_add_doc",
|
703
|
+
*[](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
704
|
+
self.addDoc(words, labels);
|
705
|
+
})
|
706
|
+
.define_method(
|
707
|
+
"latent_topics",
|
708
|
+
*[](tomoto::IPLDAModel& self) {
|
709
|
+
return self.getNumLatentTopics();
|
710
|
+
});
|
711
|
+
|
712
|
+
Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
|
713
|
+
.define_singleton_method(
|
714
|
+
"_new",
|
715
|
+
*[](size_t tw, size_t k, Array rb_vars, float alpha, float eta, std::vector<float> mu, std::vector<float> nu_sq, std::vector<float> glm_param, int seed) {
|
716
|
+
if (seed < 0) {
|
717
|
+
seed = std::random_device{}();
|
718
|
+
}
|
719
|
+
std::vector<tomoto::ISLDAModel::GLM> vars;
|
720
|
+
vars.reserve(rb_vars.size());
|
721
|
+
for (auto const& v : rb_vars) {
|
722
|
+
vars.push_back((tomoto::ISLDAModel::GLM) from_ruby<int>(v));
|
723
|
+
}
|
724
|
+
return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, k, vars, alpha, eta, mu, nu_sq, glm_param, seed);
|
725
|
+
})
|
726
|
+
.define_method(
|
727
|
+
"_add_doc",
|
728
|
+
*[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<float> y) {
|
729
|
+
self.addDoc(words, y);
|
730
|
+
})
|
731
|
+
.define_method(
|
732
|
+
"f",
|
733
|
+
*[](tomoto::ISLDAModel& self) {
|
734
|
+
return self.getF();
|
735
|
+
})
|
736
|
+
.define_method(
|
737
|
+
"_var_type",
|
738
|
+
*[](tomoto::ISLDAModel& self, size_t var_id) {
|
739
|
+
if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
|
740
|
+
return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
|
741
|
+
});
|
245
742
|
}
|
data/ext/tomoto/extconf.rb
CHANGED
@@ -11,6 +11,9 @@ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
|
11
11
|
if apple_clang
|
12
12
|
# silence rice warnings
|
13
13
|
$CXXFLAGS += " -Wno-deprecated-declarations"
|
14
|
+
else
|
15
|
+
# silence eigen warnings
|
16
|
+
$CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
|
14
17
|
end
|
15
18
|
|
16
19
|
# silence tomoto warnings
|
data/lib/tomoto.rb
CHANGED
@@ -3,8 +3,18 @@ require "tomoto/ext"
|
|
3
3
|
|
4
4
|
# modules
|
5
5
|
require "tomoto/ct"
|
6
|
+
require "tomoto/dmr"
|
7
|
+
require "tomoto/dt"
|
8
|
+
require "tomoto/gdmr"
|
6
9
|
require "tomoto/hdp"
|
10
|
+
require "tomoto/hlda"
|
11
|
+
require "tomoto/hpa"
|
7
12
|
require "tomoto/lda"
|
13
|
+
require "tomoto/llda"
|
14
|
+
require "tomoto/mglda"
|
15
|
+
require "tomoto/pa"
|
16
|
+
require "tomoto/plda"
|
17
|
+
require "tomoto/slda"
|
8
18
|
require "tomoto/version"
|
9
19
|
|
10
20
|
module Tomoto
|
data/lib/tomoto/ct.rb
CHANGED
@@ -7,5 +7,18 @@ module Tomoto
|
|
7
7
|
model.instance_variable_set(:@rm_top, rm_top)
|
8
8
|
model
|
9
9
|
end
|
10
|
+
|
11
|
+
def correlations(topic_id = nil)
|
12
|
+
prepare
|
13
|
+
if topic_id
|
14
|
+
_correlations(topic_id)
|
15
|
+
else
|
16
|
+
k.times.map { |i| _correlations(i) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def prior_cov
|
21
|
+
_prior_cov.each_slice(k).to_a
|
22
|
+
end
|
10
23
|
end
|
11
24
|
end
|
data/lib/tomoto/dmr.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class DMR
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, sigma: 1.0, alpha_epsilon: 1e-10, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, alpha, sigma, eta, alpha_epsilon, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, metadata: "")
|
12
|
+
_add_doc(prepare_doc(doc), [metadata])
|
13
|
+
end
|
14
|
+
|
15
|
+
def lambdas
|
16
|
+
if f == 0
|
17
|
+
[]
|
18
|
+
else
|
19
|
+
k.times.map { |i| _lambdas(i) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/tomoto/dt.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class DT
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, t: 1, alpha_var: 0.1, eta_var: 0.1, phi_var: 0.1, lr_a: 0.01, lr_b: 0.1, lr_c: 0.55) #, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, t, alpha_var, eta_var, phi_var, lr_a, lr_b, lr_c)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, timepoint: 0)
|
12
|
+
_add_doc(prepare_doc(doc), timepoint)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/gdmr.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class GDMR
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, degrees: [], alpha: 0.1, eta: 0.01, sigma: 1.0, sigma0: 3.0, alpha_epsilon: 1e-10, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, metadata: [])
|
12
|
+
_add_doc(prepare_doc(doc), metadata.map(&:to_s))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/hlda.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, depth: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), depth, alpha, eta, gamma, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def children_topics(topic_id)
|
12
|
+
check_topic(topic_id)
|
13
|
+
_children_topics(topic_id)
|
14
|
+
end
|
15
|
+
|
16
|
+
def level(topic_id)
|
17
|
+
check_topic(topic_id)
|
18
|
+
_live_topic?(topic_id) ? _level(topic_id) : -1
|
19
|
+
end
|
20
|
+
|
21
|
+
def live_topic?(topic_id)
|
22
|
+
check_topic(topic_id)
|
23
|
+
_live_topic?(topic_id)
|
24
|
+
end
|
25
|
+
|
26
|
+
def num_docs_of_topic(topic_id)
|
27
|
+
check_topic(topic_id)
|
28
|
+
_num_docs_of_topic(topic_id)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parent_topic(topic_id)
|
32
|
+
check_topic(topic_id)
|
33
|
+
_live_topic?(topic_id) ? _parent_topic(topic_id) : -1
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def check_topic(topic_id)
|
39
|
+
raise "topic_id must be < K" if topic_id >= k
|
40
|
+
raise "train() should be called first" unless @prepared
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/tomoto/hpa.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HPA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/lda.rb
CHANGED
@@ -15,9 +15,7 @@ module Tomoto
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def add_doc(doc)
|
18
|
-
|
19
|
-
doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
|
20
|
-
_add_doc(doc)
|
18
|
+
_add_doc(prepare_doc(doc))
|
21
19
|
end
|
22
20
|
|
23
21
|
def count_by_topics
|
@@ -47,6 +45,10 @@ module Tomoto
|
|
47
45
|
_train(iterations, workers)
|
48
46
|
end
|
49
47
|
|
48
|
+
def tw
|
49
|
+
TERM_WEIGHT[_tw]
|
50
|
+
end
|
51
|
+
|
50
52
|
private
|
51
53
|
|
52
54
|
def prepare
|
@@ -56,6 +58,12 @@ module Tomoto
|
|
56
58
|
end
|
57
59
|
end
|
58
60
|
|
61
|
+
def prepare_doc(doc)
|
62
|
+
raise "cannot add_doc() after train()" if defined?(@prepared)
|
63
|
+
doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
|
64
|
+
doc
|
65
|
+
end
|
66
|
+
|
59
67
|
class << self
|
60
68
|
private
|
61
69
|
|
data/lib/tomoto/llda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class LLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, labels: [])
|
12
|
+
_add_doc(prepare_doc(doc), labels)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/mglda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class MGLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k_g: 1, k_l: 1, t: 3, alpha_g: 0.1, alpha_l: 0.1, alpha_mg: 0.1, alpha_ml: 0.1, eta_g: 0.01) #, eta_l: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, delimiter: ".")
|
12
|
+
_add_doc(prepare_doc(doc), delimiter)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/pa.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class PA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/plda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class PLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, latent_topics: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), latent_topics, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, labels: [])
|
12
|
+
_add_doc(prepare_doc(doc), labels)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/slda.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class SLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, vars: "", alpha: 0.1, eta: 0.01, mu: [], nu_sq: [], glm_param: [], seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, vars.split("").map { |v| to_glm(v) }, alpha, eta, mu, nu_sq, glm_param, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, y: [])
|
12
|
+
_add_doc(prepare_doc(doc), y)
|
13
|
+
end
|
14
|
+
|
15
|
+
def var_type(var_id)
|
16
|
+
raise "train() should be called first" unless @prepared
|
17
|
+
_var_type(var_id)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
class << self
|
23
|
+
private
|
24
|
+
|
25
|
+
def to_glm(v)
|
26
|
+
case v
|
27
|
+
when "l"
|
28
|
+
0
|
29
|
+
when "b"
|
30
|
+
1
|
31
|
+
else
|
32
|
+
raise "Invalid var: #{v}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/tomoto/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomoto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -94,8 +94,18 @@ files:
|
|
94
94
|
- ext/tomoto/extconf.rb
|
95
95
|
- lib/tomoto.rb
|
96
96
|
- lib/tomoto/ct.rb
|
97
|
+
- lib/tomoto/dmr.rb
|
98
|
+
- lib/tomoto/dt.rb
|
99
|
+
- lib/tomoto/gdmr.rb
|
97
100
|
- lib/tomoto/hdp.rb
|
101
|
+
- lib/tomoto/hlda.rb
|
102
|
+
- lib/tomoto/hpa.rb
|
98
103
|
- lib/tomoto/lda.rb
|
104
|
+
- lib/tomoto/llda.rb
|
105
|
+
- lib/tomoto/mglda.rb
|
106
|
+
- lib/tomoto/pa.rb
|
107
|
+
- lib/tomoto/plda.rb
|
108
|
+
- lib/tomoto/slda.rb
|
99
109
|
- lib/tomoto/version.rb
|
100
110
|
- vendor/EigenRand/EigenRand/Core.h
|
101
111
|
- vendor/EigenRand/EigenRand/Dists/Basic.h
|