tomoto 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +23 -17
- data/ext/tomoto/ext.cpp +512 -15
- data/ext/tomoto/extconf.rb +3 -0
- data/lib/tomoto.rb +10 -0
- data/lib/tomoto/ct.rb +13 -0
- data/lib/tomoto/dmr.rb +23 -0
- data/lib/tomoto/dt.rb +15 -0
- data/lib/tomoto/gdmr.rb +15 -0
- data/lib/tomoto/hlda.rb +43 -0
- data/lib/tomoto/hpa.rb +11 -0
- data/lib/tomoto/lda.rb +11 -3
- data/lib/tomoto/llda.rb +15 -0
- data/lib/tomoto/mglda.rb +15 -0
- data/lib/tomoto/pa.rb +11 -0
- data/lib/tomoto/plda.rb +15 -0
- data/lib/tomoto/slda.rb +37 -0
- data/lib/tomoto/version.rb +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b40c9adf2f0162eb6174b17395ea37b9294e14b22609e9f51951e9904125ff9
|
4
|
+
data.tar.gz: be3f68438f60a7e4fc11033921636f8d03bf411bd3d3eb6aa3b4fb448faac41a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a74747ae372d030c42562d4e2b99ab167ccc28533468ed08819f4bd34d42b340349870712c12e565388eb7833f993349432e77baee8618c34c265676ca181072
|
7
|
+
data.tar.gz: da9e833bb98726278108a68a7dd6bed0e54b3979c25d49d8db01aa613e6205a6a3512881688209baf2589c4dc5514ed2663fb251a5e273c42b678bb1daa06d74
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
#
|
1
|
+
# tomoto
|
2
2
|
|
3
|
-
[
|
3
|
+
:tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
|
4
|
+
|
5
|
+
[![Build Status](https://travis-ci.org/ankane/tomoto.svg?branch=master)](https://travis-ci.org/ankane/tomoto)
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -10,7 +12,7 @@ Add this line to your application’s Gemfile:
|
|
10
12
|
gem 'tomoto'
|
11
13
|
```
|
12
14
|
|
13
|
-
It can take
|
15
|
+
It can take 10-20 minutes to compile the extension.
|
14
16
|
|
15
17
|
## Getting Started
|
16
18
|
|
@@ -65,23 +67,27 @@ model.ll_per_word
|
|
65
67
|
Supports:
|
66
68
|
|
67
69
|
- Latent Dirichlet Allocation (`LDA`)
|
70
|
+
- Labeled LDA (`LLDA`)
|
71
|
+
- Partially Labeled LDA (`PLDA`)
|
72
|
+
- Supervised LDA (`SLDA`)
|
73
|
+
- Dirichlet Multinomial Regression (`DMR`)
|
74
|
+
- Generalized Dirichlet Multinomial Regression (`GDMR`)
|
68
75
|
- Hierarchical Dirichlet Process (`HDP`)
|
76
|
+
- Hierarchical LDA (`HLDA`)
|
77
|
+
- Multi Grain LDA (`MGLDA`)
|
78
|
+
- Pachinko Allocation (`PA`)
|
79
|
+
- Hierarchical PA (`HPA`)
|
69
80
|
- Correlated Topic Model (`CT`)
|
81
|
+
- Dynamic Topic Model (`DT`)
|
70
82
|
|
71
|
-
##
|
83
|
+
## API
|
72
84
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
k: 1,
|
80
|
-
alpha: 0.1,
|
81
|
-
eta: 0.01,
|
82
|
-
seed: nil
|
83
|
-
)
|
84
|
-
```
|
85
|
+
This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
|
86
|
+
|
87
|
+
- The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
|
88
|
+
- Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
|
89
|
+
|
90
|
+
If a method or option you need isn’t supported, feel free to open an issue.
|
85
91
|
|
86
92
|
## Tokenization
|
87
93
|
|
@@ -93,7 +99,7 @@ model.add_doc(["tokens", "from", "document", "one"])
|
|
93
99
|
|
94
100
|
## Performance
|
95
101
|
|
96
|
-
|
102
|
+
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check what it’s using with:
|
97
103
|
|
98
104
|
```ruby
|
99
105
|
Tomoto.isa
|
data/ext/tomoto/ext.cpp
CHANGED
@@ -1,7 +1,21 @@
|
|
1
|
+
// stdlib
|
2
|
+
#include <fstream>
|
3
|
+
#include <iostream>
|
4
|
+
|
1
5
|
// tomoto
|
2
6
|
#include <CT.h>
|
7
|
+
#include <DMR.h>
|
8
|
+
#include <DT.h>
|
9
|
+
#include <GDMR.h>
|
3
10
|
#include <HDP.h>
|
11
|
+
#include <HLDA.h>
|
12
|
+
#include <HPA.h>
|
4
13
|
#include <LDA.h>
|
14
|
+
#include <LLDA.h>
|
15
|
+
#include <MGLDA.h>
|
16
|
+
#include <PA.h>
|
17
|
+
#include <PLDA.h>
|
18
|
+
#include <SLDA.h>
|
5
19
|
|
6
20
|
// rice
|
7
21
|
#include <rice/Array.hpp>
|
@@ -26,6 +40,62 @@ Object to_ruby<std::vector<float>>(std::vector<float> const & x)
|
|
26
40
|
return res;
|
27
41
|
}
|
28
42
|
|
43
|
+
template<>
|
44
|
+
Object to_ruby<std::vector<uint32_t>>(std::vector<uint32_t> const & x)
|
45
|
+
{
|
46
|
+
Array res;
|
47
|
+
for (auto const& v : x) {
|
48
|
+
res.push(v);
|
49
|
+
}
|
50
|
+
return res;
|
51
|
+
}
|
52
|
+
|
53
|
+
template<>
|
54
|
+
Object to_ruby<std::vector<uint64_t>>(std::vector<uint64_t> const & x)
|
55
|
+
{
|
56
|
+
Array res;
|
57
|
+
for (auto const& v : x) {
|
58
|
+
res.push(v);
|
59
|
+
}
|
60
|
+
return res;
|
61
|
+
}
|
62
|
+
|
63
|
+
template<>
|
64
|
+
std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
|
65
|
+
{
|
66
|
+
Array a = Array(x);
|
67
|
+
std::vector<std::string> res;
|
68
|
+
res.reserve(a.size());
|
69
|
+
for (auto const& v : a) {
|
70
|
+
res.push_back(from_ruby<std::string>(v));
|
71
|
+
}
|
72
|
+
return res;
|
73
|
+
}
|
74
|
+
|
75
|
+
template<>
|
76
|
+
std::vector<float> from_ruby<std::vector<float>>(Object x)
|
77
|
+
{
|
78
|
+
Array a = Array(x);
|
79
|
+
std::vector<float> res;
|
80
|
+
res.reserve(a.size());
|
81
|
+
for (auto const& v : a) {
|
82
|
+
res.push_back(from_ruby<float>(v));
|
83
|
+
}
|
84
|
+
return res;
|
85
|
+
}
|
86
|
+
|
87
|
+
template<>
|
88
|
+
std::vector<uint64_t> from_ruby<std::vector<uint64_t>>(Object x)
|
89
|
+
{
|
90
|
+
Array a = Array(x);
|
91
|
+
std::vector<uint64_t> res;
|
92
|
+
res.reserve(a.size());
|
93
|
+
for (auto const& v : a) {
|
94
|
+
res.push_back(from_ruby<uint64_t>(v));
|
95
|
+
}
|
96
|
+
return res;
|
97
|
+
}
|
98
|
+
|
29
99
|
extern "C"
|
30
100
|
void Init_ext()
|
31
101
|
{
|
@@ -55,12 +125,7 @@ void Init_ext()
|
|
55
125
|
})
|
56
126
|
.define_method(
|
57
127
|
"_add_doc",
|
58
|
-
*[](tomoto::ILDAModel& self,
|
59
|
-
std::vector<std::string> words;
|
60
|
-
words.reserve(rb_words.size());
|
61
|
-
for (auto const& v : rb_words) {
|
62
|
-
words.push_back(from_ruby<std::string>(v));
|
63
|
-
}
|
128
|
+
*[](tomoto::ILDAModel& self, std::vector<std::string> words) {
|
64
129
|
self.addDoc(words);
|
65
130
|
})
|
66
131
|
.define_method(
|
@@ -93,6 +158,11 @@ void Init_ext()
|
|
93
158
|
*[](tomoto::ILDAModel& self) {
|
94
159
|
return self.getEta();
|
95
160
|
})
|
161
|
+
.define_method(
|
162
|
+
"global_step",
|
163
|
+
*[](tomoto::ILDAModel& self) {
|
164
|
+
return self.getGlobalStep();
|
165
|
+
})
|
96
166
|
.define_method(
|
97
167
|
"k",
|
98
168
|
*[](tomoto::ILDAModel& self) {
|
@@ -112,15 +182,36 @@ void Init_ext()
|
|
112
182
|
return self.getLLPerWord();
|
113
183
|
})
|
114
184
|
.define_method(
|
115
|
-
"
|
185
|
+
"num_docs",
|
116
186
|
*[](tomoto::ILDAModel& self) {
|
117
|
-
return self.
|
187
|
+
return self.getNumDocs();
|
118
188
|
})
|
119
189
|
.define_method(
|
120
190
|
"num_vocabs",
|
121
191
|
*[](tomoto::ILDAModel& self) {
|
122
192
|
return self.getV();
|
123
193
|
})
|
194
|
+
.define_method(
|
195
|
+
"num_words",
|
196
|
+
*[](tomoto::ILDAModel& self) {
|
197
|
+
return self.getN();
|
198
|
+
})
|
199
|
+
.define_method(
|
200
|
+
"optim_interval",
|
201
|
+
*[](tomoto::ILDAModel& self) {
|
202
|
+
return self.getOptimInterval();
|
203
|
+
})
|
204
|
+
.define_method(
|
205
|
+
"optim_interval=",
|
206
|
+
*[](tomoto::ILDAModel& self, size_t value) {
|
207
|
+
self.setOptimInterval(value);
|
208
|
+
return value;
|
209
|
+
})
|
210
|
+
.define_method(
|
211
|
+
"perplexity",
|
212
|
+
*[](tomoto::ILDAModel& self) {
|
213
|
+
return self.getPerplexity();
|
214
|
+
})
|
124
215
|
.define_method(
|
125
216
|
"_prepare",
|
126
217
|
*[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
|
@@ -159,6 +250,62 @@ void Init_ext()
|
|
159
250
|
size_t ps = 0;
|
160
251
|
self.train(iteration, workers, (tomoto::ParallelScheme)ps);
|
161
252
|
})
|
253
|
+
.define_method(
|
254
|
+
"_tw",
|
255
|
+
*[](tomoto::ILDAModel& self) {
|
256
|
+
return (int)self.getTermWeight();
|
257
|
+
})
|
258
|
+
.define_method(
|
259
|
+
"used_vocab_df",
|
260
|
+
*[](tomoto::ILDAModel& self) {
|
261
|
+
auto vocab = self.getVocabDf();
|
262
|
+
Array res;
|
263
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
264
|
+
res.push(vocab[i]);
|
265
|
+
}
|
266
|
+
return res;
|
267
|
+
})
|
268
|
+
.define_method(
|
269
|
+
"used_vocab_freq",
|
270
|
+
*[](tomoto::ILDAModel& self) {
|
271
|
+
auto vocab = self.getVocabCf();
|
272
|
+
Array res;
|
273
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
274
|
+
res.push(vocab[i]);
|
275
|
+
}
|
276
|
+
return res;
|
277
|
+
})
|
278
|
+
.define_method(
|
279
|
+
"used_vocabs",
|
280
|
+
*[](tomoto::ILDAModel& self) {
|
281
|
+
auto dict = self.getVocabDict();
|
282
|
+
Array res;
|
283
|
+
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
284
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
285
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
286
|
+
}
|
287
|
+
return res;
|
288
|
+
})
|
289
|
+
.define_method(
|
290
|
+
"vocab_df",
|
291
|
+
*[](tomoto::ILDAModel& self) {
|
292
|
+
auto vocab = self.getVocabDf();
|
293
|
+
Array res;
|
294
|
+
for (size_t i = 0; i < vocab.size(); i++) {
|
295
|
+
res.push(vocab[i]);
|
296
|
+
}
|
297
|
+
return res;
|
298
|
+
})
|
299
|
+
.define_method(
|
300
|
+
"vocab_freq",
|
301
|
+
*[](tomoto::ILDAModel& self) {
|
302
|
+
auto vocab = self.getVocabCf();
|
303
|
+
Array res;
|
304
|
+
for (size_t i = 0; i < vocab.size(); i++) {
|
305
|
+
res.push(vocab[i]);
|
306
|
+
}
|
307
|
+
return res;
|
308
|
+
})
|
162
309
|
.define_method(
|
163
310
|
"vocabs",
|
164
311
|
*[](tomoto::ILDAModel& self) {
|
@@ -180,6 +327,11 @@ void Init_ext()
|
|
180
327
|
}
|
181
328
|
return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
182
329
|
})
|
330
|
+
.define_method(
|
331
|
+
"_correlations",
|
332
|
+
*[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
333
|
+
return self.getCorrelationTopic(topic_id);
|
334
|
+
})
|
183
335
|
.define_method(
|
184
336
|
"num_beta_sample",
|
185
337
|
*[](tomoto::ICTModel& self) {
|
@@ -187,9 +339,9 @@ void Init_ext()
|
|
187
339
|
})
|
188
340
|
.define_method(
|
189
341
|
"num_beta_sample=",
|
190
|
-
*[](tomoto::ICTModel& self, size_t
|
191
|
-
self.setNumBetaSample(
|
192
|
-
return
|
342
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
343
|
+
self.setNumBetaSample(value);
|
344
|
+
return value;
|
193
345
|
})
|
194
346
|
.define_method(
|
195
347
|
"num_tmn_sample",
|
@@ -198,12 +350,12 @@ void Init_ext()
|
|
198
350
|
})
|
199
351
|
.define_method(
|
200
352
|
"num_tmn_sample=",
|
201
|
-
*[](tomoto::ICTModel& self, size_t
|
202
|
-
self.setNumTMNSample(
|
203
|
-
return
|
353
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
354
|
+
self.setNumTMNSample(value);
|
355
|
+
return value;
|
204
356
|
})
|
205
357
|
.define_method(
|
206
|
-
"
|
358
|
+
"_prior_cov",
|
207
359
|
*[](tomoto::ICTModel& self) {
|
208
360
|
return self.getPriorCov();
|
209
361
|
})
|
@@ -213,6 +365,138 @@ void Init_ext()
|
|
213
365
|
return self.getPriorMean();
|
214
366
|
});
|
215
367
|
|
368
|
+
Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
|
369
|
+
.define_singleton_method(
|
370
|
+
"_new",
|
371
|
+
*[](size_t tw, size_t k, float alpha, float sigma, float eta, float alpha_epsilon, int seed) {
|
372
|
+
if (seed < 0) {
|
373
|
+
seed = std::random_device{}();
|
374
|
+
}
|
375
|
+
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
|
376
|
+
})
|
377
|
+
.define_method(
|
378
|
+
"_add_doc",
|
379
|
+
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::vector<std::string> metadata) {
|
380
|
+
self.addDoc(words, metadata);
|
381
|
+
})
|
382
|
+
.define_method(
|
383
|
+
"alpha_epsilon",
|
384
|
+
*[](tomoto::IDMRModel& self) {
|
385
|
+
return self.getAlphaEps();
|
386
|
+
})
|
387
|
+
.define_method(
|
388
|
+
"alpha_epsilon=",
|
389
|
+
*[](tomoto::IDMRModel& self, float value) {
|
390
|
+
self.setAlphaEps(value);
|
391
|
+
return value;
|
392
|
+
})
|
393
|
+
.define_method(
|
394
|
+
"f",
|
395
|
+
*[](tomoto::IDMRModel& self) {
|
396
|
+
return self.getF();
|
397
|
+
})
|
398
|
+
.define_method(
|
399
|
+
"_lambdas",
|
400
|
+
*[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
401
|
+
return self.getLambdaByTopic(topic_id);
|
402
|
+
})
|
403
|
+
.define_method(
|
404
|
+
"metadata_dict",
|
405
|
+
*[](tomoto::IDMRModel& self) {
|
406
|
+
auto dict = self.getMetadataDict();
|
407
|
+
Array res;
|
408
|
+
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
409
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
410
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
411
|
+
}
|
412
|
+
return res;
|
413
|
+
})
|
414
|
+
.define_method(
|
415
|
+
"sigma",
|
416
|
+
*[](tomoto::IDMRModel& self) {
|
417
|
+
return self.getSigma();
|
418
|
+
});
|
419
|
+
|
420
|
+
Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
|
421
|
+
.define_singleton_method(
|
422
|
+
"_new",
|
423
|
+
*[](size_t tw, size_t k, size_t t, float alphaVar, float etaVar, float phiVar, float shapeA, float shapeB, float shapeC) {
|
424
|
+
// Rice only supports 10 arguments
|
425
|
+
int seed = -1;
|
426
|
+
if (seed < 0) {
|
427
|
+
seed = std::random_device{}();
|
428
|
+
}
|
429
|
+
return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
|
430
|
+
})
|
431
|
+
.define_method(
|
432
|
+
"_add_doc",
|
433
|
+
*[](tomoto::IDTModel& self, std::vector<std::string> words, size_t timepoint) {
|
434
|
+
self.addDoc(words, timepoint);
|
435
|
+
})
|
436
|
+
.define_method(
|
437
|
+
"lr_a",
|
438
|
+
*[](tomoto::IDTModel& self) {
|
439
|
+
return self.getShapeA();
|
440
|
+
})
|
441
|
+
.define_method(
|
442
|
+
"lr_a=",
|
443
|
+
*[](tomoto::IDTModel& self, float value) {
|
444
|
+
self.setShapeA(value);
|
445
|
+
return value;
|
446
|
+
})
|
447
|
+
.define_method(
|
448
|
+
"lr_b",
|
449
|
+
*[](tomoto::IDTModel& self) {
|
450
|
+
return self.getShapeB();
|
451
|
+
})
|
452
|
+
.define_method(
|
453
|
+
"lr_b=",
|
454
|
+
*[](tomoto::IDTModel& self, float value) {
|
455
|
+
self.setShapeB(value);
|
456
|
+
return value;
|
457
|
+
})
|
458
|
+
.define_method(
|
459
|
+
"lr_c",
|
460
|
+
*[](tomoto::IDTModel& self) {
|
461
|
+
return self.getShapeC();
|
462
|
+
})
|
463
|
+
.define_method(
|
464
|
+
"lr_c=",
|
465
|
+
*[](tomoto::IDTModel& self, float value) {
|
466
|
+
self.setShapeC(value);
|
467
|
+
return value;
|
468
|
+
})
|
469
|
+
.define_method(
|
470
|
+
"num_docs_by_timepoint",
|
471
|
+
*[](tomoto::IDTModel& self) {
|
472
|
+
return self.getNumDocsByT();
|
473
|
+
})
|
474
|
+
.define_method(
|
475
|
+
"num_timepoints",
|
476
|
+
*[](tomoto::IDTModel& self) {
|
477
|
+
return self.getT();
|
478
|
+
});
|
479
|
+
|
480
|
+
Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
|
481
|
+
.define_singleton_method(
|
482
|
+
"_new",
|
483
|
+
*[](size_t tw, size_t k, std::vector<uint64_t> degrees, float alpha, float sigma, float sigma0, float eta, float alpha_epsilon, int seed) {
|
484
|
+
if (seed < 0) {
|
485
|
+
seed = std::random_device{}();
|
486
|
+
}
|
487
|
+
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed);
|
488
|
+
})
|
489
|
+
.define_method(
|
490
|
+
"degrees",
|
491
|
+
*[](tomoto::IGDMRModel& self) {
|
492
|
+
return self.getFs();
|
493
|
+
})
|
494
|
+
.define_method(
|
495
|
+
"sigma0",
|
496
|
+
*[](tomoto::IGDMRModel& self) {
|
497
|
+
return self.getSigma0();
|
498
|
+
});
|
499
|
+
|
216
500
|
Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
|
217
501
|
.define_singleton_method(
|
218
502
|
"_new",
|
@@ -242,4 +526,217 @@ void Init_ext()
|
|
242
526
|
*[](tomoto::IHDPModel& self) {
|
243
527
|
return self.getTotalTables();
|
244
528
|
});
|
529
|
+
|
530
|
+
Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
|
531
|
+
.define_singleton_method(
|
532
|
+
"_new",
|
533
|
+
*[](size_t tw, size_t levelDepth, float alpha, float eta, float gamma, int seed) {
|
534
|
+
if (seed < 0) {
|
535
|
+
seed = std::random_device{}();
|
536
|
+
}
|
537
|
+
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
|
538
|
+
})
|
539
|
+
.define_method(
|
540
|
+
"_children_topics",
|
541
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
542
|
+
return self.getChildTopicId(topic_id);
|
543
|
+
})
|
544
|
+
.define_method(
|
545
|
+
"depth",
|
546
|
+
*[](tomoto::IHLDAModel& self) {
|
547
|
+
return self.getLevelDepth();
|
548
|
+
})
|
549
|
+
.define_method(
|
550
|
+
"gamma",
|
551
|
+
*[](tomoto::IHLDAModel& self) {
|
552
|
+
return self.getGamma();
|
553
|
+
})
|
554
|
+
.define_method(
|
555
|
+
"_level",
|
556
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
557
|
+
return self.getLevelOfTopic(topic_id);
|
558
|
+
})
|
559
|
+
.define_method(
|
560
|
+
"live_k",
|
561
|
+
*[](tomoto::IHLDAModel& self) {
|
562
|
+
return self.getLiveK();
|
563
|
+
})
|
564
|
+
.define_method(
|
565
|
+
"_live_topic?",
|
566
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
567
|
+
return self.isLiveTopic(topic_id);
|
568
|
+
})
|
569
|
+
.define_method(
|
570
|
+
"_num_docs_of_topic",
|
571
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
572
|
+
return self.getNumDocsOfTopic(topic_id);
|
573
|
+
})
|
574
|
+
.define_method(
|
575
|
+
"_parent_topic",
|
576
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
577
|
+
return self.getParentTopicId(topic_id);
|
578
|
+
});
|
579
|
+
|
580
|
+
Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
|
581
|
+
.define_singleton_method(
|
582
|
+
"_new",
|
583
|
+
*[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
|
584
|
+
if (seed < 0) {
|
585
|
+
seed = std::random_device{}();
|
586
|
+
}
|
587
|
+
return tomoto::IPAModel::create((tomoto::TermWeight)tw, k1, k2, alpha, eta, seed);
|
588
|
+
})
|
589
|
+
.define_method(
|
590
|
+
"k1",
|
591
|
+
*[](tomoto::IPAModel& self) {
|
592
|
+
return self.getK();
|
593
|
+
})
|
594
|
+
.define_method(
|
595
|
+
"k2",
|
596
|
+
*[](tomoto::IPAModel& self) {
|
597
|
+
return self.getK2();
|
598
|
+
});
|
599
|
+
|
600
|
+
Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
|
601
|
+
.define_singleton_method(
|
602
|
+
"_new",
|
603
|
+
*[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
|
604
|
+
if (seed < 0) {
|
605
|
+
seed = std::random_device{}();
|
606
|
+
}
|
607
|
+
return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
|
608
|
+
});
|
609
|
+
|
610
|
+
Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
|
611
|
+
.define_singleton_method(
|
612
|
+
"_new",
|
613
|
+
*[](size_t tw, size_t k_g, size_t k_l, size_t t, float alpha_g, float alpha_l, float alpha_mg, float alpha_ml, float eta_g) {
|
614
|
+
return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
|
615
|
+
})
|
616
|
+
.define_method(
|
617
|
+
"_add_doc",
|
618
|
+
*[](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
|
619
|
+
self.addDoc(words, delimiter);
|
620
|
+
})
|
621
|
+
.define_method(
|
622
|
+
"alpha_g",
|
623
|
+
*[](tomoto::IMGLDAModel& self) {
|
624
|
+
return self.getAlpha();
|
625
|
+
})
|
626
|
+
.define_method(
|
627
|
+
"alpha_l",
|
628
|
+
*[](tomoto::IMGLDAModel& self) {
|
629
|
+
return self.getAlphaL();
|
630
|
+
})
|
631
|
+
.define_method(
|
632
|
+
"alpha_mg",
|
633
|
+
*[](tomoto::IMGLDAModel& self) {
|
634
|
+
return self.getAlphaM();
|
635
|
+
})
|
636
|
+
.define_method(
|
637
|
+
"alpha_ml",
|
638
|
+
*[](tomoto::IMGLDAModel& self) {
|
639
|
+
return self.getAlphaML();
|
640
|
+
})
|
641
|
+
.define_method(
|
642
|
+
"eta_g",
|
643
|
+
*[](tomoto::IMGLDAModel& self) {
|
644
|
+
return self.getEta();
|
645
|
+
})
|
646
|
+
.define_method(
|
647
|
+
"eta_l",
|
648
|
+
*[](tomoto::IMGLDAModel& self) {
|
649
|
+
return self.getEtaL();
|
650
|
+
})
|
651
|
+
.define_method(
|
652
|
+
"gamma",
|
653
|
+
*[](tomoto::IMGLDAModel& self) {
|
654
|
+
return self.getGamma();
|
655
|
+
})
|
656
|
+
.define_method(
|
657
|
+
"k_g",
|
658
|
+
*[](tomoto::IMGLDAModel& self) {
|
659
|
+
return self.getK();
|
660
|
+
})
|
661
|
+
.define_method(
|
662
|
+
"k_l",
|
663
|
+
*[](tomoto::IMGLDAModel& self) {
|
664
|
+
return self.getKL();
|
665
|
+
})
|
666
|
+
.define_method(
|
667
|
+
"t",
|
668
|
+
*[](tomoto::IMGLDAModel& self) {
|
669
|
+
return self.getT();
|
670
|
+
});
|
671
|
+
|
672
|
+
Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
|
673
|
+
.define_singleton_method(
|
674
|
+
"_new",
|
675
|
+
*[](size_t tw, size_t k, float alpha, float eta, int seed) {
|
676
|
+
if (seed < 0) {
|
677
|
+
seed = std::random_device{}();
|
678
|
+
}
|
679
|
+
return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
680
|
+
})
|
681
|
+
.define_method(
|
682
|
+
"_add_doc",
|
683
|
+
*[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
684
|
+
self.addDoc(words, labels);
|
685
|
+
})
|
686
|
+
.define_method(
|
687
|
+
"topics_per_label",
|
688
|
+
*[](tomoto::ILLDAModel& self) {
|
689
|
+
return self.getNumTopicsPerLabel();
|
690
|
+
});
|
691
|
+
|
692
|
+
Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
|
693
|
+
.define_singleton_method(
|
694
|
+
"_new",
|
695
|
+
*[](size_t tw, size_t latent_topics, float alpha, float eta, int seed) {
|
696
|
+
if (seed < 0) {
|
697
|
+
seed = std::random_device{}();
|
698
|
+
}
|
699
|
+
return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, latent_topics, 1, alpha, eta, seed);
|
700
|
+
})
|
701
|
+
.define_method(
|
702
|
+
"_add_doc",
|
703
|
+
*[](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
704
|
+
self.addDoc(words, labels);
|
705
|
+
})
|
706
|
+
.define_method(
|
707
|
+
"latent_topics",
|
708
|
+
*[](tomoto::IPLDAModel& self) {
|
709
|
+
return self.getNumLatentTopics();
|
710
|
+
});
|
711
|
+
|
712
|
+
Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
|
713
|
+
.define_singleton_method(
|
714
|
+
"_new",
|
715
|
+
*[](size_t tw, size_t k, Array rb_vars, float alpha, float eta, std::vector<float> mu, std::vector<float> nu_sq, std::vector<float> glm_param, int seed) {
|
716
|
+
if (seed < 0) {
|
717
|
+
seed = std::random_device{}();
|
718
|
+
}
|
719
|
+
std::vector<tomoto::ISLDAModel::GLM> vars;
|
720
|
+
vars.reserve(rb_vars.size());
|
721
|
+
for (auto const& v : rb_vars) {
|
722
|
+
vars.push_back((tomoto::ISLDAModel::GLM) from_ruby<int>(v));
|
723
|
+
}
|
724
|
+
return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, k, vars, alpha, eta, mu, nu_sq, glm_param, seed);
|
725
|
+
})
|
726
|
+
.define_method(
|
727
|
+
"_add_doc",
|
728
|
+
*[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<float> y) {
|
729
|
+
self.addDoc(words, y);
|
730
|
+
})
|
731
|
+
.define_method(
|
732
|
+
"f",
|
733
|
+
*[](tomoto::ISLDAModel& self) {
|
734
|
+
return self.getF();
|
735
|
+
})
|
736
|
+
.define_method(
|
737
|
+
"_var_type",
|
738
|
+
*[](tomoto::ISLDAModel& self, size_t var_id) {
|
739
|
+
if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
|
740
|
+
return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
|
741
|
+
});
|
245
742
|
}
|
data/ext/tomoto/extconf.rb
CHANGED
@@ -11,6 +11,9 @@ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
|
11
11
|
if apple_clang
|
12
12
|
# silence rice warnings
|
13
13
|
$CXXFLAGS += " -Wno-deprecated-declarations"
|
14
|
+
else
|
15
|
+
# silence eigen warnings
|
16
|
+
$CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
|
14
17
|
end
|
15
18
|
|
16
19
|
# silence tomoto warnings
|
data/lib/tomoto.rb
CHANGED
@@ -3,8 +3,18 @@ require "tomoto/ext"
|
|
3
3
|
|
4
4
|
# modules
|
5
5
|
require "tomoto/ct"
|
6
|
+
require "tomoto/dmr"
|
7
|
+
require "tomoto/dt"
|
8
|
+
require "tomoto/gdmr"
|
6
9
|
require "tomoto/hdp"
|
10
|
+
require "tomoto/hlda"
|
11
|
+
require "tomoto/hpa"
|
7
12
|
require "tomoto/lda"
|
13
|
+
require "tomoto/llda"
|
14
|
+
require "tomoto/mglda"
|
15
|
+
require "tomoto/pa"
|
16
|
+
require "tomoto/plda"
|
17
|
+
require "tomoto/slda"
|
8
18
|
require "tomoto/version"
|
9
19
|
|
10
20
|
module Tomoto
|
data/lib/tomoto/ct.rb
CHANGED
@@ -7,5 +7,18 @@ module Tomoto
|
|
7
7
|
model.instance_variable_set(:@rm_top, rm_top)
|
8
8
|
model
|
9
9
|
end
|
10
|
+
|
11
|
+
def correlations(topic_id = nil)
|
12
|
+
prepare
|
13
|
+
if topic_id
|
14
|
+
_correlations(topic_id)
|
15
|
+
else
|
16
|
+
k.times.map { |i| _correlations(i) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def prior_cov
|
21
|
+
_prior_cov.each_slice(k).to_a
|
22
|
+
end
|
10
23
|
end
|
11
24
|
end
|
data/lib/tomoto/dmr.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class DMR
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, sigma: 1.0, alpha_epsilon: 1e-10, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, alpha, sigma, eta, alpha_epsilon, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, metadata: "")
|
12
|
+
_add_doc(prepare_doc(doc), [metadata])
|
13
|
+
end
|
14
|
+
|
15
|
+
def lambdas
|
16
|
+
if f == 0
|
17
|
+
[]
|
18
|
+
else
|
19
|
+
k.times.map { |i| _lambdas(i) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/tomoto/dt.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class DT
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, t: 1, alpha_var: 0.1, eta_var: 0.1, phi_var: 0.1, lr_a: 0.01, lr_b: 0.1, lr_c: 0.55) #, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, t, alpha_var, eta_var, phi_var, lr_a, lr_b, lr_c)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, timepoint: 0)
|
12
|
+
_add_doc(prepare_doc(doc), timepoint)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/gdmr.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class GDMR
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, degrees: [], alpha: 0.1, eta: 0.01, sigma: 1.0, sigma0: 3.0, alpha_epsilon: 1e-10, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, metadata: [])
|
12
|
+
_add_doc(prepare_doc(doc), metadata.map(&:to_s))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/hlda.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, depth: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), depth, alpha, eta, gamma, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def children_topics(topic_id)
|
12
|
+
check_topic(topic_id)
|
13
|
+
_children_topics(topic_id)
|
14
|
+
end
|
15
|
+
|
16
|
+
def level(topic_id)
|
17
|
+
check_topic(topic_id)
|
18
|
+
_live_topic?(topic_id) ? _level(topic_id) : -1
|
19
|
+
end
|
20
|
+
|
21
|
+
def live_topic?(topic_id)
|
22
|
+
check_topic(topic_id)
|
23
|
+
_live_topic?(topic_id)
|
24
|
+
end
|
25
|
+
|
26
|
+
def num_docs_of_topic(topic_id)
|
27
|
+
check_topic(topic_id)
|
28
|
+
_num_docs_of_topic(topic_id)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parent_topic(topic_id)
|
32
|
+
check_topic(topic_id)
|
33
|
+
_live_topic?(topic_id) ? _parent_topic(topic_id) : -1
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def check_topic(topic_id)
|
39
|
+
raise "topic_id must be < K" if topic_id >= k
|
40
|
+
raise "train() should be called first" unless @prepared
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/tomoto/hpa.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HPA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/lda.rb
CHANGED
@@ -15,9 +15,7 @@ module Tomoto
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def add_doc(doc)
|
18
|
-
|
19
|
-
doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
|
20
|
-
_add_doc(doc)
|
18
|
+
_add_doc(prepare_doc(doc))
|
21
19
|
end
|
22
20
|
|
23
21
|
def count_by_topics
|
@@ -47,6 +45,10 @@ module Tomoto
|
|
47
45
|
_train(iterations, workers)
|
48
46
|
end
|
49
47
|
|
48
|
+
def tw
|
49
|
+
TERM_WEIGHT[_tw]
|
50
|
+
end
|
51
|
+
|
50
52
|
private
|
51
53
|
|
52
54
|
def prepare
|
@@ -56,6 +58,12 @@ module Tomoto
|
|
56
58
|
end
|
57
59
|
end
|
58
60
|
|
61
|
+
def prepare_doc(doc)
|
62
|
+
raise "cannot add_doc() after train()" if defined?(@prepared)
|
63
|
+
doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
|
64
|
+
doc
|
65
|
+
end
|
66
|
+
|
59
67
|
class << self
|
60
68
|
private
|
61
69
|
|
data/lib/tomoto/llda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class LLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, labels: [])
|
12
|
+
_add_doc(prepare_doc(doc), labels)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/mglda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class MGLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k_g: 1, k_l: 1, t: 3, alpha_g: 0.1, alpha_l: 0.1, alpha_mg: 0.1, alpha_ml: 0.1, eta_g: 0.01) #, eta_l: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, delimiter: ".")
|
12
|
+
_add_doc(prepare_doc(doc), delimiter)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/pa.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class PA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/plda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class PLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, latent_topics: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), latent_topics, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, labels: [])
|
12
|
+
_add_doc(prepare_doc(doc), labels)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/slda.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class SLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, vars: "", alpha: 0.1, eta: 0.01, mu: [], nu_sq: [], glm_param: [], seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, vars.split("").map { |v| to_glm(v) }, alpha, eta, mu, nu_sq, glm_param, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
model
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, y: [])
|
12
|
+
_add_doc(prepare_doc(doc), y)
|
13
|
+
end
|
14
|
+
|
15
|
+
def var_type(var_id)
|
16
|
+
raise "train() should be called first" unless @prepared
|
17
|
+
_var_type(var_id)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
class << self
|
23
|
+
private
|
24
|
+
|
25
|
+
def to_glm(v)
|
26
|
+
case v
|
27
|
+
when "l"
|
28
|
+
0
|
29
|
+
when "b"
|
30
|
+
1
|
31
|
+
else
|
32
|
+
raise "Invalid var: #{v}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/tomoto/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomoto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -94,8 +94,18 @@ files:
|
|
94
94
|
- ext/tomoto/extconf.rb
|
95
95
|
- lib/tomoto.rb
|
96
96
|
- lib/tomoto/ct.rb
|
97
|
+
- lib/tomoto/dmr.rb
|
98
|
+
- lib/tomoto/dt.rb
|
99
|
+
- lib/tomoto/gdmr.rb
|
97
100
|
- lib/tomoto/hdp.rb
|
101
|
+
- lib/tomoto/hlda.rb
|
102
|
+
- lib/tomoto/hpa.rb
|
98
103
|
- lib/tomoto/lda.rb
|
104
|
+
- lib/tomoto/llda.rb
|
105
|
+
- lib/tomoto/mglda.rb
|
106
|
+
- lib/tomoto/pa.rb
|
107
|
+
- lib/tomoto/plda.rb
|
108
|
+
- lib/tomoto/slda.rb
|
99
109
|
- lib/tomoto/version.rb
|
100
110
|
- vendor/EigenRand/EigenRand/Core.h
|
101
111
|
- vendor/EigenRand/EigenRand/Dists/Basic.h
|