tomoto 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -0
- data/ext/tomoto/ct.cpp +54 -0
- data/ext/tomoto/dmr.cpp +62 -0
- data/ext/tomoto/dt.cpp +82 -0
- data/ext/tomoto/ext.cpp +27 -773
- data/ext/tomoto/gdmr.cpp +34 -0
- data/ext/tomoto/hdp.cpp +42 -0
- data/ext/tomoto/hlda.cpp +66 -0
- data/ext/tomoto/hpa.cpp +27 -0
- data/ext/tomoto/lda.cpp +250 -0
- data/ext/tomoto/llda.cpp +29 -0
- data/ext/tomoto/mglda.cpp +71 -0
- data/ext/tomoto/pa.cpp +27 -0
- data/ext/tomoto/plda.cpp +29 -0
- data/ext/tomoto/slda.cpp +40 -0
- data/ext/tomoto/utils.h +84 -0
- data/lib/tomoto/tomoto.bundle +0 -0
- data/lib/tomoto/tomoto.so +0 -0
- data/lib/tomoto/version.rb +1 -1
- data/vendor/tomotopy/README.kr.rst +12 -3
- data/vendor/tomotopy/README.rst +12 -3
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +47 -2
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +21 -151
- data/vendor/tomotopy/src/Labeling/Labeler.h +5 -3
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +518 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/DT.h +1 -1
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +8 -23
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +9 -18
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +56 -58
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +4 -14
- data/vendor/tomotopy/src/TopicModel/LDA.h +69 -17
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +108 -61
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +7 -8
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +26 -16
- data/vendor/tomotopy/src/TopicModel/PT.h +27 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.cpp +10 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +273 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +16 -11
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +3 -2
- data/vendor/tomotopy/src/Utils/Trie.hpp +39 -8
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +36 -38
- data/vendor/tomotopy/src/Utils/Utils.hpp +50 -45
- data/vendor/tomotopy/src/Utils/math.h +8 -4
- data/vendor/tomotopy/src/Utils/tvector.hpp +4 -0
- metadata +24 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1cb95a96851ccd3d499ed38c9da531ce48588cf44c37ccc92bbfdc9277e0962
|
4
|
+
data.tar.gz: cadee081b1f0ea9cc37b75afd97e8ecebb32796cc335da2ff50e844c955a0e4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5154bfb71c0b8891953b97c8edf37a7fb70fcb2ab09c3f51126e14262c729dcdc4b82d2727a8601131e090a05efcd1958851d77b5e8e95b922fc9b1f44cedf6
|
7
|
+
data.tar.gz: f975f505493d41bc425e0d288762e97c83ffdb6c1812792bc2dca517c550f1508efef79a24bdde992d7acd3994d6566c27745b9bed806ea64dfa072d22c692a0
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
3
|
Copyright (c) 2019, bab2min
|
4
|
-
Copyright (c) 2020 Andrew Kane
|
4
|
+
Copyright (c) 2020-2021 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
data/ext/tomoto/ct.cpp
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#include <CT.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_ct(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"_correlations",
|
19
|
+
*[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
20
|
+
return self.getCorrelationTopic(topic_id);
|
21
|
+
})
|
22
|
+
.define_method(
|
23
|
+
"num_beta_sample",
|
24
|
+
*[](tomoto::ICTModel& self) {
|
25
|
+
return self.getNumBetaSample();
|
26
|
+
})
|
27
|
+
.define_method(
|
28
|
+
"num_beta_sample=",
|
29
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
30
|
+
self.setNumBetaSample(value);
|
31
|
+
return value;
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"num_tmn_sample",
|
35
|
+
*[](tomoto::ICTModel& self) {
|
36
|
+
return self.getNumTMNSample();
|
37
|
+
})
|
38
|
+
.define_method(
|
39
|
+
"num_tmn_sample=",
|
40
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
41
|
+
self.setNumTMNSample(value);
|
42
|
+
return value;
|
43
|
+
})
|
44
|
+
.define_method(
|
45
|
+
"_prior_cov",
|
46
|
+
*[](tomoto::ICTModel& self) {
|
47
|
+
return self.getPriorCov();
|
48
|
+
})
|
49
|
+
.define_method(
|
50
|
+
"prior_mean",
|
51
|
+
*[](tomoto::ICTModel& self) {
|
52
|
+
return self.getPriorMean();
|
53
|
+
});
|
54
|
+
}
|
data/ext/tomoto/dmr.cpp
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
#include <DMR.h>
|
2
|
+
|
3
|
+
#include <rice/Class.hpp>
|
4
|
+
#include <rice/Module.hpp>
|
5
|
+
|
6
|
+
#include "utils.h"
|
7
|
+
|
8
|
+
void init_dmr(Rice::Module& m) {
|
9
|
+
Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
|
10
|
+
.define_singleton_method(
|
11
|
+
"_new",
|
12
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
13
|
+
if (seed < 0) {
|
14
|
+
seed = std::random_device{}();
|
15
|
+
}
|
16
|
+
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
|
17
|
+
})
|
18
|
+
.define_method(
|
19
|
+
"_add_doc",
|
20
|
+
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
|
21
|
+
auto doc = buildDoc(words);
|
22
|
+
doc.misc["metadata"] = metadata;
|
23
|
+
return self.addDoc(doc);
|
24
|
+
})
|
25
|
+
.define_method(
|
26
|
+
"alpha_epsilon",
|
27
|
+
*[](tomoto::IDMRModel& self) {
|
28
|
+
return self.getAlphaEps();
|
29
|
+
})
|
30
|
+
.define_method(
|
31
|
+
"alpha_epsilon=",
|
32
|
+
*[](tomoto::IDMRModel& self, tomoto::Float value) {
|
33
|
+
self.setAlphaEps(value);
|
34
|
+
return value;
|
35
|
+
})
|
36
|
+
.define_method(
|
37
|
+
"f",
|
38
|
+
*[](tomoto::IDMRModel& self) {
|
39
|
+
return self.getF();
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"_lambdas",
|
43
|
+
*[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
44
|
+
return self.getLambdaByTopic(topic_id);
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"metadata_dict",
|
48
|
+
*[](tomoto::IDMRModel& self) {
|
49
|
+
auto dict = self.getMetadataDict();
|
50
|
+
Array res;
|
51
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
52
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
53
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
54
|
+
}
|
55
|
+
return res;
|
56
|
+
})
|
57
|
+
.define_method(
|
58
|
+
"sigma",
|
59
|
+
*[](tomoto::IDMRModel& self) {
|
60
|
+
return self.getSigma();
|
61
|
+
});
|
62
|
+
}
|
data/ext/tomoto/dt.cpp
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#include <DT.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_dt(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
|
12
|
+
// Rice only supports 10 arguments
|
13
|
+
int seed = -1;
|
14
|
+
if (seed < 0) {
|
15
|
+
seed = std::random_device{}();
|
16
|
+
}
|
17
|
+
return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
|
18
|
+
})
|
19
|
+
.define_method(
|
20
|
+
"_add_doc",
|
21
|
+
*[](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
|
22
|
+
auto doc = buildDoc(words);
|
23
|
+
doc.misc["timepoint"] = timepoint;
|
24
|
+
return self.addDoc(doc);
|
25
|
+
})
|
26
|
+
.define_method(
|
27
|
+
"alpha",
|
28
|
+
*[](tomoto::IDTModel& self) {
|
29
|
+
Array res;
|
30
|
+
for (size_t i = 0; i < self.getK(); i++) {
|
31
|
+
Array res2;
|
32
|
+
for (size_t j = 0; j < self.getT(); j++) {
|
33
|
+
res2.push(self.getAlpha(i, j));
|
34
|
+
}
|
35
|
+
res.push(res2);
|
36
|
+
}
|
37
|
+
return res;
|
38
|
+
})
|
39
|
+
.define_method(
|
40
|
+
"lr_a",
|
41
|
+
*[](tomoto::IDTModel& self) {
|
42
|
+
return self.getShapeA();
|
43
|
+
})
|
44
|
+
.define_method(
|
45
|
+
"lr_a=",
|
46
|
+
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
47
|
+
self.setShapeA(value);
|
48
|
+
return value;
|
49
|
+
})
|
50
|
+
.define_method(
|
51
|
+
"lr_b",
|
52
|
+
*[](tomoto::IDTModel& self) {
|
53
|
+
return self.getShapeB();
|
54
|
+
})
|
55
|
+
.define_method(
|
56
|
+
"lr_b=",
|
57
|
+
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
58
|
+
self.setShapeB(value);
|
59
|
+
return value;
|
60
|
+
})
|
61
|
+
.define_method(
|
62
|
+
"lr_c",
|
63
|
+
*[](tomoto::IDTModel& self) {
|
64
|
+
return self.getShapeC();
|
65
|
+
})
|
66
|
+
.define_method(
|
67
|
+
"lr_c=",
|
68
|
+
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
69
|
+
self.setShapeC(value);
|
70
|
+
return value;
|
71
|
+
})
|
72
|
+
.define_method(
|
73
|
+
"num_docs_by_timepoint",
|
74
|
+
*[](tomoto::IDTModel& self) {
|
75
|
+
return self.getNumDocsByT();
|
76
|
+
})
|
77
|
+
.define_method(
|
78
|
+
"num_timepoints",
|
79
|
+
*[](tomoto::IDTModel& self) {
|
80
|
+
return self.getT();
|
81
|
+
});
|
82
|
+
}
|
data/ext/tomoto/ext.cpp
CHANGED
@@ -1,111 +1,23 @@
|
|
1
|
-
// stdlib
|
2
|
-
#include <fstream>
|
3
|
-
#include <iostream>
|
4
|
-
|
5
|
-
// tomoto
|
6
|
-
#include <CT.h>
|
7
|
-
#include <DMR.h>
|
8
|
-
#include <DT.h>
|
9
|
-
#include <GDMR.h>
|
10
|
-
#include <HDP.h>
|
11
|
-
#include <HLDA.h>
|
12
|
-
#include <HPA.h>
|
13
|
-
#include <LDA.h>
|
14
|
-
#include <LLDA.h>
|
15
|
-
#include <MGLDA.h>
|
16
|
-
#include <PA.h>
|
17
|
-
#include <PLDA.h>
|
18
|
-
#include <SLDA.h>
|
19
|
-
|
20
|
-
// rice
|
21
|
-
#include <rice/Array.hpp>
|
22
|
-
#include <rice/Hash.hpp>
|
23
1
|
#include <rice/Module.hpp>
|
24
2
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
res.push(v);
|
39
|
-
}
|
40
|
-
return res;
|
41
|
-
}
|
42
|
-
|
43
|
-
template<>
|
44
|
-
Object to_ruby<std::vector<uint32_t>>(std::vector<uint32_t> const & x)
|
45
|
-
{
|
46
|
-
Array res;
|
47
|
-
for (auto const& v : x) {
|
48
|
-
res.push(v);
|
49
|
-
}
|
50
|
-
return res;
|
51
|
-
}
|
52
|
-
|
53
|
-
template<>
|
54
|
-
Object to_ruby<std::vector<uint64_t>>(std::vector<uint64_t> const & x)
|
55
|
-
{
|
56
|
-
Array res;
|
57
|
-
for (auto const& v : x) {
|
58
|
-
res.push(v);
|
59
|
-
}
|
60
|
-
return res;
|
61
|
-
}
|
62
|
-
|
63
|
-
template<>
|
64
|
-
std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
|
65
|
-
{
|
66
|
-
Array a = Array(x);
|
67
|
-
std::vector<std::string> res;
|
68
|
-
res.reserve(a.size());
|
69
|
-
for (auto const& v : a) {
|
70
|
-
res.push_back(from_ruby<std::string>(v));
|
71
|
-
}
|
72
|
-
return res;
|
73
|
-
}
|
74
|
-
|
75
|
-
template<>
|
76
|
-
std::vector<tomoto::Float> from_ruby<std::vector<tomoto::Float>>(Object x)
|
77
|
-
{
|
78
|
-
Array a = Array(x);
|
79
|
-
std::vector<tomoto::Float> res;
|
80
|
-
res.reserve(a.size());
|
81
|
-
for (auto const& v : a) {
|
82
|
-
res.push_back(from_ruby<tomoto::Float>(v));
|
83
|
-
}
|
84
|
-
return res;
|
85
|
-
}
|
86
|
-
|
87
|
-
template<>
|
88
|
-
std::vector<uint64_t> from_ruby<std::vector<uint64_t>>(Object x)
|
89
|
-
{
|
90
|
-
Array a = Array(x);
|
91
|
-
std::vector<uint64_t> res;
|
92
|
-
res.reserve(a.size());
|
93
|
-
for (auto const& v : a) {
|
94
|
-
res.push_back(from_ruby<uint64_t>(v));
|
95
|
-
}
|
96
|
-
return res;
|
97
|
-
}
|
98
|
-
|
99
|
-
tomoto::RawDoc buildDoc(std::vector<std::string>& words) {
|
100
|
-
tomoto::RawDoc doc;
|
101
|
-
doc.rawWords = words;
|
102
|
-
return doc;
|
103
|
-
}
|
3
|
+
void init_lda(Rice::Module& m);
|
4
|
+
void init_ct(Rice::Module& m);
|
5
|
+
void init_dmr(Rice::Module& m);
|
6
|
+
void init_dt(Rice::Module& m);
|
7
|
+
void init_gdmr(Rice::Module& m);
|
8
|
+
void init_hdp(Rice::Module& m);
|
9
|
+
void init_hlda(Rice::Module& m);
|
10
|
+
void init_pa(Rice::Module& m);
|
11
|
+
void init_hpa(Rice::Module& m);
|
12
|
+
void init_mglda(Rice::Module& m);
|
13
|
+
void init_llda(Rice::Module& m);
|
14
|
+
void init_plda(Rice::Module& m);
|
15
|
+
void init_slda(Rice::Module& m);
|
104
16
|
|
105
17
|
extern "C"
|
106
18
|
void Init_ext()
|
107
19
|
{
|
108
|
-
|
20
|
+
auto m = Rice::define_module("Tomoto")
|
109
21
|
.define_singleton_method(
|
110
22
|
"isa",
|
111
23
|
*[]() {
|
@@ -120,675 +32,17 @@ void Init_ext()
|
|
120
32
|
#endif
|
121
33
|
});
|
122
34
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
})
|
137
|
-
.define_method(
|
138
|
-
"alpha",
|
139
|
-
*[](tomoto::ILDAModel& self) {
|
140
|
-
Array res;
|
141
|
-
for (size_t i = 0; i < self.getK(); i++) {
|
142
|
-
res.push(self.getAlpha(i));
|
143
|
-
}
|
144
|
-
return res;
|
145
|
-
})
|
146
|
-
.define_method(
|
147
|
-
"burn_in",
|
148
|
-
*[](tomoto::ILDAModel& self) {
|
149
|
-
return self.getBurnInIteration();
|
150
|
-
})
|
151
|
-
.define_method(
|
152
|
-
"burn_in=",
|
153
|
-
*[](tomoto::ILDAModel& self, size_t iteration) {
|
154
|
-
self.setBurnInIteration(iteration);
|
155
|
-
return iteration;
|
156
|
-
})
|
157
|
-
.define_method(
|
158
|
-
"_count_by_topics",
|
159
|
-
*[](tomoto::ILDAModel& self) {
|
160
|
-
Array res;
|
161
|
-
for (auto const& v : self.getCountByTopic()) {
|
162
|
-
res.push(v);
|
163
|
-
}
|
164
|
-
return res;
|
165
|
-
})
|
166
|
-
.define_method(
|
167
|
-
"eta",
|
168
|
-
*[](tomoto::ILDAModel& self) {
|
169
|
-
return self.getEta();
|
170
|
-
})
|
171
|
-
.define_method(
|
172
|
-
"global_step",
|
173
|
-
*[](tomoto::ILDAModel& self) {
|
174
|
-
return self.getGlobalStep();
|
175
|
-
})
|
176
|
-
.define_method(
|
177
|
-
"k",
|
178
|
-
*[](tomoto::ILDAModel& self) {
|
179
|
-
return self.getK();
|
180
|
-
})
|
181
|
-
.define_method(
|
182
|
-
"_load",
|
183
|
-
*[](tomoto::ILDAModel& self, const char* filename) {
|
184
|
-
std::ifstream str{ filename, std::ios_base::binary };
|
185
|
-
if (!str) throw std::runtime_error{ std::string("cannot open file '") + filename + std::string("'") };
|
186
|
-
std::vector<uint8_t> extra_data;
|
187
|
-
self.loadModel(str, &extra_data);
|
188
|
-
})
|
189
|
-
.define_method(
|
190
|
-
"ll_per_word",
|
191
|
-
*[](tomoto::ILDAModel& self) {
|
192
|
-
return self.getLLPerWord();
|
193
|
-
})
|
194
|
-
.define_method(
|
195
|
-
"num_docs",
|
196
|
-
*[](tomoto::ILDAModel& self) {
|
197
|
-
return self.getNumDocs();
|
198
|
-
})
|
199
|
-
.define_method(
|
200
|
-
"num_vocabs",
|
201
|
-
*[](tomoto::ILDAModel& self) {
|
202
|
-
return self.getV();
|
203
|
-
})
|
204
|
-
.define_method(
|
205
|
-
"num_words",
|
206
|
-
*[](tomoto::ILDAModel& self) {
|
207
|
-
return self.getN();
|
208
|
-
})
|
209
|
-
.define_method(
|
210
|
-
"optim_interval",
|
211
|
-
*[](tomoto::ILDAModel& self) {
|
212
|
-
return self.getOptimInterval();
|
213
|
-
})
|
214
|
-
.define_method(
|
215
|
-
"optim_interval=",
|
216
|
-
*[](tomoto::ILDAModel& self, size_t value) {
|
217
|
-
self.setOptimInterval(value);
|
218
|
-
return value;
|
219
|
-
})
|
220
|
-
.define_method(
|
221
|
-
"perplexity",
|
222
|
-
*[](tomoto::ILDAModel& self) {
|
223
|
-
return self.getPerplexity();
|
224
|
-
})
|
225
|
-
.define_method(
|
226
|
-
"_prepare",
|
227
|
-
*[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
|
228
|
-
self.prepare(true, minCnt, minDf, rmTop);
|
229
|
-
})
|
230
|
-
.define_method(
|
231
|
-
"_removed_top_words",
|
232
|
-
*[](tomoto::ILDAModel& self, size_t rmTop) {
|
233
|
-
Array res;
|
234
|
-
auto dict = self.getVocabDict();
|
235
|
-
size_t size = dict.size();
|
236
|
-
for (size_t i = rmTop; i > 0; i--) {
|
237
|
-
res.push(dict.toWord(size - i));
|
238
|
-
}
|
239
|
-
return res;
|
240
|
-
})
|
241
|
-
.define_method(
|
242
|
-
"_save",
|
243
|
-
*[](tomoto::ILDAModel& self, const char* filename, bool full) {
|
244
|
-
std::ofstream str{ filename, std::ios_base::binary };
|
245
|
-
std::vector<uint8_t> extra_data;
|
246
|
-
self.saveModel(str, full, &extra_data);
|
247
|
-
})
|
248
|
-
.define_method(
|
249
|
-
"_topic_words",
|
250
|
-
*[](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
|
251
|
-
Hash res;
|
252
|
-
for (auto const& v : self.getWordsByTopicSorted(topicId, topN)) {
|
253
|
-
res[v.first] = v.second;
|
254
|
-
}
|
255
|
-
return res;
|
256
|
-
})
|
257
|
-
.define_method(
|
258
|
-
"_train",
|
259
|
-
*[](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
|
260
|
-
self.train(iteration, workers, (tomoto::ParallelScheme)ps);
|
261
|
-
})
|
262
|
-
.define_method(
|
263
|
-
"_tw",
|
264
|
-
*[](tomoto::ILDAModel& self) {
|
265
|
-
return (int)self.getTermWeight();
|
266
|
-
})
|
267
|
-
.define_method(
|
268
|
-
"used_vocab_df",
|
269
|
-
*[](tomoto::ILDAModel& self) {
|
270
|
-
auto vocab = self.getVocabDf();
|
271
|
-
Array res;
|
272
|
-
for (size_t i = 0; i < self.getV(); i++) {
|
273
|
-
res.push(vocab[i]);
|
274
|
-
}
|
275
|
-
return res;
|
276
|
-
})
|
277
|
-
.define_method(
|
278
|
-
"used_vocab_freq",
|
279
|
-
*[](tomoto::ILDAModel& self) {
|
280
|
-
auto vocab = self.getVocabCf();
|
281
|
-
Array res;
|
282
|
-
for (size_t i = 0; i < self.getV(); i++) {
|
283
|
-
res.push(vocab[i]);
|
284
|
-
}
|
285
|
-
return res;
|
286
|
-
})
|
287
|
-
.define_method(
|
288
|
-
"used_vocabs",
|
289
|
-
*[](tomoto::ILDAModel& self) {
|
290
|
-
auto dict = self.getVocabDict();
|
291
|
-
Array res;
|
292
|
-
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
293
|
-
for (size_t i = 0; i < self.getV(); i++) {
|
294
|
-
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
295
|
-
}
|
296
|
-
return res;
|
297
|
-
})
|
298
|
-
.define_method(
|
299
|
-
"vocab_df",
|
300
|
-
*[](tomoto::ILDAModel& self) {
|
301
|
-
auto vocab = self.getVocabDf();
|
302
|
-
Array res;
|
303
|
-
for (size_t i = 0; i < vocab.size(); i++) {
|
304
|
-
res.push(vocab[i]);
|
305
|
-
}
|
306
|
-
return res;
|
307
|
-
})
|
308
|
-
.define_method(
|
309
|
-
"vocab_freq",
|
310
|
-
*[](tomoto::ILDAModel& self) {
|
311
|
-
auto vocab = self.getVocabCf();
|
312
|
-
Array res;
|
313
|
-
for (size_t i = 0; i < vocab.size(); i++) {
|
314
|
-
res.push(vocab[i]);
|
315
|
-
}
|
316
|
-
return res;
|
317
|
-
})
|
318
|
-
.define_method(
|
319
|
-
"vocabs",
|
320
|
-
*[](tomoto::ILDAModel& self) {
|
321
|
-
auto dict = self.getVocabDict();
|
322
|
-
Array res;
|
323
|
-
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
324
|
-
for (size_t i = 0; i < dict.size(); i++) {
|
325
|
-
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
326
|
-
}
|
327
|
-
return res;
|
328
|
-
});
|
329
|
-
|
330
|
-
Class rb_cCT = define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(rb_mTomoto, "CT")
|
331
|
-
.define_singleton_method(
|
332
|
-
"_new",
|
333
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
334
|
-
if (seed < 0) {
|
335
|
-
seed = std::random_device{}();
|
336
|
-
}
|
337
|
-
return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
338
|
-
})
|
339
|
-
.define_method(
|
340
|
-
"_correlations",
|
341
|
-
*[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
342
|
-
return self.getCorrelationTopic(topic_id);
|
343
|
-
})
|
344
|
-
.define_method(
|
345
|
-
"num_beta_sample",
|
346
|
-
*[](tomoto::ICTModel& self) {
|
347
|
-
return self.getNumBetaSample();
|
348
|
-
})
|
349
|
-
.define_method(
|
350
|
-
"num_beta_sample=",
|
351
|
-
*[](tomoto::ICTModel& self, size_t value) {
|
352
|
-
self.setNumBetaSample(value);
|
353
|
-
return value;
|
354
|
-
})
|
355
|
-
.define_method(
|
356
|
-
"num_tmn_sample",
|
357
|
-
*[](tomoto::ICTModel& self) {
|
358
|
-
return self.getNumTMNSample();
|
359
|
-
})
|
360
|
-
.define_method(
|
361
|
-
"num_tmn_sample=",
|
362
|
-
*[](tomoto::ICTModel& self, size_t value) {
|
363
|
-
self.setNumTMNSample(value);
|
364
|
-
return value;
|
365
|
-
})
|
366
|
-
.define_method(
|
367
|
-
"_prior_cov",
|
368
|
-
*[](tomoto::ICTModel& self) {
|
369
|
-
return self.getPriorCov();
|
370
|
-
})
|
371
|
-
.define_method(
|
372
|
-
"prior_mean",
|
373
|
-
*[](tomoto::ICTModel& self) {
|
374
|
-
return self.getPriorMean();
|
375
|
-
});
|
376
|
-
|
377
|
-
Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
|
378
|
-
.define_singleton_method(
|
379
|
-
"_new",
|
380
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
381
|
-
if (seed < 0) {
|
382
|
-
seed = std::random_device{}();
|
383
|
-
}
|
384
|
-
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
|
385
|
-
})
|
386
|
-
.define_method(
|
387
|
-
"_add_doc",
|
388
|
-
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
|
389
|
-
auto doc = buildDoc(words);
|
390
|
-
doc.misc["metadata"] = metadata;
|
391
|
-
self.addDoc(doc);
|
392
|
-
})
|
393
|
-
.define_method(
|
394
|
-
"alpha_epsilon",
|
395
|
-
*[](tomoto::IDMRModel& self) {
|
396
|
-
return self.getAlphaEps();
|
397
|
-
})
|
398
|
-
.define_method(
|
399
|
-
"alpha_epsilon=",
|
400
|
-
*[](tomoto::IDMRModel& self, tomoto::Float value) {
|
401
|
-
self.setAlphaEps(value);
|
402
|
-
return value;
|
403
|
-
})
|
404
|
-
.define_method(
|
405
|
-
"f",
|
406
|
-
*[](tomoto::IDMRModel& self) {
|
407
|
-
return self.getF();
|
408
|
-
})
|
409
|
-
.define_method(
|
410
|
-
"_lambdas",
|
411
|
-
*[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
412
|
-
return self.getLambdaByTopic(topic_id);
|
413
|
-
})
|
414
|
-
.define_method(
|
415
|
-
"metadata_dict",
|
416
|
-
*[](tomoto::IDMRModel& self) {
|
417
|
-
auto dict = self.getMetadataDict();
|
418
|
-
Array res;
|
419
|
-
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
420
|
-
for (size_t i = 0; i < dict.size(); i++) {
|
421
|
-
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
422
|
-
}
|
423
|
-
return res;
|
424
|
-
})
|
425
|
-
.define_method(
|
426
|
-
"sigma",
|
427
|
-
*[](tomoto::IDMRModel& self) {
|
428
|
-
return self.getSigma();
|
429
|
-
});
|
430
|
-
|
431
|
-
Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
|
432
|
-
.define_singleton_method(
|
433
|
-
"_new",
|
434
|
-
*[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
|
435
|
-
// Rice only supports 10 arguments
|
436
|
-
int seed = -1;
|
437
|
-
if (seed < 0) {
|
438
|
-
seed = std::random_device{}();
|
439
|
-
}
|
440
|
-
return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
|
441
|
-
})
|
442
|
-
.define_method(
|
443
|
-
"_add_doc",
|
444
|
-
*[](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
|
445
|
-
auto doc = buildDoc(words);
|
446
|
-
doc.misc["timepoint"] = timepoint;
|
447
|
-
self.addDoc(doc);
|
448
|
-
})
|
449
|
-
.define_method(
|
450
|
-
"lr_a",
|
451
|
-
*[](tomoto::IDTModel& self) {
|
452
|
-
return self.getShapeA();
|
453
|
-
})
|
454
|
-
.define_method(
|
455
|
-
"lr_a=",
|
456
|
-
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
457
|
-
self.setShapeA(value);
|
458
|
-
return value;
|
459
|
-
})
|
460
|
-
.define_method(
|
461
|
-
"lr_b",
|
462
|
-
*[](tomoto::IDTModel& self) {
|
463
|
-
return self.getShapeB();
|
464
|
-
})
|
465
|
-
.define_method(
|
466
|
-
"lr_b=",
|
467
|
-
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
468
|
-
self.setShapeB(value);
|
469
|
-
return value;
|
470
|
-
})
|
471
|
-
.define_method(
|
472
|
-
"lr_c",
|
473
|
-
*[](tomoto::IDTModel& self) {
|
474
|
-
return self.getShapeC();
|
475
|
-
})
|
476
|
-
.define_method(
|
477
|
-
"lr_c=",
|
478
|
-
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
479
|
-
self.setShapeC(value);
|
480
|
-
return value;
|
481
|
-
})
|
482
|
-
.define_method(
|
483
|
-
"num_docs_by_timepoint",
|
484
|
-
*[](tomoto::IDTModel& self) {
|
485
|
-
return self.getNumDocsByT();
|
486
|
-
})
|
487
|
-
.define_method(
|
488
|
-
"num_timepoints",
|
489
|
-
*[](tomoto::IDTModel& self) {
|
490
|
-
return self.getT();
|
491
|
-
});
|
492
|
-
|
493
|
-
Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
|
494
|
-
.define_singleton_method(
|
495
|
-
"_new",
|
496
|
-
*[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
497
|
-
if (seed < 0) {
|
498
|
-
seed = std::random_device{}();
|
499
|
-
}
|
500
|
-
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed);
|
501
|
-
})
|
502
|
-
.define_method(
|
503
|
-
"_add_doc",
|
504
|
-
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> metadata) {
|
505
|
-
auto doc = buildDoc(words);
|
506
|
-
doc.misc["metadata"] = metadata;
|
507
|
-
self.addDoc(doc);
|
508
|
-
})
|
509
|
-
.define_method(
|
510
|
-
"degrees",
|
511
|
-
*[](tomoto::IGDMRModel& self) {
|
512
|
-
return self.getFs();
|
513
|
-
})
|
514
|
-
.define_method(
|
515
|
-
"sigma0",
|
516
|
-
*[](tomoto::IGDMRModel& self) {
|
517
|
-
return self.getSigma0();
|
518
|
-
});
|
519
|
-
|
520
|
-
Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
|
521
|
-
.define_singleton_method(
|
522
|
-
"_new",
|
523
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
|
524
|
-
if (seed < 0) {
|
525
|
-
seed = std::random_device{}();
|
526
|
-
}
|
527
|
-
return tomoto::IHDPModel::create((tomoto::TermWeight)tw, k, alpha, eta, gamma, seed);
|
528
|
-
})
|
529
|
-
.define_method(
|
530
|
-
"alpha",
|
531
|
-
*[](tomoto::IHDPModel& self) {
|
532
|
-
return self.getAlpha();
|
533
|
-
})
|
534
|
-
.define_method(
|
535
|
-
"gamma",
|
536
|
-
*[](tomoto::IHDPModel& self) {
|
537
|
-
return self.getGamma();
|
538
|
-
})
|
539
|
-
.define_method(
|
540
|
-
"live_k",
|
541
|
-
*[](tomoto::IHDPModel& self) {
|
542
|
-
return self.getLiveK();
|
543
|
-
})
|
544
|
-
.define_method(
|
545
|
-
"live_topic?",
|
546
|
-
*[](tomoto::IHDPModel& self, size_t tid) {
|
547
|
-
return self.isLiveTopic(tid);
|
548
|
-
})
|
549
|
-
.define_method(
|
550
|
-
"num_tables",
|
551
|
-
*[](tomoto::IHDPModel& self) {
|
552
|
-
return self.getTotalTables();
|
553
|
-
});
|
554
|
-
|
555
|
-
Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
|
556
|
-
.define_singleton_method(
|
557
|
-
"_new",
|
558
|
-
*[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
|
559
|
-
if (seed < 0) {
|
560
|
-
seed = std::random_device{}();
|
561
|
-
}
|
562
|
-
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
|
563
|
-
})
|
564
|
-
.define_method(
|
565
|
-
"alpha",
|
566
|
-
*[](tomoto::IHLDAModel& self) {
|
567
|
-
Array res;
|
568
|
-
for (size_t i = 0; i < self.getLevelDepth(); i++) {
|
569
|
-
res.push(self.getAlpha(i));
|
570
|
-
}
|
571
|
-
return res;
|
572
|
-
})
|
573
|
-
.define_method(
|
574
|
-
"_children_topics",
|
575
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
576
|
-
return self.getChildTopicId(topic_id);
|
577
|
-
})
|
578
|
-
.define_method(
|
579
|
-
"depth",
|
580
|
-
*[](tomoto::IHLDAModel& self) {
|
581
|
-
return self.getLevelDepth();
|
582
|
-
})
|
583
|
-
.define_method(
|
584
|
-
"gamma",
|
585
|
-
*[](tomoto::IHLDAModel& self) {
|
586
|
-
return self.getGamma();
|
587
|
-
})
|
588
|
-
.define_method(
|
589
|
-
"_level",
|
590
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
591
|
-
return self.getLevelOfTopic(topic_id);
|
592
|
-
})
|
593
|
-
.define_method(
|
594
|
-
"live_k",
|
595
|
-
*[](tomoto::IHLDAModel& self) {
|
596
|
-
return self.getLiveK();
|
597
|
-
})
|
598
|
-
.define_method(
|
599
|
-
"_live_topic?",
|
600
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
601
|
-
return self.isLiveTopic(topic_id);
|
602
|
-
})
|
603
|
-
.define_method(
|
604
|
-
"_num_docs_of_topic",
|
605
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
606
|
-
return self.getNumDocsOfTopic(topic_id);
|
607
|
-
})
|
608
|
-
.define_method(
|
609
|
-
"_parent_topic",
|
610
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
611
|
-
return self.getParentTopicId(topic_id);
|
612
|
-
});
|
613
|
-
|
614
|
-
Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
|
615
|
-
.define_singleton_method(
|
616
|
-
"_new",
|
617
|
-
*[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
618
|
-
if (seed < 0) {
|
619
|
-
seed = std::random_device{}();
|
620
|
-
}
|
621
|
-
return tomoto::IPAModel::create((tomoto::TermWeight)tw, k1, k2, alpha, eta, seed);
|
622
|
-
})
|
623
|
-
.define_method(
|
624
|
-
"k1",
|
625
|
-
*[](tomoto::IPAModel& self) {
|
626
|
-
return self.getK();
|
627
|
-
})
|
628
|
-
.define_method(
|
629
|
-
"k2",
|
630
|
-
*[](tomoto::IPAModel& self) {
|
631
|
-
return self.getK2();
|
632
|
-
});
|
633
|
-
|
634
|
-
Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
|
635
|
-
.define_singleton_method(
|
636
|
-
"_new",
|
637
|
-
*[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
638
|
-
if (seed < 0) {
|
639
|
-
seed = std::random_device{}();
|
640
|
-
}
|
641
|
-
return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
|
642
|
-
})
|
643
|
-
.define_method(
|
644
|
-
"alpha",
|
645
|
-
*[](tomoto::IHPAModel& self) {
|
646
|
-
Array res;
|
647
|
-
// use <= to return k+1 elements
|
648
|
-
for (size_t i = 0; i <= self.getK(); i++) {
|
649
|
-
res.push(self.getAlpha(i));
|
650
|
-
}
|
651
|
-
return res;
|
652
|
-
});
|
653
|
-
|
654
|
-
Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
|
655
|
-
.define_singleton_method(
|
656
|
-
"_new",
|
657
|
-
*[](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
|
658
|
-
return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
|
659
|
-
})
|
660
|
-
.define_method(
|
661
|
-
"_add_doc",
|
662
|
-
*[](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
|
663
|
-
auto doc = buildDoc(words);
|
664
|
-
doc.misc["delimiter"] = delimiter;
|
665
|
-
self.addDoc(doc);
|
666
|
-
})
|
667
|
-
.define_method(
|
668
|
-
"alpha_g",
|
669
|
-
*[](tomoto::IMGLDAModel& self) {
|
670
|
-
return self.getAlpha();
|
671
|
-
})
|
672
|
-
.define_method(
|
673
|
-
"alpha_l",
|
674
|
-
*[](tomoto::IMGLDAModel& self) {
|
675
|
-
return self.getAlphaL();
|
676
|
-
})
|
677
|
-
.define_method(
|
678
|
-
"alpha_mg",
|
679
|
-
*[](tomoto::IMGLDAModel& self) {
|
680
|
-
return self.getAlphaM();
|
681
|
-
})
|
682
|
-
.define_method(
|
683
|
-
"alpha_ml",
|
684
|
-
*[](tomoto::IMGLDAModel& self) {
|
685
|
-
return self.getAlphaML();
|
686
|
-
})
|
687
|
-
.define_method(
|
688
|
-
"eta_g",
|
689
|
-
*[](tomoto::IMGLDAModel& self) {
|
690
|
-
return self.getEta();
|
691
|
-
})
|
692
|
-
.define_method(
|
693
|
-
"eta_l",
|
694
|
-
*[](tomoto::IMGLDAModel& self) {
|
695
|
-
return self.getEtaL();
|
696
|
-
})
|
697
|
-
.define_method(
|
698
|
-
"gamma",
|
699
|
-
*[](tomoto::IMGLDAModel& self) {
|
700
|
-
return self.getGamma();
|
701
|
-
})
|
702
|
-
.define_method(
|
703
|
-
"k_g",
|
704
|
-
*[](tomoto::IMGLDAModel& self) {
|
705
|
-
return self.getK();
|
706
|
-
})
|
707
|
-
.define_method(
|
708
|
-
"k_l",
|
709
|
-
*[](tomoto::IMGLDAModel& self) {
|
710
|
-
return self.getKL();
|
711
|
-
})
|
712
|
-
.define_method(
|
713
|
-
"t",
|
714
|
-
*[](tomoto::IMGLDAModel& self) {
|
715
|
-
return self.getT();
|
716
|
-
});
|
717
|
-
|
718
|
-
Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
|
719
|
-
.define_singleton_method(
|
720
|
-
"_new",
|
721
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
722
|
-
if (seed < 0) {
|
723
|
-
seed = std::random_device{}();
|
724
|
-
}
|
725
|
-
return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
726
|
-
})
|
727
|
-
.define_method(
|
728
|
-
"_add_doc",
|
729
|
-
*[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
730
|
-
auto doc = buildDoc(words);
|
731
|
-
doc.misc["labels"] = labels;
|
732
|
-
self.addDoc(doc);
|
733
|
-
})
|
734
|
-
.define_method(
|
735
|
-
"topics_per_label",
|
736
|
-
*[](tomoto::ILLDAModel& self) {
|
737
|
-
return self.getNumTopicsPerLabel();
|
738
|
-
});
|
739
|
-
|
740
|
-
Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
|
741
|
-
.define_singleton_method(
|
742
|
-
"_new",
|
743
|
-
*[](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
744
|
-
if (seed < 0) {
|
745
|
-
seed = std::random_device{}();
|
746
|
-
}
|
747
|
-
return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, latent_topics, 1, alpha, eta, seed);
|
748
|
-
})
|
749
|
-
.define_method(
|
750
|
-
"_add_doc",
|
751
|
-
*[](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
752
|
-
auto doc = buildDoc(words);
|
753
|
-
doc.misc["labels"] = labels;
|
754
|
-
self.addDoc(doc);
|
755
|
-
})
|
756
|
-
.define_method(
|
757
|
-
"latent_topics",
|
758
|
-
*[](tomoto::IPLDAModel& self) {
|
759
|
-
return self.getNumLatentTopics();
|
760
|
-
});
|
761
|
-
|
762
|
-
Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
|
763
|
-
.define_singleton_method(
|
764
|
-
"_new",
|
765
|
-
*[](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, int seed) {
|
766
|
-
if (seed < 0) {
|
767
|
-
seed = std::random_device{}();
|
768
|
-
}
|
769
|
-
std::vector<tomoto::ISLDAModel::GLM> vars;
|
770
|
-
vars.reserve(rb_vars.size());
|
771
|
-
for (auto const& v : rb_vars) {
|
772
|
-
vars.push_back((tomoto::ISLDAModel::GLM) from_ruby<int>(v));
|
773
|
-
}
|
774
|
-
return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, k, vars, alpha, eta, mu, nu_sq, glm_param, seed);
|
775
|
-
})
|
776
|
-
.define_method(
|
777
|
-
"_add_doc",
|
778
|
-
*[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
|
779
|
-
auto doc = buildDoc(words);
|
780
|
-
doc.misc["y"] = y;
|
781
|
-
self.addDoc(doc);
|
782
|
-
})
|
783
|
-
.define_method(
|
784
|
-
"f",
|
785
|
-
*[](tomoto::ISLDAModel& self) {
|
786
|
-
return self.getF();
|
787
|
-
})
|
788
|
-
.define_method(
|
789
|
-
"_var_type",
|
790
|
-
*[](tomoto::ISLDAModel& self, size_t var_id) {
|
791
|
-
if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
|
792
|
-
return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
|
793
|
-
});
|
35
|
+
init_lda(m);
|
36
|
+
init_ct(m);
|
37
|
+
init_dmr(m);
|
38
|
+
init_dt(m);
|
39
|
+
init_gdmr(m);
|
40
|
+
init_hdp(m);
|
41
|
+
init_hlda(m);
|
42
|
+
init_pa(m);
|
43
|
+
init_hpa(m);
|
44
|
+
init_mglda(m);
|
45
|
+
init_llda(m);
|
46
|
+
init_plda(m);
|
47
|
+
init_slda(m);
|
794
48
|
}
|