tomoto 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -0
- data/ext/tomoto/ct.cpp +54 -0
- data/ext/tomoto/dmr.cpp +62 -0
- data/ext/tomoto/dt.cpp +82 -0
- data/ext/tomoto/ext.cpp +27 -773
- data/ext/tomoto/gdmr.cpp +34 -0
- data/ext/tomoto/hdp.cpp +42 -0
- data/ext/tomoto/hlda.cpp +66 -0
- data/ext/tomoto/hpa.cpp +27 -0
- data/ext/tomoto/lda.cpp +250 -0
- data/ext/tomoto/llda.cpp +29 -0
- data/ext/tomoto/mglda.cpp +71 -0
- data/ext/tomoto/pa.cpp +27 -0
- data/ext/tomoto/plda.cpp +29 -0
- data/ext/tomoto/slda.cpp +40 -0
- data/ext/tomoto/utils.h +84 -0
- data/lib/tomoto/tomoto.bundle +0 -0
- data/lib/tomoto/tomoto.so +0 -0
- data/lib/tomoto/version.rb +1 -1
- data/vendor/tomotopy/README.kr.rst +12 -3
- data/vendor/tomotopy/README.rst +12 -3
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +47 -2
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +21 -151
- data/vendor/tomotopy/src/Labeling/Labeler.h +5 -3
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +518 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/DT.h +1 -1
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +8 -23
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +9 -18
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +56 -58
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +4 -14
- data/vendor/tomotopy/src/TopicModel/LDA.h +69 -17
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +108 -61
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +7 -8
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +26 -16
- data/vendor/tomotopy/src/TopicModel/PT.h +27 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.cpp +10 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +273 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +16 -11
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +3 -2
- data/vendor/tomotopy/src/Utils/Trie.hpp +39 -8
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +36 -38
- data/vendor/tomotopy/src/Utils/Utils.hpp +50 -45
- data/vendor/tomotopy/src/Utils/math.h +8 -4
- data/vendor/tomotopy/src/Utils/tvector.hpp +4 -0
- metadata +24 -60
data/ext/tomoto/gdmr.cpp
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#include <GDMR.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_gdmr(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"_add_doc",
|
19
|
+
*[](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> metadata) {
|
20
|
+
auto doc = buildDoc(words);
|
21
|
+
doc.misc["metadata"] = metadata;
|
22
|
+
return self.addDoc(doc);
|
23
|
+
})
|
24
|
+
.define_method(
|
25
|
+
"degrees",
|
26
|
+
*[](tomoto::IGDMRModel& self) {
|
27
|
+
return self.getFs();
|
28
|
+
})
|
29
|
+
.define_method(
|
30
|
+
"sigma0",
|
31
|
+
*[](tomoto::IGDMRModel& self) {
|
32
|
+
return self.getSigma0();
|
33
|
+
});
|
34
|
+
}
|
data/ext/tomoto/hdp.cpp
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#include <HDP.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hdp(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::IHDPModel::create((tomoto::TermWeight)tw, k, alpha, eta, gamma, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"alpha",
|
19
|
+
*[](tomoto::IHDPModel& self) {
|
20
|
+
return self.getAlpha();
|
21
|
+
})
|
22
|
+
.define_method(
|
23
|
+
"gamma",
|
24
|
+
*[](tomoto::IHDPModel& self) {
|
25
|
+
return self.getGamma();
|
26
|
+
})
|
27
|
+
.define_method(
|
28
|
+
"live_k",
|
29
|
+
*[](tomoto::IHDPModel& self) {
|
30
|
+
return self.getLiveK();
|
31
|
+
})
|
32
|
+
.define_method(
|
33
|
+
"live_topic?",
|
34
|
+
*[](tomoto::IHDPModel& self, size_t tid) {
|
35
|
+
return self.isLiveTopic(tid);
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"num_tables",
|
39
|
+
*[](tomoto::IHDPModel& self) {
|
40
|
+
return self.getTotalTables();
|
41
|
+
});
|
42
|
+
}
|
data/ext/tomoto/hlda.cpp
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#include <HLDA.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hlda(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"alpha",
|
19
|
+
*[](tomoto::IHLDAModel& self) {
|
20
|
+
Array res;
|
21
|
+
for (size_t i = 0; i < self.getLevelDepth(); i++) {
|
22
|
+
res.push(self.getAlpha(i));
|
23
|
+
}
|
24
|
+
return res;
|
25
|
+
})
|
26
|
+
.define_method(
|
27
|
+
"_children_topics",
|
28
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
29
|
+
return self.getChildTopicId(topic_id);
|
30
|
+
})
|
31
|
+
.define_method(
|
32
|
+
"depth",
|
33
|
+
*[](tomoto::IHLDAModel& self) {
|
34
|
+
return self.getLevelDepth();
|
35
|
+
})
|
36
|
+
.define_method(
|
37
|
+
"gamma",
|
38
|
+
*[](tomoto::IHLDAModel& self) {
|
39
|
+
return self.getGamma();
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"_level",
|
43
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
44
|
+
return self.getLevelOfTopic(topic_id);
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"live_k",
|
48
|
+
*[](tomoto::IHLDAModel& self) {
|
49
|
+
return self.getLiveK();
|
50
|
+
})
|
51
|
+
.define_method(
|
52
|
+
"_live_topic?",
|
53
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
54
|
+
return self.isLiveTopic(topic_id);
|
55
|
+
})
|
56
|
+
.define_method(
|
57
|
+
"_num_docs_of_topic",
|
58
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
59
|
+
return self.getNumDocsOfTopic(topic_id);
|
60
|
+
})
|
61
|
+
.define_method(
|
62
|
+
"_parent_topic",
|
63
|
+
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
64
|
+
return self.getParentTopicId(topic_id);
|
65
|
+
});
|
66
|
+
}
|
data/ext/tomoto/hpa.cpp
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#include <HPA.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hpa(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(m, "HPA")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"alpha",
|
19
|
+
*[](tomoto::IHPAModel& self) {
|
20
|
+
Array res;
|
21
|
+
// use <= to return k+1 elements
|
22
|
+
for (size_t i = 0; i <= self.getK(); i++) {
|
23
|
+
res.push(self.getAlpha(i));
|
24
|
+
}
|
25
|
+
return res;
|
26
|
+
});
|
27
|
+
}
|
data/ext/tomoto/lda.cpp
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
#include <fstream>
|
2
|
+
#include <iostream>
|
3
|
+
|
4
|
+
#include <LDA.h>
|
5
|
+
|
6
|
+
#include <rice/Class.hpp>
|
7
|
+
#include <rice/Hash.hpp>
|
8
|
+
#include <rice/Module.hpp>
|
9
|
+
|
10
|
+
#include "utils.h"
|
11
|
+
|
12
|
+
class DocumentObject
|
13
|
+
{
|
14
|
+
public:
|
15
|
+
DocumentObject(const tomoto::DocumentBase* _doc, const tomoto::ITopicModel* _tm) : doc{ _doc }, tm{ _tm } {}
|
16
|
+
|
17
|
+
const tomoto::DocumentBase* doc;
|
18
|
+
const tomoto::ITopicModel* tm;
|
19
|
+
};
|
20
|
+
|
21
|
+
void init_lda(Rice::Module& m) {
|
22
|
+
Rice::define_class_under<DocumentObject>(m, "Document")
|
23
|
+
.define_method(
|
24
|
+
"topics",
|
25
|
+
*[](DocumentObject& self) {
|
26
|
+
Rice::Hash res;
|
27
|
+
auto topics = self.tm->getTopicsByDoc(self.doc);
|
28
|
+
for (size_t i = 0; i < topics.size(); i++) {
|
29
|
+
res[i] = topics[i];
|
30
|
+
}
|
31
|
+
return res;
|
32
|
+
});
|
33
|
+
|
34
|
+
Rice::define_class_under<tomoto::ILDAModel>(m, "LDA")
|
35
|
+
.define_singleton_method(
|
36
|
+
"_new",
|
37
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
38
|
+
if (seed < 0) {
|
39
|
+
seed = std::random_device{}();
|
40
|
+
}
|
41
|
+
return tomoto::ILDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
42
|
+
})
|
43
|
+
.define_method(
|
44
|
+
"_add_doc",
|
45
|
+
*[](tomoto::ILDAModel& self, std::vector<std::string> words) {
|
46
|
+
return self.addDoc(buildDoc(words));
|
47
|
+
})
|
48
|
+
.define_method(
|
49
|
+
"alpha",
|
50
|
+
*[](tomoto::ILDAModel& self) {
|
51
|
+
Array res;
|
52
|
+
for (size_t i = 0; i < self.getK(); i++) {
|
53
|
+
res.push(self.getAlpha(i));
|
54
|
+
}
|
55
|
+
return res;
|
56
|
+
})
|
57
|
+
.define_method(
|
58
|
+
"burn_in",
|
59
|
+
*[](tomoto::ILDAModel& self) {
|
60
|
+
return self.getBurnInIteration();
|
61
|
+
})
|
62
|
+
.define_method(
|
63
|
+
"burn_in=",
|
64
|
+
*[](tomoto::ILDAModel& self, size_t iteration) {
|
65
|
+
self.setBurnInIteration(iteration);
|
66
|
+
return iteration;
|
67
|
+
})
|
68
|
+
.define_method(
|
69
|
+
"_count_by_topics",
|
70
|
+
*[](tomoto::ILDAModel& self) {
|
71
|
+
Array res;
|
72
|
+
for (auto const& v : self.getCountByTopic()) {
|
73
|
+
res.push(v);
|
74
|
+
}
|
75
|
+
return res;
|
76
|
+
})
|
77
|
+
.define_method(
|
78
|
+
"docs",
|
79
|
+
*[](tomoto::ILDAModel& self) {
|
80
|
+
Array res;
|
81
|
+
auto n = self.getNumDocs();
|
82
|
+
for (size_t i = 0; i < n; i++) {
|
83
|
+
res.push(DocumentObject(self.getDoc(i), &self));
|
84
|
+
}
|
85
|
+
return res;
|
86
|
+
})
|
87
|
+
.define_method(
|
88
|
+
"eta",
|
89
|
+
*[](tomoto::ILDAModel& self) {
|
90
|
+
return self.getEta();
|
91
|
+
})
|
92
|
+
.define_method(
|
93
|
+
"global_step",
|
94
|
+
*[](tomoto::ILDAModel& self) {
|
95
|
+
return self.getGlobalStep();
|
96
|
+
})
|
97
|
+
.define_method(
|
98
|
+
"k",
|
99
|
+
*[](tomoto::ILDAModel& self) {
|
100
|
+
return self.getK();
|
101
|
+
})
|
102
|
+
.define_method(
|
103
|
+
"_load",
|
104
|
+
*[](tomoto::ILDAModel& self, const char* filename) {
|
105
|
+
std::ifstream str{ filename, std::ios_base::binary };
|
106
|
+
if (!str) throw std::runtime_error{ std::string("cannot open file '") + filename + std::string("'") };
|
107
|
+
std::vector<uint8_t> extra_data;
|
108
|
+
self.loadModel(str, &extra_data);
|
109
|
+
})
|
110
|
+
.define_method(
|
111
|
+
"ll_per_word",
|
112
|
+
*[](tomoto::ILDAModel& self) {
|
113
|
+
return self.getLLPerWord();
|
114
|
+
})
|
115
|
+
.define_method(
|
116
|
+
"num_docs",
|
117
|
+
*[](tomoto::ILDAModel& self) {
|
118
|
+
return self.getNumDocs();
|
119
|
+
})
|
120
|
+
.define_method(
|
121
|
+
"num_vocabs",
|
122
|
+
*[](tomoto::ILDAModel& self) {
|
123
|
+
return self.getV();
|
124
|
+
})
|
125
|
+
.define_method(
|
126
|
+
"num_words",
|
127
|
+
*[](tomoto::ILDAModel& self) {
|
128
|
+
return self.getN();
|
129
|
+
})
|
130
|
+
.define_method(
|
131
|
+
"optim_interval",
|
132
|
+
*[](tomoto::ILDAModel& self) {
|
133
|
+
return self.getOptimInterval();
|
134
|
+
})
|
135
|
+
.define_method(
|
136
|
+
"optim_interval=",
|
137
|
+
*[](tomoto::ILDAModel& self, size_t value) {
|
138
|
+
self.setOptimInterval(value);
|
139
|
+
return value;
|
140
|
+
})
|
141
|
+
.define_method(
|
142
|
+
"perplexity",
|
143
|
+
*[](tomoto::ILDAModel& self) {
|
144
|
+
return self.getPerplexity();
|
145
|
+
})
|
146
|
+
.define_method(
|
147
|
+
"_prepare",
|
148
|
+
*[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
|
149
|
+
self.prepare(true, minCnt, minDf, rmTop);
|
150
|
+
})
|
151
|
+
.define_method(
|
152
|
+
"_removed_top_words",
|
153
|
+
*[](tomoto::ILDAModel& self, size_t rmTop) {
|
154
|
+
Array res;
|
155
|
+
auto dict = self.getVocabDict();
|
156
|
+
size_t size = dict.size();
|
157
|
+
for (size_t i = rmTop; i > 0; i--) {
|
158
|
+
res.push(dict.toWord(size - i));
|
159
|
+
}
|
160
|
+
return res;
|
161
|
+
})
|
162
|
+
.define_method(
|
163
|
+
"_save",
|
164
|
+
*[](tomoto::ILDAModel& self, const char* filename, bool full) {
|
165
|
+
std::ofstream str{ filename, std::ios_base::binary };
|
166
|
+
std::vector<uint8_t> extra_data;
|
167
|
+
self.saveModel(str, full, &extra_data);
|
168
|
+
})
|
169
|
+
.define_method(
|
170
|
+
"_topic_words",
|
171
|
+
*[](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
|
172
|
+
Rice::Hash res;
|
173
|
+
for (auto const& v : self.getWordsByTopicSorted(topicId, topN)) {
|
174
|
+
res[v.first] = v.second;
|
175
|
+
}
|
176
|
+
return res;
|
177
|
+
})
|
178
|
+
.define_method(
|
179
|
+
"_train",
|
180
|
+
*[](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
|
181
|
+
self.train(iteration, workers, (tomoto::ParallelScheme)ps);
|
182
|
+
})
|
183
|
+
.define_method(
|
184
|
+
"_tw",
|
185
|
+
*[](tomoto::ILDAModel& self) {
|
186
|
+
return (int)self.getTermWeight();
|
187
|
+
})
|
188
|
+
.define_method(
|
189
|
+
"used_vocab_df",
|
190
|
+
*[](tomoto::ILDAModel& self) {
|
191
|
+
auto vocab = self.getVocabDf();
|
192
|
+
Array res;
|
193
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
194
|
+
res.push(vocab[i]);
|
195
|
+
}
|
196
|
+
return res;
|
197
|
+
})
|
198
|
+
.define_method(
|
199
|
+
"used_vocab_freq",
|
200
|
+
*[](tomoto::ILDAModel& self) {
|
201
|
+
auto vocab = self.getVocabCf();
|
202
|
+
Array res;
|
203
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
204
|
+
res.push(vocab[i]);
|
205
|
+
}
|
206
|
+
return res;
|
207
|
+
})
|
208
|
+
.define_method(
|
209
|
+
"used_vocabs",
|
210
|
+
*[](tomoto::ILDAModel& self) {
|
211
|
+
auto dict = self.getVocabDict();
|
212
|
+
Array res;
|
213
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
214
|
+
for (size_t i = 0; i < self.getV(); i++) {
|
215
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
216
|
+
}
|
217
|
+
return res;
|
218
|
+
})
|
219
|
+
.define_method(
|
220
|
+
"vocab_df",
|
221
|
+
*[](tomoto::ILDAModel& self) {
|
222
|
+
auto vocab = self.getVocabDf();
|
223
|
+
Array res;
|
224
|
+
for (size_t i = 0; i < vocab.size(); i++) {
|
225
|
+
res.push(vocab[i]);
|
226
|
+
}
|
227
|
+
return res;
|
228
|
+
})
|
229
|
+
.define_method(
|
230
|
+
"vocab_freq",
|
231
|
+
*[](tomoto::ILDAModel& self) {
|
232
|
+
auto vocab = self.getVocabCf();
|
233
|
+
Array res;
|
234
|
+
for (size_t i = 0; i < vocab.size(); i++) {
|
235
|
+
res.push(vocab[i]);
|
236
|
+
}
|
237
|
+
return res;
|
238
|
+
})
|
239
|
+
.define_method(
|
240
|
+
"vocabs",
|
241
|
+
*[](tomoto::ILDAModel& self) {
|
242
|
+
auto dict = self.getVocabDict();
|
243
|
+
Array res;
|
244
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
245
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
246
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
247
|
+
}
|
248
|
+
return res;
|
249
|
+
});
|
250
|
+
}
|
data/ext/tomoto/llda.cpp
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#include <LLDA.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_llda(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(m, "LLDA")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"_add_doc",
|
19
|
+
*[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
20
|
+
auto doc = buildDoc(words);
|
21
|
+
doc.misc["labels"] = labels;
|
22
|
+
return self.addDoc(doc);
|
23
|
+
})
|
24
|
+
.define_method(
|
25
|
+
"topics_per_label",
|
26
|
+
*[](tomoto::ILLDAModel& self) {
|
27
|
+
return self.getNumTopicsPerLabel();
|
28
|
+
});
|
29
|
+
}
|