tomoto 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -0
- data/ext/tomoto/ct.cpp +54 -0
- data/ext/tomoto/dmr.cpp +62 -0
- data/ext/tomoto/dt.cpp +82 -0
- data/ext/tomoto/ext.cpp +27 -773
- data/ext/tomoto/gdmr.cpp +34 -0
- data/ext/tomoto/hdp.cpp +42 -0
- data/ext/tomoto/hlda.cpp +66 -0
- data/ext/tomoto/hpa.cpp +27 -0
- data/ext/tomoto/lda.cpp +250 -0
- data/ext/tomoto/llda.cpp +29 -0
- data/ext/tomoto/mglda.cpp +71 -0
- data/ext/tomoto/pa.cpp +27 -0
- data/ext/tomoto/plda.cpp +29 -0
- data/ext/tomoto/slda.cpp +40 -0
- data/ext/tomoto/utils.h +84 -0
- data/lib/tomoto/tomoto.bundle +0 -0
- data/lib/tomoto/tomoto.so +0 -0
- data/lib/tomoto/version.rb +1 -1
- data/vendor/tomotopy/README.kr.rst +12 -3
- data/vendor/tomotopy/README.rst +12 -3
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +47 -2
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +21 -151
- data/vendor/tomotopy/src/Labeling/Labeler.h +5 -3
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +518 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/DT.h +1 -1
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +8 -23
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +9 -18
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +56 -58
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +4 -14
- data/vendor/tomotopy/src/TopicModel/LDA.h +69 -17
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +108 -61
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +7 -8
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +26 -16
- data/vendor/tomotopy/src/TopicModel/PT.h +27 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.cpp +10 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +273 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +16 -11
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +3 -2
- data/vendor/tomotopy/src/Utils/Trie.hpp +39 -8
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +36 -38
- data/vendor/tomotopy/src/Utils/Utils.hpp +50 -45
- data/vendor/tomotopy/src/Utils/math.h +8 -4
- data/vendor/tomotopy/src/Utils/tvector.hpp +4 -0
- metadata +24 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1cb95a96851ccd3d499ed38c9da531ce48588cf44c37ccc92bbfdc9277e0962
|
4
|
+
data.tar.gz: cadee081b1f0ea9cc37b75afd97e8ecebb32796cc335da2ff50e844c955a0e4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5154bfb71c0b8891953b97c8edf37a7fb70fcb2ab09c3f51126e14262c729dcdc4b82d2727a8601131e090a05efcd1958851d77b5e8e95b922fc9b1f44cedf6
|
7
|
+
data.tar.gz: f975f505493d41bc425e0d288762e97c83ffdb6c1812792bc2dca517c550f1508efef79a24bdde992d7acd3994d6566c27745b9bed806ea64dfa072d22c692a0
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
3
|
Copyright (c) 2019, bab2min
|
4
|
-
Copyright (c) 2020 Andrew Kane
|
4
|
+
Copyright (c) 2020-2021 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
data/ext/tomoto/ct.cpp
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#include <CT.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_ct(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
12
|
+
if (seed < 0) {
|
13
|
+
seed = std::random_device{}();
|
14
|
+
}
|
15
|
+
return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
16
|
+
})
|
17
|
+
.define_method(
|
18
|
+
"_correlations",
|
19
|
+
*[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
20
|
+
return self.getCorrelationTopic(topic_id);
|
21
|
+
})
|
22
|
+
.define_method(
|
23
|
+
"num_beta_sample",
|
24
|
+
*[](tomoto::ICTModel& self) {
|
25
|
+
return self.getNumBetaSample();
|
26
|
+
})
|
27
|
+
.define_method(
|
28
|
+
"num_beta_sample=",
|
29
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
30
|
+
self.setNumBetaSample(value);
|
31
|
+
return value;
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"num_tmn_sample",
|
35
|
+
*[](tomoto::ICTModel& self) {
|
36
|
+
return self.getNumTMNSample();
|
37
|
+
})
|
38
|
+
.define_method(
|
39
|
+
"num_tmn_sample=",
|
40
|
+
*[](tomoto::ICTModel& self, size_t value) {
|
41
|
+
self.setNumTMNSample(value);
|
42
|
+
return value;
|
43
|
+
})
|
44
|
+
.define_method(
|
45
|
+
"_prior_cov",
|
46
|
+
*[](tomoto::ICTModel& self) {
|
47
|
+
return self.getPriorCov();
|
48
|
+
})
|
49
|
+
.define_method(
|
50
|
+
"prior_mean",
|
51
|
+
*[](tomoto::ICTModel& self) {
|
52
|
+
return self.getPriorMean();
|
53
|
+
});
|
54
|
+
}
|
data/ext/tomoto/dmr.cpp
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
#include <DMR.h>
|
2
|
+
|
3
|
+
#include <rice/Class.hpp>
|
4
|
+
#include <rice/Module.hpp>
|
5
|
+
|
6
|
+
#include "utils.h"
|
7
|
+
|
8
|
+
void init_dmr(Rice::Module& m) {
|
9
|
+
Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
|
10
|
+
.define_singleton_method(
|
11
|
+
"_new",
|
12
|
+
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
13
|
+
if (seed < 0) {
|
14
|
+
seed = std::random_device{}();
|
15
|
+
}
|
16
|
+
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
|
17
|
+
})
|
18
|
+
.define_method(
|
19
|
+
"_add_doc",
|
20
|
+
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
|
21
|
+
auto doc = buildDoc(words);
|
22
|
+
doc.misc["metadata"] = metadata;
|
23
|
+
return self.addDoc(doc);
|
24
|
+
})
|
25
|
+
.define_method(
|
26
|
+
"alpha_epsilon",
|
27
|
+
*[](tomoto::IDMRModel& self) {
|
28
|
+
return self.getAlphaEps();
|
29
|
+
})
|
30
|
+
.define_method(
|
31
|
+
"alpha_epsilon=",
|
32
|
+
*[](tomoto::IDMRModel& self, tomoto::Float value) {
|
33
|
+
self.setAlphaEps(value);
|
34
|
+
return value;
|
35
|
+
})
|
36
|
+
.define_method(
|
37
|
+
"f",
|
38
|
+
*[](tomoto::IDMRModel& self) {
|
39
|
+
return self.getF();
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"_lambdas",
|
43
|
+
*[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
44
|
+
return self.getLambdaByTopic(topic_id);
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"metadata_dict",
|
48
|
+
*[](tomoto::IDMRModel& self) {
|
49
|
+
auto dict = self.getMetadataDict();
|
50
|
+
Array res;
|
51
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
52
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
53
|
+
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
54
|
+
}
|
55
|
+
return res;
|
56
|
+
})
|
57
|
+
.define_method(
|
58
|
+
"sigma",
|
59
|
+
*[](tomoto::IDMRModel& self) {
|
60
|
+
return self.getSigma();
|
61
|
+
});
|
62
|
+
}
|
data/ext/tomoto/dt.cpp
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#include <DT.h>
|
2
|
+
|
3
|
+
#include <rice/Module.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_dt(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
|
9
|
+
.define_singleton_method(
|
10
|
+
"_new",
|
11
|
+
*[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
|
12
|
+
// Rice only supports 10 arguments
|
13
|
+
int seed = -1;
|
14
|
+
if (seed < 0) {
|
15
|
+
seed = std::random_device{}();
|
16
|
+
}
|
17
|
+
return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
|
18
|
+
})
|
19
|
+
.define_method(
|
20
|
+
"_add_doc",
|
21
|
+
*[](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
|
22
|
+
auto doc = buildDoc(words);
|
23
|
+
doc.misc["timepoint"] = timepoint;
|
24
|
+
return self.addDoc(doc);
|
25
|
+
})
|
26
|
+
.define_method(
|
27
|
+
"alpha",
|
28
|
+
*[](tomoto::IDTModel& self) {
|
29
|
+
Array res;
|
30
|
+
for (size_t i = 0; i < self.getK(); i++) {
|
31
|
+
Array res2;
|
32
|
+
for (size_t j = 0; j < self.getT(); j++) {
|
33
|
+
res2.push(self.getAlpha(i, j));
|
34
|
+
}
|
35
|
+
res.push(res2);
|
36
|
+
}
|
37
|
+
return res;
|
38
|
+
})
|
39
|
+
.define_method(
|
40
|
+
"lr_a",
|
41
|
+
*[](tomoto::IDTModel& self) {
|
42
|
+
return self.getShapeA();
|
43
|
+
})
|
44
|
+
.define_method(
|
45
|
+
"lr_a=",
|
46
|
+
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
47
|
+
self.setShapeA(value);
|
48
|
+
return value;
|
49
|
+
})
|
50
|
+
.define_method(
|
51
|
+
"lr_b",
|
52
|
+
*[](tomoto::IDTModel& self) {
|
53
|
+
return self.getShapeB();
|
54
|
+
})
|
55
|
+
.define_method(
|
56
|
+
"lr_b=",
|
57
|
+
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
58
|
+
self.setShapeB(value);
|
59
|
+
return value;
|
60
|
+
})
|
61
|
+
.define_method(
|
62
|
+
"lr_c",
|
63
|
+
*[](tomoto::IDTModel& self) {
|
64
|
+
return self.getShapeC();
|
65
|
+
})
|
66
|
+
.define_method(
|
67
|
+
"lr_c=",
|
68
|
+
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
69
|
+
self.setShapeC(value);
|
70
|
+
return value;
|
71
|
+
})
|
72
|
+
.define_method(
|
73
|
+
"num_docs_by_timepoint",
|
74
|
+
*[](tomoto::IDTModel& self) {
|
75
|
+
return self.getNumDocsByT();
|
76
|
+
})
|
77
|
+
.define_method(
|
78
|
+
"num_timepoints",
|
79
|
+
*[](tomoto::IDTModel& self) {
|
80
|
+
return self.getT();
|
81
|
+
});
|
82
|
+
}
|
data/ext/tomoto/ext.cpp
CHANGED
@@ -1,111 +1,23 @@
|
|
1
|
-
// stdlib
|
2
|
-
#include <fstream>
|
3
|
-
#include <iostream>
|
4
|
-
|
5
|
-
// tomoto
|
6
|
-
#include <CT.h>
|
7
|
-
#include <DMR.h>
|
8
|
-
#include <DT.h>
|
9
|
-
#include <GDMR.h>
|
10
|
-
#include <HDP.h>
|
11
|
-
#include <HLDA.h>
|
12
|
-
#include <HPA.h>
|
13
|
-
#include <LDA.h>
|
14
|
-
#include <LLDA.h>
|
15
|
-
#include <MGLDA.h>
|
16
|
-
#include <PA.h>
|
17
|
-
#include <PLDA.h>
|
18
|
-
#include <SLDA.h>
|
19
|
-
|
20
|
-
// rice
|
21
|
-
#include <rice/Array.hpp>
|
22
|
-
#include <rice/Hash.hpp>
|
23
1
|
#include <rice/Module.hpp>
|
24
2
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
res.push(v);
|
39
|
-
}
|
40
|
-
return res;
|
41
|
-
}
|
42
|
-
|
43
|
-
template<>
|
44
|
-
Object to_ruby<std::vector<uint32_t>>(std::vector<uint32_t> const & x)
|
45
|
-
{
|
46
|
-
Array res;
|
47
|
-
for (auto const& v : x) {
|
48
|
-
res.push(v);
|
49
|
-
}
|
50
|
-
return res;
|
51
|
-
}
|
52
|
-
|
53
|
-
template<>
|
54
|
-
Object to_ruby<std::vector<uint64_t>>(std::vector<uint64_t> const & x)
|
55
|
-
{
|
56
|
-
Array res;
|
57
|
-
for (auto const& v : x) {
|
58
|
-
res.push(v);
|
59
|
-
}
|
60
|
-
return res;
|
61
|
-
}
|
62
|
-
|
63
|
-
template<>
|
64
|
-
std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
|
65
|
-
{
|
66
|
-
Array a = Array(x);
|
67
|
-
std::vector<std::string> res;
|
68
|
-
res.reserve(a.size());
|
69
|
-
for (auto const& v : a) {
|
70
|
-
res.push_back(from_ruby<std::string>(v));
|
71
|
-
}
|
72
|
-
return res;
|
73
|
-
}
|
74
|
-
|
75
|
-
template<>
|
76
|
-
std::vector<tomoto::Float> from_ruby<std::vector<tomoto::Float>>(Object x)
|
77
|
-
{
|
78
|
-
Array a = Array(x);
|
79
|
-
std::vector<tomoto::Float> res;
|
80
|
-
res.reserve(a.size());
|
81
|
-
for (auto const& v : a) {
|
82
|
-
res.push_back(from_ruby<tomoto::Float>(v));
|
83
|
-
}
|
84
|
-
return res;
|
85
|
-
}
|
86
|
-
|
87
|
-
template<>
|
88
|
-
std::vector<uint64_t> from_ruby<std::vector<uint64_t>>(Object x)
|
89
|
-
{
|
90
|
-
Array a = Array(x);
|
91
|
-
std::vector<uint64_t> res;
|
92
|
-
res.reserve(a.size());
|
93
|
-
for (auto const& v : a) {
|
94
|
-
res.push_back(from_ruby<uint64_t>(v));
|
95
|
-
}
|
96
|
-
return res;
|
97
|
-
}
|
98
|
-
|
99
|
-
tomoto::RawDoc buildDoc(std::vector<std::string>& words) {
|
100
|
-
tomoto::RawDoc doc;
|
101
|
-
doc.rawWords = words;
|
102
|
-
return doc;
|
103
|
-
}
|
3
|
+
void init_lda(Rice::Module& m);
|
4
|
+
void init_ct(Rice::Module& m);
|
5
|
+
void init_dmr(Rice::Module& m);
|
6
|
+
void init_dt(Rice::Module& m);
|
7
|
+
void init_gdmr(Rice::Module& m);
|
8
|
+
void init_hdp(Rice::Module& m);
|
9
|
+
void init_hlda(Rice::Module& m);
|
10
|
+
void init_pa(Rice::Module& m);
|
11
|
+
void init_hpa(Rice::Module& m);
|
12
|
+
void init_mglda(Rice::Module& m);
|
13
|
+
void init_llda(Rice::Module& m);
|
14
|
+
void init_plda(Rice::Module& m);
|
15
|
+
void init_slda(Rice::Module& m);
|
104
16
|
|
105
17
|
extern "C"
|
106
18
|
void Init_ext()
|
107
19
|
{
|
108
|
-
|
20
|
+
auto m = Rice::define_module("Tomoto")
|
109
21
|
.define_singleton_method(
|
110
22
|
"isa",
|
111
23
|
*[]() {
|
@@ -120,675 +32,17 @@ void Init_ext()
|
|
120
32
|
#endif
|
121
33
|
});
|
122
34
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
})
|
137
|
-
.define_method(
|
138
|
-
"alpha",
|
139
|
-
*[](tomoto::ILDAModel& self) {
|
140
|
-
Array res;
|
141
|
-
for (size_t i = 0; i < self.getK(); i++) {
|
142
|
-
res.push(self.getAlpha(i));
|
143
|
-
}
|
144
|
-
return res;
|
145
|
-
})
|
146
|
-
.define_method(
|
147
|
-
"burn_in",
|
148
|
-
*[](tomoto::ILDAModel& self) {
|
149
|
-
return self.getBurnInIteration();
|
150
|
-
})
|
151
|
-
.define_method(
|
152
|
-
"burn_in=",
|
153
|
-
*[](tomoto::ILDAModel& self, size_t iteration) {
|
154
|
-
self.setBurnInIteration(iteration);
|
155
|
-
return iteration;
|
156
|
-
})
|
157
|
-
.define_method(
|
158
|
-
"_count_by_topics",
|
159
|
-
*[](tomoto::ILDAModel& self) {
|
160
|
-
Array res;
|
161
|
-
for (auto const& v : self.getCountByTopic()) {
|
162
|
-
res.push(v);
|
163
|
-
}
|
164
|
-
return res;
|
165
|
-
})
|
166
|
-
.define_method(
|
167
|
-
"eta",
|
168
|
-
*[](tomoto::ILDAModel& self) {
|
169
|
-
return self.getEta();
|
170
|
-
})
|
171
|
-
.define_method(
|
172
|
-
"global_step",
|
173
|
-
*[](tomoto::ILDAModel& self) {
|
174
|
-
return self.getGlobalStep();
|
175
|
-
})
|
176
|
-
.define_method(
|
177
|
-
"k",
|
178
|
-
*[](tomoto::ILDAModel& self) {
|
179
|
-
return self.getK();
|
180
|
-
})
|
181
|
-
.define_method(
|
182
|
-
"_load",
|
183
|
-
*[](tomoto::ILDAModel& self, const char* filename) {
|
184
|
-
std::ifstream str{ filename, std::ios_base::binary };
|
185
|
-
if (!str) throw std::runtime_error{ std::string("cannot open file '") + filename + std::string("'") };
|
186
|
-
std::vector<uint8_t> extra_data;
|
187
|
-
self.loadModel(str, &extra_data);
|
188
|
-
})
|
189
|
-
.define_method(
|
190
|
-
"ll_per_word",
|
191
|
-
*[](tomoto::ILDAModel& self) {
|
192
|
-
return self.getLLPerWord();
|
193
|
-
})
|
194
|
-
.define_method(
|
195
|
-
"num_docs",
|
196
|
-
*[](tomoto::ILDAModel& self) {
|
197
|
-
return self.getNumDocs();
|
198
|
-
})
|
199
|
-
.define_method(
|
200
|
-
"num_vocabs",
|
201
|
-
*[](tomoto::ILDAModel& self) {
|
202
|
-
return self.getV();
|
203
|
-
})
|
204
|
-
.define_method(
|
205
|
-
"num_words",
|
206
|
-
*[](tomoto::ILDAModel& self) {
|
207
|
-
return self.getN();
|
208
|
-
})
|
209
|
-
.define_method(
|
210
|
-
"optim_interval",
|
211
|
-
*[](tomoto::ILDAModel& self) {
|
212
|
-
return self.getOptimInterval();
|
213
|
-
})
|
214
|
-
.define_method(
|
215
|
-
"optim_interval=",
|
216
|
-
*[](tomoto::ILDAModel& self, size_t value) {
|
217
|
-
self.setOptimInterval(value);
|
218
|
-
return value;
|
219
|
-
})
|
220
|
-
.define_method(
|
221
|
-
"perplexity",
|
222
|
-
*[](tomoto::ILDAModel& self) {
|
223
|
-
return self.getPerplexity();
|
224
|
-
})
|
225
|
-
.define_method(
|
226
|
-
"_prepare",
|
227
|
-
*[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
|
228
|
-
self.prepare(true, minCnt, minDf, rmTop);
|
229
|
-
})
|
230
|
-
.define_method(
|
231
|
-
"_removed_top_words",
|
232
|
-
*[](tomoto::ILDAModel& self, size_t rmTop) {
|
233
|
-
Array res;
|
234
|
-
auto dict = self.getVocabDict();
|
235
|
-
size_t size = dict.size();
|
236
|
-
for (size_t i = rmTop; i > 0; i--) {
|
237
|
-
res.push(dict.toWord(size - i));
|
238
|
-
}
|
239
|
-
return res;
|
240
|
-
})
|
241
|
-
.define_method(
|
242
|
-
"_save",
|
243
|
-
*[](tomoto::ILDAModel& self, const char* filename, bool full) {
|
244
|
-
std::ofstream str{ filename, std::ios_base::binary };
|
245
|
-
std::vector<uint8_t> extra_data;
|
246
|
-
self.saveModel(str, full, &extra_data);
|
247
|
-
})
|
248
|
-
.define_method(
|
249
|
-
"_topic_words",
|
250
|
-
*[](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
|
251
|
-
Hash res;
|
252
|
-
for (auto const& v : self.getWordsByTopicSorted(topicId, topN)) {
|
253
|
-
res[v.first] = v.second;
|
254
|
-
}
|
255
|
-
return res;
|
256
|
-
})
|
257
|
-
.define_method(
|
258
|
-
"_train",
|
259
|
-
*[](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
|
260
|
-
self.train(iteration, workers, (tomoto::ParallelScheme)ps);
|
261
|
-
})
|
262
|
-
.define_method(
|
263
|
-
"_tw",
|
264
|
-
*[](tomoto::ILDAModel& self) {
|
265
|
-
return (int)self.getTermWeight();
|
266
|
-
})
|
267
|
-
.define_method(
|
268
|
-
"used_vocab_df",
|
269
|
-
*[](tomoto::ILDAModel& self) {
|
270
|
-
auto vocab = self.getVocabDf();
|
271
|
-
Array res;
|
272
|
-
for (size_t i = 0; i < self.getV(); i++) {
|
273
|
-
res.push(vocab[i]);
|
274
|
-
}
|
275
|
-
return res;
|
276
|
-
})
|
277
|
-
.define_method(
|
278
|
-
"used_vocab_freq",
|
279
|
-
*[](tomoto::ILDAModel& self) {
|
280
|
-
auto vocab = self.getVocabCf();
|
281
|
-
Array res;
|
282
|
-
for (size_t i = 0; i < self.getV(); i++) {
|
283
|
-
res.push(vocab[i]);
|
284
|
-
}
|
285
|
-
return res;
|
286
|
-
})
|
287
|
-
.define_method(
|
288
|
-
"used_vocabs",
|
289
|
-
*[](tomoto::ILDAModel& self) {
|
290
|
-
auto dict = self.getVocabDict();
|
291
|
-
Array res;
|
292
|
-
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
293
|
-
for (size_t i = 0; i < self.getV(); i++) {
|
294
|
-
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
295
|
-
}
|
296
|
-
return res;
|
297
|
-
})
|
298
|
-
.define_method(
|
299
|
-
"vocab_df",
|
300
|
-
*[](tomoto::ILDAModel& self) {
|
301
|
-
auto vocab = self.getVocabDf();
|
302
|
-
Array res;
|
303
|
-
for (size_t i = 0; i < vocab.size(); i++) {
|
304
|
-
res.push(vocab[i]);
|
305
|
-
}
|
306
|
-
return res;
|
307
|
-
})
|
308
|
-
.define_method(
|
309
|
-
"vocab_freq",
|
310
|
-
*[](tomoto::ILDAModel& self) {
|
311
|
-
auto vocab = self.getVocabCf();
|
312
|
-
Array res;
|
313
|
-
for (size_t i = 0; i < vocab.size(); i++) {
|
314
|
-
res.push(vocab[i]);
|
315
|
-
}
|
316
|
-
return res;
|
317
|
-
})
|
318
|
-
.define_method(
|
319
|
-
"vocabs",
|
320
|
-
*[](tomoto::ILDAModel& self) {
|
321
|
-
auto dict = self.getVocabDict();
|
322
|
-
Array res;
|
323
|
-
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
324
|
-
for (size_t i = 0; i < dict.size(); i++) {
|
325
|
-
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
326
|
-
}
|
327
|
-
return res;
|
328
|
-
});
|
329
|
-
|
330
|
-
Class rb_cCT = define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(rb_mTomoto, "CT")
|
331
|
-
.define_singleton_method(
|
332
|
-
"_new",
|
333
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
334
|
-
if (seed < 0) {
|
335
|
-
seed = std::random_device{}();
|
336
|
-
}
|
337
|
-
return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
338
|
-
})
|
339
|
-
.define_method(
|
340
|
-
"_correlations",
|
341
|
-
*[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
342
|
-
return self.getCorrelationTopic(topic_id);
|
343
|
-
})
|
344
|
-
.define_method(
|
345
|
-
"num_beta_sample",
|
346
|
-
*[](tomoto::ICTModel& self) {
|
347
|
-
return self.getNumBetaSample();
|
348
|
-
})
|
349
|
-
.define_method(
|
350
|
-
"num_beta_sample=",
|
351
|
-
*[](tomoto::ICTModel& self, size_t value) {
|
352
|
-
self.setNumBetaSample(value);
|
353
|
-
return value;
|
354
|
-
})
|
355
|
-
.define_method(
|
356
|
-
"num_tmn_sample",
|
357
|
-
*[](tomoto::ICTModel& self) {
|
358
|
-
return self.getNumTMNSample();
|
359
|
-
})
|
360
|
-
.define_method(
|
361
|
-
"num_tmn_sample=",
|
362
|
-
*[](tomoto::ICTModel& self, size_t value) {
|
363
|
-
self.setNumTMNSample(value);
|
364
|
-
return value;
|
365
|
-
})
|
366
|
-
.define_method(
|
367
|
-
"_prior_cov",
|
368
|
-
*[](tomoto::ICTModel& self) {
|
369
|
-
return self.getPriorCov();
|
370
|
-
})
|
371
|
-
.define_method(
|
372
|
-
"prior_mean",
|
373
|
-
*[](tomoto::ICTModel& self) {
|
374
|
-
return self.getPriorMean();
|
375
|
-
});
|
376
|
-
|
377
|
-
Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
|
378
|
-
.define_singleton_method(
|
379
|
-
"_new",
|
380
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
381
|
-
if (seed < 0) {
|
382
|
-
seed = std::random_device{}();
|
383
|
-
}
|
384
|
-
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
|
385
|
-
})
|
386
|
-
.define_method(
|
387
|
-
"_add_doc",
|
388
|
-
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
|
389
|
-
auto doc = buildDoc(words);
|
390
|
-
doc.misc["metadata"] = metadata;
|
391
|
-
self.addDoc(doc);
|
392
|
-
})
|
393
|
-
.define_method(
|
394
|
-
"alpha_epsilon",
|
395
|
-
*[](tomoto::IDMRModel& self) {
|
396
|
-
return self.getAlphaEps();
|
397
|
-
})
|
398
|
-
.define_method(
|
399
|
-
"alpha_epsilon=",
|
400
|
-
*[](tomoto::IDMRModel& self, tomoto::Float value) {
|
401
|
-
self.setAlphaEps(value);
|
402
|
-
return value;
|
403
|
-
})
|
404
|
-
.define_method(
|
405
|
-
"f",
|
406
|
-
*[](tomoto::IDMRModel& self) {
|
407
|
-
return self.getF();
|
408
|
-
})
|
409
|
-
.define_method(
|
410
|
-
"_lambdas",
|
411
|
-
*[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
412
|
-
return self.getLambdaByTopic(topic_id);
|
413
|
-
})
|
414
|
-
.define_method(
|
415
|
-
"metadata_dict",
|
416
|
-
*[](tomoto::IDMRModel& self) {
|
417
|
-
auto dict = self.getMetadataDict();
|
418
|
-
Array res;
|
419
|
-
auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
|
420
|
-
for (size_t i = 0; i < dict.size(); i++) {
|
421
|
-
res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
|
422
|
-
}
|
423
|
-
return res;
|
424
|
-
})
|
425
|
-
.define_method(
|
426
|
-
"sigma",
|
427
|
-
*[](tomoto::IDMRModel& self) {
|
428
|
-
return self.getSigma();
|
429
|
-
});
|
430
|
-
|
431
|
-
Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
|
432
|
-
.define_singleton_method(
|
433
|
-
"_new",
|
434
|
-
*[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
|
435
|
-
// Rice only supports 10 arguments
|
436
|
-
int seed = -1;
|
437
|
-
if (seed < 0) {
|
438
|
-
seed = std::random_device{}();
|
439
|
-
}
|
440
|
-
return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
|
441
|
-
})
|
442
|
-
.define_method(
|
443
|
-
"_add_doc",
|
444
|
-
*[](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
|
445
|
-
auto doc = buildDoc(words);
|
446
|
-
doc.misc["timepoint"] = timepoint;
|
447
|
-
self.addDoc(doc);
|
448
|
-
})
|
449
|
-
.define_method(
|
450
|
-
"lr_a",
|
451
|
-
*[](tomoto::IDTModel& self) {
|
452
|
-
return self.getShapeA();
|
453
|
-
})
|
454
|
-
.define_method(
|
455
|
-
"lr_a=",
|
456
|
-
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
457
|
-
self.setShapeA(value);
|
458
|
-
return value;
|
459
|
-
})
|
460
|
-
.define_method(
|
461
|
-
"lr_b",
|
462
|
-
*[](tomoto::IDTModel& self) {
|
463
|
-
return self.getShapeB();
|
464
|
-
})
|
465
|
-
.define_method(
|
466
|
-
"lr_b=",
|
467
|
-
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
468
|
-
self.setShapeB(value);
|
469
|
-
return value;
|
470
|
-
})
|
471
|
-
.define_method(
|
472
|
-
"lr_c",
|
473
|
-
*[](tomoto::IDTModel& self) {
|
474
|
-
return self.getShapeC();
|
475
|
-
})
|
476
|
-
.define_method(
|
477
|
-
"lr_c=",
|
478
|
-
*[](tomoto::IDTModel& self, tomoto::Float value) {
|
479
|
-
self.setShapeC(value);
|
480
|
-
return value;
|
481
|
-
})
|
482
|
-
.define_method(
|
483
|
-
"num_docs_by_timepoint",
|
484
|
-
*[](tomoto::IDTModel& self) {
|
485
|
-
return self.getNumDocsByT();
|
486
|
-
})
|
487
|
-
.define_method(
|
488
|
-
"num_timepoints",
|
489
|
-
*[](tomoto::IDTModel& self) {
|
490
|
-
return self.getT();
|
491
|
-
});
|
492
|
-
|
493
|
-
Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
|
494
|
-
.define_singleton_method(
|
495
|
-
"_new",
|
496
|
-
*[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
|
497
|
-
if (seed < 0) {
|
498
|
-
seed = std::random_device{}();
|
499
|
-
}
|
500
|
-
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed);
|
501
|
-
})
|
502
|
-
.define_method(
|
503
|
-
"_add_doc",
|
504
|
-
*[](tomoto::IDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> metadata) {
|
505
|
-
auto doc = buildDoc(words);
|
506
|
-
doc.misc["metadata"] = metadata;
|
507
|
-
self.addDoc(doc);
|
508
|
-
})
|
509
|
-
.define_method(
|
510
|
-
"degrees",
|
511
|
-
*[](tomoto::IGDMRModel& self) {
|
512
|
-
return self.getFs();
|
513
|
-
})
|
514
|
-
.define_method(
|
515
|
-
"sigma0",
|
516
|
-
*[](tomoto::IGDMRModel& self) {
|
517
|
-
return self.getSigma0();
|
518
|
-
});
|
519
|
-
|
520
|
-
Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
|
521
|
-
.define_singleton_method(
|
522
|
-
"_new",
|
523
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
|
524
|
-
if (seed < 0) {
|
525
|
-
seed = std::random_device{}();
|
526
|
-
}
|
527
|
-
return tomoto::IHDPModel::create((tomoto::TermWeight)tw, k, alpha, eta, gamma, seed);
|
528
|
-
})
|
529
|
-
.define_method(
|
530
|
-
"alpha",
|
531
|
-
*[](tomoto::IHDPModel& self) {
|
532
|
-
return self.getAlpha();
|
533
|
-
})
|
534
|
-
.define_method(
|
535
|
-
"gamma",
|
536
|
-
*[](tomoto::IHDPModel& self) {
|
537
|
-
return self.getGamma();
|
538
|
-
})
|
539
|
-
.define_method(
|
540
|
-
"live_k",
|
541
|
-
*[](tomoto::IHDPModel& self) {
|
542
|
-
return self.getLiveK();
|
543
|
-
})
|
544
|
-
.define_method(
|
545
|
-
"live_topic?",
|
546
|
-
*[](tomoto::IHDPModel& self, size_t tid) {
|
547
|
-
return self.isLiveTopic(tid);
|
548
|
-
})
|
549
|
-
.define_method(
|
550
|
-
"num_tables",
|
551
|
-
*[](tomoto::IHDPModel& self) {
|
552
|
-
return self.getTotalTables();
|
553
|
-
});
|
554
|
-
|
555
|
-
Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
|
556
|
-
.define_singleton_method(
|
557
|
-
"_new",
|
558
|
-
*[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
|
559
|
-
if (seed < 0) {
|
560
|
-
seed = std::random_device{}();
|
561
|
-
}
|
562
|
-
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
|
563
|
-
})
|
564
|
-
.define_method(
|
565
|
-
"alpha",
|
566
|
-
*[](tomoto::IHLDAModel& self) {
|
567
|
-
Array res;
|
568
|
-
for (size_t i = 0; i < self.getLevelDepth(); i++) {
|
569
|
-
res.push(self.getAlpha(i));
|
570
|
-
}
|
571
|
-
return res;
|
572
|
-
})
|
573
|
-
.define_method(
|
574
|
-
"_children_topics",
|
575
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
576
|
-
return self.getChildTopicId(topic_id);
|
577
|
-
})
|
578
|
-
.define_method(
|
579
|
-
"depth",
|
580
|
-
*[](tomoto::IHLDAModel& self) {
|
581
|
-
return self.getLevelDepth();
|
582
|
-
})
|
583
|
-
.define_method(
|
584
|
-
"gamma",
|
585
|
-
*[](tomoto::IHLDAModel& self) {
|
586
|
-
return self.getGamma();
|
587
|
-
})
|
588
|
-
.define_method(
|
589
|
-
"_level",
|
590
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
591
|
-
return self.getLevelOfTopic(topic_id);
|
592
|
-
})
|
593
|
-
.define_method(
|
594
|
-
"live_k",
|
595
|
-
*[](tomoto::IHLDAModel& self) {
|
596
|
-
return self.getLiveK();
|
597
|
-
})
|
598
|
-
.define_method(
|
599
|
-
"_live_topic?",
|
600
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
601
|
-
return self.isLiveTopic(topic_id);
|
602
|
-
})
|
603
|
-
.define_method(
|
604
|
-
"_num_docs_of_topic",
|
605
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
606
|
-
return self.getNumDocsOfTopic(topic_id);
|
607
|
-
})
|
608
|
-
.define_method(
|
609
|
-
"_parent_topic",
|
610
|
-
*[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
611
|
-
return self.getParentTopicId(topic_id);
|
612
|
-
});
|
613
|
-
|
614
|
-
Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
|
615
|
-
.define_singleton_method(
|
616
|
-
"_new",
|
617
|
-
*[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
618
|
-
if (seed < 0) {
|
619
|
-
seed = std::random_device{}();
|
620
|
-
}
|
621
|
-
return tomoto::IPAModel::create((tomoto::TermWeight)tw, k1, k2, alpha, eta, seed);
|
622
|
-
})
|
623
|
-
.define_method(
|
624
|
-
"k1",
|
625
|
-
*[](tomoto::IPAModel& self) {
|
626
|
-
return self.getK();
|
627
|
-
})
|
628
|
-
.define_method(
|
629
|
-
"k2",
|
630
|
-
*[](tomoto::IPAModel& self) {
|
631
|
-
return self.getK2();
|
632
|
-
});
|
633
|
-
|
634
|
-
Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
|
635
|
-
.define_singleton_method(
|
636
|
-
"_new",
|
637
|
-
*[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
638
|
-
if (seed < 0) {
|
639
|
-
seed = std::random_device{}();
|
640
|
-
}
|
641
|
-
return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
|
642
|
-
})
|
643
|
-
.define_method(
|
644
|
-
"alpha",
|
645
|
-
*[](tomoto::IHPAModel& self) {
|
646
|
-
Array res;
|
647
|
-
// use <= to return k+1 elements
|
648
|
-
for (size_t i = 0; i <= self.getK(); i++) {
|
649
|
-
res.push(self.getAlpha(i));
|
650
|
-
}
|
651
|
-
return res;
|
652
|
-
});
|
653
|
-
|
654
|
-
Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
|
655
|
-
.define_singleton_method(
|
656
|
-
"_new",
|
657
|
-
*[](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
|
658
|
-
return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
|
659
|
-
})
|
660
|
-
.define_method(
|
661
|
-
"_add_doc",
|
662
|
-
*[](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
|
663
|
-
auto doc = buildDoc(words);
|
664
|
-
doc.misc["delimiter"] = delimiter;
|
665
|
-
self.addDoc(doc);
|
666
|
-
})
|
667
|
-
.define_method(
|
668
|
-
"alpha_g",
|
669
|
-
*[](tomoto::IMGLDAModel& self) {
|
670
|
-
return self.getAlpha();
|
671
|
-
})
|
672
|
-
.define_method(
|
673
|
-
"alpha_l",
|
674
|
-
*[](tomoto::IMGLDAModel& self) {
|
675
|
-
return self.getAlphaL();
|
676
|
-
})
|
677
|
-
.define_method(
|
678
|
-
"alpha_mg",
|
679
|
-
*[](tomoto::IMGLDAModel& self) {
|
680
|
-
return self.getAlphaM();
|
681
|
-
})
|
682
|
-
.define_method(
|
683
|
-
"alpha_ml",
|
684
|
-
*[](tomoto::IMGLDAModel& self) {
|
685
|
-
return self.getAlphaML();
|
686
|
-
})
|
687
|
-
.define_method(
|
688
|
-
"eta_g",
|
689
|
-
*[](tomoto::IMGLDAModel& self) {
|
690
|
-
return self.getEta();
|
691
|
-
})
|
692
|
-
.define_method(
|
693
|
-
"eta_l",
|
694
|
-
*[](tomoto::IMGLDAModel& self) {
|
695
|
-
return self.getEtaL();
|
696
|
-
})
|
697
|
-
.define_method(
|
698
|
-
"gamma",
|
699
|
-
*[](tomoto::IMGLDAModel& self) {
|
700
|
-
return self.getGamma();
|
701
|
-
})
|
702
|
-
.define_method(
|
703
|
-
"k_g",
|
704
|
-
*[](tomoto::IMGLDAModel& self) {
|
705
|
-
return self.getK();
|
706
|
-
})
|
707
|
-
.define_method(
|
708
|
-
"k_l",
|
709
|
-
*[](tomoto::IMGLDAModel& self) {
|
710
|
-
return self.getKL();
|
711
|
-
})
|
712
|
-
.define_method(
|
713
|
-
"t",
|
714
|
-
*[](tomoto::IMGLDAModel& self) {
|
715
|
-
return self.getT();
|
716
|
-
});
|
717
|
-
|
718
|
-
Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
|
719
|
-
.define_singleton_method(
|
720
|
-
"_new",
|
721
|
-
*[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
722
|
-
if (seed < 0) {
|
723
|
-
seed = std::random_device{}();
|
724
|
-
}
|
725
|
-
return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
|
726
|
-
})
|
727
|
-
.define_method(
|
728
|
-
"_add_doc",
|
729
|
-
*[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
730
|
-
auto doc = buildDoc(words);
|
731
|
-
doc.misc["labels"] = labels;
|
732
|
-
self.addDoc(doc);
|
733
|
-
})
|
734
|
-
.define_method(
|
735
|
-
"topics_per_label",
|
736
|
-
*[](tomoto::ILLDAModel& self) {
|
737
|
-
return self.getNumTopicsPerLabel();
|
738
|
-
});
|
739
|
-
|
740
|
-
Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
|
741
|
-
.define_singleton_method(
|
742
|
-
"_new",
|
743
|
-
*[](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, int seed) {
|
744
|
-
if (seed < 0) {
|
745
|
-
seed = std::random_device{}();
|
746
|
-
}
|
747
|
-
return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, latent_topics, 1, alpha, eta, seed);
|
748
|
-
})
|
749
|
-
.define_method(
|
750
|
-
"_add_doc",
|
751
|
-
*[](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
|
752
|
-
auto doc = buildDoc(words);
|
753
|
-
doc.misc["labels"] = labels;
|
754
|
-
self.addDoc(doc);
|
755
|
-
})
|
756
|
-
.define_method(
|
757
|
-
"latent_topics",
|
758
|
-
*[](tomoto::IPLDAModel& self) {
|
759
|
-
return self.getNumLatentTopics();
|
760
|
-
});
|
761
|
-
|
762
|
-
Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
|
763
|
-
.define_singleton_method(
|
764
|
-
"_new",
|
765
|
-
*[](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, int seed) {
|
766
|
-
if (seed < 0) {
|
767
|
-
seed = std::random_device{}();
|
768
|
-
}
|
769
|
-
std::vector<tomoto::ISLDAModel::GLM> vars;
|
770
|
-
vars.reserve(rb_vars.size());
|
771
|
-
for (auto const& v : rb_vars) {
|
772
|
-
vars.push_back((tomoto::ISLDAModel::GLM) from_ruby<int>(v));
|
773
|
-
}
|
774
|
-
return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, k, vars, alpha, eta, mu, nu_sq, glm_param, seed);
|
775
|
-
})
|
776
|
-
.define_method(
|
777
|
-
"_add_doc",
|
778
|
-
*[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
|
779
|
-
auto doc = buildDoc(words);
|
780
|
-
doc.misc["y"] = y;
|
781
|
-
self.addDoc(doc);
|
782
|
-
})
|
783
|
-
.define_method(
|
784
|
-
"f",
|
785
|
-
*[](tomoto::ISLDAModel& self) {
|
786
|
-
return self.getF();
|
787
|
-
})
|
788
|
-
.define_method(
|
789
|
-
"_var_type",
|
790
|
-
*[](tomoto::ISLDAModel& self, size_t var_id) {
|
791
|
-
if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
|
792
|
-
return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
|
793
|
-
});
|
35
|
+
init_lda(m);
|
36
|
+
init_ct(m);
|
37
|
+
init_dmr(m);
|
38
|
+
init_dt(m);
|
39
|
+
init_gdmr(m);
|
40
|
+
init_hdp(m);
|
41
|
+
init_hlda(m);
|
42
|
+
init_pa(m);
|
43
|
+
init_hpa(m);
|
44
|
+
init_mglda(m);
|
45
|
+
init_llda(m);
|
46
|
+
init_plda(m);
|
47
|
+
init_slda(m);
|
794
48
|
}
|