tomoto 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/tomoto/ct.cpp +11 -11
  4. data/ext/tomoto/dmr.cpp +14 -13
  5. data/ext/tomoto/dt.cpp +14 -14
  6. data/ext/tomoto/ext.cpp +7 -7
  7. data/ext/tomoto/extconf.rb +1 -3
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/utils.h +16 -70
  19. data/lib/tomoto/version.rb +1 -1
  20. data/vendor/tomotopy/README.kr.rst +57 -0
  21. data/vendor/tomotopy/README.rst +55 -0
  22. data/vendor/tomotopy/src/Labeling/Phraser.hpp +3 -3
  23. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +5 -2
  24. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +5 -2
  25. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +5 -2
  26. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +4 -4
  27. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +5 -2
  28. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -2
  29. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  30. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +3 -3
  31. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +34 -14
  32. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +5 -2
  33. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +2 -2
  34. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  35. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +5 -2
  36. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +5 -2
  37. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +4 -1
  38. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +48 -21
  39. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  40. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  41. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  42. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  43. data/vendor/tomotopy/src/Utils/math.h +2 -2
  44. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  45. metadata +6 -6
data/ext/tomoto/hpa.cpp CHANGED
@@ -1,14 +1,14 @@
1
1
  #include <HPA.h>
2
2
 
3
- #include <rice/Module.hpp>
3
+ #include <rice/rice.hpp>
4
4
 
5
5
  #include "utils.h"
6
6
 
7
7
  void init_hpa(Rice::Module& m) {
8
8
  Rice::define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(m, "HPA")
9
- .define_singleton_method(
9
+ .define_singleton_function(
10
10
  "_new",
11
- *[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
12
  tomoto::HPAArgs args;
13
13
  args.k = k1;
14
14
  args.k2 = k2;
@@ -18,10 +18,10 @@ void init_hpa(Rice::Module& m) {
18
18
  args.seed = seed;
19
19
  }
20
20
  return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, args);
21
- })
21
+ }, Rice::Return().takeOwnership())
22
22
  .define_method(
23
23
  "alpha",
24
- *[](tomoto::IHPAModel& self) {
24
+ [](tomoto::IHPAModel& self) {
25
25
  Array res;
26
26
  // use <= to return k+1 elements
27
27
  for (size_t i = 0; i <= self.getK(); i++) {
data/ext/tomoto/lda.cpp CHANGED
@@ -3,9 +3,7 @@
3
3
 
4
4
  #include <LDA.h>
5
5
 
6
- #include <rice/Class.hpp>
7
- #include <rice/Hash.hpp>
8
- #include <rice/Module.hpp>
6
+ #include <rice/rice.hpp>
9
7
 
10
8
  #include "utils.h"
11
9
 
@@ -22,7 +20,7 @@ void init_lda(Rice::Module& m) {
22
20
  Rice::define_class_under<DocumentObject>(m, "Document")
23
21
  .define_method(
24
22
  "topics",
25
- *[](DocumentObject& self) {
23
+ [](DocumentObject& self) {
26
24
  Rice::Hash res;
27
25
  auto topics = self.tm->getTopicsByDoc(self.doc);
28
26
  for (size_t i = 0; i < topics.size(); i++) {
@@ -32,9 +30,9 @@ void init_lda(Rice::Module& m) {
32
30
  });
33
31
 
34
32
  Rice::define_class_under<tomoto::ILDAModel>(m, "LDA")
35
- .define_singleton_method(
33
+ .define_singleton_function(
36
34
  "_new",
37
- *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
35
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
38
36
  tomoto::LDAArgs args;
39
37
  args.k = k;
40
38
  args.alpha = {alpha};
@@ -43,10 +41,10 @@ void init_lda(Rice::Module& m) {
43
41
  args.seed = seed;
44
42
  }
45
43
  return tomoto::ILDAModel::create((tomoto::TermWeight)tw, args);
46
- })
44
+ }, Rice::Return().takeOwnership())
47
45
  .define_method(
48
46
  "_add_doc",
49
- *[](tomoto::ILDAModel& self, std::vector<std::string> words) {
47
+ [](tomoto::ILDAModel& self, std::vector<std::string> words) {
50
48
  return self.addDoc(buildDoc(words));
51
49
  })
52
50
  .define_method(
@@ -75,7 +73,7 @@ void init_lda(Rice::Module& m) {
75
73
  })
76
74
  .define_method(
77
75
  "alpha",
78
- *[](tomoto::ILDAModel& self) {
76
+ [](tomoto::ILDAModel& self) {
79
77
  Array res;
80
78
  for (size_t i = 0; i < self.getK(); i++) {
81
79
  res.push(self.getAlpha(i));
@@ -84,18 +82,18 @@ void init_lda(Rice::Module& m) {
84
82
  })
85
83
  .define_method(
86
84
  "burn_in",
87
- *[](tomoto::ILDAModel& self) {
85
+ [](tomoto::ILDAModel& self) {
88
86
  return self.getBurnInIteration();
89
87
  })
90
88
  .define_method(
91
89
  "burn_in=",
92
- *[](tomoto::ILDAModel& self, size_t iteration) {
90
+ [](tomoto::ILDAModel& self, size_t iteration) {
93
91
  self.setBurnInIteration(iteration);
94
92
  return iteration;
95
93
  })
96
94
  .define_method(
97
95
  "_count_by_topics",
98
- *[](tomoto::ILDAModel& self) {
96
+ [](tomoto::ILDAModel& self) {
99
97
  Array res;
100
98
  for (auto const& v : self.getCountByTopic()) {
101
99
  res.push(v);
@@ -104,32 +102,33 @@ void init_lda(Rice::Module& m) {
104
102
  })
105
103
  .define_method(
106
104
  "docs",
107
- *[](tomoto::ILDAModel& self) {
105
+ [](tomoto::ILDAModel& self) {
108
106
  Array res;
109
107
  auto n = self.getNumDocs();
110
108
  for (size_t i = 0; i < n; i++) {
111
- res.push(DocumentObject(self.getDoc(i), &self));
109
+ auto v = DocumentObject(self.getDoc(i), &self);
110
+ res.push(Object(Rice::detail::To_Ruby<DocumentObject>().convert(v)));
112
111
  }
113
112
  return res;
114
113
  })
115
114
  .define_method(
116
115
  "eta",
117
- *[](tomoto::ILDAModel& self) {
116
+ [](tomoto::ILDAModel& self) {
118
117
  return self.getEta();
119
118
  })
120
119
  .define_method(
121
120
  "global_step",
122
- *[](tomoto::ILDAModel& self) {
121
+ [](tomoto::ILDAModel& self) {
123
122
  return self.getGlobalStep();
124
123
  })
125
124
  .define_method(
126
125
  "k",
127
- *[](tomoto::ILDAModel& self) {
126
+ [](tomoto::ILDAModel& self) {
128
127
  return self.getK();
129
128
  })
130
129
  .define_method(
131
130
  "_load",
132
- *[](tomoto::ILDAModel& self, const char* filename) {
131
+ [](tomoto::ILDAModel& self, const char* filename) {
133
132
  std::ifstream str{ filename, std::ios_base::binary };
134
133
  if (!str) throw std::runtime_error{ std::string("cannot open file '") + filename + std::string("'") };
135
134
  std::vector<uint8_t> extra_data;
@@ -137,48 +136,48 @@ void init_lda(Rice::Module& m) {
137
136
  })
138
137
  .define_method(
139
138
  "ll_per_word",
140
- *[](tomoto::ILDAModel& self) {
139
+ [](tomoto::ILDAModel& self) {
141
140
  return self.getLLPerWord();
142
141
  })
143
142
  .define_method(
144
143
  "num_docs",
145
- *[](tomoto::ILDAModel& self) {
144
+ [](tomoto::ILDAModel& self) {
146
145
  return self.getNumDocs();
147
146
  })
148
147
  .define_method(
149
148
  "num_vocabs",
150
- *[](tomoto::ILDAModel& self) {
149
+ [](tomoto::ILDAModel& self) {
151
150
  return self.getV();
152
151
  })
153
152
  .define_method(
154
153
  "num_words",
155
- *[](tomoto::ILDAModel& self) {
154
+ [](tomoto::ILDAModel& self) {
156
155
  return self.getN();
157
156
  })
158
157
  .define_method(
159
158
  "optim_interval",
160
- *[](tomoto::ILDAModel& self) {
159
+ [](tomoto::ILDAModel& self) {
161
160
  return self.getOptimInterval();
162
161
  })
163
162
  .define_method(
164
163
  "optim_interval=",
165
- *[](tomoto::ILDAModel& self, size_t value) {
164
+ [](tomoto::ILDAModel& self, size_t value) {
166
165
  self.setOptimInterval(value);
167
166
  return value;
168
167
  })
169
168
  .define_method(
170
169
  "perplexity",
171
- *[](tomoto::ILDAModel& self) {
170
+ [](tomoto::ILDAModel& self) {
172
171
  return self.getPerplexity();
173
172
  })
174
173
  .define_method(
175
174
  "_prepare",
176
- *[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
175
+ [](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
177
176
  self.prepare(true, minCnt, minDf, rmTop);
178
177
  })
179
178
  .define_method(
180
179
  "_removed_top_words",
181
- *[](tomoto::ILDAModel& self, size_t rmTop) {
180
+ [](tomoto::ILDAModel& self, size_t rmTop) {
182
181
  Array res;
183
182
  auto dict = self.getVocabDict();
184
183
  size_t size = dict.size();
@@ -189,14 +188,14 @@ void init_lda(Rice::Module& m) {
189
188
  })
190
189
  .define_method(
191
190
  "_save",
192
- *[](tomoto::ILDAModel& self, const char* filename, bool full) {
191
+ [](tomoto::ILDAModel& self, const char* filename, bool full) {
193
192
  std::ofstream str{ filename, std::ios_base::binary };
194
193
  std::vector<uint8_t> extra_data;
195
194
  self.saveModel(str, full, &extra_data);
196
195
  })
197
196
  .define_method(
198
197
  "_topic_words",
199
- *[](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
198
+ [](tomoto::ILDAModel& self, size_t topicId, size_t topN) {
200
199
  Rice::Hash res;
201
200
  for (auto const& v : self.getWordsByTopicSorted(topicId, topN)) {
202
201
  res[v.first] = v.second;
@@ -205,17 +204,17 @@ void init_lda(Rice::Module& m) {
205
204
  })
206
205
  .define_method(
207
206
  "_train",
208
- *[](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
207
+ [](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
209
208
  self.train(iteration, workers, (tomoto::ParallelScheme)ps);
210
209
  })
211
210
  .define_method(
212
211
  "_tw",
213
- *[](tomoto::ILDAModel& self) {
212
+ [](tomoto::ILDAModel& self) {
214
213
  return (int)self.getTermWeight();
215
214
  })
216
215
  .define_method(
217
216
  "used_vocab_df",
218
- *[](tomoto::ILDAModel& self) {
217
+ [](tomoto::ILDAModel& self) {
219
218
  auto vocab = self.getVocabDf();
220
219
  Array res;
221
220
  for (size_t i = 0; i < self.getV(); i++) {
@@ -225,7 +224,7 @@ void init_lda(Rice::Module& m) {
225
224
  })
226
225
  .define_method(
227
226
  "used_vocab_freq",
228
- *[](tomoto::ILDAModel& self) {
227
+ [](tomoto::ILDAModel& self) {
229
228
  auto vocab = self.getVocabCf();
230
229
  Array res;
231
230
  for (size_t i = 0; i < self.getV(); i++) {
@@ -235,18 +234,20 @@ void init_lda(Rice::Module& m) {
235
234
  })
236
235
  .define_method(
237
236
  "used_vocabs",
238
- *[](tomoto::ILDAModel& self) {
237
+ [](tomoto::ILDAModel& self) {
239
238
  auto dict = self.getVocabDict();
240
239
  Array res;
241
240
  auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
242
241
  for (size_t i = 0; i < self.getV(); i++) {
243
- res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
242
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
243
+ Object obj(value);
244
+ res.push(obj.call("force_encoding", utf8));
244
245
  }
245
246
  return res;
246
247
  })
247
248
  .define_method(
248
249
  "vocab_df",
249
- *[](tomoto::ILDAModel& self) {
250
+ [](tomoto::ILDAModel& self) {
250
251
  auto vocab = self.getVocabDf();
251
252
  Array res;
252
253
  for (size_t i = 0; i < vocab.size(); i++) {
@@ -256,7 +257,7 @@ void init_lda(Rice::Module& m) {
256
257
  })
257
258
  .define_method(
258
259
  "vocab_freq",
259
- *[](tomoto::ILDAModel& self) {
260
+ [](tomoto::ILDAModel& self) {
260
261
  auto vocab = self.getVocabCf();
261
262
  Array res;
262
263
  for (size_t i = 0; i < vocab.size(); i++) {
@@ -266,12 +267,14 @@ void init_lda(Rice::Module& m) {
266
267
  })
267
268
  .define_method(
268
269
  "vocabs",
269
- *[](tomoto::ILDAModel& self) {
270
+ [](tomoto::ILDAModel& self) {
270
271
  auto dict = self.getVocabDict();
271
272
  Array res;
272
273
  auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
273
274
  for (size_t i = 0; i < dict.size(); i++) {
274
- res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
275
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
276
+ Object obj(value);
277
+ res.push(obj.call("force_encoding", utf8));
275
278
  }
276
279
  return res;
277
280
  });
data/ext/tomoto/llda.cpp CHANGED
@@ -1,14 +1,14 @@
1
1
  #include <LLDA.h>
2
2
 
3
- #include <rice/Module.hpp>
3
+ #include <rice/rice.hpp>
4
4
 
5
5
  #include "utils.h"
6
6
 
7
7
  void init_llda(Rice::Module& m) {
8
8
  Rice::define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(m, "LLDA")
9
- .define_singleton_method(
9
+ .define_singleton_function(
10
10
  "_new",
11
- *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
12
  tomoto::LDAArgs args;
13
13
  args.k = k;
14
14
  args.alpha = {alpha};
@@ -17,17 +17,17 @@ void init_llda(Rice::Module& m) {
17
17
  args.seed = seed;
18
18
  }
19
19
  return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, args);
20
- })
20
+ }, Rice::Return().takeOwnership())
21
21
  .define_method(
22
22
  "_add_doc",
23
- *[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
23
+ [](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
24
24
  auto doc = buildDoc(words);
25
25
  doc.misc["labels"] = labels;
26
26
  return self.addDoc(doc);
27
27
  })
28
28
  .define_method(
29
29
  "topics_per_label",
30
- *[](tomoto::ILLDAModel& self) {
30
+ [](tomoto::ILLDAModel& self) {
31
31
  return self.getNumTopicsPerLabel();
32
32
  });
33
33
  }
data/ext/tomoto/mglda.cpp CHANGED
@@ -1,14 +1,14 @@
1
1
  #include <MGLDA.h>
2
2
 
3
- #include <rice/Module.hpp>
3
+ #include <rice/rice.hpp>
4
4
 
5
5
  #include "utils.h"
6
6
 
7
7
  void init_mglda(Rice::Module& m) {
8
8
  Rice::define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(m, "MGLDA")
9
- .define_singleton_method(
9
+ .define_singleton_function(
10
10
  "_new",
11
- *[](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
11
+ [](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
12
12
  tomoto::MGLDAArgs args;
13
13
  args.k = k_g;
14
14
  args.kL = k_l;
@@ -20,62 +20,62 @@ void init_mglda(Rice::Module& m) {
20
20
  args.eta = eta_g;
21
21
  // TODO more args
22
22
  return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, args);
23
- })
23
+ }, Rice::Return().takeOwnership())
24
24
  .define_method(
25
25
  "_add_doc",
26
- *[](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
26
+ [](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
27
27
  auto doc = buildDoc(words);
28
28
  doc.misc["delimiter"] = delimiter;
29
29
  return self.addDoc(doc);
30
30
  })
31
31
  .define_method(
32
32
  "alpha_g",
33
- *[](tomoto::IMGLDAModel& self) {
33
+ [](tomoto::IMGLDAModel& self) {
34
34
  return self.getAlpha();
35
35
  })
36
36
  .define_method(
37
37
  "alpha_l",
38
- *[](tomoto::IMGLDAModel& self) {
38
+ [](tomoto::IMGLDAModel& self) {
39
39
  return self.getAlphaL();
40
40
  })
41
41
  .define_method(
42
42
  "alpha_mg",
43
- *[](tomoto::IMGLDAModel& self) {
43
+ [](tomoto::IMGLDAModel& self) {
44
44
  return self.getAlphaM();
45
45
  })
46
46
  .define_method(
47
47
  "alpha_ml",
48
- *[](tomoto::IMGLDAModel& self) {
48
+ [](tomoto::IMGLDAModel& self) {
49
49
  return self.getAlphaML();
50
50
  })
51
51
  .define_method(
52
52
  "eta_g",
53
- *[](tomoto::IMGLDAModel& self) {
53
+ [](tomoto::IMGLDAModel& self) {
54
54
  return self.getEta();
55
55
  })
56
56
  .define_method(
57
57
  "eta_l",
58
- *[](tomoto::IMGLDAModel& self) {
58
+ [](tomoto::IMGLDAModel& self) {
59
59
  return self.getEtaL();
60
60
  })
61
61
  .define_method(
62
62
  "gamma",
63
- *[](tomoto::IMGLDAModel& self) {
63
+ [](tomoto::IMGLDAModel& self) {
64
64
  return self.getGamma();
65
65
  })
66
66
  .define_method(
67
67
  "k_g",
68
- *[](tomoto::IMGLDAModel& self) {
68
+ [](tomoto::IMGLDAModel& self) {
69
69
  return self.getK();
70
70
  })
71
71
  .define_method(
72
72
  "k_l",
73
- *[](tomoto::IMGLDAModel& self) {
73
+ [](tomoto::IMGLDAModel& self) {
74
74
  return self.getKL();
75
75
  })
76
76
  .define_method(
77
77
  "t",
78
- *[](tomoto::IMGLDAModel& self) {
78
+ [](tomoto::IMGLDAModel& self) {
79
79
  return self.getT();
80
80
  });
81
81
  }
data/ext/tomoto/pa.cpp CHANGED
@@ -1,14 +1,14 @@
1
1
  #include <PA.h>
2
2
 
3
- #include <rice/Module.hpp>
3
+ #include <rice/rice.hpp>
4
4
 
5
5
  #include "utils.h"
6
6
 
7
7
  void init_pa(Rice::Module& m) {
8
8
  Rice::define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(m, "PA")
9
- .define_singleton_method(
9
+ .define_singleton_function(
10
10
  "_new",
11
- *[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
12
  tomoto::PAArgs args;
13
13
  args.k = k1;
14
14
  args.k2 = k2;
@@ -18,15 +18,15 @@ void init_pa(Rice::Module& m) {
18
18
  args.seed = seed;
19
19
  }
20
20
  return tomoto::IPAModel::create((tomoto::TermWeight)tw, args);
21
- })
21
+ }, Rice::Return().takeOwnership())
22
22
  .define_method(
23
23
  "k1",
24
- *[](tomoto::IPAModel& self) {
24
+ [](tomoto::IPAModel& self) {
25
25
  return self.getK();
26
26
  })
27
27
  .define_method(
28
28
  "k2",
29
- *[](tomoto::IPAModel& self) {
29
+ [](tomoto::IPAModel& self) {
30
30
  return self.getK2();
31
31
  });
32
32
  }