tomoto 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/tomoto/ct.cpp +11 -11
  4. data/ext/tomoto/dmr.cpp +14 -13
  5. data/ext/tomoto/dt.cpp +14 -14
  6. data/ext/tomoto/ext.cpp +7 -7
  7. data/ext/tomoto/extconf.rb +1 -3
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/utils.h +16 -70
  19. data/lib/tomoto/version.rb +1 -1
  20. data/vendor/tomotopy/README.kr.rst +57 -0
  21. data/vendor/tomotopy/README.rst +55 -0
  22. data/vendor/tomotopy/src/Labeling/Phraser.hpp +3 -3
  23. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +5 -2
  24. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +5 -2
  25. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +5 -2
  26. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +4 -4
  27. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +5 -2
  28. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -2
  29. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  30. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +3 -3
  31. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +34 -14
  32. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +5 -2
  33. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +2 -2
  34. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  35. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +5 -2
  36. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +5 -2
  37. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +4 -1
  38. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +48 -21
  39. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  40. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  41. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  42. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  43. data/vendor/tomotopy/src/Utils/math.h +2 -2
  44. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  45. metadata +6 -6
data/ext/tomoto/plda.cpp CHANGED
@@ -1,14 +1,14 @@
1
1
  #include <PLDA.h>
2
2
 
3
- #include <rice/Module.hpp>
3
+ #include <rice/rice.hpp>
4
4
 
5
5
  #include "utils.h"
6
6
 
7
7
  void init_plda(Rice::Module& m) {
8
8
  Rice::define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(m, "PLDA")
9
- .define_singleton_method(
9
+ .define_singleton_function(
10
10
  "_new",
11
- *[](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
11
+ [](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
12
  tomoto::PLDAArgs args;
13
13
  args.numLatentTopics = latent_topics;
14
14
  args.alpha = {alpha};
@@ -17,17 +17,17 @@ void init_plda(Rice::Module& m) {
17
17
  args.seed = seed;
18
18
  }
19
19
  return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, args);
20
- })
20
+ }, Rice::Return().takeOwnership())
21
21
  .define_method(
22
22
  "_add_doc",
23
- *[](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
23
+ [](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
24
24
  auto doc = buildDoc(words);
25
25
  doc.misc["labels"] = labels;
26
26
  return self.addDoc(doc);
27
27
  })
28
28
  .define_method(
29
29
  "latent_topics",
30
- *[](tomoto::IPLDAModel& self) {
30
+ [](tomoto::IPLDAModel& self) {
31
31
  return self.getNumLatentTopics();
32
32
  });
33
33
  }
data/ext/tomoto/slda.cpp CHANGED
@@ -1,18 +1,18 @@
1
1
  #include <SLDA.h>
2
2
 
3
- #include <rice/Module.hpp>
3
+ #include <rice/rice.hpp>
4
4
 
5
5
  #include "utils.h"
6
6
 
7
7
  void init_slda(Rice::Module& m) {
8
8
  Rice::define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(m, "SLDA")
9
- .define_singleton_method(
9
+ .define_singleton_function(
10
10
  "_new",
11
- *[](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, size_t seed) {
11
+ [](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, size_t seed) {
12
12
  std::vector<tomoto::ISLDAModel::GLM> vars;
13
13
  vars.reserve(rb_vars.size());
14
14
  for (auto const& v : rb_vars) {
15
- vars.push_back((tomoto::ISLDAModel::GLM) from_ruby<int>(v));
15
+ vars.push_back((tomoto::ISLDAModel::GLM) Rice::detail::From_Ruby<int>().convert(v.value()));
16
16
  }
17
17
  tomoto::SLDAArgs args;
18
18
  args.k = k;
@@ -26,22 +26,22 @@ void init_slda(Rice::Module& m) {
26
26
  args.seed = seed;
27
27
  }
28
28
  return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, args);
29
- })
29
+ }, Rice::Return().takeOwnership())
30
30
  .define_method(
31
31
  "_add_doc",
32
- *[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
32
+ [](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
33
33
  auto doc = buildDoc(words);
34
34
  doc.misc["y"] = y;
35
35
  return self.addDoc(doc);
36
36
  })
37
37
  .define_method(
38
38
  "f",
39
- *[](tomoto::ISLDAModel& self) {
39
+ [](tomoto::ISLDAModel& self) {
40
40
  return self.getF();
41
41
  })
42
42
  .define_method(
43
43
  "_var_type",
44
- *[](tomoto::ISLDAModel& self, size_t var_id) {
44
+ [](tomoto::ISLDAModel& self, size_t var_id) {
45
45
  if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
46
46
  return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
47
47
  });
data/ext/tomoto/utils.h CHANGED
@@ -1,80 +1,26 @@
1
1
  #pragma once
2
2
 
3
- #include <rice/Array.hpp>
3
+ #include <rice/rice.hpp>
4
+ #include <rice/stl.hpp>
4
5
 
5
6
  using Rice::Array;
6
7
  using Rice::Object;
7
8
 
8
- template<>
9
- inline
10
- Object to_ruby<std::vector<tomoto::Float>>(std::vector<tomoto::Float> const & x)
9
+ namespace Rice::detail
11
10
  {
12
- Array res;
13
- for (auto const& v : x) {
14
- res.push(v);
15
- }
16
- return res;
17
- }
18
-
19
- template<>
20
- inline
21
- Object to_ruby<std::vector<uint32_t>>(std::vector<uint32_t> const & x)
22
- {
23
- Array res;
24
- for (auto const& v : x) {
25
- res.push(v);
26
- }
27
- return res;
28
- }
29
-
30
- template<>
31
- inline
32
- Object to_ruby<std::vector<uint64_t>>(std::vector<uint64_t> const & x)
33
- {
34
- Array res;
35
- for (auto const& v : x) {
36
- res.push(v);
37
- }
38
- return res;
39
- }
40
-
41
- template<>
42
- inline
43
- std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
44
- {
45
- Array a = Array(x);
46
- std::vector<std::string> res;
47
- res.reserve(a.size());
48
- for (auto const& v : a) {
49
- res.push_back(from_ruby<std::string>(v));
50
- }
51
- return res;
52
- }
53
-
54
- template<>
55
- inline
56
- std::vector<tomoto::Float> from_ruby<std::vector<tomoto::Float>>(Object x)
57
- {
58
- Array a = Array(x);
59
- std::vector<tomoto::Float> res;
60
- res.reserve(a.size());
61
- for (auto const& v : a) {
62
- res.push_back(from_ruby<tomoto::Float>(v));
63
- }
64
- return res;
65
- }
66
-
67
- template<>
68
- inline
69
- std::vector<uint64_t> from_ruby<std::vector<uint64_t>>(Object x)
70
- {
71
- Array a = Array(x);
72
- std::vector<uint64_t> res;
73
- res.reserve(a.size());
74
- for (auto const& v : a) {
75
- res.push_back(from_ruby<uint64_t>(v));
76
- }
77
- return res;
11
+ template<typename T>
12
+ class To_Ruby<std::vector<T>>
13
+ {
14
+ public:
15
+ VALUE convert(std::vector<T> const & x)
16
+ {
17
+ auto a = rb_ary_new2(x.size());
18
+ for (const auto& v : x) {
19
+ detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
20
+ }
21
+ return a;
22
+ }
23
+ };
78
24
  }
79
25
 
80
26
  inline tomoto::RawDoc buildDoc(std::vector<std::string>& words) {
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.2.2"
2
+ VERSION = "0.2.3"
3
3
  end
@@ -198,6 +198,57 @@ add_doc은 `tomotopy.LDAModel.train`을 시작하기 전까지만 사용할 수
198
198
  infer 메소드는 `tomotopy.Document` 인스턴스 하나를 추론하거나 `tomotopy.Document` 인스턴스의 `list`를 추론하는데 사용할 수 있습니다.
199
199
  자세한 것은 `tomotopy.LDAModel.infer`을 참조하길 바랍니다.
200
200
 
201
+ Corpus와 transform
202
+ --------------------
203
+ `tomotopy`의 모든 토픽 모델들은 각자 별도의 내부적인 문헌 타입을 가지고 있습니다.
204
+ 그리고 이 문헌 타입들에 맞는 문헌들은 각 모델의 `add_doc` 메소드를 통해 생성될 수 있습니다.
205
+ 하지만 이 때문에 동일한 목록의 문헌들을 서로 다른 토픽 모델에 입력해야 하는 경우
206
+ 매 모델에 각 문헌을 추가할때마다 `add_doc`을 호출해야하기 때문에 비효율이 발생합니다.
207
+ 따라서 `tomotopy`에서는 여러 문헌을 묶어서 관리해주는 `tomotopy.utils.Corpus` 클래스를 제공합니다.
208
+ 토픽 모델 객체를 생성할때 `tomotopy.utils.Corpus`를 `__init__` 메소드의 `corpus` 인자로 넘겨줌으로써
209
+ 어떤 모델에든 쉽게 문헌들을 삽입할 수 있게 해줍니다.
210
+ `tomotopy.utils.Corpus`를 토픽 모델에 삽입하면 corpus 객체가 가지고 있는 문헌들 전부가 모델에 자동으로 삽입됩니다.
211
+
212
+ 그런데 일부 토픽 모델의 경우 문헌을 생성하기 위해 서로 다른 데이터를 요구합니다.
213
+ 예를 들어 `tomotopy.DMRModel`는 `metadata`라는 `str` 타입의 데이터를 요구하고,
214
+ `tomotopy.PLDAModel`는 `labels`라는 `List[str]` 타입의 데이터를 요구합니다.
215
+ 그러나 `tomotopy.utils.Corpus`는 토픽 모델에 종속되지 않은 독립적인 문헌 데이터를 보관하기 때문에,
216
+ corpus가 가지고 있는 문헌 데이터가 실제 토픽 모델이 요구하는 데이터와 일치하지 않을 가능성이 있습니다.
217
+ 이 경우 `transform`라는 인자를 통해 corpus 내의 데이터를 변형시켜 토픽 모델이 요구하는 실제 데이터와 일치시킬 수 있습니다.
218
+ 자세한 내용은 아래의 코드를 확인해주세요:
219
+
220
+ ::
221
+
222
+ from tomotopy import DMRModel
223
+ from tomotopy.utils import Corpus
224
+
225
+ corpus = Corpus()
226
+ corpus.add_doc("a b c d e".split(), a_data=1)
227
+ corpus.add_doc("e f g h i".split(), a_data=2)
228
+ corpus.add_doc("i j k l m".split(), a_data=3)
229
+
230
+ model = DMRModel(k=10)
231
+ model.add_corpus(corpus)
232
+ # `corpus`에 있던 `a_data`는 사라지고
233
+ # `DMRModel`이 요구하는 `metadata`에는 기본값인 빈 문자열이 채워집니다.
234
+
235
+ assert model.docs[0].metadata == ''
236
+ assert model.docs[1].metadata == ''
237
+ assert model.docs[2].metadata == ''
238
+
239
+ def transform_a_data_to_metadata(misc: dict):
240
+ return {'metadata': str(misc['a_data'])}
241
+ # 이 함수는 `a_data`를 `metadata`로 변환합니다.
242
+
243
+ model = DMRModel(k=10)
244
+ model.add_corpus(corpus, transform=transform_a_data_to_metadata)
245
+ # 이제 `model`에는 기본값이 아닌 `metadata`가 입력됩니다. 이들은 `transform`에 의해 `a_data`로부터 생성됩니다.
246
+
247
+ assert model.docs[0].metadata == '1'
248
+ assert model.docs[1].metadata == '2'
249
+ assert model.docs[2].metadata == '3'
250
+
251
+
201
252
  병렬 샘플링 알고리즘
202
253
  ----------------------------
203
254
  `tomotopy`는 0.5.0버전부터 병렬 알고리즘을 고를 수 있는 선택지를 제공합니다.
@@ -254,6 +305,12 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
254
305
 
255
306
  역사
256
307
  -------
308
+ * 0.12.1 (2021-06-20)
309
+ * `tomotopy.LDAModel.set_word_prior()`가 크래시를 발생시키던 문제를 해결했습니다.
310
+ * 이제 `tomotopy.LDAModel.perplexity`와 `tomotopy.LDAModel.ll_per_word`가 TermWeight가 ONE이 아닌 경우에도 정확한 값을 반환합니다.
311
+ * 용어가중치가 적용된 빈도수를 반환하는 `tomotopy.LDAModel.used_vocab_weighted_freq`가 추가되었습니다.
312
+ * 이제 `tomotopy.LDAModel.summary()`가 단어의 엔트로피뿐만 아니라, 용어 가중치가 적용된 단어의 엔트로피도 함께 보여줍니다.
313
+
257
314
  * 0.12.0 (2021-04-26)
258
315
  * 이제 `tomotopy.DMRModel`와 `tomotopy.GDMRModel`가 다중 메타데이터를 지원합니다. (https://github.com/bab2min/tomotopy/blob/main/examples/dmr_multi_label.py 참조)
259
316
  * `tomotopy.GDMRModel`의 성능이 개선되었습니다.
@@ -202,6 +202,55 @@ Inference for unseen document should be performed using `tomotopy.LDAModel.infer
202
202
  The `infer` method can infer only one instance of `tomotopy.Document` or a `list` of instances of `tomotopy.Document`.
203
203
  See more at `tomotopy.LDAModel.infer`.
204
204
 
205
+ Corpus and transform
206
+ --------------------
207
+ Every topic model in `tomotopy` has its own internal document type.
208
+ A document can be created and added into suitable for each model through each model's `add_doc` method.
209
+ However, trying to add the same list of documents to different models becomes quite inconvenient,
210
+ because `add_doc` should be called for the same list of documents to each different model.
211
+ Thus, `tomotopy` provides `tomotopy.utils.Corpus` class that holds a list of documents.
212
+ `tomotopy.utils.Corpus` can be inserted into any model by passing as argument `corpus` to `__init__` or `add_corpus` method of each model.
213
+ So, inserting `tomotopy.utils.Corpus` just has the same effect to inserting documents the corpus holds.
214
+
215
+ Some topic models requires different data for its documents.
216
+ For example, `tomotopy.DMRModel` requires argument `metadata` in `str` type,
217
+ but `tomotopy.PLDAModel` requires argument `labels` in `List[str]` type.
218
+ Since `tomotopy.utils.Corpus` holds an independent set of documents rather than being tied to a specific topic model,
219
+ data types required by a topic model may be inconsistent when a corpus is added into that topic model.
220
+ In this case, miscellaneous data can be transformed to be fitted target topic model using argument `transform`.
221
+ See more details in the following code:
222
+
223
+ ::
224
+
225
+ from tomotopy import DMRModel
226
+ from tomotopy.utils import Corpus
227
+
228
+ corpus = Corpus()
229
+ corpus.add_doc("a b c d e".split(), a_data=1)
230
+ corpus.add_doc("e f g h i".split(), a_data=2)
231
+ corpus.add_doc("i j k l m".split(), a_data=3)
232
+
233
+ model = DMRModel(k=10)
234
+ model.add_corpus(corpus)
235
+ # You lose `a_data` field in `corpus`,
236
+ # and `metadata` that `DMRModel` requires is filled with the default value, empty str.
237
+
238
+ assert model.docs[0].metadata == ''
239
+ assert model.docs[1].metadata == ''
240
+ assert model.docs[2].metadata == ''
241
+
242
+ def transform_a_data_to_metadata(misc: dict):
243
+ return {'metadata': str(misc['a_data'])}
244
+ # this function transforms `a_data` to `metadata`
245
+
246
+ model = DMRModel(k=10)
247
+ model.add_corpus(corpus, transform=transform_a_data_to_metadata)
248
+ # Now docs in `model` has non-default `metadata`, that generated from `a_data` field.
249
+
250
+ assert model.docs[0].metadata == '1'
251
+ assert model.docs[1].metadata == '2'
252
+ assert model.docs[2].metadata == '3'
253
+
205
254
  Parallel Sampling Algorithms
206
255
  ----------------------------
207
256
  Since version 0.5.0, `tomotopy` allows you to choose a parallelism algorithm.
@@ -260,6 +309,12 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
260
309
 
261
310
  History
262
311
  -------
312
+ * 0.12.1 (2021-06-20)
313
+ * An issue where `tomotopy.LDAModel.set_word_prior()` causes a crash has been fixed.
314
+ * Now `tomotopy.LDAModel.perplexity` and `tomotopy.LDAModel.ll_per_word` return the accurate value when `TermWeight` is not `ONE`.
315
+ * `tomotopy.LDAModel.used_vocab_weighted_freq` was added, which returns term-weighted frequencies of words.
316
+ * Now `tomotopy.LDAModel.summary()` shows not only the entropy of words, but also the entropy of term-weighted words.
317
+
263
318
  * 0.12.0 (2021-04-26)
264
319
  * Now `tomotopy.DMRModel` and `tomotopy.GDMRModel` support multiple values of metadata (see https://github.com/bab2min/tomotopy/blob/main/examples/dmr_multi_label.py )
265
320
  * The performance of `tomotopy.GDMRModel` was improved.
@@ -316,7 +316,7 @@ namespace tomoto
316
316
  }
317
317
  }
318
318
 
319
- float totN = std::accumulate(vocabFreqs.begin(), vocabFreqs.end(), (size_t)0);
319
+ float totN = (float)std::accumulate(vocabFreqs.begin(), vocabFreqs.end(), (size_t)0);
320
320
  const float logTotN = std::log(totN);
321
321
 
322
322
  // calculating PMIs
@@ -489,7 +489,7 @@ namespace tomoto
489
489
 
490
490
  float rbe = branchingEntropy(trieNodes[0].getNext(bigram.first)->getNext(bigram.second), candMinCnt);
491
491
  float lbe = branchingEntropy(trieNodesBw[0].getNext(bigram.second)->getNext(bigram.first), candMinCnt);
492
- float nbe = std::sqrt(rbe * lbe) / std::log(p.second);
492
+ float nbe = std::sqrt(rbe * lbe) / (float)std::log(p.second);
493
493
  if (nbe < minNBE) continue;
494
494
  candidates.emplace_back(npmi * nbe, bigram.first, bigram.second);
495
495
  candidates.back().cf = p.second;
@@ -512,7 +512,7 @@ namespace tomoto
512
512
 
513
513
  float rbe = branchingEntropy(node, candMinCnt);
514
514
  float lbe = branchingEntropy(trieNodesBw[0].findNode(rkeys.rbegin(), rkeys.rend()), candMinCnt);
515
- float nbe = std::sqrt(rbe * lbe) / std::log(node->val);
515
+ float nbe = std::sqrt(rbe * lbe) / (float)std::log(node->val);
516
516
  if (nbe < minNBE) return;
517
517
  candidates.emplace_back(npmi * nbe, rkeys);
518
518
  candidates.back().cf = node->val;
@@ -33,7 +33,10 @@ namespace tomoto
33
33
  friend typename BaseClass::BaseClass;
34
34
  using WeightType = typename BaseClass::WeightType;
35
35
 
36
- static constexpr char TMID[] = "CTM\0";
36
+ static constexpr auto tmid()
37
+ {
38
+ return serializer::to_key("CTM\0");
39
+ }
37
40
 
38
41
  uint64_t numBetaSample = 10;
39
42
  uint64_t numTMNSample = 5;
@@ -247,7 +250,7 @@ namespace tomoto
247
250
  this->optimInterval = 2;
248
251
  }
249
252
 
250
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
253
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
251
254
  {
252
255
  std::vector<Float> ret(this->K);
253
256
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
@@ -47,7 +47,10 @@ namespace tomoto
47
47
  friend typename BaseClass::BaseClass;
48
48
  using WeightType = typename BaseClass::WeightType;
49
49
 
50
- static constexpr char TMID[] = "DMR\0";
50
+ static constexpr auto tmid()
51
+ {
52
+ return serializer::to_key("DMR\0");
53
+ }
51
54
 
52
55
  Matrix lambda;
53
56
  mutable std::unordered_map<std::pair<uint64_t, Vector>, size_t, MdHash> mdHashMap;
@@ -449,7 +452,7 @@ namespace tomoto
449
452
  optimRepeat = _optimRepeat;
450
453
  }
451
454
 
452
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
455
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
453
456
  {
454
457
  std::vector<Float> ret(this->K);
455
458
  auto alphaDoc = getCachedAlpha(doc);
@@ -41,7 +41,10 @@ namespace tomoto
41
41
  friend typename BaseClass::BaseClass;
42
42
  using WeightType = typename BaseClass::WeightType;
43
43
 
44
- static constexpr char TMID[] = "DTM\0";
44
+ static constexpr auto tmid()
45
+ {
46
+ return serializer::to_key("DTM\0");
47
+ }
45
48
 
46
49
  uint64_t T;
47
50
  Float shapeA = 0.03f, shapeB = 0.1f, shapeC = 0.55f;
@@ -54,7 +57,7 @@ namespace tomoto
54
57
  std::vector<sample::AliasMethod<>> wordAliasTables; // Dim: (Word * Time)
55
58
 
56
59
  template<int _inc>
57
- inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, Tid tid) const
60
+ inline void addWordTo(_ModelState& ld, _DocType& doc, size_t pid, Vid vid, Tid tid) const
58
61
  {
59
62
  assert(tid < this->K);
60
63
  assert(vid < this->realV);
@@ -168,7 +168,7 @@ namespace tomoto
168
168
  }
169
169
 
170
170
  template<int _inc>
171
- inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, size_t tableId, Tid tid) const
171
+ inline void addWordTo(_ModelState& ld, _DocType& doc, size_t pid, Vid vid, size_t tableId, Tid tid) const
172
172
  {
173
173
  addOnlyWordTo<_inc>(ld, doc, pid, vid, tid);
174
174
  constexpr bool _dec = _inc < 0 && _tw != TermWeight::one;
@@ -490,7 +490,7 @@ namespace tomoto
490
490
  THROW_ERROR_WITH_INFO(exc::Unimplemented, "HDPModel doesn't provide setWordPrior function.");
491
491
  }
492
492
 
493
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
493
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
494
494
  {
495
495
  std::vector<Float> ret(this->K);
496
496
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
@@ -522,7 +522,7 @@ namespace tomoto
522
522
  for (size_t i = 0; i < cntIdx.size(); ++i)
523
523
  {
524
524
  if (i && cntIdx[i].first / sum <= topicThreshold) break;
525
- newK[cntIdx[i].second] = i;
525
+ newK[cntIdx[i].second] = (Tid)i;
526
526
  liveK++;
527
527
  }
528
528
 
@@ -558,7 +558,7 @@ namespace tomoto
558
558
  lda->docs[i].Zs[j] = non_topic_id;
559
559
  continue;
560
560
  }
561
- size_t newTopic = newK[this->docs[i].numTopicByTable[this->docs[i].Zs[j]].topic];
561
+ Tid newTopic = newK[this->docs[i].numTopicByTable[this->docs[i].Zs[j]].topic];
562
562
  while (newTopic == (Tid)-1) newTopic = newK[randomTopic(rng)];
563
563
  lda->docs[i].Zs[j] = newTopic;
564
564
  }