tomoto 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/tomoto/ct.cpp +8 -4
  4. data/ext/tomoto/dmr.cpp +10 -4
  5. data/ext/tomoto/dt.cpp +13 -4
  6. data/ext/tomoto/extconf.rb +1 -1
  7. data/ext/tomoto/gdmr.cpp +14 -6
  8. data/ext/tomoto/hdp.cpp +9 -4
  9. data/ext/tomoto/hlda.cpp +9 -4
  10. data/ext/tomoto/hpa.cpp +9 -4
  11. data/ext/tomoto/lda.cpp +8 -4
  12. data/ext/tomoto/llda.cpp +8 -4
  13. data/ext/tomoto/mglda.cpp +11 -1
  14. data/ext/tomoto/pa.cpp +9 -4
  15. data/ext/tomoto/plda.cpp +8 -4
  16. data/ext/tomoto/slda.cpp +13 -5
  17. data/lib/tomoto/gdmr.rb +2 -2
  18. data/lib/tomoto/version.rb +1 -1
  19. data/vendor/EigenRand/EigenRand/Core.h +6 -1107
  20. data/vendor/EigenRand/EigenRand/Dists/Basic.h +490 -43
  21. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +916 -285
  22. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +85 -36
  23. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +1038 -290
  24. data/vendor/EigenRand/EigenRand/EigenRand +2 -2
  25. data/vendor/EigenRand/EigenRand/Macro.h +4 -4
  26. data/vendor/EigenRand/EigenRand/MorePacketMath.h +54 -22
  27. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +222 -0
  28. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +492 -0
  29. data/vendor/EigenRand/EigenRand/PacketFilter.h +2 -2
  30. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +2 -2
  31. data/vendor/EigenRand/EigenRand/RandUtils.h +65 -11
  32. data/vendor/EigenRand/EigenRand/doc.h +142 -25
  33. data/vendor/EigenRand/LICENSE +1 -1
  34. data/vendor/EigenRand/README.md +109 -24
  35. data/vendor/tomotopy/README.kr.rst +27 -6
  36. data/vendor/tomotopy/README.rst +29 -8
  37. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +60 -12
  38. data/vendor/tomotopy/src/Labeling/FoRelevance.h +2 -2
  39. data/vendor/tomotopy/src/Labeling/Phraser.hpp +33 -21
  40. data/vendor/tomotopy/src/TopicModel/CT.h +8 -5
  41. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +2 -6
  42. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +29 -23
  43. data/vendor/tomotopy/src/TopicModel/DMR.h +33 -4
  44. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +2 -6
  45. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +231 -57
  46. data/vendor/tomotopy/src/TopicModel/DT.h +24 -5
  47. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +2 -8
  48. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +41 -28
  49. data/vendor/tomotopy/src/TopicModel/GDMR.h +31 -5
  50. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +2 -7
  51. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +211 -104
  52. data/vendor/tomotopy/src/TopicModel/HDP.h +11 -2
  53. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +2 -6
  54. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +52 -45
  55. data/vendor/tomotopy/src/TopicModel/HLDA.h +11 -2
  56. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +2 -6
  57. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +13 -16
  58. data/vendor/tomotopy/src/TopicModel/HPA.h +5 -2
  59. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +2 -6
  60. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +51 -21
  61. data/vendor/tomotopy/src/TopicModel/LDA.h +9 -2
  62. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +8 -8
  63. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +2 -6
  64. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +70 -28
  65. data/vendor/tomotopy/src/TopicModel/LLDA.h +1 -2
  66. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +2 -6
  67. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +22 -12
  68. data/vendor/tomotopy/src/TopicModel/MGLDA.h +12 -3
  69. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +2 -10
  70. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +42 -19
  71. data/vendor/tomotopy/src/TopicModel/PA.h +9 -4
  72. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +2 -6
  73. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +48 -25
  74. data/vendor/tomotopy/src/TopicModel/PLDA.h +13 -2
  75. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +2 -6
  76. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +27 -19
  77. data/vendor/tomotopy/src/TopicModel/PT.h +12 -5
  78. data/vendor/tomotopy/src/TopicModel/PTModel.cpp +2 -3
  79. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +29 -14
  80. data/vendor/tomotopy/src/TopicModel/SLDA.h +18 -6
  81. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +2 -10
  82. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +93 -43
  83. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +58 -23
  84. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +6 -6
  85. data/vendor/tomotopy/src/Utils/Dictionary.h +11 -0
  86. data/vendor/tomotopy/src/Utils/SharedString.hpp +26 -1
  87. data/vendor/tomotopy/src/Utils/Trie.hpp +46 -21
  88. data/vendor/tomotopy/src/Utils/Utils.hpp +99 -14
  89. data/vendor/tomotopy/src/Utils/exception.h +1 -1
  90. data/vendor/tomotopy/src/Utils/math.h +5 -7
  91. data/vendor/tomotopy/src/Utils/serializer.hpp +329 -201
  92. data/vendor/tomotopy/src/Utils/text.hpp +8 -0
  93. data/vendor/tomotopy/src/Utils/tvector.hpp +49 -7
  94. metadata +9 -7
@@ -4,7 +4,7 @@
4
4
  namespace tomoto
5
5
  {
6
6
  template<TermWeight _tw>
7
- struct DocumentPTM : public DocumentLDA<_tw>
7
+ struct DocumentPT : public DocumentLDA<_tw>
8
8
  {
9
9
  using BaseDocument = DocumentLDA<_tw>;
10
10
  using DocumentLDA<_tw>::DocumentLDA;
@@ -16,12 +16,19 @@ namespace tomoto
16
16
  DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 1, 0x00010001, pseudoDoc);
17
17
  };
18
18
 
19
+ struct PTArgs : public LDAArgs
20
+ {
21
+ size_t p = 100;
22
+ Float lambda = 0.01;
23
+ };
24
+
19
25
  class IPTModel : public ILDAModel
20
26
  {
21
27
  public:
22
- using DefaultDocType = DocumentPTM<TermWeight::one>;
23
- static IPTModel* create(TermWeight _weight, size_t _K = 1, size_t _P = 100,
24
- Float alpha = 0.1, Float eta = 0.01, Float lambda = 0.01, size_t seed = std::random_device{}(),
28
+ using DefaultDocType = DocumentPT<TermWeight::one>;
29
+ static IPTModel* create(TermWeight _weight, const PTArgs& args,
25
30
  bool scalarRng = false);
31
+
32
+ virtual size_t getP() const = 0;
26
33
  };
27
- }
34
+ }
@@ -2,9 +2,8 @@
2
2
 
3
3
  namespace tomoto
4
4
  {
5
-
6
- IPTModel* IPTModel::create(TermWeight _weight, size_t _K, size_t _P, Float _alpha, Float _eta, Float _lambda, size_t seed, bool scalarRng)
5
+ IPTModel* IPTModel::create(TermWeight _weight, const PTArgs& args, bool scalarRng)
7
6
  {
8
- TMT_SWITCH_TW(_weight, scalarRng, PTModel, _K, _P, _alpha, _eta, _lambda, seed);
7
+ TMT_SWITCH_TW(_weight, scalarRng, PTModel, args);
9
8
  }
10
9
  }
@@ -25,7 +25,7 @@ namespace tomoto
25
25
  template<TermWeight _tw, typename _RandGen,
26
26
  typename _Interface = IPTModel,
27
27
  typename _Derived = void,
28
- typename _DocType = DocumentPTM<_tw>,
28
+ typename _DocType = DocumentPT<_tw>,
29
29
  typename _ModelState = ModelStatePTM<_tw>>
30
30
  class PTModel : public LDAModel<_tw, _RandGen, flags::continuous_doc_data | flags::partitioned_multisampling, _Interface,
31
31
  typename std::conditional<std::is_same<_Derived, void>::value, PTModel<_tw, _RandGen>, _Derived>::type,
@@ -158,15 +158,13 @@ namespace tomoto
158
158
  {
159
159
  sortAndWriteOrder(doc.words, doc.wOrder);
160
160
  doc.numByTopic.init((WeightType*)this->globalState.numByTopicPDoc.col(0).data(), this->K, 1);
161
- doc.Zs = tvector<Tid>(wordSize);
161
+ doc.Zs = tvector<Tid>(wordSize, non_topic_id);
162
162
  if (_tw != TermWeight::one) doc.wordWeights.resize(wordSize);
163
163
  }
164
164
 
165
165
  void initGlobalState(bool initDocs)
166
166
  {
167
- this->alphas.resize(this->K);
168
- this->alphas.array() = this->alpha;
169
- this->globalState.pLikelihood = Eigen::Matrix<Float, -1, 1>::Zero(numPDocs);
167
+ this->globalState.pLikelihood = Vector::Zero(numPDocs);
170
168
  this->globalState.numDocsByPDoc = Eigen::ArrayXi::Zero(numPDocs);
171
169
  this->globalState.numByTopicPDoc = Eigen::Matrix<WeightType, -1, -1>::Zero(this->K, numPDocs);
172
170
  BaseClass::initGlobalState(initDocs);
@@ -175,15 +173,15 @@ namespace tomoto
175
173
  struct Generator
176
174
  {
177
175
  std::uniform_int_distribution<uint64_t> psi;
178
- std::uniform_int_distribution<Tid> theta;
176
+ Eigen::Rand::DiscreteGen<int32_t> theta;
179
177
  };
180
178
 
181
179
  Generator makeGeneratorForInit(const _DocType*) const
182
180
  {
183
- return Generator{
184
- std::uniform_int_distribution<uint64_t>{0, numPDocs - 1},
185
- std::uniform_int_distribution<Tid>{0, (Tid)(this->K - 1)}
186
- };
181
+ Generator g;
182
+ g.psi = std::uniform_int_distribution<uint64_t>{ 0, numPDocs - 1 };
183
+ g.theta = Eigen::Rand::DiscreteGen<int32_t>{ this->alphas.data(), this->alphas.data() + this->alphas.size() };
184
+ return g;
187
185
  }
188
186
 
189
187
  template<bool _Infer>
@@ -256,17 +254,34 @@ namespace tomoto
256
254
  DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 0, numPDocs, lambda);
257
255
  DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 1, 0x00010001, numPDocs, lambda);
258
256
 
259
- PTModel(size_t _K = 1, size_t _P = 100, Float _alpha = 1.0, Float _eta = 0.01, Float _lambda = 0.01,
260
- size_t _rg = std::random_device{}())
261
- : BaseClass(_K, _alpha, _eta, _rg), numPDocs(_P), lambda(_lambda)
257
+ GETTER(P, size_t, numPDocs);
258
+
259
+ PTModel(const PTArgs& args)
260
+ : BaseClass(args), numPDocs(args.p), lambda(args.lambda)
262
261
  {
263
262
  }
264
263
 
264
+ std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
265
+ {
266
+ std::vector<Float> ret(this->K);
267
+ Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
268
+ m = this->alphas.array();
269
+ for (size_t i = 0; i < doc.words.size(); ++i)
270
+ {
271
+ if (doc.words[i] >= this->realV) continue;
272
+ typename std::conditional<_tw != TermWeight::one, float, int32_t>::type weight
273
+ = _tw != TermWeight::one ? doc.wordWeights[i] : 1;
274
+ ret[doc.Zs[i]] += weight;
275
+ }
276
+ if (normalize) m /= m.sum();
277
+ return ret;
278
+ }
279
+
265
280
  void updateDocs()
266
281
  {
267
282
  for (auto& doc : this->docs)
268
283
  {
269
- doc.template update<>(this->getTopicDocPtr(doc.pseudoDoc), *static_cast<DerivedClass*>(this));
284
+ doc.template update<>(this->globalState.numByTopicPDoc.col(doc.pseudoDoc).data(), *static_cast<DerivedClass*>(this));
270
285
  }
271
286
  }
272
287
  };
@@ -9,10 +9,19 @@ namespace tomoto
9
9
  using BaseDocument = DocumentLDA<_tw>;
10
10
  using DocumentLDA<_tw>::DocumentLDA;
11
11
  std::vector<Float> y;
12
+
13
+ RawDoc::MiscType makeMisc(const ITopicModel* tm) const override
14
+ {
15
+ RawDoc::MiscType ret = DocumentLDA<_tw>::makeMisc(tm);
16
+ ret["y"] = y;
17
+ return ret;
18
+ }
12
19
  DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 0, y);
13
20
  DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 1, 0x00010001, y);
14
21
  };
15
22
 
23
+ struct SLDAArgs;
24
+
16
25
  class ISLDAModel : public ILDAModel
17
26
  {
18
27
  public:
@@ -23,12 +32,7 @@ namespace tomoto
23
32
  };
24
33
 
25
34
  using DefaultDocType = DocumentSLDA<TermWeight::one>;
26
- static ISLDAModel* create(TermWeight _weight, size_t _K = 1,
27
- const std::vector<ISLDAModel::GLM>& vars = {},
28
- Float alpha = 0.1, Float _eta = 0.01,
29
- const std::vector<Float>& _mu = {}, const std::vector<Float>& _nuSq = {},
30
- const std::vector<Float>& _glmParam = {},
31
- size_t seed = std::random_device{}(),
35
+ static ISLDAModel* create(TermWeight _weight, const SLDAArgs& args,
32
36
  bool scalarRng = false);
33
37
 
34
38
  virtual size_t getF() const = 0;
@@ -36,4 +40,12 @@ namespace tomoto
36
40
  virtual GLM getTypeOfVar(size_t f) const = 0;
37
41
  virtual std::vector<Float> estimateVars(const DocumentBase* doc) const = 0;
38
42
  };
43
+
44
+ struct SLDAArgs : public LDAArgs
45
+ {
46
+ std::vector<ISLDAModel::GLM> vars;
47
+ std::vector<Float> mu;
48
+ std::vector<Float> nuSq;
49
+ std::vector<Float> glmParam;
50
+ };
39
51
  }
@@ -2,16 +2,8 @@
2
2
 
3
3
  namespace tomoto
4
4
  {
5
- /*template class SLDAModel<TermWeight::one>;
6
- template class SLDAModel<TermWeight::idf>;
7
- template class SLDAModel<TermWeight::pmi>;*/
8
-
9
- ISLDAModel* ISLDAModel::create(TermWeight _weight, size_t _K, const std::vector<ISLDAModel::GLM>& vars,
10
- Float _alpha, Float _eta,
11
- const std::vector<Float>& _mu, const std::vector<Float>& _nuSq,
12
- const std::vector<Float>& _glmParam,
13
- size_t seed, bool scalarRng)
5
+ ISLDAModel* ISLDAModel::create(TermWeight _weight, const SLDAArgs& args, bool scalarRng)
14
6
  {
15
- TMT_SWITCH_TW(_weight, scalarRng, SLDAModel, _K, vars, _alpha, _eta, _mu, _nuSq, _glmParam, seed);
7
+ TMT_SWITCH_TW(_weight, scalarRng, SLDAModel, args);
16
8
  }
17
9
  }
@@ -16,22 +16,24 @@ namespace tomoto
16
16
  template<typename _WeightType>
17
17
  struct GLMFunctor
18
18
  {
19
- Eigen::Matrix<Float, -1, 1> regressionCoef; // Dim : (K)
19
+ Vector regressionCoef; // Dim : (K)
20
20
 
21
- GLMFunctor(size_t K = 0, Float mu = 0) : regressionCoef(Eigen::Matrix<Float, -1, 1>::Constant(K, mu))
21
+ GLMFunctor(size_t K = 0, Float mu = 0) : regressionCoef(Vector::Constant(K, mu))
22
22
  {
23
23
  }
24
24
 
25
25
  virtual ISLDAModel::GLM getType() const = 0;
26
26
 
27
+ virtual std::unique_ptr<GLMFunctor> copy() const = 0;
28
+
27
29
  virtual void updateZLL(
28
- Eigen::Matrix<Float, -1, 1>& zLikelihood,
30
+ Vector& zLikelihood,
29
31
  Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic, size_t docId, Float docSize) const = 0;
30
32
 
31
33
  virtual void optimizeCoef(
32
- const Eigen::Matrix<Float, -1, -1>& normZ,
34
+ const Matrix& normZ,
33
35
  Float mu, Float nuSq,
34
- Eigen::Block<Eigen::Matrix<Float, -1, -1>, -1, 1, true> ys
36
+ Eigen::Block<Matrix, -1, 1, true> ys
35
37
  ) = 0;
36
38
 
37
39
  virtual double getLL(Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic,
@@ -69,8 +71,13 @@ namespace tomoto
69
71
 
70
72
  ISLDAModel::GLM getType() const override { return ISLDAModel::GLM::linear; }
71
73
 
74
+ std::unique_ptr<GLMFunctor<_WeightType>> copy() const override
75
+ {
76
+ return std::make_unique<LinearFunctor>(*this);
77
+ }
78
+
72
79
  void updateZLL(
73
- Eigen::Matrix<Float, -1, 1>& zLikelihood,
80
+ Vector& zLikelihood,
74
81
  Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic, size_t docId, Float docSize) const override
75
82
  {
76
83
  Float yErr = y -
@@ -81,14 +88,14 @@ namespace tomoto
81
88
  }
82
89
 
83
90
  void optimizeCoef(
84
- const Eigen::Matrix<Float, -1, -1>& normZ,
91
+ const Matrix& normZ,
85
92
  Float mu, Float nuSq,
86
- Eigen::Block<Eigen::Matrix<Float, -1, -1>, -1, 1, true> ys
93
+ Eigen::Block<Matrix, -1, 1, true> ys
87
94
  ) override
88
95
  {
89
- Eigen::Matrix<Float, -1, -1> selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
90
- Eigen::Matrix<Float, -1, -1> normZZT = selectedNormZ * selectedNormZ.transpose();
91
- normZZT += Eigen::Matrix<Float, -1, -1>::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
96
+ Matrix selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
97
+ Matrix normZZT = selectedNormZ * selectedNormZ.transpose();
98
+ normZZT += Matrix::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
92
99
  this->regressionCoef = normZZT.colPivHouseholderQr().solve(selectedNormZ * ys.array().isNaN().select(0, ys).matrix());
93
100
  }
94
101
 
@@ -113,17 +120,22 @@ namespace tomoto
113
120
  struct BinaryLogisticFunctor : public GLMFunctor<_WeightType>
114
121
  {
115
122
  Float b = 1;
116
- Eigen::Matrix<Float, -1, 1> omega;
123
+ Vector omega;
117
124
 
118
125
  BinaryLogisticFunctor(size_t K = 0, Float mu = 0, Float _b = 1, size_t numDocs = 0)
119
- : GLMFunctor<_WeightType>(K, mu), b(_b), omega{ Eigen::Matrix<Float, -1, 1>::Ones(numDocs) }
126
+ : GLMFunctor<_WeightType>(K, mu), b(_b), omega{ Vector::Ones(numDocs) }
120
127
  {
121
128
  }
122
129
 
123
130
  ISLDAModel::GLM getType() const override { return ISLDAModel::GLM::binary_logistic; }
124
131
 
132
+ std::unique_ptr<GLMFunctor<_WeightType>> copy() const override
133
+ {
134
+ return std::make_unique<BinaryLogisticFunctor>(*this);
135
+ }
136
+
125
137
  void updateZLL(
126
- Eigen::Matrix<Float, -1, 1>& zLikelihood,
138
+ Vector& zLikelihood,
127
139
  Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic, size_t docId, Float docSize) const override
128
140
  {
129
141
  Float yErr = b * (y - 0.5f) -
@@ -134,18 +146,18 @@ namespace tomoto
134
146
  }
135
147
 
136
148
  void optimizeCoef(
137
- const Eigen::Matrix<Float, -1, -1>& normZ,
149
+ const Matrix& normZ,
138
150
  Float mu, Float nuSq,
139
- Eigen::Block<Eigen::Matrix<Float, -1, -1>, -1, 1, true> ys
151
+ Eigen::Block<Matrix, -1, 1, true> ys
140
152
  ) override
141
153
  {
142
- Eigen::Matrix<Float, -1, -1> selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
143
- Eigen::Matrix<Float, -1, -1> normZZT = selectedNormZ * Eigen::DiagonalMatrix<Float, -1>{ omega } * selectedNormZ.transpose();
144
- normZZT += Eigen::Matrix<Float, -1, -1>::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
154
+ Matrix selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
155
+ Matrix normZZT = selectedNormZ * Eigen::DiagonalMatrix<Float, -1>{ omega } * selectedNormZ.transpose();
156
+ normZZT += Matrix::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
145
157
 
146
158
  this->regressionCoef = normZZT
147
159
  .colPivHouseholderQr().solve(selectedNormZ * ys.array().isNaN().select(0, b * (ys.array() - 0.5f)).matrix()
148
- + Eigen::Matrix<Float, -1, 1>::Constant(selectedNormZ.rows(), mu / nuSq));
160
+ + Vector::Constant(selectedNormZ.rows(), mu / nuSq));
149
161
 
150
162
  RandGen rng;
151
163
  for (size_t i = 0; i < (size_t)omega.size(); ++i)
@@ -173,8 +185,20 @@ namespace tomoto
173
185
 
174
186
  DEFINE_SERIALIZER_AFTER_BASE(GLMFunctor<_WeightType>, b, omega);
175
187
  };
188
+
189
+ struct CopyGLMFunctor
190
+ {
191
+ template<typename Wt>
192
+ std::vector<std::unique_ptr<GLMFunctor<Wt>>> operator()(const std::vector<std::unique_ptr<GLMFunctor<Wt>>>& o)
193
+ {
194
+ std::vector<std::unique_ptr<GLMFunctor<Wt>>> ret;
195
+ for (auto& p : o) ret.emplace_back(p->copy());
196
+ return ret;
197
+ }
198
+ };
176
199
  }
177
200
 
201
+
178
202
  template<TermWeight _tw, typename _RandGen,
179
203
  size_t _Flags = flags::partitioned_multisampling,
180
204
  typename _Interface = ISLDAModel,
@@ -198,12 +222,12 @@ namespace tomoto
198
222
  std::vector<ISLDAModel::GLM> varTypes;
199
223
  std::vector<Float> glmParam;
200
224
 
201
- Eigen::Matrix<Float, -1, 1> mu; // Mean of regression coefficients, Dim : (F)
202
- Eigen::Matrix<Float, -1, 1> nuSq; // Variance of regression coefficients, Dim : (F)
225
+ Vector mu; // Mean of regression coefficients, Dim : (F)
226
+ Vector nuSq; // Variance of regression coefficients, Dim : (F)
203
227
 
204
- std::vector<std::unique_ptr<detail::GLMFunctor<WeightType>>> responseVars;
205
- Eigen::Matrix<Float, -1, -1> normZ; // topic proportions for all docs, Dim : (K, D)
206
- Eigen::Matrix<Float, -1, -1> Ys; // response variables, Dim : (D, F)
228
+ DelegateCopy<std::vector<std::unique_ptr<detail::GLMFunctor<WeightType>>>, detail::CopyGLMFunctor> responseVars;
229
+ Matrix normZ; // topic proportions for all docs, Dim : (K, D)
230
+ Matrix Ys; // response variables, Dim : (D, F)
207
231
 
208
232
  template<bool _asymEta>
209
233
  Float* getZLikelihoods(_ModelState& ld, const _DocType& doc, size_t docId, size_t vid) const
@@ -299,11 +323,11 @@ namespace tomoto
299
323
  switch (varTypes[f])
300
324
  {
301
325
  case ISLDAModel::GLM::linear:
302
- v = make_unique<detail::LinearFunctor<WeightType>>(this->K, mu[f],
326
+ v = std::make_unique<detail::LinearFunctor<WeightType>>(this->K, mu[f],
303
327
  f < glmParam.size() ? glmParam[f] : 1.f);
304
328
  break;
305
329
  case ISLDAModel::GLM::binary_logistic:
306
- v = make_unique<detail::BinaryLogisticFunctor<WeightType>>(this->K, mu[f],
330
+ v = std::make_unique<detail::BinaryLogisticFunctor<WeightType>>(this->K, mu[f],
307
331
  f < glmParam.size() ? glmParam[f] : 1.f, this->docs.size());
308
332
  break;
309
333
  }
@@ -322,22 +346,48 @@ namespace tomoto
322
346
  DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 0, F, responseVars, mu, nuSq);
323
347
  DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 1, 0x00010001, F, responseVars, mu, nuSq);
324
348
 
325
- SLDAModel(size_t _K = 1, const std::vector<ISLDAModel::GLM>& vars = {},
326
- Float _alpha = 0.1, Float _eta = 0.01,
327
- const std::vector<Float>& _mu = {}, const std::vector<Float>& _nuSq = {},
328
- const std::vector<Float>& _glmParam = {},
329
- size_t _rg = std::random_device{}())
330
- : BaseClass(_K, _alpha, _eta, _rg), F(vars.size()), varTypes(vars),
331
- glmParam(_glmParam)
349
+ SLDAModel(const SLDAArgs& args)
350
+ : BaseClass(args), F(args.vars.size()), varTypes(args.vars),
351
+ glmParam(args.glmParam)
332
352
  {
333
353
  for (auto t : varTypes)
334
354
  {
335
- if (t != ISLDAModel::GLM::linear && t != ISLDAModel::GLM::binary_logistic) THROW_ERROR_WITH_INFO(std::runtime_error, "unknown var GLM type in 'vars'");
355
+ if ((size_t)t > (size_t)ISLDAModel::GLM::binary_logistic) THROW_ERROR_WITH_INFO(exc::InvalidArgument, "unknown var GLM type in `vars`");
356
+ }
357
+
358
+ if (args.mu.size() == 0)
359
+ {
360
+ mu = Vector::Zero(F);
361
+ }
362
+ else if (args.mu.size() == 1)
363
+ {
364
+ mu = Vector::Constant(F, args.mu[0]);
365
+ }
366
+ else if (args.mu.size() == F)
367
+ {
368
+ mu = Eigen::Map<const Vector>(args.mu.data(), args.mu.size());
369
+ }
370
+ else
371
+ {
372
+ THROW_ERROR_WITH_INFO(exc::InvalidArgument, text::format("wrong mu value (len = %zd)", args.mu.size()));
373
+ }
374
+
375
+ if (args.nuSq.size() == 0)
376
+ {
377
+ nuSq = Vector::Ones(F);
378
+ }
379
+ else if (args.mu.size() == 1)
380
+ {
381
+ nuSq = Vector::Constant(F, args.nuSq[0]);
382
+ }
383
+ else if (args.mu.size() == F)
384
+ {
385
+ nuSq = Eigen::Map<const Vector>(args.nuSq.data(), args.nuSq.size());
386
+ }
387
+ else
388
+ {
389
+ THROW_ERROR_WITH_INFO(exc::InvalidArgument, text::format("wrong nuSq value (len = %zd)", args.nuSq.size()));
336
390
  }
337
- mu = decltype(mu)::Zero(F);
338
- std::copy(_mu.begin(), _mu.end(), mu.data());
339
- nuSq = decltype(nuSq)::Ones(F);
340
- std::copy(_nuSq.begin(), _nuSq.end(), nuSq.data());
341
391
  }
342
392
 
343
393
  std::vector<Float> getRegressionCoef(size_t f) const override
@@ -385,7 +435,7 @@ namespace tomoto
385
435
  std::unique_ptr<DocumentBase> makeDoc(const RawDoc& rawDoc, const RawDocTokenizer::Factory& tokenizer) const override
386
436
  {
387
437
  auto doc = as_mutable(this)->template _makeFromRawDoc<true>(rawDoc, tokenizer);
388
- return make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
438
+ return std::make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
389
439
  }
390
440
 
391
441
  size_t addDoc(const RawDoc& rawDoc) override
@@ -397,7 +447,7 @@ namespace tomoto
397
447
  std::unique_ptr<DocumentBase> makeDoc(const RawDoc& rawDoc) const override
398
448
  {
399
449
  auto doc = as_mutable(this)->template _makeFromRawDoc<true>(rawDoc);
400
- return make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
450
+ return std::make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
401
451
  }
402
452
 
403
453
  std::vector<Float> estimateVars(const DocumentBase* doc) const override
@@ -424,10 +474,10 @@ namespace tomoto
424
474
  switch ((ISLDAModel::GLM)(t - 1))
425
475
  {
426
476
  case ISLDAModel::GLM::linear:
427
- p = make_unique<LinearFunctor<_WeightType>>();
477
+ p = std::make_unique<LinearFunctor<_WeightType>>();
428
478
  break;
429
479
  case ISLDAModel::GLM::binary_logistic:
430
- p = make_unique<BinaryLogisticFunctor<_WeightType>>();
480
+ p = std::make_unique<BinaryLogisticFunctor<_WeightType>>();
431
481
  break;
432
482
  default:
433
483
  throw std::ios_base::failure(text::format("wrong GLMFunctor type id %d", (t - 1)));