tomoto 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b40c9adf2f0162eb6174b17395ea37b9294e14b22609e9f51951e9904125ff9
4
- data.tar.gz: be3f68438f60a7e4fc11033921636f8d03bf411bd3d3eb6aa3b4fb448faac41a
3
+ metadata.gz: dd4c36ff621f73c38bb066694a932f0a682c18591ddf05a9a0764bea0b6e4430
4
+ data.tar.gz: 551e56c4bc17fb5a3a0aeac0db055960fcc5e45bf097bf88c7cbf9046f958e7d
5
5
  SHA512:
6
- metadata.gz: a74747ae372d030c42562d4e2b99ab167ccc28533468ed08819f4bd34d42b340349870712c12e565388eb7833f993349432e77baee8618c34c265676ca181072
7
- data.tar.gz: da9e833bb98726278108a68a7dd6bed0e54b3979c25d49d8db01aa613e6205a6a3512881688209baf2589c4dc5514ed2663fb251a5e273c42b678bb1daa06d74
6
+ metadata.gz: 565a91d0bb6d48142f38dc3d9e798ddb99bf41fda32762295362075fba972eea6b56b6bde126eab74677eba5fd525581b68c5efa73361a46fcb0b2796ab63684
7
+ data.tar.gz: 415193e4eb6adbe5dce05328aadf9acb91f4acc50951484183a956455d7336f93961fe145465b1eeffaae78dad37ee1452defe832514c72b3c032860ed433cc8
@@ -1,3 +1,8 @@
1
+ ## 0.1.2 (2020-10-10)
2
+
3
+ - Added `summary` method
4
+ - Added `parallel` option to `train` method
5
+
1
6
  ## 0.1.1 (2020-10-10)
2
7
 
3
8
  - Added many more models
data/README.md CHANGED
@@ -23,7 +23,13 @@ model = Tomoto::LDA.new(k: 3)
23
23
  model.add_doc("text from document one")
24
24
  model.add_doc("text from document two")
25
25
  model.add_doc("text from document three")
26
- model.train(100)
26
+ model.train(100) # iterations
27
+ ```
28
+
29
+ Get the summary
30
+
31
+ ```ruby
32
+ model.summary
27
33
  ```
28
34
 
29
35
  Get topic words
@@ -89,6 +95,11 @@ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.
89
95
 
90
96
  If a method or option you need isn’t supported, feel free to open an issue.
91
97
 
98
+ ## Examples
99
+
100
+ - [LDA](examples/lda_basic.rb)
101
+ - [HDP](examples/hdp.rb)
102
+
92
103
  ## Tokenization
93
104
 
94
105
  Documents are tokenized by whitespace by default, or you can perform your own tokenization.
@@ -99,12 +110,22 @@ model.add_doc(["tokens", "from", "document", "one"])
99
110
 
100
111
  ## Performance
101
112
 
102
- tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check what it’s using with:
113
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
103
114
 
104
115
  ```ruby
105
116
  Tomoto.isa
106
117
  ```
107
118
 
119
+ ## Parallelism
120
+
121
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
122
+
123
+ ```ruby
124
+ model.train(parallel: :partition)
125
+ ```
126
+
127
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
128
+
108
129
  ## History
109
130
 
110
131
  View the [changelog](https://github.com/ankane/tomoto/blob/master/CHANGELOG.md)
@@ -31,7 +31,7 @@ using Rice::define_class_under;
31
31
  using Rice::define_module;
32
32
 
33
33
  template<>
34
- Object to_ruby<std::vector<float>>(std::vector<float> const & x)
34
+ Object to_ruby<std::vector<tomoto::Float>>(std::vector<tomoto::Float> const & x)
35
35
  {
36
36
  Array res;
37
37
  for (auto const& v : x) {
@@ -73,13 +73,13 @@ std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
73
73
  }
74
74
 
75
75
  template<>
76
- std::vector<float> from_ruby<std::vector<float>>(Object x)
76
+ std::vector<tomoto::Float> from_ruby<std::vector<tomoto::Float>>(Object x)
77
77
  {
78
78
  Array a = Array(x);
79
- std::vector<float> res;
79
+ std::vector<tomoto::Float> res;
80
80
  res.reserve(a.size());
81
81
  for (auto const& v : a) {
82
- res.push_back(from_ruby<float>(v));
82
+ res.push_back(from_ruby<tomoto::Float>(v));
83
83
  }
84
84
  return res;
85
85
  }
@@ -117,7 +117,7 @@ void Init_ext()
117
117
  Class rb_cLDA = define_class_under<tomoto::ILDAModel>(rb_mTomoto, "LDA")
118
118
  .define_singleton_method(
119
119
  "_new",
120
- *[](size_t tw, size_t k, float alpha, float eta, int seed) {
120
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
121
121
  if (seed < 0) {
122
122
  seed = std::random_device{}();
123
123
  }
@@ -131,7 +131,11 @@ void Init_ext()
131
131
  .define_method(
132
132
  "alpha",
133
133
  *[](tomoto::ILDAModel& self) {
134
- return self.getAlpha();
134
+ Array res;
135
+ for (size_t i = 0; i < self.getK(); i++) {
136
+ res.push(self.getAlpha(i));
137
+ }
138
+ return res;
135
139
  })
136
140
  .define_method(
137
141
  "burn_in",
@@ -246,8 +250,7 @@ void Init_ext()
246
250
  })
247
251
  .define_method(
248
252
  "_train",
249
- *[](tomoto::ILDAModel& self, size_t iteration, size_t workers) {
250
- size_t ps = 0;
253
+ *[](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
251
254
  self.train(iteration, workers, (tomoto::ParallelScheme)ps);
252
255
  })
253
256
  .define_method(
@@ -321,7 +324,7 @@ void Init_ext()
321
324
  Class rb_cCT = define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(rb_mTomoto, "CT")
322
325
  .define_singleton_method(
323
326
  "_new",
324
- *[](size_t tw, size_t k, float alpha, float eta, int seed) {
327
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
325
328
  if (seed < 0) {
326
329
  seed = std::random_device{}();
327
330
  }
@@ -368,7 +371,7 @@ void Init_ext()
368
371
  Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
369
372
  .define_singleton_method(
370
373
  "_new",
371
- *[](size_t tw, size_t k, float alpha, float sigma, float eta, float alpha_epsilon, int seed) {
374
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
372
375
  if (seed < 0) {
373
376
  seed = std::random_device{}();
374
377
  }
@@ -386,7 +389,7 @@ void Init_ext()
386
389
  })
387
390
  .define_method(
388
391
  "alpha_epsilon=",
389
- *[](tomoto::IDMRModel& self, float value) {
392
+ *[](tomoto::IDMRModel& self, tomoto::Float value) {
390
393
  self.setAlphaEps(value);
391
394
  return value;
392
395
  })
@@ -420,7 +423,7 @@ void Init_ext()
420
423
  Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
421
424
  .define_singleton_method(
422
425
  "_new",
423
- *[](size_t tw, size_t k, size_t t, float alphaVar, float etaVar, float phiVar, float shapeA, float shapeB, float shapeC) {
426
+ *[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
424
427
  // Rice only supports 10 arguments
425
428
  int seed = -1;
426
429
  if (seed < 0) {
@@ -440,7 +443,7 @@ void Init_ext()
440
443
  })
441
444
  .define_method(
442
445
  "lr_a=",
443
- *[](tomoto::IDTModel& self, float value) {
446
+ *[](tomoto::IDTModel& self, tomoto::Float value) {
444
447
  self.setShapeA(value);
445
448
  return value;
446
449
  })
@@ -451,7 +454,7 @@ void Init_ext()
451
454
  })
452
455
  .define_method(
453
456
  "lr_b=",
454
- *[](tomoto::IDTModel& self, float value) {
457
+ *[](tomoto::IDTModel& self, tomoto::Float value) {
455
458
  self.setShapeB(value);
456
459
  return value;
457
460
  })
@@ -462,7 +465,7 @@ void Init_ext()
462
465
  })
463
466
  .define_method(
464
467
  "lr_c=",
465
- *[](tomoto::IDTModel& self, float value) {
468
+ *[](tomoto::IDTModel& self, tomoto::Float value) {
466
469
  self.setShapeC(value);
467
470
  return value;
468
471
  })
@@ -480,7 +483,7 @@ void Init_ext()
480
483
  Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
481
484
  .define_singleton_method(
482
485
  "_new",
483
- *[](size_t tw, size_t k, std::vector<uint64_t> degrees, float alpha, float sigma, float sigma0, float eta, float alpha_epsilon, int seed) {
486
+ *[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
484
487
  if (seed < 0) {
485
488
  seed = std::random_device{}();
486
489
  }
@@ -500,12 +503,17 @@ void Init_ext()
500
503
  Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
501
504
  .define_singleton_method(
502
505
  "_new",
503
- *[](size_t tw, size_t k, float alpha, float eta, float gamma, int seed) {
506
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
504
507
  if (seed < 0) {
505
508
  seed = std::random_device{}();
506
509
  }
507
510
  return tomoto::IHDPModel::create((tomoto::TermWeight)tw, k, alpha, eta, gamma, seed);
508
511
  })
512
+ .define_method(
513
+ "alpha",
514
+ *[](tomoto::IHDPModel& self) {
515
+ return self.getAlpha();
516
+ })
509
517
  .define_method(
510
518
  "gamma",
511
519
  *[](tomoto::IHDPModel& self) {
@@ -530,12 +538,21 @@ void Init_ext()
530
538
  Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
531
539
  .define_singleton_method(
532
540
  "_new",
533
- *[](size_t tw, size_t levelDepth, float alpha, float eta, float gamma, int seed) {
541
+ *[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
534
542
  if (seed < 0) {
535
543
  seed = std::random_device{}();
536
544
  }
537
545
  return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
538
546
  })
547
+ .define_method(
548
+ "alpha",
549
+ *[](tomoto::IHLDAModel& self) {
550
+ Array res;
551
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
552
+ res.push(self.getAlpha(i));
553
+ }
554
+ return res;
555
+ })
539
556
  .define_method(
540
557
  "_children_topics",
541
558
  *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
@@ -580,7 +597,7 @@ void Init_ext()
580
597
  Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
581
598
  .define_singleton_method(
582
599
  "_new",
583
- *[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
600
+ *[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
584
601
  if (seed < 0) {
585
602
  seed = std::random_device{}();
586
603
  }
@@ -600,17 +617,27 @@ void Init_ext()
600
617
  Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
601
618
  .define_singleton_method(
602
619
  "_new",
603
- *[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
620
+ *[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
604
621
  if (seed < 0) {
605
622
  seed = std::random_device{}();
606
623
  }
607
624
  return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
625
+ })
626
+ .define_method(
627
+ "alpha",
628
+ *[](tomoto::IHPAModel& self) {
629
+ Array res;
630
+ // use <= to return k+1 elements
631
+ for (size_t i = 0; i <= self.getK(); i++) {
632
+ res.push(self.getAlpha(i));
633
+ }
634
+ return res;
608
635
  });
609
636
 
610
637
  Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
611
638
  .define_singleton_method(
612
639
  "_new",
613
- *[](size_t tw, size_t k_g, size_t k_l, size_t t, float alpha_g, float alpha_l, float alpha_mg, float alpha_ml, float eta_g) {
640
+ *[](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
614
641
  return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
615
642
  })
616
643
  .define_method(
@@ -672,7 +699,7 @@ void Init_ext()
672
699
  Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
673
700
  .define_singleton_method(
674
701
  "_new",
675
- *[](size_t tw, size_t k, float alpha, float eta, int seed) {
702
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
676
703
  if (seed < 0) {
677
704
  seed = std::random_device{}();
678
705
  }
@@ -692,7 +719,7 @@ void Init_ext()
692
719
  Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
693
720
  .define_singleton_method(
694
721
  "_new",
695
- *[](size_t tw, size_t latent_topics, float alpha, float eta, int seed) {
722
+ *[](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, int seed) {
696
723
  if (seed < 0) {
697
724
  seed = std::random_device{}();
698
725
  }
@@ -712,7 +739,7 @@ void Init_ext()
712
739
  Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
713
740
  .define_singleton_method(
714
741
  "_new",
715
- *[](size_t tw, size_t k, Array rb_vars, float alpha, float eta, std::vector<float> mu, std::vector<float> nu_sq, std::vector<float> glm_param, int seed) {
742
+ *[](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, int seed) {
716
743
  if (seed < 0) {
717
744
  seed = std::random_device{}();
718
745
  }
@@ -725,7 +752,7 @@ void Init_ext()
725
752
  })
726
753
  .define_method(
727
754
  "_add_doc",
728
- *[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<float> y) {
755
+ *[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
729
756
  self.addDoc(words, y);
730
757
  })
731
758
  .define_method(
@@ -18,5 +18,6 @@ require "tomoto/slda"
18
18
  require "tomoto/version"
19
19
 
20
20
  module Tomoto
21
+ PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
21
22
  TERM_WEIGHT = [:one, :idf, :pmi]
22
23
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def correlations(topic_id = nil)
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, metadata: "")
@@ -19,5 +19,9 @@ module Tomoto
19
19
  k.times.map { |i| _lambdas(i) }
20
20
  end
21
21
  end
22
+
23
+ def alpha
24
+ lambdas.map { |v| v.map { |v2| Math.exp(v2) } }
25
+ end
22
26
  end
23
27
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, timepoint: 0)
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, metadata: [])
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
  end
11
11
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def children_topics(topic_id)
@@ -39,5 +39,18 @@ module Tomoto
39
39
  raise "topic_id must be < K" if topic_id >= k
40
40
  raise "train() should be called first" unless @prepared
41
41
  end
42
+
43
+ def topics_info(summary, topic_word_top_n:)
44
+ counts = count_by_topics
45
+
46
+ nested_info = lambda do |k = 0, level = 0|
47
+ words = topic_words(k, top_n: topic_word_top_n).keys.join(" ")
48
+ summary << "| #{" " * level}##{k} (#{counts[k]}) : #{words}"
49
+ children_topics(k).sort.each do |c|
50
+ nested_info.call(c, level + 1)
51
+ end
52
+ end
53
+ nested_info.call
54
+ end
42
55
  end
43
56
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
  end
11
11
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def self.load(filename)
@@ -32,6 +32,42 @@ module Tomoto
32
32
  _save(filename, full)
33
33
  end
34
34
 
35
+ # returns string instead of printing
36
+ def summary(initial_hp: true, params: true, topic_word_top_n: 5)
37
+ summary = []
38
+
39
+ summary << "<Basic Info>"
40
+ basic_info(summary)
41
+ summary << "|"
42
+
43
+ summary << "<Training Info>"
44
+ training_info(summary)
45
+ summary << "|"
46
+
47
+ if initial_hp
48
+ summary << "<Initial Parameters>"
49
+ initial_params_info(summary)
50
+ summary << "|"
51
+ end
52
+
53
+ if params
54
+ summary << "<Parameters>"
55
+ params_info(summary)
56
+ summary << "|"
57
+ end
58
+
59
+ if topic_word_top_n > 0
60
+ summary << "<Topics>"
61
+ topics_info(summary, topic_word_top_n: topic_word_top_n)
62
+ summary << "|"
63
+ end
64
+
65
+ # skip ending |
66
+ summary.pop
67
+
68
+ summary.join("\n")
69
+ end
70
+
35
71
  def topic_words(topic_id = nil, top_n: 10)
36
72
  if topic_id
37
73
  _topic_words(topic_id, top_n)
@@ -40,9 +76,9 @@ module Tomoto
40
76
  end
41
77
  end
42
78
 
43
- def train(iterations = 10, workers: 0)
79
+ def train(iterations = 10, workers: 0, parallel: :default)
44
80
  prepare
45
- _train(iterations, workers)
81
+ _train(iterations, workers, to_ps(parallel))
46
82
  end
47
83
 
48
84
  def tw
@@ -64,12 +100,68 @@ module Tomoto
64
100
  doc
65
101
  end
66
102
 
103
+ def basic_info(summary)
104
+ sum = used_vocab_freq.sum.to_f
105
+ mapped = used_vocab_freq.map { |v| v / sum }
106
+ entropy = mapped.map { |v| v * Math.log(v) }.sum
107
+
108
+ summary << "| #{self.class.name.sub("Tomoto::", "")} (current version: #{VERSION})"
109
+ summary << "| #{num_docs} docs, #{num_words} words"
110
+ summary << "| Total Vocabs: #{vocabs.size}, Used Vocabs: #{used_vocabs.size}"
111
+ summary << "| Entropy of words: %.5f" % entropy
112
+ summary << "| Removed Vocabs: #{removed_top_words.any? ? removed_top_words.join(" ") : "<NA>"}"
113
+ end
114
+
115
+ def training_info(summary)
116
+ summary << "| Iterations: #{global_step}, Burn-in steps: #{burn_in}"
117
+ summary << "| Optimization Interval: #{optim_interval}"
118
+ summary << "| Log-likelihood per word: %.5f" % ll_per_word
119
+ end
120
+
121
+ def initial_params_info(summary)
122
+ if defined?(@init_params)
123
+ @init_params.each do |k, v|
124
+ summary << "| #{k}: #{v}"
125
+ end
126
+ else
127
+ summary << "| Not Available"
128
+ end
129
+ end
130
+
131
+ def params_info(summary)
132
+ summary << "| alpha (Dirichlet prior on the per-document topic distributions)"
133
+ summary << "| #{alpha}"
134
+ summary << "| eta (Dirichlet prior on the per-topic word distribution)"
135
+ summary << "| %.5f" % eta
136
+ end
137
+
138
+ def topics_info(summary, topic_word_top_n:)
139
+ counts = count_by_topics
140
+ topic_words(top_n: topic_word_top_n).each_with_index do |words, i|
141
+ summary << "| ##{i} (#{counts[i]}) : #{words.keys.join(" ")}"
142
+ end
143
+ end
144
+
145
+ def to_ps(ps)
146
+ PARALLEL_SCHEME.index(ps) || (raise ArgumentError, "Invalid parallel scheme: #{ps}")
147
+ end
148
+
67
149
  class << self
68
150
  private
69
151
 
70
152
  def to_tw(tw)
71
153
  TERM_WEIGHT.index(tw) || (raise ArgumentError, "Invalid tw: #{tw}")
72
154
  end
155
+
156
+ def init_params(model, binding)
157
+ init_params = {}
158
+ method(:new).parameters.each do |v|
159
+ next if v[0] != :key
160
+ init_params[v[1]] = binding.local_variable_get(v[1]).inspect
161
+ end
162
+ model.instance_variable_set(:@init_params, init_params)
163
+ model
164
+ end
73
165
  end
74
166
  end
75
167
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, labels: [])
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, delimiter: ".")
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
  end
11
11
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, labels: [])
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, y: [])
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-10 00:00:00.000000000 Z
11
+ date: 2020-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice