tomoto 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b40c9adf2f0162eb6174b17395ea37b9294e14b22609e9f51951e9904125ff9
4
- data.tar.gz: be3f68438f60a7e4fc11033921636f8d03bf411bd3d3eb6aa3b4fb448faac41a
3
+ metadata.gz: dd4c36ff621f73c38bb066694a932f0a682c18591ddf05a9a0764bea0b6e4430
4
+ data.tar.gz: 551e56c4bc17fb5a3a0aeac0db055960fcc5e45bf097bf88c7cbf9046f958e7d
5
5
  SHA512:
6
- metadata.gz: a74747ae372d030c42562d4e2b99ab167ccc28533468ed08819f4bd34d42b340349870712c12e565388eb7833f993349432e77baee8618c34c265676ca181072
7
- data.tar.gz: da9e833bb98726278108a68a7dd6bed0e54b3979c25d49d8db01aa613e6205a6a3512881688209baf2589c4dc5514ed2663fb251a5e273c42b678bb1daa06d74
6
+ metadata.gz: 565a91d0bb6d48142f38dc3d9e798ddb99bf41fda32762295362075fba972eea6b56b6bde126eab74677eba5fd525581b68c5efa73361a46fcb0b2796ab63684
7
+ data.tar.gz: 415193e4eb6adbe5dce05328aadf9acb91f4acc50951484183a956455d7336f93961fe145465b1eeffaae78dad37ee1452defe832514c72b3c032860ed433cc8
@@ -1,3 +1,8 @@
1
+ ## 0.1.2 (2020-10-10)
2
+
3
+ - Added `summary` method
4
+ - Added `parallel` option to `train` method
5
+
1
6
  ## 0.1.1 (2020-10-10)
2
7
 
3
8
  - Added many more models
data/README.md CHANGED
@@ -23,7 +23,13 @@ model = Tomoto::LDA.new(k: 3)
23
23
  model.add_doc("text from document one")
24
24
  model.add_doc("text from document two")
25
25
  model.add_doc("text from document three")
26
- model.train(100)
26
+ model.train(100) # iterations
27
+ ```
28
+
29
+ Get the summary
30
+
31
+ ```ruby
32
+ model.summary
27
33
  ```
28
34
 
29
35
  Get topic words
@@ -89,6 +95,11 @@ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.
89
95
 
90
96
  If a method or option you need isn’t supported, feel free to open an issue.
91
97
 
98
+ ## Examples
99
+
100
+ - [LDA](examples/lda_basic.rb)
101
+ - [HDP](examples/hdp.rb)
102
+
92
103
  ## Tokenization
93
104
 
94
105
  Documents are tokenized by whitespace by default, or you can perform your own tokenization.
@@ -99,12 +110,22 @@ model.add_doc(["tokens", "from", "document", "one"])
99
110
 
100
111
  ## Performance
101
112
 
102
- tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check what it’s using with:
113
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
103
114
 
104
115
  ```ruby
105
116
  Tomoto.isa
106
117
  ```
107
118
 
119
+ ## Parallelism
120
+
121
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
122
+
123
+ ```ruby
124
+ model.train(parallel: :partition)
125
+ ```
126
+
127
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
128
+
108
129
  ## History
109
130
 
110
131
  View the [changelog](https://github.com/ankane/tomoto/blob/master/CHANGELOG.md)
@@ -31,7 +31,7 @@ using Rice::define_class_under;
31
31
  using Rice::define_module;
32
32
 
33
33
  template<>
34
- Object to_ruby<std::vector<float>>(std::vector<float> const & x)
34
+ Object to_ruby<std::vector<tomoto::Float>>(std::vector<tomoto::Float> const & x)
35
35
  {
36
36
  Array res;
37
37
  for (auto const& v : x) {
@@ -73,13 +73,13 @@ std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
73
73
  }
74
74
 
75
75
  template<>
76
- std::vector<float> from_ruby<std::vector<float>>(Object x)
76
+ std::vector<tomoto::Float> from_ruby<std::vector<tomoto::Float>>(Object x)
77
77
  {
78
78
  Array a = Array(x);
79
- std::vector<float> res;
79
+ std::vector<tomoto::Float> res;
80
80
  res.reserve(a.size());
81
81
  for (auto const& v : a) {
82
- res.push_back(from_ruby<float>(v));
82
+ res.push_back(from_ruby<tomoto::Float>(v));
83
83
  }
84
84
  return res;
85
85
  }
@@ -117,7 +117,7 @@ void Init_ext()
117
117
  Class rb_cLDA = define_class_under<tomoto::ILDAModel>(rb_mTomoto, "LDA")
118
118
  .define_singleton_method(
119
119
  "_new",
120
- *[](size_t tw, size_t k, float alpha, float eta, int seed) {
120
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
121
121
  if (seed < 0) {
122
122
  seed = std::random_device{}();
123
123
  }
@@ -131,7 +131,11 @@ void Init_ext()
131
131
  .define_method(
132
132
  "alpha",
133
133
  *[](tomoto::ILDAModel& self) {
134
- return self.getAlpha();
134
+ Array res;
135
+ for (size_t i = 0; i < self.getK(); i++) {
136
+ res.push(self.getAlpha(i));
137
+ }
138
+ return res;
135
139
  })
136
140
  .define_method(
137
141
  "burn_in",
@@ -246,8 +250,7 @@ void Init_ext()
246
250
  })
247
251
  .define_method(
248
252
  "_train",
249
- *[](tomoto::ILDAModel& self, size_t iteration, size_t workers) {
250
- size_t ps = 0;
253
+ *[](tomoto::ILDAModel& self, size_t iteration, size_t workers, size_t ps) {
251
254
  self.train(iteration, workers, (tomoto::ParallelScheme)ps);
252
255
  })
253
256
  .define_method(
@@ -321,7 +324,7 @@ void Init_ext()
321
324
  Class rb_cCT = define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(rb_mTomoto, "CT")
322
325
  .define_singleton_method(
323
326
  "_new",
324
- *[](size_t tw, size_t k, float alpha, float eta, int seed) {
327
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
325
328
  if (seed < 0) {
326
329
  seed = std::random_device{}();
327
330
  }
@@ -368,7 +371,7 @@ void Init_ext()
368
371
  Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
369
372
  .define_singleton_method(
370
373
  "_new",
371
- *[](size_t tw, size_t k, float alpha, float sigma, float eta, float alpha_epsilon, int seed) {
374
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
372
375
  if (seed < 0) {
373
376
  seed = std::random_device{}();
374
377
  }
@@ -386,7 +389,7 @@ void Init_ext()
386
389
  })
387
390
  .define_method(
388
391
  "alpha_epsilon=",
389
- *[](tomoto::IDMRModel& self, float value) {
392
+ *[](tomoto::IDMRModel& self, tomoto::Float value) {
390
393
  self.setAlphaEps(value);
391
394
  return value;
392
395
  })
@@ -420,7 +423,7 @@ void Init_ext()
420
423
  Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
421
424
  .define_singleton_method(
422
425
  "_new",
423
- *[](size_t tw, size_t k, size_t t, float alphaVar, float etaVar, float phiVar, float shapeA, float shapeB, float shapeC) {
426
+ *[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
424
427
  // Rice only supports 10 arguments
425
428
  int seed = -1;
426
429
  if (seed < 0) {
@@ -440,7 +443,7 @@ void Init_ext()
440
443
  })
441
444
  .define_method(
442
445
  "lr_a=",
443
- *[](tomoto::IDTModel& self, float value) {
446
+ *[](tomoto::IDTModel& self, tomoto::Float value) {
444
447
  self.setShapeA(value);
445
448
  return value;
446
449
  })
@@ -451,7 +454,7 @@ void Init_ext()
451
454
  })
452
455
  .define_method(
453
456
  "lr_b=",
454
- *[](tomoto::IDTModel& self, float value) {
457
+ *[](tomoto::IDTModel& self, tomoto::Float value) {
455
458
  self.setShapeB(value);
456
459
  return value;
457
460
  })
@@ -462,7 +465,7 @@ void Init_ext()
462
465
  })
463
466
  .define_method(
464
467
  "lr_c=",
465
- *[](tomoto::IDTModel& self, float value) {
468
+ *[](tomoto::IDTModel& self, tomoto::Float value) {
466
469
  self.setShapeC(value);
467
470
  return value;
468
471
  })
@@ -480,7 +483,7 @@ void Init_ext()
480
483
  Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
481
484
  .define_singleton_method(
482
485
  "_new",
483
- *[](size_t tw, size_t k, std::vector<uint64_t> degrees, float alpha, float sigma, float sigma0, float eta, float alpha_epsilon, int seed) {
486
+ *[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, int seed) {
484
487
  if (seed < 0) {
485
488
  seed = std::random_device{}();
486
489
  }
@@ -500,12 +503,17 @@ void Init_ext()
500
503
  Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
501
504
  .define_singleton_method(
502
505
  "_new",
503
- *[](size_t tw, size_t k, float alpha, float eta, float gamma, int seed) {
506
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
504
507
  if (seed < 0) {
505
508
  seed = std::random_device{}();
506
509
  }
507
510
  return tomoto::IHDPModel::create((tomoto::TermWeight)tw, k, alpha, eta, gamma, seed);
508
511
  })
512
+ .define_method(
513
+ "alpha",
514
+ *[](tomoto::IHDPModel& self) {
515
+ return self.getAlpha();
516
+ })
509
517
  .define_method(
510
518
  "gamma",
511
519
  *[](tomoto::IHDPModel& self) {
@@ -530,12 +538,21 @@ void Init_ext()
530
538
  Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
531
539
  .define_singleton_method(
532
540
  "_new",
533
- *[](size_t tw, size_t levelDepth, float alpha, float eta, float gamma, int seed) {
541
+ *[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, int seed) {
534
542
  if (seed < 0) {
535
543
  seed = std::random_device{}();
536
544
  }
537
545
  return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
538
546
  })
547
+ .define_method(
548
+ "alpha",
549
+ *[](tomoto::IHLDAModel& self) {
550
+ Array res;
551
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
552
+ res.push(self.getAlpha(i));
553
+ }
554
+ return res;
555
+ })
539
556
  .define_method(
540
557
  "_children_topics",
541
558
  *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
@@ -580,7 +597,7 @@ void Init_ext()
580
597
  Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
581
598
  .define_singleton_method(
582
599
  "_new",
583
- *[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
600
+ *[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
584
601
  if (seed < 0) {
585
602
  seed = std::random_device{}();
586
603
  }
@@ -600,17 +617,27 @@ void Init_ext()
600
617
  Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
601
618
  .define_singleton_method(
602
619
  "_new",
603
- *[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
620
+ *[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, int seed) {
604
621
  if (seed < 0) {
605
622
  seed = std::random_device{}();
606
623
  }
607
624
  return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
625
+ })
626
+ .define_method(
627
+ "alpha",
628
+ *[](tomoto::IHPAModel& self) {
629
+ Array res;
630
+ // use <= to return k+1 elements
631
+ for (size_t i = 0; i <= self.getK(); i++) {
632
+ res.push(self.getAlpha(i));
633
+ }
634
+ return res;
608
635
  });
609
636
 
610
637
  Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
611
638
  .define_singleton_method(
612
639
  "_new",
613
- *[](size_t tw, size_t k_g, size_t k_l, size_t t, float alpha_g, float alpha_l, float alpha_mg, float alpha_ml, float eta_g) {
640
+ *[](size_t tw, size_t k_g, size_t k_l, size_t t, tomoto::Float alpha_g, tomoto::Float alpha_l, tomoto::Float alpha_mg, tomoto::Float alpha_ml, tomoto::Float eta_g) {
614
641
  return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
615
642
  })
616
643
  .define_method(
@@ -672,7 +699,7 @@ void Init_ext()
672
699
  Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
673
700
  .define_singleton_method(
674
701
  "_new",
675
- *[](size_t tw, size_t k, float alpha, float eta, int seed) {
702
+ *[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, int seed) {
676
703
  if (seed < 0) {
677
704
  seed = std::random_device{}();
678
705
  }
@@ -692,7 +719,7 @@ void Init_ext()
692
719
  Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
693
720
  .define_singleton_method(
694
721
  "_new",
695
- *[](size_t tw, size_t latent_topics, float alpha, float eta, int seed) {
722
+ *[](size_t tw, size_t latent_topics, tomoto::Float alpha, tomoto::Float eta, int seed) {
696
723
  if (seed < 0) {
697
724
  seed = std::random_device{}();
698
725
  }
@@ -712,7 +739,7 @@ void Init_ext()
712
739
  Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
713
740
  .define_singleton_method(
714
741
  "_new",
715
- *[](size_t tw, size_t k, Array rb_vars, float alpha, float eta, std::vector<float> mu, std::vector<float> nu_sq, std::vector<float> glm_param, int seed) {
742
+ *[](size_t tw, size_t k, Array rb_vars, tomoto::Float alpha, tomoto::Float eta, std::vector<tomoto::Float> mu, std::vector<tomoto::Float> nu_sq, std::vector<tomoto::Float> glm_param, int seed) {
716
743
  if (seed < 0) {
717
744
  seed = std::random_device{}();
718
745
  }
@@ -725,7 +752,7 @@ void Init_ext()
725
752
  })
726
753
  .define_method(
727
754
  "_add_doc",
728
- *[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<float> y) {
755
+ *[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<tomoto::Float> y) {
729
756
  self.addDoc(words, y);
730
757
  })
731
758
  .define_method(
@@ -18,5 +18,6 @@ require "tomoto/slda"
18
18
  require "tomoto/version"
19
19
 
20
20
  module Tomoto
21
+ PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
21
22
  TERM_WEIGHT = [:one, :idf, :pmi]
22
23
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def correlations(topic_id = nil)
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, metadata: "")
@@ -19,5 +19,9 @@ module Tomoto
19
19
  k.times.map { |i| _lambdas(i) }
20
20
  end
21
21
  end
22
+
23
+ def alpha
24
+ lambdas.map { |v| v.map { |v2| Math.exp(v2) } }
25
+ end
22
26
  end
23
27
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, timepoint: 0)
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, metadata: [])
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
  end
11
11
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def children_topics(topic_id)
@@ -39,5 +39,18 @@ module Tomoto
39
39
  raise "topic_id must be < K" if topic_id >= k
40
40
  raise "train() should be called first" unless @prepared
41
41
  end
42
+
43
+ def topics_info(summary, topic_word_top_n:)
44
+ counts = count_by_topics
45
+
46
+ nested_info = lambda do |k = 0, level = 0|
47
+ words = topic_words(k, top_n: topic_word_top_n).keys.join(" ")
48
+ summary << "| #{" " * level}##{k} (#{counts[k]}) : #{words}"
49
+ children_topics(k).sort.each do |c|
50
+ nested_info.call(c, level + 1)
51
+ end
52
+ end
53
+ nested_info.call
54
+ end
42
55
  end
43
56
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
  end
11
11
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def self.load(filename)
@@ -32,6 +32,42 @@ module Tomoto
32
32
  _save(filename, full)
33
33
  end
34
34
 
35
+ # returns string instead of printing
36
+ def summary(initial_hp: true, params: true, topic_word_top_n: 5)
37
+ summary = []
38
+
39
+ summary << "<Basic Info>"
40
+ basic_info(summary)
41
+ summary << "|"
42
+
43
+ summary << "<Training Info>"
44
+ training_info(summary)
45
+ summary << "|"
46
+
47
+ if initial_hp
48
+ summary << "<Initial Parameters>"
49
+ initial_params_info(summary)
50
+ summary << "|"
51
+ end
52
+
53
+ if params
54
+ summary << "<Parameters>"
55
+ params_info(summary)
56
+ summary << "|"
57
+ end
58
+
59
+ if topic_word_top_n > 0
60
+ summary << "<Topics>"
61
+ topics_info(summary, topic_word_top_n: topic_word_top_n)
62
+ summary << "|"
63
+ end
64
+
65
+ # skip ending |
66
+ summary.pop
67
+
68
+ summary.join("\n")
69
+ end
70
+
35
71
  def topic_words(topic_id = nil, top_n: 10)
36
72
  if topic_id
37
73
  _topic_words(topic_id, top_n)
@@ -40,9 +76,9 @@ module Tomoto
40
76
  end
41
77
  end
42
78
 
43
- def train(iterations = 10, workers: 0)
79
+ def train(iterations = 10, workers: 0, parallel: :default)
44
80
  prepare
45
- _train(iterations, workers)
81
+ _train(iterations, workers, to_ps(parallel))
46
82
  end
47
83
 
48
84
  def tw
@@ -64,12 +100,68 @@ module Tomoto
64
100
  doc
65
101
  end
66
102
 
103
+ def basic_info(summary)
104
+ sum = used_vocab_freq.sum.to_f
105
+ mapped = used_vocab_freq.map { |v| v / sum }
106
+ entropy = mapped.map { |v| v * Math.log(v) }.sum
107
+
108
+ summary << "| #{self.class.name.sub("Tomoto::", "")} (current version: #{VERSION})"
109
+ summary << "| #{num_docs} docs, #{num_words} words"
110
+ summary << "| Total Vocabs: #{vocabs.size}, Used Vocabs: #{used_vocabs.size}"
111
+ summary << "| Entropy of words: %.5f" % entropy
112
+ summary << "| Removed Vocabs: #{removed_top_words.any? ? removed_top_words.join(" ") : "<NA>"}"
113
+ end
114
+
115
+ def training_info(summary)
116
+ summary << "| Iterations: #{global_step}, Burn-in steps: #{burn_in}"
117
+ summary << "| Optimization Interval: #{optim_interval}"
118
+ summary << "| Log-likelihood per word: %.5f" % ll_per_word
119
+ end
120
+
121
+ def initial_params_info(summary)
122
+ if defined?(@init_params)
123
+ @init_params.each do |k, v|
124
+ summary << "| #{k}: #{v}"
125
+ end
126
+ else
127
+ summary << "| Not Available"
128
+ end
129
+ end
130
+
131
+ def params_info(summary)
132
+ summary << "| alpha (Dirichlet prior on the per-document topic distributions)"
133
+ summary << "| #{alpha}"
134
+ summary << "| eta (Dirichlet prior on the per-topic word distribution)"
135
+ summary << "| %.5f" % eta
136
+ end
137
+
138
+ def topics_info(summary, topic_word_top_n:)
139
+ counts = count_by_topics
140
+ topic_words(top_n: topic_word_top_n).each_with_index do |words, i|
141
+ summary << "| ##{i} (#{counts[i]}) : #{words.keys.join(" ")}"
142
+ end
143
+ end
144
+
145
+ def to_ps(ps)
146
+ PARALLEL_SCHEME.index(ps) || (raise ArgumentError, "Invalid parallel scheme: #{ps}")
147
+ end
148
+
67
149
  class << self
68
150
  private
69
151
 
70
152
  def to_tw(tw)
71
153
  TERM_WEIGHT.index(tw) || (raise ArgumentError, "Invalid tw: #{tw}")
72
154
  end
155
+
156
+ def init_params(model, binding)
157
+ init_params = {}
158
+ method(:new).parameters.each do |v|
159
+ next if v[0] != :key
160
+ init_params[v[1]] = binding.local_variable_get(v[1]).inspect
161
+ end
162
+ model.instance_variable_set(:@init_params, init_params)
163
+ model
164
+ end
73
165
  end
74
166
  end
75
167
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, labels: [])
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, delimiter: ".")
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
  end
11
11
  end
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, labels: [])
@@ -5,7 +5,7 @@ module Tomoto
5
5
  model.instance_variable_set(:@min_cf, min_cf)
6
6
  model.instance_variable_set(:@min_df, min_df)
7
7
  model.instance_variable_set(:@rm_top, rm_top)
8
- model
8
+ init_params(model, binding)
9
9
  end
10
10
 
11
11
  def add_doc(doc, y: [])
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-10 00:00:00.000000000 Z
11
+ date: 2020-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice