tomoto 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1044b496120cf955a03d6dd184056c40572501a68043c3bd3d4cc17334caba3d
4
- data.tar.gz: 7078dd4bdc562cae748ca89aa9b73d38d209d6a40a1e6d2acff0d8fba0c5a18b
3
+ metadata.gz: cd717980f682fd9151cf51a439e3ab54ff59f88575aa8c552b45b769048e9e6b
4
+ data.tar.gz: 80ebf4430f748279d4973ae8fd3949bdaf3043446370a81b7d7c1ed3107358fa
5
5
  SHA512:
6
- metadata.gz: ef9944cc3820397ef18bbca2e42d4a1a0d4b43dd283f3fac8e066478ecc78e74b4a2d32a6f3304203e22fad048ebdacc60637b7f51a42cf6fc73053613a3e77e
7
- data.tar.gz: 8409a754f890f788b6bc33938420311917418cf62f7cfde572db307a85f4473835caa342583ee6e3a6d03f517d89f75714b468351fa50011dbd156a9547c4918
6
+ metadata.gz: 4d3cc8d59f665d7957b1f94d60f688fdbe4d3400d984f716e29a6d32334b46b901f7ef0da6702ce5582cbd90e7c9c42727f04c19820c8e971e6c47d597dcc6f6
7
+ data.tar.gz: d0627badaa6d0ee3b65d30f805e6a5c54d440911e8ec770e69e70c0b1a840d8d0ee6c248fa1479162b2270eeb5038fb2ff5ca6d80a8233a25df83dfb562ea743
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.1 (2021-08-23)
2
+
3
+ - Added support for unseen documents
4
+
1
5
  ## 0.2.0 (2021-05-23)
2
6
 
3
7
  - Updated tomoto to 0.12.0
data/README.md CHANGED
@@ -75,6 +75,13 @@ Get the log likelihood per word
75
75
  model.ll_per_word
76
76
  ```
77
77
 
78
+ Perform inference for unseen documents
79
+
80
+ ```ruby
81
+ doc = model.make_doc("unseen doc")
82
+ topic_dist, ll = model.infer(doc)
83
+ ```
84
+
78
85
  ## Models
79
86
 
80
87
  Supports:
data/ext/tomoto/lda.cpp CHANGED
@@ -49,6 +49,30 @@ void init_lda(Rice::Module& m) {
49
49
  *[](tomoto::ILDAModel& self, std::vector<std::string> words) {
50
50
  return self.addDoc(buildDoc(words));
51
51
  })
52
+ .define_method(
53
+ "_make_doc",
54
+ *[](tomoto::ILDAModel& self, std::vector<std::string> words) {
55
+ return DocumentObject(self.makeDoc(buildDoc(words)).release(), &self);
56
+ })
57
+ .define_method(
58
+ "_infer",
59
+ *[](tomoto::ILDAModel& self, DocumentObject& doc_object, size_t iteration, float tolerance, size_t workers, size_t ps, size_t together) {
60
+ std::vector<tomoto::DocumentBase*> docs;
61
+ auto doc = doc_object.doc;
62
+ docs.emplace_back(const_cast<tomoto::DocumentBase*>(doc));
63
+ float ll = self.infer(docs, iteration, tolerance, workers, (tomoto::ParallelScheme)ps, !!together)[0];
64
+
65
+ auto topic_dist = self.getTopicsByDoc(doc);
66
+ auto topic_res = Array();
67
+ for (size_t i = 0; i < topic_dist.size(); i++) {
68
+ topic_res.push(topic_dist[i]);
69
+ }
70
+
71
+ auto res = Array();
72
+ res.push(topic_res);
73
+ res.push(ll);
74
+ return res;
75
+ })
52
76
  .define_method(
53
77
  "alpha",
54
78
  *[](tomoto::ILDAModel& self) {
Binary file
Binary file
Binary file
data/lib/tomoto/lda.rb CHANGED
@@ -18,6 +18,16 @@ module Tomoto
18
18
  _add_doc(prepare_doc(doc))
19
19
  end
20
20
 
21
+ def make_doc(doc)
22
+ _make_doc(tokenize_doc(doc))
23
+ end
24
+
25
+ # TODO support multiple docs
26
+ def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
27
+ raise "cannot infer with untrained model" unless defined?(@prepared)
28
+ _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
29
+ end
30
+
21
31
  def count_by_topics
22
32
  prepare
23
33
  _count_by_topics
@@ -96,6 +106,10 @@ module Tomoto
96
106
 
97
107
  def prepare_doc(doc)
98
108
  raise "cannot add_doc() after train()" if defined?(@prepared)
109
+ tokenize_doc(doc)
110
+ end
111
+
112
+ def tokenize_doc(doc)
99
113
  doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
100
114
  doc
101
115
  end
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-23 00:00:00.000000000 Z
11
+ date: 2021-08-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -51,6 +51,9 @@ files:
51
51
  - ext/tomoto/slda.cpp
52
52
  - ext/tomoto/utils.h
53
53
  - lib/tomoto.rb
54
+ - lib/tomoto/2.6/tomoto.so
55
+ - lib/tomoto/2.7/tomoto.so
56
+ - lib/tomoto/3.0/tomoto.so
54
57
  - lib/tomoto/ct.rb
55
58
  - lib/tomoto/dmr.rb
56
59
  - lib/tomoto/dt.rb
@@ -64,8 +67,6 @@ files:
64
67
  - lib/tomoto/pa.rb
65
68
  - lib/tomoto/plda.rb
66
69
  - lib/tomoto/slda.rb
67
- - lib/tomoto/tomoto.bundle
68
- - lib/tomoto/tomoto.so
69
70
  - lib/tomoto/version.rb
70
71
  - vendor/EigenRand/EigenRand/Core.h
71
72
  - vendor/EigenRand/EigenRand/Dists/Basic.h
@@ -508,7 +509,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
508
509
  - !ruby/object:Gem::Version
509
510
  version: '0'
510
511
  requirements: []
511
- rubygems_version: 3.2.3
512
+ rubygems_version: 3.2.22
512
513
  signing_key:
513
514
  specification_version: 4
514
515
  summary: High performance topic modeling for Ruby
Binary file
data/lib/tomoto/tomoto.so DELETED
Binary file