tomoto 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -0
- data/ext/tomoto/lda.cpp +24 -0
- data/lib/tomoto/2.6/tomoto.so +0 -0
- data/lib/tomoto/2.7/tomoto.so +0 -0
- data/lib/tomoto/3.0/tomoto.so +0 -0
- data/lib/tomoto/lda.rb +14 -0
- data/lib/tomoto/version.rb +1 -1
- metadata +6 -5
- data/lib/tomoto/tomoto.bundle +0 -0
- data/lib/tomoto/tomoto.so +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cd717980f682fd9151cf51a439e3ab54ff59f88575aa8c552b45b769048e9e6b
|
|
4
|
+
data.tar.gz: 80ebf4430f748279d4973ae8fd3949bdaf3043446370a81b7d7c1ed3107358fa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4d3cc8d59f665d7957b1f94d60f688fdbe4d3400d984f716e29a6d32334b46b901f7ef0da6702ce5582cbd90e7c9c42727f04c19820c8e971e6c47d597dcc6f6
|
|
7
|
+
data.tar.gz: d0627badaa6d0ee3b65d30f805e6a5c54d440911e8ec770e69e70c0b1a840d8d0ee6c248fa1479162b2270eeb5038fb2ff5ca6d80a8233a25df83dfb562ea743
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
data/ext/tomoto/lda.cpp
CHANGED
|
@@ -49,6 +49,30 @@ void init_lda(Rice::Module& m) {
|
|
|
49
49
|
*[](tomoto::ILDAModel& self, std::vector<std::string> words) {
|
|
50
50
|
return self.addDoc(buildDoc(words));
|
|
51
51
|
})
|
|
52
|
+
.define_method(
|
|
53
|
+
"_make_doc",
|
|
54
|
+
*[](tomoto::ILDAModel& self, std::vector<std::string> words) {
|
|
55
|
+
return DocumentObject(self.makeDoc(buildDoc(words)).release(), &self);
|
|
56
|
+
})
|
|
57
|
+
.define_method(
|
|
58
|
+
"_infer",
|
|
59
|
+
*[](tomoto::ILDAModel& self, DocumentObject& doc_object, size_t iteration, float tolerance, size_t workers, size_t ps, size_t together) {
|
|
60
|
+
std::vector<tomoto::DocumentBase*> docs;
|
|
61
|
+
auto doc = doc_object.doc;
|
|
62
|
+
docs.emplace_back(const_cast<tomoto::DocumentBase*>(doc));
|
|
63
|
+
float ll = self.infer(docs, iteration, tolerance, workers, (tomoto::ParallelScheme)ps, !!together)[0];
|
|
64
|
+
|
|
65
|
+
auto topic_dist = self.getTopicsByDoc(doc);
|
|
66
|
+
auto topic_res = Array();
|
|
67
|
+
for (size_t i = 0; i < topic_dist.size(); i++) {
|
|
68
|
+
topic_res.push(topic_dist[i]);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
auto res = Array();
|
|
72
|
+
res.push(topic_res);
|
|
73
|
+
res.push(ll);
|
|
74
|
+
return res;
|
|
75
|
+
})
|
|
52
76
|
.define_method(
|
|
53
77
|
"alpha",
|
|
54
78
|
*[](tomoto::ILDAModel& self) {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/tomoto/lda.rb
CHANGED
|
@@ -18,6 +18,16 @@ module Tomoto
|
|
|
18
18
|
_add_doc(prepare_doc(doc))
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
+
def make_doc(doc)
|
|
22
|
+
_make_doc(tokenize_doc(doc))
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# TODO support multiple docs
|
|
26
|
+
def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
|
|
27
|
+
raise "cannot infer with untrained model" unless defined?(@prepared)
|
|
28
|
+
_infer(doc, iter, tolerance, workers, to_ps(parallel), together)
|
|
29
|
+
end
|
|
30
|
+
|
|
21
31
|
def count_by_topics
|
|
22
32
|
prepare
|
|
23
33
|
_count_by_topics
|
|
@@ -96,6 +106,10 @@ module Tomoto
|
|
|
96
106
|
|
|
97
107
|
def prepare_doc(doc)
|
|
98
108
|
raise "cannot add_doc() after train()" if defined?(@prepared)
|
|
109
|
+
tokenize_doc(doc)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def tokenize_doc(doc)
|
|
99
113
|
doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
|
|
100
114
|
doc
|
|
101
115
|
end
|
data/lib/tomoto/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tomoto
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-08-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rice
|
|
@@ -51,6 +51,9 @@ files:
|
|
|
51
51
|
- ext/tomoto/slda.cpp
|
|
52
52
|
- ext/tomoto/utils.h
|
|
53
53
|
- lib/tomoto.rb
|
|
54
|
+
- lib/tomoto/2.6/tomoto.so
|
|
55
|
+
- lib/tomoto/2.7/tomoto.so
|
|
56
|
+
- lib/tomoto/3.0/tomoto.so
|
|
54
57
|
- lib/tomoto/ct.rb
|
|
55
58
|
- lib/tomoto/dmr.rb
|
|
56
59
|
- lib/tomoto/dt.rb
|
|
@@ -64,8 +67,6 @@ files:
|
|
|
64
67
|
- lib/tomoto/pa.rb
|
|
65
68
|
- lib/tomoto/plda.rb
|
|
66
69
|
- lib/tomoto/slda.rb
|
|
67
|
-
- lib/tomoto/tomoto.bundle
|
|
68
|
-
- lib/tomoto/tomoto.so
|
|
69
70
|
- lib/tomoto/version.rb
|
|
70
71
|
- vendor/EigenRand/EigenRand/Core.h
|
|
71
72
|
- vendor/EigenRand/EigenRand/Dists/Basic.h
|
|
@@ -508,7 +509,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
508
509
|
- !ruby/object:Gem::Version
|
|
509
510
|
version: '0'
|
|
510
511
|
requirements: []
|
|
511
|
-
rubygems_version: 3.2.
|
|
512
|
+
rubygems_version: 3.2.22
|
|
512
513
|
signing_key:
|
|
513
514
|
specification_version: 4
|
|
514
515
|
summary: High performance topic modeling for Ruby
|
data/lib/tomoto/tomoto.bundle
DELETED
|
Binary file
|
data/lib/tomoto/tomoto.so
DELETED
|
Binary file
|