tomoto 0.3.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +45 -0
- data/LICENSE.txt +22 -0
- data/README.md +162 -0
- data/ext/tomoto/ct.cpp +58 -0
- data/ext/tomoto/dmr.cpp +69 -0
- data/ext/tomoto/dt.cpp +91 -0
- data/ext/tomoto/extconf.rb +34 -0
- data/ext/tomoto/gdmr.cpp +42 -0
- data/ext/tomoto/hdp.cpp +47 -0
- data/ext/tomoto/hlda.cpp +71 -0
- data/ext/tomoto/hpa.cpp +32 -0
- data/ext/tomoto/lda.cpp +281 -0
- data/ext/tomoto/llda.cpp +33 -0
- data/ext/tomoto/mglda.cpp +81 -0
- data/ext/tomoto/pa.cpp +32 -0
- data/ext/tomoto/plda.cpp +33 -0
- data/ext/tomoto/slda.cpp +48 -0
- data/ext/tomoto/tomoto.cpp +48 -0
- data/ext/tomoto/utils.h +30 -0
- data/lib/tomoto/2.7/tomoto.bundle +0 -0
- data/lib/tomoto/3.0/tomoto.bundle +0 -0
- data/lib/tomoto/3.1/tomoto.bundle +0 -0
- data/lib/tomoto/ct.rb +24 -0
- data/lib/tomoto/dmr.rb +27 -0
- data/lib/tomoto/dt.rb +15 -0
- data/lib/tomoto/gdmr.rb +15 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/hlda.rb +56 -0
- data/lib/tomoto/hpa.rb +11 -0
- data/lib/tomoto/lda.rb +181 -0
- data/lib/tomoto/llda.rb +15 -0
- data/lib/tomoto/mglda.rb +15 -0
- data/lib/tomoto/pa.rb +11 -0
- data/lib/tomoto/plda.rb +15 -0
- data/lib/tomoto/slda.rb +37 -0
- data/lib/tomoto/version.rb +3 -0
- data/lib/tomoto.rb +27 -0
- data/vendor/EigenRand/EigenRand/EigenRand +24 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +426 -0
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +51 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/Cholesky +45 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +384 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +60 -0
- data/vendor/eigen/Eigen/Geometry +59 -0
- data/vendor/eigen/Eigen/Householder +29 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +32 -0
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +47 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +70 -0
- data/vendor/eigen/Eigen/PaStiXSupport +49 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +50 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +50 -0
- data/vendor/eigen/Eigen/Sparse +34 -0
- data/vendor/eigen/Eigen/SparseCholesky +37 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +50 -0
- data/vendor/eigen/Eigen/SparseQR +36 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/README.md +5 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +20 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +512 -0
- data/vendor/tomotopy/README.rst +516 -0
- data/vendor/variant/LICENSE +25 -0
- data/vendor/variant/LICENSE_1_0.txt +23 -0
- data/vendor/variant/README.md +102 -0
- metadata +140 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b23bb72cb63777f22912c00abe475b2ba903b2abd0ab12ef5ebafc0832ef3d4d
|
4
|
+
data.tar.gz: c7a98c977d7864ce142df08380b0ed3cc94b663221af79ddb291393102b9603b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 378389c5f113de2e5cf0db7b6a433fd189a74afca495e2ecc69cf1ffddb32c5dba0761a98c245ba748e1e8365633958573591d0182af1d2d5b8b23bee9f6a3ba
|
7
|
+
data.tar.gz: 385e48e227645fb6148bea04b6d04157e6cf4b271b23114bc168463886ac167e06315ffa65ce7def5eb9cf3c94cfee5483cf1761208a5fb5323c00e2a3395550
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
## 0.3.0 (2022-10-03)
|
2
|
+
|
3
|
+
- Added precompiled gems for Linux and Mac
|
4
|
+
- Updated tomoto to 0.12.3
|
5
|
+
- Dropped support for Ruby < 2.7
|
6
|
+
|
7
|
+
## 0.2.3 (2021-08-26)
|
8
|
+
|
9
|
+
- Updated to Rice 4
|
10
|
+
|
11
|
+
## 0.2.2 (2021-08-23)
|
12
|
+
|
13
|
+
- Reduced gem size
|
14
|
+
|
15
|
+
## 0.2.1 (2021-08-23)
|
16
|
+
|
17
|
+
- Added support for unseen documents
|
18
|
+
|
19
|
+
## 0.2.0 (2021-05-23)
|
20
|
+
|
21
|
+
- Updated tomoto to 0.12.0
|
22
|
+
- Dropped support for Ruby < 2.6
|
23
|
+
|
24
|
+
## 0.1.4 (2021-03-14)
|
25
|
+
|
26
|
+
- Added `docs` method
|
27
|
+
- Updated tomoto to 0.10.2
|
28
|
+
- Updated `add_doc` to return the index of the document
|
29
|
+
|
30
|
+
## 0.1.3 (2020-12-19)
|
31
|
+
|
32
|
+
- Updated tomoto to 0.10.0
|
33
|
+
|
34
|
+
## 0.1.2 (2020-10-10)
|
35
|
+
|
36
|
+
- Added `summary` method
|
37
|
+
- Added `parallel` option to `train` method
|
38
|
+
|
39
|
+
## 0.1.1 (2020-10-10)
|
40
|
+
|
41
|
+
- Added many more models
|
42
|
+
|
43
|
+
## 0.1.0 (2020-10-09)
|
44
|
+
|
45
|
+
- First release
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2019, bab2min
|
4
|
+
Copyright (c) 2020-2021 Andrew Kane
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
# tomoto.rb
|
2
|
+
|
3
|
+
:tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
|
4
|
+
|
5
|
+
[![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application’s Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem "tomoto"
|
13
|
+
```
|
14
|
+
|
15
|
+
## Getting Started
|
16
|
+
|
17
|
+
Train a model
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
model = Tomoto::LDA.new(k: 2)
|
21
|
+
model.add_doc("text from document one")
|
22
|
+
model.add_doc("text from document two")
|
23
|
+
model.add_doc("text from document three")
|
24
|
+
model.train(100) # iterations
|
25
|
+
```
|
26
|
+
|
27
|
+
Get the summary
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
model.summary
|
31
|
+
```
|
32
|
+
|
33
|
+
Get topic words
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
model.topic_words
|
37
|
+
```
|
38
|
+
|
39
|
+
Save the model to a file
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
model.save("model.bin")
|
43
|
+
```
|
44
|
+
|
45
|
+
Load the model from a file
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
model = Tomoto::LDA.load("model.bin")
|
49
|
+
```
|
50
|
+
|
51
|
+
Get topic probabilities for a document
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
doc = model.docs[0]
|
55
|
+
doc.topics
|
56
|
+
```
|
57
|
+
|
58
|
+
Get the number of words for each topic
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
model.count_by_topics
|
62
|
+
```
|
63
|
+
|
64
|
+
Get the vocab
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
model.vocabs
|
68
|
+
```
|
69
|
+
|
70
|
+
Get the log likelihood per word
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
model.ll_per_word
|
74
|
+
```
|
75
|
+
|
76
|
+
Perform inference for unseen documents
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
doc = model.make_doc("unseen doc")
|
80
|
+
topic_dist, ll = model.infer(doc)
|
81
|
+
```
|
82
|
+
|
83
|
+
## Models
|
84
|
+
|
85
|
+
Supports:
|
86
|
+
|
87
|
+
- Latent Dirichlet Allocation (`LDA`)
|
88
|
+
- Labeled LDA (`LLDA`)
|
89
|
+
- Partially Labeled LDA (`PLDA`)
|
90
|
+
- Supervised LDA (`SLDA`)
|
91
|
+
- Dirichlet Multinomial Regression (`DMR`)
|
92
|
+
- Generalized Dirichlet Multinomial Regression (`GDMR`)
|
93
|
+
- Hierarchical Dirichlet Process (`HDP`)
|
94
|
+
- Hierarchical LDA (`HLDA`)
|
95
|
+
- Multi Grain LDA (`MGLDA`)
|
96
|
+
- Pachinko Allocation (`PA`)
|
97
|
+
- Hierarchical PA (`HPA`)
|
98
|
+
- Correlated Topic Model (`CT`)
|
99
|
+
- Dynamic Topic Model (`DT`)
|
100
|
+
|
101
|
+
## API
|
102
|
+
|
103
|
+
This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
|
104
|
+
|
105
|
+
- The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
|
106
|
+
- Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
|
107
|
+
|
108
|
+
If a method or option you need isn’t supported, feel free to open an issue.
|
109
|
+
|
110
|
+
## Examples
|
111
|
+
|
112
|
+
- [LDA](examples/lda_basic.rb)
|
113
|
+
- [HDP](examples/hdp_basic.rb)
|
114
|
+
|
115
|
+
## Tokenization
|
116
|
+
|
117
|
+
Documents are tokenized by whitespace by default, or you can perform your own tokenization.
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
model.add_doc(["tokens", "from", "document", "one"])
|
121
|
+
```
|
122
|
+
|
123
|
+
## Performance
|
124
|
+
|
125
|
+
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
|
126
|
+
|
127
|
+
```ruby
|
128
|
+
Tomoto.isa
|
129
|
+
```
|
130
|
+
|
131
|
+
## Parallelism
|
132
|
+
|
133
|
+
Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
|
134
|
+
|
135
|
+
```ruby
|
136
|
+
model.train(parallel: :partition)
|
137
|
+
```
|
138
|
+
|
139
|
+
Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
|
140
|
+
|
141
|
+
## History
|
142
|
+
|
143
|
+
View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
|
144
|
+
|
145
|
+
## Contributing
|
146
|
+
|
147
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
148
|
+
|
149
|
+
- [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
|
150
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
|
151
|
+
- Write, clarify, or fix documentation
|
152
|
+
- Suggest or add new features
|
153
|
+
|
154
|
+
To get started with development:
|
155
|
+
|
156
|
+
```sh
|
157
|
+
git clone --recursive https://github.com/ankane/tomoto-ruby.git
|
158
|
+
cd tomoto-ruby
|
159
|
+
bundle install
|
160
|
+
bundle exec rake compile
|
161
|
+
bundle exec rake test
|
162
|
+
```
|
data/ext/tomoto/ct.cpp
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#include <CT.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_ct(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
|
12
|
+
tomoto::CTArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.eta = eta;
|
16
|
+
if (seed >= 0) {
|
17
|
+
args.seed = seed;
|
18
|
+
}
|
19
|
+
return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
|
20
|
+
}, Rice::Return().takeOwnership())
|
21
|
+
.define_method(
|
22
|
+
"_correlations",
|
23
|
+
[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
24
|
+
return self.getCorrelationTopic(topic_id);
|
25
|
+
})
|
26
|
+
.define_method(
|
27
|
+
"num_beta_sample",
|
28
|
+
[](tomoto::ICTModel& self) {
|
29
|
+
return self.getNumBetaSample();
|
30
|
+
})
|
31
|
+
.define_method(
|
32
|
+
"num_beta_sample=",
|
33
|
+
[](tomoto::ICTModel& self, size_t value) {
|
34
|
+
self.setNumBetaSample(value);
|
35
|
+
return value;
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"num_tmn_sample",
|
39
|
+
[](tomoto::ICTModel& self) {
|
40
|
+
return self.getNumTMNSample();
|
41
|
+
})
|
42
|
+
.define_method(
|
43
|
+
"num_tmn_sample=",
|
44
|
+
[](tomoto::ICTModel& self, size_t value) {
|
45
|
+
self.setNumTMNSample(value);
|
46
|
+
return value;
|
47
|
+
})
|
48
|
+
.define_method(
|
49
|
+
"_prior_cov",
|
50
|
+
[](tomoto::ICTModel& self) {
|
51
|
+
return self.getPriorCov();
|
52
|
+
})
|
53
|
+
.define_method(
|
54
|
+
"prior_mean",
|
55
|
+
[](tomoto::ICTModel& self) {
|
56
|
+
return self.getPriorMean();
|
57
|
+
});
|
58
|
+
}
|
data/ext/tomoto/dmr.cpp
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
#include <DMR.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_dmr(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
|
12
|
+
tomoto::DMRArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.sigma = sigma;
|
16
|
+
args.eta = eta;
|
17
|
+
args.alphaEps = alpha_epsilon;
|
18
|
+
if (seed >= 0) {
|
19
|
+
args.seed = seed;
|
20
|
+
}
|
21
|
+
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
|
22
|
+
}, Rice::Return().takeOwnership())
|
23
|
+
.define_method(
|
24
|
+
"_add_doc",
|
25
|
+
[](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
|
26
|
+
auto doc = buildDoc(words);
|
27
|
+
doc.misc["metadata"] = metadata;
|
28
|
+
return self.addDoc(doc);
|
29
|
+
})
|
30
|
+
.define_method(
|
31
|
+
"alpha_epsilon",
|
32
|
+
[](tomoto::IDMRModel& self) {
|
33
|
+
return self.getAlphaEps();
|
34
|
+
})
|
35
|
+
.define_method(
|
36
|
+
"alpha_epsilon=",
|
37
|
+
[](tomoto::IDMRModel& self, tomoto::Float value) {
|
38
|
+
self.setAlphaEps(value);
|
39
|
+
return value;
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"f",
|
43
|
+
[](tomoto::IDMRModel& self) {
|
44
|
+
return self.getF();
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"_lambdas",
|
48
|
+
[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
49
|
+
return self.getLambdaByTopic(topic_id);
|
50
|
+
})
|
51
|
+
.define_method(
|
52
|
+
"metadata_dict",
|
53
|
+
[](tomoto::IDMRModel& self) {
|
54
|
+
auto dict = self.getMetadataDict();
|
55
|
+
Array res;
|
56
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
57
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
58
|
+
VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
|
59
|
+
Object obj(value);
|
60
|
+
res.push(obj.call("force_encoding", utf8));
|
61
|
+
}
|
62
|
+
return res;
|
63
|
+
})
|
64
|
+
.define_method(
|
65
|
+
"sigma",
|
66
|
+
[](tomoto::IDMRModel& self) {
|
67
|
+
return self.getSigma();
|
68
|
+
});
|
69
|
+
}
|
data/ext/tomoto/dt.cpp
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#include <DT.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_dt(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
|
12
|
+
// Rice only supports 10 arguments
|
13
|
+
size_t seed = -1;
|
14
|
+
tomoto::DTArgs args;
|
15
|
+
args.k = k;
|
16
|
+
args.t = t;
|
17
|
+
args.alpha = {alphaVar};
|
18
|
+
args.eta = etaVar;
|
19
|
+
args.phi = phiVar;
|
20
|
+
args.shapeA = shapeA;
|
21
|
+
args.shapeB = shapeB;
|
22
|
+
args.shapeC = shapeC;
|
23
|
+
if (seed >= 0) {
|
24
|
+
args.seed = seed;
|
25
|
+
}
|
26
|
+
return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
|
27
|
+
}, Rice::Return().takeOwnership())
|
28
|
+
.define_method(
|
29
|
+
"_add_doc",
|
30
|
+
[](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
|
31
|
+
auto doc = buildDoc(words);
|
32
|
+
doc.misc["timepoint"] = timepoint;
|
33
|
+
return self.addDoc(doc);
|
34
|
+
})
|
35
|
+
.define_method(
|
36
|
+
"alpha",
|
37
|
+
[](tomoto::IDTModel& self) {
|
38
|
+
Array res;
|
39
|
+
for (size_t i = 0; i < self.getK(); i++) {
|
40
|
+
Array res2;
|
41
|
+
for (size_t j = 0; j < self.getT(); j++) {
|
42
|
+
res2.push(self.getAlpha(i, j));
|
43
|
+
}
|
44
|
+
res.push(res2);
|
45
|
+
}
|
46
|
+
return res;
|
47
|
+
})
|
48
|
+
.define_method(
|
49
|
+
"lr_a",
|
50
|
+
[](tomoto::IDTModel& self) {
|
51
|
+
return self.getShapeA();
|
52
|
+
})
|
53
|
+
.define_method(
|
54
|
+
"lr_a=",
|
55
|
+
[](tomoto::IDTModel& self, tomoto::Float value) {
|
56
|
+
self.setShapeA(value);
|
57
|
+
return value;
|
58
|
+
})
|
59
|
+
.define_method(
|
60
|
+
"lr_b",
|
61
|
+
[](tomoto::IDTModel& self) {
|
62
|
+
return self.getShapeB();
|
63
|
+
})
|
64
|
+
.define_method(
|
65
|
+
"lr_b=",
|
66
|
+
[](tomoto::IDTModel& self, tomoto::Float value) {
|
67
|
+
self.setShapeB(value);
|
68
|
+
return value;
|
69
|
+
})
|
70
|
+
.define_method(
|
71
|
+
"lr_c",
|
72
|
+
[](tomoto::IDTModel& self) {
|
73
|
+
return self.getShapeC();
|
74
|
+
})
|
75
|
+
.define_method(
|
76
|
+
"lr_c=",
|
77
|
+
[](tomoto::IDTModel& self, tomoto::Float value) {
|
78
|
+
self.setShapeC(value);
|
79
|
+
return value;
|
80
|
+
})
|
81
|
+
.define_method(
|
82
|
+
"num_docs_by_timepoint",
|
83
|
+
[](tomoto::IDTModel& self) {
|
84
|
+
return self.getNumDocsByT();
|
85
|
+
})
|
86
|
+
.define_method(
|
87
|
+
"num_timepoints",
|
88
|
+
[](tomoto::IDTModel& self) {
|
89
|
+
return self.getT();
|
90
|
+
});
|
91
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "mkmf-rice"
|
2
|
+
|
3
|
+
$CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
|
4
|
+
|
5
|
+
unless ENV["RUBY_CC_VERSION"]
|
6
|
+
# AVX-512F not support yet
|
7
|
+
# https://github.com/bab2min/tomotopy/issues/188
|
8
|
+
$CXXFLAGS << " " << with_config("optflags", "-march=native -mno-avx512f")
|
9
|
+
end
|
10
|
+
|
11
|
+
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
12
|
+
|
13
|
+
if apple_clang
|
14
|
+
# silence rice warnings
|
15
|
+
$CXXFLAGS += " -Wno-deprecated-declarations"
|
16
|
+
else
|
17
|
+
# silence eigen warnings
|
18
|
+
$CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
|
19
|
+
end
|
20
|
+
|
21
|
+
# silence tomoto warnings
|
22
|
+
$CXXFLAGS += " -Wno-unused-variable -Wno-switch"
|
23
|
+
|
24
|
+
ext = File.expand_path(".", __dir__)
|
25
|
+
tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
|
26
|
+
eigen = File.expand_path("../../vendor/eigen", __dir__)
|
27
|
+
eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
|
28
|
+
variant = File.expand_path("../../vendor/variant/include", __dir__)
|
29
|
+
|
30
|
+
$srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
|
31
|
+
$INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
|
32
|
+
$VPATH << tomoto
|
33
|
+
|
34
|
+
create_makefile("tomoto/tomoto")
|
data/ext/tomoto/gdmr.cpp
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#include <GDMR.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_gdmr(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
|
12
|
+
tomoto::GDMRArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.degrees = degrees;
|
15
|
+
args.alpha = {alpha};
|
16
|
+
args.sigma = sigma;
|
17
|
+
args.sigma0 = sigma0;
|
18
|
+
args.eta = eta;
|
19
|
+
args.alphaEps = alpha_epsilon;
|
20
|
+
if (seed >= 0) {
|
21
|
+
args.seed = seed;
|
22
|
+
}
|
23
|
+
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
|
24
|
+
}, Rice::Return().takeOwnership())
|
25
|
+
.define_method(
|
26
|
+
"_add_doc",
|
27
|
+
[](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
|
28
|
+
auto doc = buildDoc(words);
|
29
|
+
doc.misc["numeric_metadata"] = numeric_metadata;
|
30
|
+
return self.addDoc(doc);
|
31
|
+
})
|
32
|
+
.define_method(
|
33
|
+
"degrees",
|
34
|
+
[](tomoto::IGDMRModel& self) {
|
35
|
+
return self.getFs();
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"sigma0",
|
39
|
+
[](tomoto::IGDMRModel& self) {
|
40
|
+
return self.getSigma0();
|
41
|
+
});
|
42
|
+
}
|
data/ext/tomoto/hdp.cpp
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#include <HDP.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hdp(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
|
12
|
+
tomoto::HDPArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.eta = eta;
|
16
|
+
args.gamma = gamma;
|
17
|
+
if (seed >= 0) {
|
18
|
+
args.seed = seed;
|
19
|
+
}
|
20
|
+
return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
|
21
|
+
}, Rice::Return().takeOwnership())
|
22
|
+
.define_method(
|
23
|
+
"alpha",
|
24
|
+
[](tomoto::IHDPModel& self) {
|
25
|
+
return self.getAlpha();
|
26
|
+
})
|
27
|
+
.define_method(
|
28
|
+
"gamma",
|
29
|
+
[](tomoto::IHDPModel& self) {
|
30
|
+
return self.getGamma();
|
31
|
+
})
|
32
|
+
.define_method(
|
33
|
+
"live_k",
|
34
|
+
[](tomoto::IHDPModel& self) {
|
35
|
+
return self.getLiveK();
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"live_topic?",
|
39
|
+
[](tomoto::IHDPModel& self, size_t tid) {
|
40
|
+
return self.isLiveTopic(tid);
|
41
|
+
})
|
42
|
+
.define_method(
|
43
|
+
"num_tables",
|
44
|
+
[](tomoto::IHDPModel& self) {
|
45
|
+
return self.getTotalTables();
|
46
|
+
});
|
47
|
+
}
|
data/ext/tomoto/hlda.cpp
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#include <HLDA.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hlda(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
|
12
|
+
tomoto::HLDAArgs args;
|
13
|
+
args.k = levelDepth;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.eta = eta;
|
16
|
+
args.gamma = gamma;
|
17
|
+
if (seed >= 0) {
|
18
|
+
args.seed = seed;
|
19
|
+
}
|
20
|
+
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
|
21
|
+
}, Rice::Return().takeOwnership())
|
22
|
+
.define_method(
|
23
|
+
"alpha",
|
24
|
+
[](tomoto::IHLDAModel& self) {
|
25
|
+
Array res;
|
26
|
+
for (size_t i = 0; i < self.getLevelDepth(); i++) {
|
27
|
+
res.push(self.getAlpha(i));
|
28
|
+
}
|
29
|
+
return res;
|
30
|
+
})
|
31
|
+
.define_method(
|
32
|
+
"_children_topics",
|
33
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
34
|
+
return self.getChildTopicId(topic_id);
|
35
|
+
})
|
36
|
+
.define_method(
|
37
|
+
"depth",
|
38
|
+
[](tomoto::IHLDAModel& self) {
|
39
|
+
return self.getLevelDepth();
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"gamma",
|
43
|
+
[](tomoto::IHLDAModel& self) {
|
44
|
+
return self.getGamma();
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"_level",
|
48
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
49
|
+
return self.getLevelOfTopic(topic_id);
|
50
|
+
})
|
51
|
+
.define_method(
|
52
|
+
"live_k",
|
53
|
+
[](tomoto::IHLDAModel& self) {
|
54
|
+
return self.getLiveK();
|
55
|
+
})
|
56
|
+
.define_method(
|
57
|
+
"_live_topic?",
|
58
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
59
|
+
return self.isLiveTopic(topic_id);
|
60
|
+
})
|
61
|
+
.define_method(
|
62
|
+
"_num_docs_of_topic",
|
63
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
64
|
+
return self.getNumDocsOfTopic(topic_id);
|
65
|
+
})
|
66
|
+
.define_method(
|
67
|
+
"_parent_topic",
|
68
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
69
|
+
return self.getParentTopicId(topic_id);
|
70
|
+
});
|
71
|
+
}
|
data/ext/tomoto/hpa.cpp
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#include <HPA.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hpa(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(m, "HPA")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
|
12
|
+
tomoto::HPAArgs args;
|
13
|
+
args.k = k1;
|
14
|
+
args.k2 = k2;
|
15
|
+
args.alpha = {alpha};
|
16
|
+
args.eta = eta;
|
17
|
+
if (seed >= 0) {
|
18
|
+
args.seed = seed;
|
19
|
+
}
|
20
|
+
return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, args);
|
21
|
+
}, Rice::Return().takeOwnership())
|
22
|
+
.define_method(
|
23
|
+
"alpha",
|
24
|
+
[](tomoto::IHPAModel& self) {
|
25
|
+
Array res;
|
26
|
+
// use <= to return k+1 elements
|
27
|
+
for (size_t i = 0; i <= self.getK(); i++) {
|
28
|
+
res.push(self.getAlpha(i));
|
29
|
+
}
|
30
|
+
return res;
|
31
|
+
});
|
32
|
+
}
|