tomoto 0.4.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +65 -0
- data/LICENSE.txt +22 -0
- data/README.md +154 -0
- data/ext/tomoto/ct.cpp +58 -0
- data/ext/tomoto/dmr.cpp +69 -0
- data/ext/tomoto/dt.cpp +91 -0
- data/ext/tomoto/extconf.rb +42 -0
- data/ext/tomoto/gdmr.cpp +42 -0
- data/ext/tomoto/hdp.cpp +47 -0
- data/ext/tomoto/hlda.cpp +71 -0
- data/ext/tomoto/hpa.cpp +32 -0
- data/ext/tomoto/lda.cpp +281 -0
- data/ext/tomoto/llda.cpp +46 -0
- data/ext/tomoto/mglda.cpp +81 -0
- data/ext/tomoto/pa.cpp +32 -0
- data/ext/tomoto/plda.cpp +33 -0
- data/ext/tomoto/slda.cpp +48 -0
- data/ext/tomoto/tomoto.cpp +48 -0
- data/ext/tomoto/utils.h +30 -0
- data/lib/tomoto/3.0/tomoto.so +0 -0
- data/lib/tomoto/3.1/tomoto.so +0 -0
- data/lib/tomoto/3.2/tomoto.so +0 -0
- data/lib/tomoto/3.3/tomoto.so +0 -0
- data/lib/tomoto/ct.rb +24 -0
- data/lib/tomoto/dmr.rb +27 -0
- data/lib/tomoto/dt.rb +15 -0
- data/lib/tomoto/gdmr.rb +15 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/hlda.rb +56 -0
- data/lib/tomoto/hpa.rb +11 -0
- data/lib/tomoto/lda.rb +186 -0
- data/lib/tomoto/llda.rb +15 -0
- data/lib/tomoto/mglda.rb +15 -0
- data/lib/tomoto/pa.rb +11 -0
- data/lib/tomoto/plda.rb +15 -0
- data/lib/tomoto/slda.rb +37 -0
- data/lib/tomoto/version.rb +3 -0
- data/lib/tomoto.rb +27 -0
- data/vendor/EigenRand/EigenRand/EigenRand +24 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +430 -0
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +51 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/Cholesky +45 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +384 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +60 -0
- data/vendor/eigen/Eigen/Geometry +59 -0
- data/vendor/eigen/Eigen/Householder +29 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +32 -0
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +47 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +70 -0
- data/vendor/eigen/Eigen/PaStiXSupport +49 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +50 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +50 -0
- data/vendor/eigen/Eigen/Sparse +34 -0
- data/vendor/eigen/Eigen/SparseCholesky +37 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +50 -0
- data/vendor/eigen/Eigen/SparseQR +36 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/README.md +5 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +20 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +536 -0
- data/vendor/tomotopy/README.rst +555 -0
- data/vendor/variant/LICENSE +25 -0
- data/vendor/variant/LICENSE_1_0.txt +23 -0
- data/vendor/variant/README.md +102 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: dd86d2aae577db884b593492994fe68fccc5cc35f8ea0c0f498e4c45fc474623
|
4
|
+
data.tar.gz: 1ab6fd3c4166c34c8f288f7d71938f5da1f0948437ebc67c3557e526db16a70f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ac159442e5b494a318b6b0dd3c702cf7570114d64416d0f69843395470957a2f073c1ceb45e5f6c2fceca532bb62a445fff1a86b8f68fd70522911581589f1ca
|
7
|
+
data.tar.gz: 73403819de8a5ab27a7558367117ed2de6a3c9e9a7e49b1b97de6b421e9234da726883b1f4b8dea57e0d5d73f858ac41a75d61607bf0dec8a8fb99bed6aed2b0
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
## 0.4.0 (2023-12-28)
|
2
|
+
|
3
|
+
- Added precompiled gem for Linux ARM
|
4
|
+
- Updated tomoto to 0.12.7
|
5
|
+
- Dropped support for Ruby < 3
|
6
|
+
|
7
|
+
## 0.3.3 (2023-02-01)
|
8
|
+
|
9
|
+
- Added `topic_label_dict` method to `LLDA`
|
10
|
+
- Fixed error with `infer` with loaded model
|
11
|
+
|
12
|
+
## 0.3.2 (2023-01-22)
|
13
|
+
|
14
|
+
- Added precompiled gem for Mac ARM
|
15
|
+
- Updated tomoto to 0.12.4
|
16
|
+
|
17
|
+
## 0.3.1 (2023-01-12)
|
18
|
+
|
19
|
+
- Added support for Ruby 3.2
|
20
|
+
|
21
|
+
## 0.3.0 (2022-10-03)
|
22
|
+
|
23
|
+
- Added precompiled gems for Linux and Mac
|
24
|
+
- Updated tomoto to 0.12.3
|
25
|
+
- Dropped support for Ruby < 2.7
|
26
|
+
|
27
|
+
## 0.2.3 (2021-08-26)
|
28
|
+
|
29
|
+
- Updated to Rice 4
|
30
|
+
|
31
|
+
## 0.2.2 (2021-08-23)
|
32
|
+
|
33
|
+
- Reduced gem size
|
34
|
+
|
35
|
+
## 0.2.1 (2021-08-23)
|
36
|
+
|
37
|
+
- Added support for unseen documents
|
38
|
+
|
39
|
+
## 0.2.0 (2021-05-23)
|
40
|
+
|
41
|
+
- Updated tomoto to 0.12.0
|
42
|
+
- Dropped support for Ruby < 2.6
|
43
|
+
|
44
|
+
## 0.1.4 (2021-03-14)
|
45
|
+
|
46
|
+
- Added `docs` method
|
47
|
+
- Updated tomoto to 0.10.2
|
48
|
+
- Updated `add_doc` to return the index of the document
|
49
|
+
|
50
|
+
## 0.1.3 (2020-12-19)
|
51
|
+
|
52
|
+
- Updated tomoto to 0.10.0
|
53
|
+
|
54
|
+
## 0.1.2 (2020-10-10)
|
55
|
+
|
56
|
+
- Added `summary` method
|
57
|
+
- Added `parallel` option to `train` method
|
58
|
+
|
59
|
+
## 0.1.1 (2020-10-10)
|
60
|
+
|
61
|
+
- Added many more models
|
62
|
+
|
63
|
+
## 0.1.0 (2020-10-09)
|
64
|
+
|
65
|
+
- First release
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2019, bab2min
|
4
|
+
Copyright (c) 2020-2023 Andrew Kane
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
# tomoto.rb
|
2
|
+
|
3
|
+
:tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
|
4
|
+
|
5
|
+
[](https://github.com/ankane/tomoto-ruby/actions)
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application’s Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem "tomoto"
|
13
|
+
```
|
14
|
+
|
15
|
+
## Getting Started
|
16
|
+
|
17
|
+
Train a model
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
model = Tomoto::LDA.new(k: 2)
|
21
|
+
model.add_doc(["tokens", "from", "document", "one"])
|
22
|
+
model.add_doc(["tokens", "from", "document", "two"])
|
23
|
+
model.add_doc(["tokens", "from", "document", "three"])
|
24
|
+
model.train(100) # iterations
|
25
|
+
```
|
26
|
+
|
27
|
+
Get the summary
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
model.summary
|
31
|
+
```
|
32
|
+
|
33
|
+
Get topic words
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
model.topic_words
|
37
|
+
```
|
38
|
+
|
39
|
+
Save the model to a file
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
model.save("model.bin")
|
43
|
+
```
|
44
|
+
|
45
|
+
Load the model from a file
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
model = Tomoto::LDA.load("model.bin")
|
49
|
+
```
|
50
|
+
|
51
|
+
Get topic probabilities for a document
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
doc = model.docs[0]
|
55
|
+
doc.topics
|
56
|
+
```
|
57
|
+
|
58
|
+
Get the number of words for each topic
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
model.count_by_topics
|
62
|
+
```
|
63
|
+
|
64
|
+
Get the vocab
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
model.vocabs
|
68
|
+
```
|
69
|
+
|
70
|
+
Get the log likelihood per word
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
model.ll_per_word
|
74
|
+
```
|
75
|
+
|
76
|
+
Perform inference for unseen documents
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
doc = model.make_doc(["unseen", "doc"])
|
80
|
+
topic_dist, ll = model.infer(doc)
|
81
|
+
```
|
82
|
+
|
83
|
+
## Models
|
84
|
+
|
85
|
+
Supports:
|
86
|
+
|
87
|
+
- Latent Dirichlet Allocation (`LDA`)
|
88
|
+
- Labeled LDA (`LLDA`)
|
89
|
+
- Partially Labeled LDA (`PLDA`)
|
90
|
+
- Supervised LDA (`SLDA`)
|
91
|
+
- Dirichlet Multinomial Regression (`DMR`)
|
92
|
+
- Generalized Dirichlet Multinomial Regression (`GDMR`)
|
93
|
+
- Hierarchical Dirichlet Process (`HDP`)
|
94
|
+
- Hierarchical LDA (`HLDA`)
|
95
|
+
- Multi Grain LDA (`MGLDA`)
|
96
|
+
- Pachinko Allocation (`PA`)
|
97
|
+
- Hierarchical PA (`HPA`)
|
98
|
+
- Correlated Topic Model (`CT`)
|
99
|
+
- Dynamic Topic Model (`DT`)
|
100
|
+
|
101
|
+
## API
|
102
|
+
|
103
|
+
This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
|
104
|
+
|
105
|
+
- The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
|
106
|
+
- Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
|
107
|
+
|
108
|
+
If a method or option you need isn’t supported, feel free to open an issue.
|
109
|
+
|
110
|
+
## Examples
|
111
|
+
|
112
|
+
- [LDA](examples/lda_basic.rb)
|
113
|
+
- [HDP](examples/hdp_basic.rb)
|
114
|
+
|
115
|
+
## Performance
|
116
|
+
|
117
|
+
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
Tomoto.isa
|
121
|
+
```
|
122
|
+
|
123
|
+
## Parallelism
|
124
|
+
|
125
|
+
Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
|
126
|
+
|
127
|
+
```ruby
|
128
|
+
model.train(parallel: :partition)
|
129
|
+
```
|
130
|
+
|
131
|
+
Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
|
132
|
+
|
133
|
+
## History
|
134
|
+
|
135
|
+
View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
|
136
|
+
|
137
|
+
## Contributing
|
138
|
+
|
139
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
140
|
+
|
141
|
+
- [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
|
142
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
|
143
|
+
- Write, clarify, or fix documentation
|
144
|
+
- Suggest or add new features
|
145
|
+
|
146
|
+
To get started with development:
|
147
|
+
|
148
|
+
```sh
|
149
|
+
git clone --recursive https://github.com/ankane/tomoto-ruby.git
|
150
|
+
cd tomoto-ruby
|
151
|
+
bundle install
|
152
|
+
bundle exec rake compile
|
153
|
+
bundle exec rake test
|
154
|
+
```
|
data/ext/tomoto/ct.cpp
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#include <CT.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_ct(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
|
12
|
+
tomoto::CTArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.eta = eta;
|
16
|
+
if (seed >= 0) {
|
17
|
+
args.seed = seed;
|
18
|
+
}
|
19
|
+
return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
|
20
|
+
}, Rice::Return().takeOwnership())
|
21
|
+
.define_method(
|
22
|
+
"_correlations",
|
23
|
+
[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
|
24
|
+
return self.getCorrelationTopic(topic_id);
|
25
|
+
})
|
26
|
+
.define_method(
|
27
|
+
"num_beta_sample",
|
28
|
+
[](tomoto::ICTModel& self) {
|
29
|
+
return self.getNumBetaSample();
|
30
|
+
})
|
31
|
+
.define_method(
|
32
|
+
"num_beta_sample=",
|
33
|
+
[](tomoto::ICTModel& self, size_t value) {
|
34
|
+
self.setNumBetaSample(value);
|
35
|
+
return value;
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"num_tmn_sample",
|
39
|
+
[](tomoto::ICTModel& self) {
|
40
|
+
return self.getNumTMNSample();
|
41
|
+
})
|
42
|
+
.define_method(
|
43
|
+
"num_tmn_sample=",
|
44
|
+
[](tomoto::ICTModel& self, size_t value) {
|
45
|
+
self.setNumTMNSample(value);
|
46
|
+
return value;
|
47
|
+
})
|
48
|
+
.define_method(
|
49
|
+
"_prior_cov",
|
50
|
+
[](tomoto::ICTModel& self) {
|
51
|
+
return self.getPriorCov();
|
52
|
+
})
|
53
|
+
.define_method(
|
54
|
+
"prior_mean",
|
55
|
+
[](tomoto::ICTModel& self) {
|
56
|
+
return self.getPriorMean();
|
57
|
+
});
|
58
|
+
}
|
data/ext/tomoto/dmr.cpp
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
#include <DMR.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_dmr(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
|
12
|
+
tomoto::DMRArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.sigma = sigma;
|
16
|
+
args.eta = eta;
|
17
|
+
args.alphaEps = alpha_epsilon;
|
18
|
+
if (seed >= 0) {
|
19
|
+
args.seed = seed;
|
20
|
+
}
|
21
|
+
return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
|
22
|
+
}, Rice::Return().takeOwnership())
|
23
|
+
.define_method(
|
24
|
+
"_add_doc",
|
25
|
+
[](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
|
26
|
+
auto doc = buildDoc(words);
|
27
|
+
doc.misc["metadata"] = metadata;
|
28
|
+
return self.addDoc(doc);
|
29
|
+
})
|
30
|
+
.define_method(
|
31
|
+
"alpha_epsilon",
|
32
|
+
[](tomoto::IDMRModel& self) {
|
33
|
+
return self.getAlphaEps();
|
34
|
+
})
|
35
|
+
.define_method(
|
36
|
+
"alpha_epsilon=",
|
37
|
+
[](tomoto::IDMRModel& self, tomoto::Float value) {
|
38
|
+
self.setAlphaEps(value);
|
39
|
+
return value;
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"f",
|
43
|
+
[](tomoto::IDMRModel& self) {
|
44
|
+
return self.getF();
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"_lambdas",
|
48
|
+
[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
|
49
|
+
return self.getLambdaByTopic(topic_id);
|
50
|
+
})
|
51
|
+
.define_method(
|
52
|
+
"metadata_dict",
|
53
|
+
[](tomoto::IDMRModel& self) {
|
54
|
+
auto dict = self.getMetadataDict();
|
55
|
+
Array res;
|
56
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
57
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
58
|
+
VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
|
59
|
+
Object obj(value);
|
60
|
+
res.push(obj.call("force_encoding", utf8));
|
61
|
+
}
|
62
|
+
return res;
|
63
|
+
})
|
64
|
+
.define_method(
|
65
|
+
"sigma",
|
66
|
+
[](tomoto::IDMRModel& self) {
|
67
|
+
return self.getSigma();
|
68
|
+
});
|
69
|
+
}
|
data/ext/tomoto/dt.cpp
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#include <DT.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_dt(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
|
12
|
+
// Rice only supports 10 arguments
|
13
|
+
size_t seed = -1;
|
14
|
+
tomoto::DTArgs args;
|
15
|
+
args.k = k;
|
16
|
+
args.t = t;
|
17
|
+
args.alpha = {alphaVar};
|
18
|
+
args.eta = etaVar;
|
19
|
+
args.phi = phiVar;
|
20
|
+
args.shapeA = shapeA;
|
21
|
+
args.shapeB = shapeB;
|
22
|
+
args.shapeC = shapeC;
|
23
|
+
if (seed >= 0) {
|
24
|
+
args.seed = seed;
|
25
|
+
}
|
26
|
+
return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
|
27
|
+
}, Rice::Return().takeOwnership())
|
28
|
+
.define_method(
|
29
|
+
"_add_doc",
|
30
|
+
[](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
|
31
|
+
auto doc = buildDoc(words);
|
32
|
+
doc.misc["timepoint"] = timepoint;
|
33
|
+
return self.addDoc(doc);
|
34
|
+
})
|
35
|
+
.define_method(
|
36
|
+
"alpha",
|
37
|
+
[](tomoto::IDTModel& self) {
|
38
|
+
Array res;
|
39
|
+
for (size_t i = 0; i < self.getK(); i++) {
|
40
|
+
Array res2;
|
41
|
+
for (size_t j = 0; j < self.getT(); j++) {
|
42
|
+
res2.push(self.getAlpha(i, j));
|
43
|
+
}
|
44
|
+
res.push(res2);
|
45
|
+
}
|
46
|
+
return res;
|
47
|
+
})
|
48
|
+
.define_method(
|
49
|
+
"lr_a",
|
50
|
+
[](tomoto::IDTModel& self) {
|
51
|
+
return self.getShapeA();
|
52
|
+
})
|
53
|
+
.define_method(
|
54
|
+
"lr_a=",
|
55
|
+
[](tomoto::IDTModel& self, tomoto::Float value) {
|
56
|
+
self.setShapeA(value);
|
57
|
+
return value;
|
58
|
+
})
|
59
|
+
.define_method(
|
60
|
+
"lr_b",
|
61
|
+
[](tomoto::IDTModel& self) {
|
62
|
+
return self.getShapeB();
|
63
|
+
})
|
64
|
+
.define_method(
|
65
|
+
"lr_b=",
|
66
|
+
[](tomoto::IDTModel& self, tomoto::Float value) {
|
67
|
+
self.setShapeB(value);
|
68
|
+
return value;
|
69
|
+
})
|
70
|
+
.define_method(
|
71
|
+
"lr_c",
|
72
|
+
[](tomoto::IDTModel& self) {
|
73
|
+
return self.getShapeC();
|
74
|
+
})
|
75
|
+
.define_method(
|
76
|
+
"lr_c=",
|
77
|
+
[](tomoto::IDTModel& self, tomoto::Float value) {
|
78
|
+
self.setShapeC(value);
|
79
|
+
return value;
|
80
|
+
})
|
81
|
+
.define_method(
|
82
|
+
"num_docs_by_timepoint",
|
83
|
+
[](tomoto::IDTModel& self) {
|
84
|
+
return self.getNumDocsByT();
|
85
|
+
})
|
86
|
+
.define_method(
|
87
|
+
"num_timepoints",
|
88
|
+
[](tomoto::IDTModel& self) {
|
89
|
+
return self.getT();
|
90
|
+
});
|
91
|
+
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "mkmf-rice"
|
2
|
+
|
3
|
+
$CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
|
4
|
+
|
5
|
+
unless ENV["RUBY_CC_VERSION"]
|
6
|
+
default_optflags =
|
7
|
+
if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
8
|
+
# -march=native not supported with Mac ARM
|
9
|
+
""
|
10
|
+
else
|
11
|
+
# AVX-512F not supported yet
|
12
|
+
# https://github.com/bab2min/tomotopy/issues/188
|
13
|
+
"-march=native -mno-avx512f"
|
14
|
+
end
|
15
|
+
|
16
|
+
$CXXFLAGS << " " << with_config("optflags", default_optflags)
|
17
|
+
end
|
18
|
+
|
19
|
+
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
20
|
+
|
21
|
+
if apple_clang
|
22
|
+
# silence rice warnings
|
23
|
+
$CXXFLAGS += " -Wno-deprecated-declarations"
|
24
|
+
else
|
25
|
+
# silence eigen warnings
|
26
|
+
$CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
|
27
|
+
end
|
28
|
+
|
29
|
+
# silence tomoto warnings
|
30
|
+
$CXXFLAGS += " -Wno-unused-variable -Wno-switch"
|
31
|
+
|
32
|
+
ext = File.expand_path(".", __dir__)
|
33
|
+
tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
|
34
|
+
eigen = File.expand_path("../../vendor/eigen", __dir__)
|
35
|
+
eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
|
36
|
+
variant = File.expand_path("../../vendor/variant/include", __dir__)
|
37
|
+
|
38
|
+
$srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
|
39
|
+
$INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
|
40
|
+
$VPATH << tomoto
|
41
|
+
|
42
|
+
create_makefile("tomoto/tomoto")
|
data/ext/tomoto/gdmr.cpp
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#include <GDMR.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_gdmr(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
|
12
|
+
tomoto::GDMRArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.degrees = degrees;
|
15
|
+
args.alpha = {alpha};
|
16
|
+
args.sigma = sigma;
|
17
|
+
args.sigma0 = sigma0;
|
18
|
+
args.eta = eta;
|
19
|
+
args.alphaEps = alpha_epsilon;
|
20
|
+
if (seed >= 0) {
|
21
|
+
args.seed = seed;
|
22
|
+
}
|
23
|
+
return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
|
24
|
+
}, Rice::Return().takeOwnership())
|
25
|
+
.define_method(
|
26
|
+
"_add_doc",
|
27
|
+
[](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
|
28
|
+
auto doc = buildDoc(words);
|
29
|
+
doc.misc["numeric_metadata"] = numeric_metadata;
|
30
|
+
return self.addDoc(doc);
|
31
|
+
})
|
32
|
+
.define_method(
|
33
|
+
"degrees",
|
34
|
+
[](tomoto::IGDMRModel& self) {
|
35
|
+
return self.getFs();
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"sigma0",
|
39
|
+
[](tomoto::IGDMRModel& self) {
|
40
|
+
return self.getSigma0();
|
41
|
+
});
|
42
|
+
}
|
data/ext/tomoto/hdp.cpp
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#include <HDP.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hdp(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
|
12
|
+
tomoto::HDPArgs args;
|
13
|
+
args.k = k;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.eta = eta;
|
16
|
+
args.gamma = gamma;
|
17
|
+
if (seed >= 0) {
|
18
|
+
args.seed = seed;
|
19
|
+
}
|
20
|
+
return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
|
21
|
+
}, Rice::Return().takeOwnership())
|
22
|
+
.define_method(
|
23
|
+
"alpha",
|
24
|
+
[](tomoto::IHDPModel& self) {
|
25
|
+
return self.getAlpha();
|
26
|
+
})
|
27
|
+
.define_method(
|
28
|
+
"gamma",
|
29
|
+
[](tomoto::IHDPModel& self) {
|
30
|
+
return self.getGamma();
|
31
|
+
})
|
32
|
+
.define_method(
|
33
|
+
"live_k",
|
34
|
+
[](tomoto::IHDPModel& self) {
|
35
|
+
return self.getLiveK();
|
36
|
+
})
|
37
|
+
.define_method(
|
38
|
+
"live_topic?",
|
39
|
+
[](tomoto::IHDPModel& self, size_t tid) {
|
40
|
+
return self.isLiveTopic(tid);
|
41
|
+
})
|
42
|
+
.define_method(
|
43
|
+
"num_tables",
|
44
|
+
[](tomoto::IHDPModel& self) {
|
45
|
+
return self.getTotalTables();
|
46
|
+
});
|
47
|
+
}
|
data/ext/tomoto/hlda.cpp
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#include <HLDA.h>
|
2
|
+
|
3
|
+
#include <rice/rice.hpp>
|
4
|
+
|
5
|
+
#include "utils.h"
|
6
|
+
|
7
|
+
void init_hlda(Rice::Module& m) {
|
8
|
+
Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
|
9
|
+
.define_singleton_function(
|
10
|
+
"_new",
|
11
|
+
[](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
|
12
|
+
tomoto::HLDAArgs args;
|
13
|
+
args.k = levelDepth;
|
14
|
+
args.alpha = {alpha};
|
15
|
+
args.eta = eta;
|
16
|
+
args.gamma = gamma;
|
17
|
+
if (seed >= 0) {
|
18
|
+
args.seed = seed;
|
19
|
+
}
|
20
|
+
return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
|
21
|
+
}, Rice::Return().takeOwnership())
|
22
|
+
.define_method(
|
23
|
+
"alpha",
|
24
|
+
[](tomoto::IHLDAModel& self) {
|
25
|
+
Array res;
|
26
|
+
for (size_t i = 0; i < self.getLevelDepth(); i++) {
|
27
|
+
res.push(self.getAlpha(i));
|
28
|
+
}
|
29
|
+
return res;
|
30
|
+
})
|
31
|
+
.define_method(
|
32
|
+
"_children_topics",
|
33
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
34
|
+
return self.getChildTopicId(topic_id);
|
35
|
+
})
|
36
|
+
.define_method(
|
37
|
+
"depth",
|
38
|
+
[](tomoto::IHLDAModel& self) {
|
39
|
+
return self.getLevelDepth();
|
40
|
+
})
|
41
|
+
.define_method(
|
42
|
+
"gamma",
|
43
|
+
[](tomoto::IHLDAModel& self) {
|
44
|
+
return self.getGamma();
|
45
|
+
})
|
46
|
+
.define_method(
|
47
|
+
"_level",
|
48
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
49
|
+
return self.getLevelOfTopic(topic_id);
|
50
|
+
})
|
51
|
+
.define_method(
|
52
|
+
"live_k",
|
53
|
+
[](tomoto::IHLDAModel& self) {
|
54
|
+
return self.getLiveK();
|
55
|
+
})
|
56
|
+
.define_method(
|
57
|
+
"_live_topic?",
|
58
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
59
|
+
return self.isLiveTopic(topic_id);
|
60
|
+
})
|
61
|
+
.define_method(
|
62
|
+
"_num_docs_of_topic",
|
63
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
64
|
+
return self.getNumDocsOfTopic(topic_id);
|
65
|
+
})
|
66
|
+
.define_method(
|
67
|
+
"_parent_topic",
|
68
|
+
[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
|
69
|
+
return self.getParentTopicId(topic_id);
|
70
|
+
});
|
71
|
+
}
|