tomoto 0.3.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +45 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +162 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +34 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +33 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/2.7/tomoto.bundle +0 -0
  22. data/lib/tomoto/3.0/tomoto.bundle +0 -0
  23. data/lib/tomoto/3.1/tomoto.bundle +0 -0
  24. data/lib/tomoto/ct.rb +24 -0
  25. data/lib/tomoto/dmr.rb +27 -0
  26. data/lib/tomoto/dt.rb +15 -0
  27. data/lib/tomoto/gdmr.rb +15 -0
  28. data/lib/tomoto/hdp.rb +11 -0
  29. data/lib/tomoto/hlda.rb +56 -0
  30. data/lib/tomoto/hpa.rb +11 -0
  31. data/lib/tomoto/lda.rb +181 -0
  32. data/lib/tomoto/llda.rb +15 -0
  33. data/lib/tomoto/mglda.rb +15 -0
  34. data/lib/tomoto/pa.rb +11 -0
  35. data/lib/tomoto/plda.rb +15 -0
  36. data/lib/tomoto/slda.rb +37 -0
  37. data/lib/tomoto/version.rb +3 -0
  38. data/lib/tomoto.rb +27 -0
  39. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  40. data/vendor/EigenRand/LICENSE +21 -0
  41. data/vendor/EigenRand/README.md +426 -0
  42. data/vendor/eigen/COPYING.APACHE +203 -0
  43. data/vendor/eigen/COPYING.BSD +26 -0
  44. data/vendor/eigen/COPYING.GPL +674 -0
  45. data/vendor/eigen/COPYING.LGPL +502 -0
  46. data/vendor/eigen/COPYING.MINPACK +51 -0
  47. data/vendor/eigen/COPYING.MPL2 +373 -0
  48. data/vendor/eigen/COPYING.README +18 -0
  49. data/vendor/eigen/Eigen/Cholesky +45 -0
  50. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  51. data/vendor/eigen/Eigen/Core +384 -0
  52. data/vendor/eigen/Eigen/Dense +7 -0
  53. data/vendor/eigen/Eigen/Eigen +2 -0
  54. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  55. data/vendor/eigen/Eigen/Geometry +59 -0
  56. data/vendor/eigen/Eigen/Householder +29 -0
  57. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  58. data/vendor/eigen/Eigen/Jacobi +32 -0
  59. data/vendor/eigen/Eigen/KLUSupport +41 -0
  60. data/vendor/eigen/Eigen/LU +47 -0
  61. data/vendor/eigen/Eigen/MetisSupport +35 -0
  62. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  63. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  64. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  65. data/vendor/eigen/Eigen/QR +50 -0
  66. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  67. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  68. data/vendor/eigen/Eigen/SVD +50 -0
  69. data/vendor/eigen/Eigen/Sparse +34 -0
  70. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  71. data/vendor/eigen/Eigen/SparseCore +69 -0
  72. data/vendor/eigen/Eigen/SparseLU +50 -0
  73. data/vendor/eigen/Eigen/SparseQR +36 -0
  74. data/vendor/eigen/Eigen/StdDeque +27 -0
  75. data/vendor/eigen/Eigen/StdList +26 -0
  76. data/vendor/eigen/Eigen/StdVector +27 -0
  77. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  78. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  79. data/vendor/eigen/README.md +5 -0
  80. data/vendor/eigen/bench/README.txt +55 -0
  81. data/vendor/eigen/bench/btl/COPYING +340 -0
  82. data/vendor/eigen/bench/btl/README +154 -0
  83. data/vendor/eigen/bench/tensors/README +20 -0
  84. data/vendor/eigen/blas/README.txt +6 -0
  85. data/vendor/eigen/ci/README.md +56 -0
  86. data/vendor/eigen/demos/mandelbrot/README +10 -0
  87. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  88. data/vendor/eigen/demos/opengl/README +13 -0
  89. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  90. data/vendor/eigen/unsupported/README.txt +50 -0
  91. data/vendor/tomotopy/LICENSE +21 -0
  92. data/vendor/tomotopy/README.kr.rst +512 -0
  93. data/vendor/tomotopy/README.rst +516 -0
  94. data/vendor/variant/LICENSE +25 -0
  95. data/vendor/variant/LICENSE_1_0.txt +23 -0
  96. data/vendor/variant/README.md +102 -0
  97. metadata +140 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b23bb72cb63777f22912c00abe475b2ba903b2abd0ab12ef5ebafc0832ef3d4d
4
+ data.tar.gz: c7a98c977d7864ce142df08380b0ed3cc94b663221af79ddb291393102b9603b
5
+ SHA512:
6
+ metadata.gz: 378389c5f113de2e5cf0db7b6a433fd189a74afca495e2ecc69cf1ffddb32c5dba0761a98c245ba748e1e8365633958573591d0182af1d2d5b8b23bee9f6a3ba
7
+ data.tar.gz: 385e48e227645fb6148bea04b6d04157e6cf4b271b23114bc168463886ac167e06315ffa65ce7def5eb9cf3c94cfee5483cf1761208a5fb5323c00e2a3395550
data/CHANGELOG.md ADDED
@@ -0,0 +1,45 @@
1
+ ## 0.3.0 (2022-10-03)
2
+
3
+ - Added precompiled gems for Linux and Mac
4
+ - Updated tomoto to 0.12.3
5
+ - Dropped support for Ruby < 2.7
6
+
7
+ ## 0.2.3 (2021-08-26)
8
+
9
+ - Updated to Rice 4
10
+
11
+ ## 0.2.2 (2021-08-23)
12
+
13
+ - Reduced gem size
14
+
15
+ ## 0.2.1 (2021-08-23)
16
+
17
+ - Added support for unseen documents
18
+
19
+ ## 0.2.0 (2021-05-23)
20
+
21
+ - Updated tomoto to 0.12.0
22
+ - Dropped support for Ruby < 2.6
23
+
24
+ ## 0.1.4 (2021-03-14)
25
+
26
+ - Added `docs` method
27
+ - Updated tomoto to 0.10.2
28
+ - Updated `add_doc` to return the index of the document
29
+
30
+ ## 0.1.3 (2020-12-19)
31
+
32
+ - Updated tomoto to 0.10.0
33
+
34
+ ## 0.1.2 (2020-10-10)
35
+
36
+ - Added `summary` method
37
+ - Added `parallel` option to `train` method
38
+
39
+ ## 0.1.1 (2020-10-10)
40
+
41
+ - Added many more models
42
+
43
+ ## 0.1.0 (2020-10-09)
44
+
45
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019, bab2min
4
+ Copyright (c) 2020-2021 Andrew Kane
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,162 @@
1
+ # tomoto.rb
2
+
3
+ :tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "tomoto"
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Train a model
18
+
19
+ ```ruby
20
+ model = Tomoto::LDA.new(k: 2)
21
+ model.add_doc("text from document one")
22
+ model.add_doc("text from document two")
23
+ model.add_doc("text from document three")
24
+ model.train(100) # iterations
25
+ ```
26
+
27
+ Get the summary
28
+
29
+ ```ruby
30
+ model.summary
31
+ ```
32
+
33
+ Get topic words
34
+
35
+ ```ruby
36
+ model.topic_words
37
+ ```
38
+
39
+ Save the model to a file
40
+
41
+ ```ruby
42
+ model.save("model.bin")
43
+ ```
44
+
45
+ Load the model from a file
46
+
47
+ ```ruby
48
+ model = Tomoto::LDA.load("model.bin")
49
+ ```
50
+
51
+ Get topic probabilities for a document
52
+
53
+ ```ruby
54
+ doc = model.docs[0]
55
+ doc.topics
56
+ ```
57
+
58
+ Get the number of words for each topic
59
+
60
+ ```ruby
61
+ model.count_by_topics
62
+ ```
63
+
64
+ Get the vocab
65
+
66
+ ```ruby
67
+ model.vocabs
68
+ ```
69
+
70
+ Get the log likelihood per word
71
+
72
+ ```ruby
73
+ model.ll_per_word
74
+ ```
75
+
76
+ Perform inference for unseen documents
77
+
78
+ ```ruby
79
+ doc = model.make_doc("unseen doc")
80
+ topic_dist, ll = model.infer(doc)
81
+ ```
82
+
83
+ ## Models
84
+
85
+ Supports:
86
+
87
+ - Latent Dirichlet Allocation (`LDA`)
88
+ - Labeled LDA (`LLDA`)
89
+ - Partially Labeled LDA (`PLDA`)
90
+ - Supervised LDA (`SLDA`)
91
+ - Dirichlet Multinomial Regression (`DMR`)
92
+ - Generalized Dirichlet Multinomial Regression (`GDMR`)
93
+ - Hierarchical Dirichlet Process (`HDP`)
94
+ - Hierarchical LDA (`HLDA`)
95
+ - Multi Grain LDA (`MGLDA`)
96
+ - Pachinko Allocation (`PA`)
97
+ - Hierarchical PA (`HPA`)
98
+ - Correlated Topic Model (`CT`)
99
+ - Dynamic Topic Model (`DT`)
100
+
101
+ ## API
102
+
103
+ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
104
+
105
+ - The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
106
+ - Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
107
+
108
+ If a method or option you need isn’t supported, feel free to open an issue.
109
+
110
+ ## Examples
111
+
112
+ - [LDA](examples/lda_basic.rb)
113
+ - [HDP](examples/hdp_basic.rb)
114
+
115
+ ## Tokenization
116
+
117
+ Documents are tokenized by whitespace by default, or you can perform your own tokenization.
118
+
119
+ ```ruby
120
+ model.add_doc(["tokens", "from", "document", "one"])
121
+ ```
122
+
123
+ ## Performance
124
+
125
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
126
+
127
+ ```ruby
128
+ Tomoto.isa
129
+ ```
130
+
131
+ ## Parallelism
132
+
133
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
134
+
135
+ ```ruby
136
+ model.train(parallel: :partition)
137
+ ```
138
+
139
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
140
+
141
+ ## History
142
+
143
+ View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
144
+
145
+ ## Contributing
146
+
147
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
148
+
149
+ - [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
150
+ - Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
151
+ - Write, clarify, or fix documentation
152
+ - Suggest or add new features
153
+
154
+ To get started with development:
155
+
156
+ ```sh
157
+ git clone --recursive https://github.com/ankane/tomoto-ruby.git
158
+ cd tomoto-ruby
159
+ bundle install
160
+ bundle exec rake compile
161
+ bundle exec rake test
162
+ ```
data/ext/tomoto/ct.cpp ADDED
@@ -0,0 +1,58 @@
1
+ #include <CT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_ct(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::CTArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_correlations",
23
+ [](tomoto::ICTModel& self, tomoto::Tid topic_id) {
24
+ return self.getCorrelationTopic(topic_id);
25
+ })
26
+ .define_method(
27
+ "num_beta_sample",
28
+ [](tomoto::ICTModel& self) {
29
+ return self.getNumBetaSample();
30
+ })
31
+ .define_method(
32
+ "num_beta_sample=",
33
+ [](tomoto::ICTModel& self, size_t value) {
34
+ self.setNumBetaSample(value);
35
+ return value;
36
+ })
37
+ .define_method(
38
+ "num_tmn_sample",
39
+ [](tomoto::ICTModel& self) {
40
+ return self.getNumTMNSample();
41
+ })
42
+ .define_method(
43
+ "num_tmn_sample=",
44
+ [](tomoto::ICTModel& self, size_t value) {
45
+ self.setNumTMNSample(value);
46
+ return value;
47
+ })
48
+ .define_method(
49
+ "_prior_cov",
50
+ [](tomoto::ICTModel& self) {
51
+ return self.getPriorCov();
52
+ })
53
+ .define_method(
54
+ "prior_mean",
55
+ [](tomoto::ICTModel& self) {
56
+ return self.getPriorMean();
57
+ });
58
+ }
@@ -0,0 +1,69 @@
1
+ #include <DMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::DMRArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.sigma = sigma;
16
+ args.eta = eta;
17
+ args.alphaEps = alpha_epsilon;
18
+ if (seed >= 0) {
19
+ args.seed = seed;
20
+ }
21
+ return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
22
+ }, Rice::Return().takeOwnership())
23
+ .define_method(
24
+ "_add_doc",
25
+ [](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
26
+ auto doc = buildDoc(words);
27
+ doc.misc["metadata"] = metadata;
28
+ return self.addDoc(doc);
29
+ })
30
+ .define_method(
31
+ "alpha_epsilon",
32
+ [](tomoto::IDMRModel& self) {
33
+ return self.getAlphaEps();
34
+ })
35
+ .define_method(
36
+ "alpha_epsilon=",
37
+ [](tomoto::IDMRModel& self, tomoto::Float value) {
38
+ self.setAlphaEps(value);
39
+ return value;
40
+ })
41
+ .define_method(
42
+ "f",
43
+ [](tomoto::IDMRModel& self) {
44
+ return self.getF();
45
+ })
46
+ .define_method(
47
+ "_lambdas",
48
+ [](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
49
+ return self.getLambdaByTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "metadata_dict",
53
+ [](tomoto::IDMRModel& self) {
54
+ auto dict = self.getMetadataDict();
55
+ Array res;
56
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
57
+ for (size_t i = 0; i < dict.size(); i++) {
58
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
59
+ Object obj(value);
60
+ res.push(obj.call("force_encoding", utf8));
61
+ }
62
+ return res;
63
+ })
64
+ .define_method(
65
+ "sigma",
66
+ [](tomoto::IDMRModel& self) {
67
+ return self.getSigma();
68
+ });
69
+ }
data/ext/tomoto/dt.cpp ADDED
@@ -0,0 +1,91 @@
1
+ #include <DT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dt(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
12
+ // Rice only supports 10 arguments
13
+ size_t seed = -1;
14
+ tomoto::DTArgs args;
15
+ args.k = k;
16
+ args.t = t;
17
+ args.alpha = {alphaVar};
18
+ args.eta = etaVar;
19
+ args.phi = phiVar;
20
+ args.shapeA = shapeA;
21
+ args.shapeB = shapeB;
22
+ args.shapeC = shapeC;
23
+ if (seed >= 0) {
24
+ args.seed = seed;
25
+ }
26
+ return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
27
+ }, Rice::Return().takeOwnership())
28
+ .define_method(
29
+ "_add_doc",
30
+ [](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
31
+ auto doc = buildDoc(words);
32
+ doc.misc["timepoint"] = timepoint;
33
+ return self.addDoc(doc);
34
+ })
35
+ .define_method(
36
+ "alpha",
37
+ [](tomoto::IDTModel& self) {
38
+ Array res;
39
+ for (size_t i = 0; i < self.getK(); i++) {
40
+ Array res2;
41
+ for (size_t j = 0; j < self.getT(); j++) {
42
+ res2.push(self.getAlpha(i, j));
43
+ }
44
+ res.push(res2);
45
+ }
46
+ return res;
47
+ })
48
+ .define_method(
49
+ "lr_a",
50
+ [](tomoto::IDTModel& self) {
51
+ return self.getShapeA();
52
+ })
53
+ .define_method(
54
+ "lr_a=",
55
+ [](tomoto::IDTModel& self, tomoto::Float value) {
56
+ self.setShapeA(value);
57
+ return value;
58
+ })
59
+ .define_method(
60
+ "lr_b",
61
+ [](tomoto::IDTModel& self) {
62
+ return self.getShapeB();
63
+ })
64
+ .define_method(
65
+ "lr_b=",
66
+ [](tomoto::IDTModel& self, tomoto::Float value) {
67
+ self.setShapeB(value);
68
+ return value;
69
+ })
70
+ .define_method(
71
+ "lr_c",
72
+ [](tomoto::IDTModel& self) {
73
+ return self.getShapeC();
74
+ })
75
+ .define_method(
76
+ "lr_c=",
77
+ [](tomoto::IDTModel& self, tomoto::Float value) {
78
+ self.setShapeC(value);
79
+ return value;
80
+ })
81
+ .define_method(
82
+ "num_docs_by_timepoint",
83
+ [](tomoto::IDTModel& self) {
84
+ return self.getNumDocsByT();
85
+ })
86
+ .define_method(
87
+ "num_timepoints",
88
+ [](tomoto::IDTModel& self) {
89
+ return self.getT();
90
+ });
91
+ }
@@ -0,0 +1,34 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
+
5
+ unless ENV["RUBY_CC_VERSION"]
6
+ # AVX-512F not support yet
7
+ # https://github.com/bab2min/tomotopy/issues/188
8
+ $CXXFLAGS << " " << with_config("optflags", "-march=native -mno-avx512f")
9
+ end
10
+
11
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
12
+
13
+ if apple_clang
14
+ # silence rice warnings
15
+ $CXXFLAGS += " -Wno-deprecated-declarations"
16
+ else
17
+ # silence eigen warnings
18
+ $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
19
+ end
20
+
21
+ # silence tomoto warnings
22
+ $CXXFLAGS += " -Wno-unused-variable -Wno-switch"
23
+
24
+ ext = File.expand_path(".", __dir__)
25
+ tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
26
+ eigen = File.expand_path("../../vendor/eigen", __dir__)
27
+ eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
28
+ variant = File.expand_path("../../vendor/variant/include", __dir__)
29
+
30
+ $srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
31
+ $INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
32
+ $VPATH << tomoto
33
+
34
+ create_makefile("tomoto/tomoto")
@@ -0,0 +1,42 @@
1
+ #include <GDMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_gdmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::GDMRArgs args;
13
+ args.k = k;
14
+ args.degrees = degrees;
15
+ args.alpha = {alpha};
16
+ args.sigma = sigma;
17
+ args.sigma0 = sigma0;
18
+ args.eta = eta;
19
+ args.alphaEps = alpha_epsilon;
20
+ if (seed >= 0) {
21
+ args.seed = seed;
22
+ }
23
+ return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
24
+ }, Rice::Return().takeOwnership())
25
+ .define_method(
26
+ "_add_doc",
27
+ [](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
28
+ auto doc = buildDoc(words);
29
+ doc.misc["numeric_metadata"] = numeric_metadata;
30
+ return self.addDoc(doc);
31
+ })
32
+ .define_method(
33
+ "degrees",
34
+ [](tomoto::IGDMRModel& self) {
35
+ return self.getFs();
36
+ })
37
+ .define_method(
38
+ "sigma0",
39
+ [](tomoto::IGDMRModel& self) {
40
+ return self.getSigma0();
41
+ });
42
+ }
@@ -0,0 +1,47 @@
1
+ #include <HDP.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hdp(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HDPArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHDPModel& self) {
25
+ return self.getAlpha();
26
+ })
27
+ .define_method(
28
+ "gamma",
29
+ [](tomoto::IHDPModel& self) {
30
+ return self.getGamma();
31
+ })
32
+ .define_method(
33
+ "live_k",
34
+ [](tomoto::IHDPModel& self) {
35
+ return self.getLiveK();
36
+ })
37
+ .define_method(
38
+ "live_topic?",
39
+ [](tomoto::IHDPModel& self, size_t tid) {
40
+ return self.isLiveTopic(tid);
41
+ })
42
+ .define_method(
43
+ "num_tables",
44
+ [](tomoto::IHDPModel& self) {
45
+ return self.getTotalTables();
46
+ });
47
+ }
@@ -0,0 +1,71 @@
1
+ #include <HLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hlda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HLDAArgs args;
13
+ args.k = levelDepth;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHLDAModel& self) {
25
+ Array res;
26
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
27
+ res.push(self.getAlpha(i));
28
+ }
29
+ return res;
30
+ })
31
+ .define_method(
32
+ "_children_topics",
33
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
34
+ return self.getChildTopicId(topic_id);
35
+ })
36
+ .define_method(
37
+ "depth",
38
+ [](tomoto::IHLDAModel& self) {
39
+ return self.getLevelDepth();
40
+ })
41
+ .define_method(
42
+ "gamma",
43
+ [](tomoto::IHLDAModel& self) {
44
+ return self.getGamma();
45
+ })
46
+ .define_method(
47
+ "_level",
48
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
49
+ return self.getLevelOfTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "live_k",
53
+ [](tomoto::IHLDAModel& self) {
54
+ return self.getLiveK();
55
+ })
56
+ .define_method(
57
+ "_live_topic?",
58
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
59
+ return self.isLiveTopic(topic_id);
60
+ })
61
+ .define_method(
62
+ "_num_docs_of_topic",
63
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
64
+ return self.getNumDocsOfTopic(topic_id);
65
+ })
66
+ .define_method(
67
+ "_parent_topic",
68
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
69
+ return self.getParentTopicId(topic_id);
70
+ });
71
+ }
@@ -0,0 +1,32 @@
1
+ #include <HPA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hpa(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(m, "HPA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::HPAArgs args;
13
+ args.k = k1;
14
+ args.k2 = k2;
15
+ args.alpha = {alpha};
16
+ args.eta = eta;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHPAModel& self) {
25
+ Array res;
26
+ // use <= to return k+1 elements
27
+ for (size_t i = 0; i <= self.getK(); i++) {
28
+ res.push(self.getAlpha(i));
29
+ }
30
+ return res;
31
+ });
32
+ }