tomoto 0.3.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +45 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +162 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +34 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +33 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/2.7/tomoto.bundle +0 -0
  22. data/lib/tomoto/3.0/tomoto.bundle +0 -0
  23. data/lib/tomoto/3.1/tomoto.bundle +0 -0
  24. data/lib/tomoto/ct.rb +24 -0
  25. data/lib/tomoto/dmr.rb +27 -0
  26. data/lib/tomoto/dt.rb +15 -0
  27. data/lib/tomoto/gdmr.rb +15 -0
  28. data/lib/tomoto/hdp.rb +11 -0
  29. data/lib/tomoto/hlda.rb +56 -0
  30. data/lib/tomoto/hpa.rb +11 -0
  31. data/lib/tomoto/lda.rb +181 -0
  32. data/lib/tomoto/llda.rb +15 -0
  33. data/lib/tomoto/mglda.rb +15 -0
  34. data/lib/tomoto/pa.rb +11 -0
  35. data/lib/tomoto/plda.rb +15 -0
  36. data/lib/tomoto/slda.rb +37 -0
  37. data/lib/tomoto/version.rb +3 -0
  38. data/lib/tomoto.rb +27 -0
  39. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  40. data/vendor/EigenRand/LICENSE +21 -0
  41. data/vendor/EigenRand/README.md +426 -0
  42. data/vendor/eigen/COPYING.APACHE +203 -0
  43. data/vendor/eigen/COPYING.BSD +26 -0
  44. data/vendor/eigen/COPYING.GPL +674 -0
  45. data/vendor/eigen/COPYING.LGPL +502 -0
  46. data/vendor/eigen/COPYING.MINPACK +51 -0
  47. data/vendor/eigen/COPYING.MPL2 +373 -0
  48. data/vendor/eigen/COPYING.README +18 -0
  49. data/vendor/eigen/Eigen/Cholesky +45 -0
  50. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  51. data/vendor/eigen/Eigen/Core +384 -0
  52. data/vendor/eigen/Eigen/Dense +7 -0
  53. data/vendor/eigen/Eigen/Eigen +2 -0
  54. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  55. data/vendor/eigen/Eigen/Geometry +59 -0
  56. data/vendor/eigen/Eigen/Householder +29 -0
  57. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  58. data/vendor/eigen/Eigen/Jacobi +32 -0
  59. data/vendor/eigen/Eigen/KLUSupport +41 -0
  60. data/vendor/eigen/Eigen/LU +47 -0
  61. data/vendor/eigen/Eigen/MetisSupport +35 -0
  62. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  63. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  64. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  65. data/vendor/eigen/Eigen/QR +50 -0
  66. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  67. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  68. data/vendor/eigen/Eigen/SVD +50 -0
  69. data/vendor/eigen/Eigen/Sparse +34 -0
  70. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  71. data/vendor/eigen/Eigen/SparseCore +69 -0
  72. data/vendor/eigen/Eigen/SparseLU +50 -0
  73. data/vendor/eigen/Eigen/SparseQR +36 -0
  74. data/vendor/eigen/Eigen/StdDeque +27 -0
  75. data/vendor/eigen/Eigen/StdList +26 -0
  76. data/vendor/eigen/Eigen/StdVector +27 -0
  77. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  78. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  79. data/vendor/eigen/README.md +5 -0
  80. data/vendor/eigen/bench/README.txt +55 -0
  81. data/vendor/eigen/bench/btl/COPYING +340 -0
  82. data/vendor/eigen/bench/btl/README +154 -0
  83. data/vendor/eigen/bench/tensors/README +20 -0
  84. data/vendor/eigen/blas/README.txt +6 -0
  85. data/vendor/eigen/ci/README.md +56 -0
  86. data/vendor/eigen/demos/mandelbrot/README +10 -0
  87. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  88. data/vendor/eigen/demos/opengl/README +13 -0
  89. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  90. data/vendor/eigen/unsupported/README.txt +50 -0
  91. data/vendor/tomotopy/LICENSE +21 -0
  92. data/vendor/tomotopy/README.kr.rst +512 -0
  93. data/vendor/tomotopy/README.rst +516 -0
  94. data/vendor/variant/LICENSE +25 -0
  95. data/vendor/variant/LICENSE_1_0.txt +23 -0
  96. data/vendor/variant/README.md +102 -0
  97. metadata +140 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b23bb72cb63777f22912c00abe475b2ba903b2abd0ab12ef5ebafc0832ef3d4d
4
+ data.tar.gz: c7a98c977d7864ce142df08380b0ed3cc94b663221af79ddb291393102b9603b
5
+ SHA512:
6
+ metadata.gz: 378389c5f113de2e5cf0db7b6a433fd189a74afca495e2ecc69cf1ffddb32c5dba0761a98c245ba748e1e8365633958573591d0182af1d2d5b8b23bee9f6a3ba
7
+ data.tar.gz: 385e48e227645fb6148bea04b6d04157e6cf4b271b23114bc168463886ac167e06315ffa65ce7def5eb9cf3c94cfee5483cf1761208a5fb5323c00e2a3395550
data/CHANGELOG.md ADDED
@@ -0,0 +1,45 @@
1
+ ## 0.3.0 (2022-10-03)
2
+
3
+ - Added precompiled gems for Linux and Mac
4
+ - Updated tomoto to 0.12.3
5
+ - Dropped support for Ruby < 2.7
6
+
7
+ ## 0.2.3 (2021-08-26)
8
+
9
+ - Updated to Rice 4
10
+
11
+ ## 0.2.2 (2021-08-23)
12
+
13
+ - Reduced gem size
14
+
15
+ ## 0.2.1 (2021-08-23)
16
+
17
+ - Added support for unseen documents
18
+
19
+ ## 0.2.0 (2021-05-23)
20
+
21
+ - Updated tomoto to 0.12.0
22
+ - Dropped support for Ruby < 2.6
23
+
24
+ ## 0.1.4 (2021-03-14)
25
+
26
+ - Added `docs` method
27
+ - Updated tomoto to 0.10.2
28
+ - Updated `add_doc` to return the index of the document
29
+
30
+ ## 0.1.3 (2020-12-19)
31
+
32
+ - Updated tomoto to 0.10.0
33
+
34
+ ## 0.1.2 (2020-10-10)
35
+
36
+ - Added `summary` method
37
+ - Added `parallel` option to `train` method
38
+
39
+ ## 0.1.1 (2020-10-10)
40
+
41
+ - Added many more models
42
+
43
+ ## 0.1.0 (2020-10-09)
44
+
45
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019, bab2min
4
+ Copyright (c) 2020-2021 Andrew Kane
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,162 @@
1
+ # tomoto.rb
2
+
3
+ :tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "tomoto"
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Train a model
18
+
19
+ ```ruby
20
+ model = Tomoto::LDA.new(k: 2)
21
+ model.add_doc("text from document one")
22
+ model.add_doc("text from document two")
23
+ model.add_doc("text from document three")
24
+ model.train(100) # iterations
25
+ ```
26
+
27
+ Get the summary
28
+
29
+ ```ruby
30
+ model.summary
31
+ ```
32
+
33
+ Get topic words
34
+
35
+ ```ruby
36
+ model.topic_words
37
+ ```
38
+
39
+ Save the model to a file
40
+
41
+ ```ruby
42
+ model.save("model.bin")
43
+ ```
44
+
45
+ Load the model from a file
46
+
47
+ ```ruby
48
+ model = Tomoto::LDA.load("model.bin")
49
+ ```
50
+
51
+ Get topic probabilities for a document
52
+
53
+ ```ruby
54
+ doc = model.docs[0]
55
+ doc.topics
56
+ ```
57
+
58
+ Get the number of words for each topic
59
+
60
+ ```ruby
61
+ model.count_by_topics
62
+ ```
63
+
64
+ Get the vocab
65
+
66
+ ```ruby
67
+ model.vocabs
68
+ ```
69
+
70
+ Get the log likelihood per word
71
+
72
+ ```ruby
73
+ model.ll_per_word
74
+ ```
75
+
76
+ Perform inference for unseen documents
77
+
78
+ ```ruby
79
+ doc = model.make_doc("unseen doc")
80
+ topic_dist, ll = model.infer(doc)
81
+ ```
82
+
83
+ ## Models
84
+
85
+ Supports:
86
+
87
+ - Latent Dirichlet Allocation (`LDA`)
88
+ - Labeled LDA (`LLDA`)
89
+ - Partially Labeled LDA (`PLDA`)
90
+ - Supervised LDA (`SLDA`)
91
+ - Dirichlet Multinomial Regression (`DMR`)
92
+ - Generalized Dirichlet Multinomial Regression (`GDMR`)
93
+ - Hierarchical Dirichlet Process (`HDP`)
94
+ - Hierarchical LDA (`HLDA`)
95
+ - Multi Grain LDA (`MGLDA`)
96
+ - Pachinko Allocation (`PA`)
97
+ - Hierarchical PA (`HPA`)
98
+ - Correlated Topic Model (`CT`)
99
+ - Dynamic Topic Model (`DT`)
100
+
101
+ ## API
102
+
103
+ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
104
+
105
+ - The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
106
+ - Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
107
+
108
+ If a method or option you need isn’t supported, feel free to open an issue.
109
+
110
+ ## Examples
111
+
112
+ - [LDA](examples/lda_basic.rb)
113
+ - [HDP](examples/hdp_basic.rb)
114
+
115
+ ## Tokenization
116
+
117
+ Documents are tokenized by whitespace by default, or you can perform your own tokenization.
118
+
119
+ ```ruby
120
+ model.add_doc(["tokens", "from", "document", "one"])
121
+ ```
122
+
123
+ ## Performance
124
+
125
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
126
+
127
+ ```ruby
128
+ Tomoto.isa
129
+ ```
130
+
131
+ ## Parallelism
132
+
133
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
134
+
135
+ ```ruby
136
+ model.train(parallel: :partition)
137
+ ```
138
+
139
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
140
+
141
+ ## History
142
+
143
+ View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
144
+
145
+ ## Contributing
146
+
147
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
148
+
149
+ - [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
150
+ - Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
151
+ - Write, clarify, or fix documentation
152
+ - Suggest or add new features
153
+
154
+ To get started with development:
155
+
156
+ ```sh
157
+ git clone --recursive https://github.com/ankane/tomoto-ruby.git
158
+ cd tomoto-ruby
159
+ bundle install
160
+ bundle exec rake compile
161
+ bundle exec rake test
162
+ ```
data/ext/tomoto/ct.cpp ADDED
@@ -0,0 +1,58 @@
1
+ #include <CT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_ct(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::CTArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_correlations",
23
+ [](tomoto::ICTModel& self, tomoto::Tid topic_id) {
24
+ return self.getCorrelationTopic(topic_id);
25
+ })
26
+ .define_method(
27
+ "num_beta_sample",
28
+ [](tomoto::ICTModel& self) {
29
+ return self.getNumBetaSample();
30
+ })
31
+ .define_method(
32
+ "num_beta_sample=",
33
+ [](tomoto::ICTModel& self, size_t value) {
34
+ self.setNumBetaSample(value);
35
+ return value;
36
+ })
37
+ .define_method(
38
+ "num_tmn_sample",
39
+ [](tomoto::ICTModel& self) {
40
+ return self.getNumTMNSample();
41
+ })
42
+ .define_method(
43
+ "num_tmn_sample=",
44
+ [](tomoto::ICTModel& self, size_t value) {
45
+ self.setNumTMNSample(value);
46
+ return value;
47
+ })
48
+ .define_method(
49
+ "_prior_cov",
50
+ [](tomoto::ICTModel& self) {
51
+ return self.getPriorCov();
52
+ })
53
+ .define_method(
54
+ "prior_mean",
55
+ [](tomoto::ICTModel& self) {
56
+ return self.getPriorMean();
57
+ });
58
+ }
@@ -0,0 +1,69 @@
1
+ #include <DMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::DMRArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.sigma = sigma;
16
+ args.eta = eta;
17
+ args.alphaEps = alpha_epsilon;
18
+ if (seed >= 0) {
19
+ args.seed = seed;
20
+ }
21
+ return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
22
+ }, Rice::Return().takeOwnership())
23
+ .define_method(
24
+ "_add_doc",
25
+ [](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
26
+ auto doc = buildDoc(words);
27
+ doc.misc["metadata"] = metadata;
28
+ return self.addDoc(doc);
29
+ })
30
+ .define_method(
31
+ "alpha_epsilon",
32
+ [](tomoto::IDMRModel& self) {
33
+ return self.getAlphaEps();
34
+ })
35
+ .define_method(
36
+ "alpha_epsilon=",
37
+ [](tomoto::IDMRModel& self, tomoto::Float value) {
38
+ self.setAlphaEps(value);
39
+ return value;
40
+ })
41
+ .define_method(
42
+ "f",
43
+ [](tomoto::IDMRModel& self) {
44
+ return self.getF();
45
+ })
46
+ .define_method(
47
+ "_lambdas",
48
+ [](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
49
+ return self.getLambdaByTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "metadata_dict",
53
+ [](tomoto::IDMRModel& self) {
54
+ auto dict = self.getMetadataDict();
55
+ Array res;
56
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
57
+ for (size_t i = 0; i < dict.size(); i++) {
58
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
59
+ Object obj(value);
60
+ res.push(obj.call("force_encoding", utf8));
61
+ }
62
+ return res;
63
+ })
64
+ .define_method(
65
+ "sigma",
66
+ [](tomoto::IDMRModel& self) {
67
+ return self.getSigma();
68
+ });
69
+ }
data/ext/tomoto/dt.cpp ADDED
@@ -0,0 +1,91 @@
1
+ #include <DT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dt(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
12
+ // Rice only supports 10 arguments
13
+ size_t seed = -1;
14
+ tomoto::DTArgs args;
15
+ args.k = k;
16
+ args.t = t;
17
+ args.alpha = {alphaVar};
18
+ args.eta = etaVar;
19
+ args.phi = phiVar;
20
+ args.shapeA = shapeA;
21
+ args.shapeB = shapeB;
22
+ args.shapeC = shapeC;
23
+ if (seed >= 0) {
24
+ args.seed = seed;
25
+ }
26
+ return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
27
+ }, Rice::Return().takeOwnership())
28
+ .define_method(
29
+ "_add_doc",
30
+ [](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
31
+ auto doc = buildDoc(words);
32
+ doc.misc["timepoint"] = timepoint;
33
+ return self.addDoc(doc);
34
+ })
35
+ .define_method(
36
+ "alpha",
37
+ [](tomoto::IDTModel& self) {
38
+ Array res;
39
+ for (size_t i = 0; i < self.getK(); i++) {
40
+ Array res2;
41
+ for (size_t j = 0; j < self.getT(); j++) {
42
+ res2.push(self.getAlpha(i, j));
43
+ }
44
+ res.push(res2);
45
+ }
46
+ return res;
47
+ })
48
+ .define_method(
49
+ "lr_a",
50
+ [](tomoto::IDTModel& self) {
51
+ return self.getShapeA();
52
+ })
53
+ .define_method(
54
+ "lr_a=",
55
+ [](tomoto::IDTModel& self, tomoto::Float value) {
56
+ self.setShapeA(value);
57
+ return value;
58
+ })
59
+ .define_method(
60
+ "lr_b",
61
+ [](tomoto::IDTModel& self) {
62
+ return self.getShapeB();
63
+ })
64
+ .define_method(
65
+ "lr_b=",
66
+ [](tomoto::IDTModel& self, tomoto::Float value) {
67
+ self.setShapeB(value);
68
+ return value;
69
+ })
70
+ .define_method(
71
+ "lr_c",
72
+ [](tomoto::IDTModel& self) {
73
+ return self.getShapeC();
74
+ })
75
+ .define_method(
76
+ "lr_c=",
77
+ [](tomoto::IDTModel& self, tomoto::Float value) {
78
+ self.setShapeC(value);
79
+ return value;
80
+ })
81
+ .define_method(
82
+ "num_docs_by_timepoint",
83
+ [](tomoto::IDTModel& self) {
84
+ return self.getNumDocsByT();
85
+ })
86
+ .define_method(
87
+ "num_timepoints",
88
+ [](tomoto::IDTModel& self) {
89
+ return self.getT();
90
+ });
91
+ }
@@ -0,0 +1,34 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
+
5
+ unless ENV["RUBY_CC_VERSION"]
6
+ # AVX-512F not support yet
7
+ # https://github.com/bab2min/tomotopy/issues/188
8
+ $CXXFLAGS << " " << with_config("optflags", "-march=native -mno-avx512f")
9
+ end
10
+
11
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
12
+
13
+ if apple_clang
14
+ # silence rice warnings
15
+ $CXXFLAGS += " -Wno-deprecated-declarations"
16
+ else
17
+ # silence eigen warnings
18
+ $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
19
+ end
20
+
21
+ # silence tomoto warnings
22
+ $CXXFLAGS += " -Wno-unused-variable -Wno-switch"
23
+
24
+ ext = File.expand_path(".", __dir__)
25
+ tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
26
+ eigen = File.expand_path("../../vendor/eigen", __dir__)
27
+ eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
28
+ variant = File.expand_path("../../vendor/variant/include", __dir__)
29
+
30
+ $srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
31
+ $INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
32
+ $VPATH << tomoto
33
+
34
+ create_makefile("tomoto/tomoto")
@@ -0,0 +1,42 @@
1
+ #include <GDMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_gdmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::GDMRArgs args;
13
+ args.k = k;
14
+ args.degrees = degrees;
15
+ args.alpha = {alpha};
16
+ args.sigma = sigma;
17
+ args.sigma0 = sigma0;
18
+ args.eta = eta;
19
+ args.alphaEps = alpha_epsilon;
20
+ if (seed >= 0) {
21
+ args.seed = seed;
22
+ }
23
+ return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
24
+ }, Rice::Return().takeOwnership())
25
+ .define_method(
26
+ "_add_doc",
27
+ [](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
28
+ auto doc = buildDoc(words);
29
+ doc.misc["numeric_metadata"] = numeric_metadata;
30
+ return self.addDoc(doc);
31
+ })
32
+ .define_method(
33
+ "degrees",
34
+ [](tomoto::IGDMRModel& self) {
35
+ return self.getFs();
36
+ })
37
+ .define_method(
38
+ "sigma0",
39
+ [](tomoto::IGDMRModel& self) {
40
+ return self.getSigma0();
41
+ });
42
+ }
@@ -0,0 +1,47 @@
1
+ #include <HDP.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hdp(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HDPArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHDPModel& self) {
25
+ return self.getAlpha();
26
+ })
27
+ .define_method(
28
+ "gamma",
29
+ [](tomoto::IHDPModel& self) {
30
+ return self.getGamma();
31
+ })
32
+ .define_method(
33
+ "live_k",
34
+ [](tomoto::IHDPModel& self) {
35
+ return self.getLiveK();
36
+ })
37
+ .define_method(
38
+ "live_topic?",
39
+ [](tomoto::IHDPModel& self, size_t tid) {
40
+ return self.isLiveTopic(tid);
41
+ })
42
+ .define_method(
43
+ "num_tables",
44
+ [](tomoto::IHDPModel& self) {
45
+ return self.getTotalTables();
46
+ });
47
+ }
@@ -0,0 +1,71 @@
1
+ #include <HLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hlda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HLDAArgs args;
13
+ args.k = levelDepth;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHLDAModel& self) {
25
+ Array res;
26
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
27
+ res.push(self.getAlpha(i));
28
+ }
29
+ return res;
30
+ })
31
+ .define_method(
32
+ "_children_topics",
33
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
34
+ return self.getChildTopicId(topic_id);
35
+ })
36
+ .define_method(
37
+ "depth",
38
+ [](tomoto::IHLDAModel& self) {
39
+ return self.getLevelDepth();
40
+ })
41
+ .define_method(
42
+ "gamma",
43
+ [](tomoto::IHLDAModel& self) {
44
+ return self.getGamma();
45
+ })
46
+ .define_method(
47
+ "_level",
48
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
49
+ return self.getLevelOfTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "live_k",
53
+ [](tomoto::IHLDAModel& self) {
54
+ return self.getLiveK();
55
+ })
56
+ .define_method(
57
+ "_live_topic?",
58
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
59
+ return self.isLiveTopic(topic_id);
60
+ })
61
+ .define_method(
62
+ "_num_docs_of_topic",
63
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
64
+ return self.getNumDocsOfTopic(topic_id);
65
+ })
66
+ .define_method(
67
+ "_parent_topic",
68
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
69
+ return self.getParentTopicId(topic_id);
70
+ });
71
+ }
@@ -0,0 +1,32 @@
1
+ #include <HPA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hpa(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(m, "HPA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k1, size_t k2, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::HPAArgs args;
13
+ args.k = k1;
14
+ args.k2 = k2;
15
+ args.alpha = {alpha};
16
+ args.eta = eta;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHPAModel& self) {
25
+ Array res;
26
+ // use <= to return k+1 elements
27
+ for (size_t i = 0; i <= self.getK(); i++) {
28
+ res.push(self.getAlpha(i));
29
+ }
30
+ return res;
31
+ });
32
+ }