tomoto 0.3.2-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +54 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +164 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +42 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +33 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/2.7/tomoto.bundle +0 -0
  22. data/lib/tomoto/3.0/tomoto.bundle +0 -0
  23. data/lib/tomoto/3.1/tomoto.bundle +0 -0
  24. data/lib/tomoto/3.2/tomoto.bundle +0 -0
  25. data/lib/tomoto/ct.rb +24 -0
  26. data/lib/tomoto/dmr.rb +27 -0
  27. data/lib/tomoto/dt.rb +15 -0
  28. data/lib/tomoto/gdmr.rb +15 -0
  29. data/lib/tomoto/hdp.rb +11 -0
  30. data/lib/tomoto/hlda.rb +56 -0
  31. data/lib/tomoto/hpa.rb +11 -0
  32. data/lib/tomoto/lda.rb +181 -0
  33. data/lib/tomoto/llda.rb +15 -0
  34. data/lib/tomoto/mglda.rb +15 -0
  35. data/lib/tomoto/pa.rb +11 -0
  36. data/lib/tomoto/plda.rb +15 -0
  37. data/lib/tomoto/slda.rb +37 -0
  38. data/lib/tomoto/version.rb +3 -0
  39. data/lib/tomoto.rb +27 -0
  40. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  41. data/vendor/EigenRand/LICENSE +21 -0
  42. data/vendor/EigenRand/README.md +430 -0
  43. data/vendor/eigen/COPYING.APACHE +203 -0
  44. data/vendor/eigen/COPYING.BSD +26 -0
  45. data/vendor/eigen/COPYING.GPL +674 -0
  46. data/vendor/eigen/COPYING.LGPL +502 -0
  47. data/vendor/eigen/COPYING.MINPACK +51 -0
  48. data/vendor/eigen/COPYING.MPL2 +373 -0
  49. data/vendor/eigen/COPYING.README +18 -0
  50. data/vendor/eigen/Eigen/Cholesky +45 -0
  51. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  52. data/vendor/eigen/Eigen/Core +384 -0
  53. data/vendor/eigen/Eigen/Dense +7 -0
  54. data/vendor/eigen/Eigen/Eigen +2 -0
  55. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  56. data/vendor/eigen/Eigen/Geometry +59 -0
  57. data/vendor/eigen/Eigen/Householder +29 -0
  58. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  59. data/vendor/eigen/Eigen/Jacobi +32 -0
  60. data/vendor/eigen/Eigen/KLUSupport +41 -0
  61. data/vendor/eigen/Eigen/LU +47 -0
  62. data/vendor/eigen/Eigen/MetisSupport +35 -0
  63. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  64. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  65. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  66. data/vendor/eigen/Eigen/QR +50 -0
  67. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  68. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  69. data/vendor/eigen/Eigen/SVD +50 -0
  70. data/vendor/eigen/Eigen/Sparse +34 -0
  71. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  72. data/vendor/eigen/Eigen/SparseCore +69 -0
  73. data/vendor/eigen/Eigen/SparseLU +50 -0
  74. data/vendor/eigen/Eigen/SparseQR +36 -0
  75. data/vendor/eigen/Eigen/StdDeque +27 -0
  76. data/vendor/eigen/Eigen/StdList +26 -0
  77. data/vendor/eigen/Eigen/StdVector +27 -0
  78. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  79. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  80. data/vendor/eigen/README.md +5 -0
  81. data/vendor/eigen/bench/README.txt +55 -0
  82. data/vendor/eigen/bench/btl/COPYING +340 -0
  83. data/vendor/eigen/bench/btl/README +154 -0
  84. data/vendor/eigen/bench/tensors/README +20 -0
  85. data/vendor/eigen/blas/README.txt +6 -0
  86. data/vendor/eigen/ci/README.md +56 -0
  87. data/vendor/eigen/demos/mandelbrot/README +10 -0
  88. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  89. data/vendor/eigen/demos/opengl/README +13 -0
  90. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  91. data/vendor/eigen/unsupported/README.txt +50 -0
  92. data/vendor/tomotopy/LICENSE +21 -0
  93. data/vendor/tomotopy/README.kr.rst +519 -0
  94. data/vendor/tomotopy/README.rst +538 -0
  95. data/vendor/variant/LICENSE +25 -0
  96. data/vendor/variant/LICENSE_1_0.txt +23 -0
  97. data/vendor/variant/README.md +102 -0
  98. metadata +141 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 25a8fdacc489aed2a3521fdfcfe0d5e591d3eeb71120bd2ca9a7bbc9cf65f5b6
4
+ data.tar.gz: f21fd4b4d378392fbc1ed7bf0a53f37854a5c28caf65e1e8189bcab11f00f90b
5
+ SHA512:
6
+ metadata.gz: 81fa53fbb42974675ec3e7e996a01372106c326d2e8713382b4866209b4c63511e861362e596e18d6f5d2219cb98e7f1ba4c42d43dd5a12d3306340402eba22a
7
+ data.tar.gz: 3c20a2402fb61c94d1a298eb743f3f2eda368fa64b828792b5685655d5ac0a62084a3f188a488d5ed1048d6e316def5fb55e85d07ae319d64f0d8d19e38e7e23
data/CHANGELOG.md ADDED
@@ -0,0 +1,54 @@
1
+ ## 0.3.2 (2023-01-22)
2
+
3
+ - Added precompiled gem for Mac ARM
4
+ - Updated tomoto to 0.12.4
5
+
6
+ ## 0.3.1 (2023-01-12)
7
+
8
+ - Added support for Ruby 3.2
9
+
10
+ ## 0.3.0 (2022-10-03)
11
+
12
+ - Added precompiled gems for Linux and Mac
13
+ - Updated tomoto to 0.12.3
14
+ - Dropped support for Ruby < 2.7
15
+
16
+ ## 0.2.3 (2021-08-26)
17
+
18
+ - Updated to Rice 4
19
+
20
+ ## 0.2.2 (2021-08-23)
21
+
22
+ - Reduced gem size
23
+
24
+ ## 0.2.1 (2021-08-23)
25
+
26
+ - Added support for unseen documents
27
+
28
+ ## 0.2.0 (2021-05-23)
29
+
30
+ - Updated tomoto to 0.12.0
31
+ - Dropped support for Ruby < 2.6
32
+
33
+ ## 0.1.4 (2021-03-14)
34
+
35
+ - Added `docs` method
36
+ - Updated tomoto to 0.10.2
37
+ - Updated `add_doc` to return the index of the document
38
+
39
+ ## 0.1.3 (2020-12-19)
40
+
41
+ - Updated tomoto to 0.10.0
42
+
43
+ ## 0.1.2 (2020-10-10)
44
+
45
+ - Added `summary` method
46
+ - Added `parallel` option to `train` method
47
+
48
+ ## 0.1.1 (2020-10-10)
49
+
50
+ - Added many more models
51
+
52
+ ## 0.1.0 (2020-10-09)
53
+
54
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019, bab2min
4
+ Copyright (c) 2020-2021 Andrew Kane
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,164 @@
1
+ # tomoto.rb
2
+
3
+ :tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "tomoto"
13
+ ```
14
+
15
+ ARM is not currently supported
16
+
17
+ ## Getting Started
18
+
19
+ Train a model
20
+
21
+ ```ruby
22
+ model = Tomoto::LDA.new(k: 2)
23
+ model.add_doc("text from document one")
24
+ model.add_doc("text from document two")
25
+ model.add_doc("text from document three")
26
+ model.train(100) # iterations
27
+ ```
28
+
29
+ Get the summary
30
+
31
+ ```ruby
32
+ model.summary
33
+ ```
34
+
35
+ Get topic words
36
+
37
+ ```ruby
38
+ model.topic_words
39
+ ```
40
+
41
+ Save the model to a file
42
+
43
+ ```ruby
44
+ model.save("model.bin")
45
+ ```
46
+
47
+ Load the model from a file
48
+
49
+ ```ruby
50
+ model = Tomoto::LDA.load("model.bin")
51
+ ```
52
+
53
+ Get topic probabilities for a document
54
+
55
+ ```ruby
56
+ doc = model.docs[0]
57
+ doc.topics
58
+ ```
59
+
60
+ Get the number of words for each topic
61
+
62
+ ```ruby
63
+ model.count_by_topics
64
+ ```
65
+
66
+ Get the vocab
67
+
68
+ ```ruby
69
+ model.vocabs
70
+ ```
71
+
72
+ Get the log likelihood per word
73
+
74
+ ```ruby
75
+ model.ll_per_word
76
+ ```
77
+
78
+ Perform inference for unseen documents
79
+
80
+ ```ruby
81
+ doc = model.make_doc("unseen doc")
82
+ topic_dist, ll = model.infer(doc)
83
+ ```
84
+
85
+ ## Models
86
+
87
+ Supports:
88
+
89
+ - Latent Dirichlet Allocation (`LDA`)
90
+ - Labeled LDA (`LLDA`)
91
+ - Partially Labeled LDA (`PLDA`)
92
+ - Supervised LDA (`SLDA`)
93
+ - Dirichlet Multinomial Regression (`DMR`)
94
+ - Generalized Dirichlet Multinomial Regression (`GDMR`)
95
+ - Hierarchical Dirichlet Process (`HDP`)
96
+ - Hierarchical LDA (`HLDA`)
97
+ - Multi Grain LDA (`MGLDA`)
98
+ - Pachinko Allocation (`PA`)
99
+ - Hierarchical PA (`HPA`)
100
+ - Correlated Topic Model (`CT`)
101
+ - Dynamic Topic Model (`DT`)
102
+
103
+ ## API
104
+
105
+ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
106
+
107
+ - The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
108
+ - Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
109
+
110
+ If a method or option you need isn’t supported, feel free to open an issue.
111
+
112
+ ## Examples
113
+
114
+ - [LDA](examples/lda_basic.rb)
115
+ - [HDP](examples/hdp_basic.rb)
116
+
117
+ ## Tokenization
118
+
119
+ Documents are tokenized by whitespace by default, or you can perform your own tokenization.
120
+
121
+ ```ruby
122
+ model.add_doc(["tokens", "from", "document", "one"])
123
+ ```
124
+
125
+ ## Performance
126
+
127
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
128
+
129
+ ```ruby
130
+ Tomoto.isa
131
+ ```
132
+
133
+ ## Parallelism
134
+
135
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
136
+
137
+ ```ruby
138
+ model.train(parallel: :partition)
139
+ ```
140
+
141
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
142
+
143
+ ## History
144
+
145
+ View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
146
+
147
+ ## Contributing
148
+
149
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
150
+
151
+ - [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
152
+ - Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
153
+ - Write, clarify, or fix documentation
154
+ - Suggest or add new features
155
+
156
+ To get started with development:
157
+
158
+ ```sh
159
+ git clone --recursive https://github.com/ankane/tomoto-ruby.git
160
+ cd tomoto-ruby
161
+ bundle install
162
+ bundle exec rake compile
163
+ bundle exec rake test
164
+ ```
data/ext/tomoto/ct.cpp ADDED
@@ -0,0 +1,58 @@
1
+ #include <CT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_ct(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::CTArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_correlations",
23
+ [](tomoto::ICTModel& self, tomoto::Tid topic_id) {
24
+ return self.getCorrelationTopic(topic_id);
25
+ })
26
+ .define_method(
27
+ "num_beta_sample",
28
+ [](tomoto::ICTModel& self) {
29
+ return self.getNumBetaSample();
30
+ })
31
+ .define_method(
32
+ "num_beta_sample=",
33
+ [](tomoto::ICTModel& self, size_t value) {
34
+ self.setNumBetaSample(value);
35
+ return value;
36
+ })
37
+ .define_method(
38
+ "num_tmn_sample",
39
+ [](tomoto::ICTModel& self) {
40
+ return self.getNumTMNSample();
41
+ })
42
+ .define_method(
43
+ "num_tmn_sample=",
44
+ [](tomoto::ICTModel& self, size_t value) {
45
+ self.setNumTMNSample(value);
46
+ return value;
47
+ })
48
+ .define_method(
49
+ "_prior_cov",
50
+ [](tomoto::ICTModel& self) {
51
+ return self.getPriorCov();
52
+ })
53
+ .define_method(
54
+ "prior_mean",
55
+ [](tomoto::ICTModel& self) {
56
+ return self.getPriorMean();
57
+ });
58
+ }
@@ -0,0 +1,69 @@
1
+ #include <DMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::DMRArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.sigma = sigma;
16
+ args.eta = eta;
17
+ args.alphaEps = alpha_epsilon;
18
+ if (seed >= 0) {
19
+ args.seed = seed;
20
+ }
21
+ return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
22
+ }, Rice::Return().takeOwnership())
23
+ .define_method(
24
+ "_add_doc",
25
+ [](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
26
+ auto doc = buildDoc(words);
27
+ doc.misc["metadata"] = metadata;
28
+ return self.addDoc(doc);
29
+ })
30
+ .define_method(
31
+ "alpha_epsilon",
32
+ [](tomoto::IDMRModel& self) {
33
+ return self.getAlphaEps();
34
+ })
35
+ .define_method(
36
+ "alpha_epsilon=",
37
+ [](tomoto::IDMRModel& self, tomoto::Float value) {
38
+ self.setAlphaEps(value);
39
+ return value;
40
+ })
41
+ .define_method(
42
+ "f",
43
+ [](tomoto::IDMRModel& self) {
44
+ return self.getF();
45
+ })
46
+ .define_method(
47
+ "_lambdas",
48
+ [](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
49
+ return self.getLambdaByTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "metadata_dict",
53
+ [](tomoto::IDMRModel& self) {
54
+ auto dict = self.getMetadataDict();
55
+ Array res;
56
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
57
+ for (size_t i = 0; i < dict.size(); i++) {
58
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
59
+ Object obj(value);
60
+ res.push(obj.call("force_encoding", utf8));
61
+ }
62
+ return res;
63
+ })
64
+ .define_method(
65
+ "sigma",
66
+ [](tomoto::IDMRModel& self) {
67
+ return self.getSigma();
68
+ });
69
+ }
data/ext/tomoto/dt.cpp ADDED
@@ -0,0 +1,91 @@
1
+ #include <DT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dt(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
12
+ // Rice only supports 10 arguments
13
+ size_t seed = -1;
14
+ tomoto::DTArgs args;
15
+ args.k = k;
16
+ args.t = t;
17
+ args.alpha = {alphaVar};
18
+ args.eta = etaVar;
19
+ args.phi = phiVar;
20
+ args.shapeA = shapeA;
21
+ args.shapeB = shapeB;
22
+ args.shapeC = shapeC;
23
+ if (seed >= 0) {
24
+ args.seed = seed;
25
+ }
26
+ return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
27
+ }, Rice::Return().takeOwnership())
28
+ .define_method(
29
+ "_add_doc",
30
+ [](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
31
+ auto doc = buildDoc(words);
32
+ doc.misc["timepoint"] = timepoint;
33
+ return self.addDoc(doc);
34
+ })
35
+ .define_method(
36
+ "alpha",
37
+ [](tomoto::IDTModel& self) {
38
+ Array res;
39
+ for (size_t i = 0; i < self.getK(); i++) {
40
+ Array res2;
41
+ for (size_t j = 0; j < self.getT(); j++) {
42
+ res2.push(self.getAlpha(i, j));
43
+ }
44
+ res.push(res2);
45
+ }
46
+ return res;
47
+ })
48
+ .define_method(
49
+ "lr_a",
50
+ [](tomoto::IDTModel& self) {
51
+ return self.getShapeA();
52
+ })
53
+ .define_method(
54
+ "lr_a=",
55
+ [](tomoto::IDTModel& self, tomoto::Float value) {
56
+ self.setShapeA(value);
57
+ return value;
58
+ })
59
+ .define_method(
60
+ "lr_b",
61
+ [](tomoto::IDTModel& self) {
62
+ return self.getShapeB();
63
+ })
64
+ .define_method(
65
+ "lr_b=",
66
+ [](tomoto::IDTModel& self, tomoto::Float value) {
67
+ self.setShapeB(value);
68
+ return value;
69
+ })
70
+ .define_method(
71
+ "lr_c",
72
+ [](tomoto::IDTModel& self) {
73
+ return self.getShapeC();
74
+ })
75
+ .define_method(
76
+ "lr_c=",
77
+ [](tomoto::IDTModel& self, tomoto::Float value) {
78
+ self.setShapeC(value);
79
+ return value;
80
+ })
81
+ .define_method(
82
+ "num_docs_by_timepoint",
83
+ [](tomoto::IDTModel& self) {
84
+ return self.getNumDocsByT();
85
+ })
86
+ .define_method(
87
+ "num_timepoints",
88
+ [](tomoto::IDTModel& self) {
89
+ return self.getT();
90
+ });
91
+ }
@@ -0,0 +1,42 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
+
5
+ unless ENV["RUBY_CC_VERSION"]
6
+ default_optflags =
7
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
8
+ # -march=native not supported with Mac ARM
9
+ ""
10
+ else
11
+ # AVX-512F not supported yet
12
+ # https://github.com/bab2min/tomotopy/issues/188
13
+ "-march=native -mno-avx512f"
14
+ end
15
+
16
+ $CXXFLAGS << " " << with_config("optflags", default_optflags)
17
+ end
18
+
19
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
20
+
21
+ if apple_clang
22
+ # silence rice warnings
23
+ $CXXFLAGS += " -Wno-deprecated-declarations"
24
+ else
25
+ # silence eigen warnings
26
+ $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
27
+ end
28
+
29
+ # silence tomoto warnings
30
+ $CXXFLAGS += " -Wno-unused-variable -Wno-switch"
31
+
32
+ ext = File.expand_path(".", __dir__)
33
+ tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
34
+ eigen = File.expand_path("../../vendor/eigen", __dir__)
35
+ eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
36
+ variant = File.expand_path("../../vendor/variant/include", __dir__)
37
+
38
+ $srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
39
+ $INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
40
+ $VPATH << tomoto
41
+
42
+ create_makefile("tomoto/tomoto")
@@ -0,0 +1,42 @@
1
+ #include <GDMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_gdmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::GDMRArgs args;
13
+ args.k = k;
14
+ args.degrees = degrees;
15
+ args.alpha = {alpha};
16
+ args.sigma = sigma;
17
+ args.sigma0 = sigma0;
18
+ args.eta = eta;
19
+ args.alphaEps = alpha_epsilon;
20
+ if (seed >= 0) {
21
+ args.seed = seed;
22
+ }
23
+ return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
24
+ }, Rice::Return().takeOwnership())
25
+ .define_method(
26
+ "_add_doc",
27
+ [](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
28
+ auto doc = buildDoc(words);
29
+ doc.misc["numeric_metadata"] = numeric_metadata;
30
+ return self.addDoc(doc);
31
+ })
32
+ .define_method(
33
+ "degrees",
34
+ [](tomoto::IGDMRModel& self) {
35
+ return self.getFs();
36
+ })
37
+ .define_method(
38
+ "sigma0",
39
+ [](tomoto::IGDMRModel& self) {
40
+ return self.getSigma0();
41
+ });
42
+ }
@@ -0,0 +1,47 @@
1
+ #include <HDP.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hdp(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HDPArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHDPModel& self) {
25
+ return self.getAlpha();
26
+ })
27
+ .define_method(
28
+ "gamma",
29
+ [](tomoto::IHDPModel& self) {
30
+ return self.getGamma();
31
+ })
32
+ .define_method(
33
+ "live_k",
34
+ [](tomoto::IHDPModel& self) {
35
+ return self.getLiveK();
36
+ })
37
+ .define_method(
38
+ "live_topic?",
39
+ [](tomoto::IHDPModel& self, size_t tid) {
40
+ return self.isLiveTopic(tid);
41
+ })
42
+ .define_method(
43
+ "num_tables",
44
+ [](tomoto::IHDPModel& self) {
45
+ return self.getTotalTables();
46
+ });
47
+ }
@@ -0,0 +1,71 @@
1
+ #include <HLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hlda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HLDAArgs args;
13
+ args.k = levelDepth;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHLDAModel& self) {
25
+ Array res;
26
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
27
+ res.push(self.getAlpha(i));
28
+ }
29
+ return res;
30
+ })
31
+ .define_method(
32
+ "_children_topics",
33
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
34
+ return self.getChildTopicId(topic_id);
35
+ })
36
+ .define_method(
37
+ "depth",
38
+ [](tomoto::IHLDAModel& self) {
39
+ return self.getLevelDepth();
40
+ })
41
+ .define_method(
42
+ "gamma",
43
+ [](tomoto::IHLDAModel& self) {
44
+ return self.getGamma();
45
+ })
46
+ .define_method(
47
+ "_level",
48
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
49
+ return self.getLevelOfTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "live_k",
53
+ [](tomoto::IHLDAModel& self) {
54
+ return self.getLiveK();
55
+ })
56
+ .define_method(
57
+ "_live_topic?",
58
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
59
+ return self.isLiveTopic(topic_id);
60
+ })
61
+ .define_method(
62
+ "_num_docs_of_topic",
63
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
64
+ return self.getNumDocsOfTopic(topic_id);
65
+ })
66
+ .define_method(
67
+ "_parent_topic",
68
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
69
+ return self.getParentTopicId(topic_id);
70
+ });
71
+ }