tomoto 0.3.2-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +54 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +164 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +42 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +33 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/2.7/tomoto.bundle +0 -0
  22. data/lib/tomoto/3.0/tomoto.bundle +0 -0
  23. data/lib/tomoto/3.1/tomoto.bundle +0 -0
  24. data/lib/tomoto/3.2/tomoto.bundle +0 -0
  25. data/lib/tomoto/ct.rb +24 -0
  26. data/lib/tomoto/dmr.rb +27 -0
  27. data/lib/tomoto/dt.rb +15 -0
  28. data/lib/tomoto/gdmr.rb +15 -0
  29. data/lib/tomoto/hdp.rb +11 -0
  30. data/lib/tomoto/hlda.rb +56 -0
  31. data/lib/tomoto/hpa.rb +11 -0
  32. data/lib/tomoto/lda.rb +181 -0
  33. data/lib/tomoto/llda.rb +15 -0
  34. data/lib/tomoto/mglda.rb +15 -0
  35. data/lib/tomoto/pa.rb +11 -0
  36. data/lib/tomoto/plda.rb +15 -0
  37. data/lib/tomoto/slda.rb +37 -0
  38. data/lib/tomoto/version.rb +3 -0
  39. data/lib/tomoto.rb +27 -0
  40. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  41. data/vendor/EigenRand/LICENSE +21 -0
  42. data/vendor/EigenRand/README.md +430 -0
  43. data/vendor/eigen/COPYING.APACHE +203 -0
  44. data/vendor/eigen/COPYING.BSD +26 -0
  45. data/vendor/eigen/COPYING.GPL +674 -0
  46. data/vendor/eigen/COPYING.LGPL +502 -0
  47. data/vendor/eigen/COPYING.MINPACK +51 -0
  48. data/vendor/eigen/COPYING.MPL2 +373 -0
  49. data/vendor/eigen/COPYING.README +18 -0
  50. data/vendor/eigen/Eigen/Cholesky +45 -0
  51. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  52. data/vendor/eigen/Eigen/Core +384 -0
  53. data/vendor/eigen/Eigen/Dense +7 -0
  54. data/vendor/eigen/Eigen/Eigen +2 -0
  55. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  56. data/vendor/eigen/Eigen/Geometry +59 -0
  57. data/vendor/eigen/Eigen/Householder +29 -0
  58. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  59. data/vendor/eigen/Eigen/Jacobi +32 -0
  60. data/vendor/eigen/Eigen/KLUSupport +41 -0
  61. data/vendor/eigen/Eigen/LU +47 -0
  62. data/vendor/eigen/Eigen/MetisSupport +35 -0
  63. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  64. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  65. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  66. data/vendor/eigen/Eigen/QR +50 -0
  67. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  68. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  69. data/vendor/eigen/Eigen/SVD +50 -0
  70. data/vendor/eigen/Eigen/Sparse +34 -0
  71. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  72. data/vendor/eigen/Eigen/SparseCore +69 -0
  73. data/vendor/eigen/Eigen/SparseLU +50 -0
  74. data/vendor/eigen/Eigen/SparseQR +36 -0
  75. data/vendor/eigen/Eigen/StdDeque +27 -0
  76. data/vendor/eigen/Eigen/StdList +26 -0
  77. data/vendor/eigen/Eigen/StdVector +27 -0
  78. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  79. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  80. data/vendor/eigen/README.md +5 -0
  81. data/vendor/eigen/bench/README.txt +55 -0
  82. data/vendor/eigen/bench/btl/COPYING +340 -0
  83. data/vendor/eigen/bench/btl/README +154 -0
  84. data/vendor/eigen/bench/tensors/README +20 -0
  85. data/vendor/eigen/blas/README.txt +6 -0
  86. data/vendor/eigen/ci/README.md +56 -0
  87. data/vendor/eigen/demos/mandelbrot/README +10 -0
  88. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  89. data/vendor/eigen/demos/opengl/README +13 -0
  90. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  91. data/vendor/eigen/unsupported/README.txt +50 -0
  92. data/vendor/tomotopy/LICENSE +21 -0
  93. data/vendor/tomotopy/README.kr.rst +519 -0
  94. data/vendor/tomotopy/README.rst +538 -0
  95. data/vendor/variant/LICENSE +25 -0
  96. data/vendor/variant/LICENSE_1_0.txt +23 -0
  97. data/vendor/variant/README.md +102 -0
  98. metadata +141 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 25a8fdacc489aed2a3521fdfcfe0d5e591d3eeb71120bd2ca9a7bbc9cf65f5b6
4
+ data.tar.gz: f21fd4b4d378392fbc1ed7bf0a53f37854a5c28caf65e1e8189bcab11f00f90b
5
+ SHA512:
6
+ metadata.gz: 81fa53fbb42974675ec3e7e996a01372106c326d2e8713382b4866209b4c63511e861362e596e18d6f5d2219cb98e7f1ba4c42d43dd5a12d3306340402eba22a
7
+ data.tar.gz: 3c20a2402fb61c94d1a298eb743f3f2eda368fa64b828792b5685655d5ac0a62084a3f188a488d5ed1048d6e316def5fb55e85d07ae319d64f0d8d19e38e7e23
data/CHANGELOG.md ADDED
@@ -0,0 +1,54 @@
1
+ ## 0.3.2 (2023-01-22)
2
+
3
+ - Added precompiled gem for Mac ARM
4
+ - Updated tomoto to 0.12.4
5
+
6
+ ## 0.3.1 (2023-01-12)
7
+
8
+ - Added support for Ruby 3.2
9
+
10
+ ## 0.3.0 (2022-10-03)
11
+
12
+ - Added precompiled gems for Linux and Mac
13
+ - Updated tomoto to 0.12.3
14
+ - Dropped support for Ruby < 2.7
15
+
16
+ ## 0.2.3 (2021-08-26)
17
+
18
+ - Updated to Rice 4
19
+
20
+ ## 0.2.2 (2021-08-23)
21
+
22
+ - Reduced gem size
23
+
24
+ ## 0.2.1 (2021-08-23)
25
+
26
+ - Added support for unseen documents
27
+
28
+ ## 0.2.0 (2021-05-23)
29
+
30
+ - Updated tomoto to 0.12.0
31
+ - Dropped support for Ruby < 2.6
32
+
33
+ ## 0.1.4 (2021-03-14)
34
+
35
+ - Added `docs` method
36
+ - Updated tomoto to 0.10.2
37
+ - Updated `add_doc` to return the index of the document
38
+
39
+ ## 0.1.3 (2020-12-19)
40
+
41
+ - Updated tomoto to 0.10.0
42
+
43
+ ## 0.1.2 (2020-10-10)
44
+
45
+ - Added `summary` method
46
+ - Added `parallel` option to `train` method
47
+
48
+ ## 0.1.1 (2020-10-10)
49
+
50
+ - Added many more models
51
+
52
+ ## 0.1.0 (2020-10-09)
53
+
54
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019, bab2min
4
+ Copyright (c) 2020-2021 Andrew Kane
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,164 @@
1
+ # tomoto.rb
2
+
3
+ :tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "tomoto"
13
+ ```
14
+
15
+ ARM is not currently supported
16
+
17
+ ## Getting Started
18
+
19
+ Train a model
20
+
21
+ ```ruby
22
+ model = Tomoto::LDA.new(k: 2)
23
+ model.add_doc("text from document one")
24
+ model.add_doc("text from document two")
25
+ model.add_doc("text from document three")
26
+ model.train(100) # iterations
27
+ ```
28
+
29
+ Get the summary
30
+
31
+ ```ruby
32
+ model.summary
33
+ ```
34
+
35
+ Get topic words
36
+
37
+ ```ruby
38
+ model.topic_words
39
+ ```
40
+
41
+ Save the model to a file
42
+
43
+ ```ruby
44
+ model.save("model.bin")
45
+ ```
46
+
47
+ Load the model from a file
48
+
49
+ ```ruby
50
+ model = Tomoto::LDA.load("model.bin")
51
+ ```
52
+
53
+ Get topic probabilities for a document
54
+
55
+ ```ruby
56
+ doc = model.docs[0]
57
+ doc.topics
58
+ ```
59
+
60
+ Get the number of words for each topic
61
+
62
+ ```ruby
63
+ model.count_by_topics
64
+ ```
65
+
66
+ Get the vocab
67
+
68
+ ```ruby
69
+ model.vocabs
70
+ ```
71
+
72
+ Get the log likelihood per word
73
+
74
+ ```ruby
75
+ model.ll_per_word
76
+ ```
77
+
78
+ Perform inference for unseen documents
79
+
80
+ ```ruby
81
+ doc = model.make_doc("unseen doc")
82
+ topic_dist, ll = model.infer(doc)
83
+ ```
84
+
85
+ ## Models
86
+
87
+ Supports:
88
+
89
+ - Latent Dirichlet Allocation (`LDA`)
90
+ - Labeled LDA (`LLDA`)
91
+ - Partially Labeled LDA (`PLDA`)
92
+ - Supervised LDA (`SLDA`)
93
+ - Dirichlet Multinomial Regression (`DMR`)
94
+ - Generalized Dirichlet Multinomial Regression (`GDMR`)
95
+ - Hierarchical Dirichlet Process (`HDP`)
96
+ - Hierarchical LDA (`HLDA`)
97
+ - Multi Grain LDA (`MGLDA`)
98
+ - Pachinko Allocation (`PA`)
99
+ - Hierarchical PA (`HPA`)
100
+ - Correlated Topic Model (`CT`)
101
+ - Dynamic Topic Model (`DT`)
102
+
103
+ ## API
104
+
105
+ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
106
+
107
+ - The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
108
+ - Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
109
+
110
+ If a method or option you need isn’t supported, feel free to open an issue.
111
+
112
+ ## Examples
113
+
114
+ - [LDA](examples/lda_basic.rb)
115
+ - [HDP](examples/hdp_basic.rb)
116
+
117
+ ## Tokenization
118
+
119
+ Documents are tokenized by whitespace by default, or you can perform your own tokenization.
120
+
121
+ ```ruby
122
+ model.add_doc(["tokens", "from", "document", "one"])
123
+ ```
124
+
125
+ ## Performance
126
+
127
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
128
+
129
+ ```ruby
130
+ Tomoto.isa
131
+ ```
132
+
133
+ ## Parallelism
134
+
135
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
136
+
137
+ ```ruby
138
+ model.train(parallel: :partition)
139
+ ```
140
+
141
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
142
+
143
+ ## History
144
+
145
+ View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
146
+
147
+ ## Contributing
148
+
149
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
150
+
151
+ - [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
152
+ - Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
153
+ - Write, clarify, or fix documentation
154
+ - Suggest or add new features
155
+
156
+ To get started with development:
157
+
158
+ ```sh
159
+ git clone --recursive https://github.com/ankane/tomoto-ruby.git
160
+ cd tomoto-ruby
161
+ bundle install
162
+ bundle exec rake compile
163
+ bundle exec rake test
164
+ ```
data/ext/tomoto/ct.cpp ADDED
@@ -0,0 +1,58 @@
1
+ #include <CT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_ct(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::CTArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_correlations",
23
+ [](tomoto::ICTModel& self, tomoto::Tid topic_id) {
24
+ return self.getCorrelationTopic(topic_id);
25
+ })
26
+ .define_method(
27
+ "num_beta_sample",
28
+ [](tomoto::ICTModel& self) {
29
+ return self.getNumBetaSample();
30
+ })
31
+ .define_method(
32
+ "num_beta_sample=",
33
+ [](tomoto::ICTModel& self, size_t value) {
34
+ self.setNumBetaSample(value);
35
+ return value;
36
+ })
37
+ .define_method(
38
+ "num_tmn_sample",
39
+ [](tomoto::ICTModel& self) {
40
+ return self.getNumTMNSample();
41
+ })
42
+ .define_method(
43
+ "num_tmn_sample=",
44
+ [](tomoto::ICTModel& self, size_t value) {
45
+ self.setNumTMNSample(value);
46
+ return value;
47
+ })
48
+ .define_method(
49
+ "_prior_cov",
50
+ [](tomoto::ICTModel& self) {
51
+ return self.getPriorCov();
52
+ })
53
+ .define_method(
54
+ "prior_mean",
55
+ [](tomoto::ICTModel& self) {
56
+ return self.getPriorMean();
57
+ });
58
+ }
@@ -0,0 +1,69 @@
1
+ #include <DMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::DMRArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.sigma = sigma;
16
+ args.eta = eta;
17
+ args.alphaEps = alpha_epsilon;
18
+ if (seed >= 0) {
19
+ args.seed = seed;
20
+ }
21
+ return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
22
+ }, Rice::Return().takeOwnership())
23
+ .define_method(
24
+ "_add_doc",
25
+ [](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
26
+ auto doc = buildDoc(words);
27
+ doc.misc["metadata"] = metadata;
28
+ return self.addDoc(doc);
29
+ })
30
+ .define_method(
31
+ "alpha_epsilon",
32
+ [](tomoto::IDMRModel& self) {
33
+ return self.getAlphaEps();
34
+ })
35
+ .define_method(
36
+ "alpha_epsilon=",
37
+ [](tomoto::IDMRModel& self, tomoto::Float value) {
38
+ self.setAlphaEps(value);
39
+ return value;
40
+ })
41
+ .define_method(
42
+ "f",
43
+ [](tomoto::IDMRModel& self) {
44
+ return self.getF();
45
+ })
46
+ .define_method(
47
+ "_lambdas",
48
+ [](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
49
+ return self.getLambdaByTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "metadata_dict",
53
+ [](tomoto::IDMRModel& self) {
54
+ auto dict = self.getMetadataDict();
55
+ Array res;
56
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
57
+ for (size_t i = 0; i < dict.size(); i++) {
58
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
59
+ Object obj(value);
60
+ res.push(obj.call("force_encoding", utf8));
61
+ }
62
+ return res;
63
+ })
64
+ .define_method(
65
+ "sigma",
66
+ [](tomoto::IDMRModel& self) {
67
+ return self.getSigma();
68
+ });
69
+ }
data/ext/tomoto/dt.cpp ADDED
@@ -0,0 +1,91 @@
1
+ #include <DT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dt(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
12
+ // Rice only supports 10 arguments
13
+ size_t seed = -1;
14
+ tomoto::DTArgs args;
15
+ args.k = k;
16
+ args.t = t;
17
+ args.alpha = {alphaVar};
18
+ args.eta = etaVar;
19
+ args.phi = phiVar;
20
+ args.shapeA = shapeA;
21
+ args.shapeB = shapeB;
22
+ args.shapeC = shapeC;
23
+ if (seed >= 0) {
24
+ args.seed = seed;
25
+ }
26
+ return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
27
+ }, Rice::Return().takeOwnership())
28
+ .define_method(
29
+ "_add_doc",
30
+ [](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
31
+ auto doc = buildDoc(words);
32
+ doc.misc["timepoint"] = timepoint;
33
+ return self.addDoc(doc);
34
+ })
35
+ .define_method(
36
+ "alpha",
37
+ [](tomoto::IDTModel& self) {
38
+ Array res;
39
+ for (size_t i = 0; i < self.getK(); i++) {
40
+ Array res2;
41
+ for (size_t j = 0; j < self.getT(); j++) {
42
+ res2.push(self.getAlpha(i, j));
43
+ }
44
+ res.push(res2);
45
+ }
46
+ return res;
47
+ })
48
+ .define_method(
49
+ "lr_a",
50
+ [](tomoto::IDTModel& self) {
51
+ return self.getShapeA();
52
+ })
53
+ .define_method(
54
+ "lr_a=",
55
+ [](tomoto::IDTModel& self, tomoto::Float value) {
56
+ self.setShapeA(value);
57
+ return value;
58
+ })
59
+ .define_method(
60
+ "lr_b",
61
+ [](tomoto::IDTModel& self) {
62
+ return self.getShapeB();
63
+ })
64
+ .define_method(
65
+ "lr_b=",
66
+ [](tomoto::IDTModel& self, tomoto::Float value) {
67
+ self.setShapeB(value);
68
+ return value;
69
+ })
70
+ .define_method(
71
+ "lr_c",
72
+ [](tomoto::IDTModel& self) {
73
+ return self.getShapeC();
74
+ })
75
+ .define_method(
76
+ "lr_c=",
77
+ [](tomoto::IDTModel& self, tomoto::Float value) {
78
+ self.setShapeC(value);
79
+ return value;
80
+ })
81
+ .define_method(
82
+ "num_docs_by_timepoint",
83
+ [](tomoto::IDTModel& self) {
84
+ return self.getNumDocsByT();
85
+ })
86
+ .define_method(
87
+ "num_timepoints",
88
+ [](tomoto::IDTModel& self) {
89
+ return self.getT();
90
+ });
91
+ }
@@ -0,0 +1,42 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
+
5
+ unless ENV["RUBY_CC_VERSION"]
6
+ default_optflags =
7
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
8
+ # -march=native not supported with Mac ARM
9
+ ""
10
+ else
11
+ # AVX-512F not supported yet
12
+ # https://github.com/bab2min/tomotopy/issues/188
13
+ "-march=native -mno-avx512f"
14
+ end
15
+
16
+ $CXXFLAGS << " " << with_config("optflags", default_optflags)
17
+ end
18
+
19
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
20
+
21
+ if apple_clang
22
+ # silence rice warnings
23
+ $CXXFLAGS += " -Wno-deprecated-declarations"
24
+ else
25
+ # silence eigen warnings
26
+ $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
27
+ end
28
+
29
+ # silence tomoto warnings
30
+ $CXXFLAGS += " -Wno-unused-variable -Wno-switch"
31
+
32
+ ext = File.expand_path(".", __dir__)
33
+ tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
34
+ eigen = File.expand_path("../../vendor/eigen", __dir__)
35
+ eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
36
+ variant = File.expand_path("../../vendor/variant/include", __dir__)
37
+
38
+ $srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
39
+ $INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
40
+ $VPATH << tomoto
41
+
42
+ create_makefile("tomoto/tomoto")
@@ -0,0 +1,42 @@
1
+ #include <GDMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_gdmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::GDMRArgs args;
13
+ args.k = k;
14
+ args.degrees = degrees;
15
+ args.alpha = {alpha};
16
+ args.sigma = sigma;
17
+ args.sigma0 = sigma0;
18
+ args.eta = eta;
19
+ args.alphaEps = alpha_epsilon;
20
+ if (seed >= 0) {
21
+ args.seed = seed;
22
+ }
23
+ return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
24
+ }, Rice::Return().takeOwnership())
25
+ .define_method(
26
+ "_add_doc",
27
+ [](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
28
+ auto doc = buildDoc(words);
29
+ doc.misc["numeric_metadata"] = numeric_metadata;
30
+ return self.addDoc(doc);
31
+ })
32
+ .define_method(
33
+ "degrees",
34
+ [](tomoto::IGDMRModel& self) {
35
+ return self.getFs();
36
+ })
37
+ .define_method(
38
+ "sigma0",
39
+ [](tomoto::IGDMRModel& self) {
40
+ return self.getSigma0();
41
+ });
42
+ }
@@ -0,0 +1,47 @@
1
+ #include <HDP.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hdp(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HDPArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHDPModel& self) {
25
+ return self.getAlpha();
26
+ })
27
+ .define_method(
28
+ "gamma",
29
+ [](tomoto::IHDPModel& self) {
30
+ return self.getGamma();
31
+ })
32
+ .define_method(
33
+ "live_k",
34
+ [](tomoto::IHDPModel& self) {
35
+ return self.getLiveK();
36
+ })
37
+ .define_method(
38
+ "live_topic?",
39
+ [](tomoto::IHDPModel& self, size_t tid) {
40
+ return self.isLiveTopic(tid);
41
+ })
42
+ .define_method(
43
+ "num_tables",
44
+ [](tomoto::IHDPModel& self) {
45
+ return self.getTotalTables();
46
+ });
47
+ }
@@ -0,0 +1,71 @@
1
+ #include <HLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hlda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HLDAArgs args;
13
+ args.k = levelDepth;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHLDAModel& self) {
25
+ Array res;
26
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
27
+ res.push(self.getAlpha(i));
28
+ }
29
+ return res;
30
+ })
31
+ .define_method(
32
+ "_children_topics",
33
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
34
+ return self.getChildTopicId(topic_id);
35
+ })
36
+ .define_method(
37
+ "depth",
38
+ [](tomoto::IHLDAModel& self) {
39
+ return self.getLevelDepth();
40
+ })
41
+ .define_method(
42
+ "gamma",
43
+ [](tomoto::IHLDAModel& self) {
44
+ return self.getGamma();
45
+ })
46
+ .define_method(
47
+ "_level",
48
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
49
+ return self.getLevelOfTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "live_k",
53
+ [](tomoto::IHLDAModel& self) {
54
+ return self.getLiveK();
55
+ })
56
+ .define_method(
57
+ "_live_topic?",
58
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
59
+ return self.isLiveTopic(topic_id);
60
+ })
61
+ .define_method(
62
+ "_num_docs_of_topic",
63
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
64
+ return self.getNumDocsOfTopic(topic_id);
65
+ })
66
+ .define_method(
67
+ "_parent_topic",
68
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
69
+ return self.getParentTopicId(topic_id);
70
+ });
71
+ }