tomoto 0.4.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +65 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +154 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +42 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +46 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/3.0/tomoto.so +0 -0
  22. data/lib/tomoto/3.1/tomoto.so +0 -0
  23. data/lib/tomoto/3.2/tomoto.so +0 -0
  24. data/lib/tomoto/3.3/tomoto.so +0 -0
  25. data/lib/tomoto/ct.rb +24 -0
  26. data/lib/tomoto/dmr.rb +27 -0
  27. data/lib/tomoto/dt.rb +15 -0
  28. data/lib/tomoto/gdmr.rb +15 -0
  29. data/lib/tomoto/hdp.rb +11 -0
  30. data/lib/tomoto/hlda.rb +56 -0
  31. data/lib/tomoto/hpa.rb +11 -0
  32. data/lib/tomoto/lda.rb +186 -0
  33. data/lib/tomoto/llda.rb +15 -0
  34. data/lib/tomoto/mglda.rb +15 -0
  35. data/lib/tomoto/pa.rb +11 -0
  36. data/lib/tomoto/plda.rb +15 -0
  37. data/lib/tomoto/slda.rb +37 -0
  38. data/lib/tomoto/version.rb +3 -0
  39. data/lib/tomoto.rb +27 -0
  40. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  41. data/vendor/EigenRand/LICENSE +21 -0
  42. data/vendor/EigenRand/README.md +430 -0
  43. data/vendor/eigen/COPYING.APACHE +203 -0
  44. data/vendor/eigen/COPYING.BSD +26 -0
  45. data/vendor/eigen/COPYING.GPL +674 -0
  46. data/vendor/eigen/COPYING.LGPL +502 -0
  47. data/vendor/eigen/COPYING.MINPACK +51 -0
  48. data/vendor/eigen/COPYING.MPL2 +373 -0
  49. data/vendor/eigen/COPYING.README +18 -0
  50. data/vendor/eigen/Eigen/Cholesky +45 -0
  51. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  52. data/vendor/eigen/Eigen/Core +384 -0
  53. data/vendor/eigen/Eigen/Dense +7 -0
  54. data/vendor/eigen/Eigen/Eigen +2 -0
  55. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  56. data/vendor/eigen/Eigen/Geometry +59 -0
  57. data/vendor/eigen/Eigen/Householder +29 -0
  58. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  59. data/vendor/eigen/Eigen/Jacobi +32 -0
  60. data/vendor/eigen/Eigen/KLUSupport +41 -0
  61. data/vendor/eigen/Eigen/LU +47 -0
  62. data/vendor/eigen/Eigen/MetisSupport +35 -0
  63. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  64. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  65. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  66. data/vendor/eigen/Eigen/QR +50 -0
  67. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  68. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  69. data/vendor/eigen/Eigen/SVD +50 -0
  70. data/vendor/eigen/Eigen/Sparse +34 -0
  71. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  72. data/vendor/eigen/Eigen/SparseCore +69 -0
  73. data/vendor/eigen/Eigen/SparseLU +50 -0
  74. data/vendor/eigen/Eigen/SparseQR +36 -0
  75. data/vendor/eigen/Eigen/StdDeque +27 -0
  76. data/vendor/eigen/Eigen/StdList +26 -0
  77. data/vendor/eigen/Eigen/StdVector +27 -0
  78. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  79. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  80. data/vendor/eigen/README.md +5 -0
  81. data/vendor/eigen/bench/README.txt +55 -0
  82. data/vendor/eigen/bench/btl/COPYING +340 -0
  83. data/vendor/eigen/bench/btl/README +154 -0
  84. data/vendor/eigen/bench/tensors/README +20 -0
  85. data/vendor/eigen/blas/README.txt +6 -0
  86. data/vendor/eigen/ci/README.md +56 -0
  87. data/vendor/eigen/demos/mandelbrot/README +10 -0
  88. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  89. data/vendor/eigen/demos/opengl/README +13 -0
  90. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  91. data/vendor/eigen/unsupported/README.txt +50 -0
  92. data/vendor/tomotopy/LICENSE +21 -0
  93. data/vendor/tomotopy/README.kr.rst +536 -0
  94. data/vendor/tomotopy/README.rst +555 -0
  95. data/vendor/variant/LICENSE +25 -0
  96. data/vendor/variant/LICENSE_1_0.txt +23 -0
  97. data/vendor/variant/README.md +102 -0
  98. metadata +141 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: dd86d2aae577db884b593492994fe68fccc5cc35f8ea0c0f498e4c45fc474623
4
+ data.tar.gz: 1ab6fd3c4166c34c8f288f7d71938f5da1f0948437ebc67c3557e526db16a70f
5
+ SHA512:
6
+ metadata.gz: ac159442e5b494a318b6b0dd3c702cf7570114d64416d0f69843395470957a2f073c1ceb45e5f6c2fceca532bb62a445fff1a86b8f68fd70522911581589f1ca
7
+ data.tar.gz: 73403819de8a5ab27a7558367117ed2de6a3c9e9a7e49b1b97de6b421e9234da726883b1f4b8dea57e0d5d73f858ac41a75d61607bf0dec8a8fb99bed6aed2b0
data/CHANGELOG.md ADDED
@@ -0,0 +1,65 @@
1
+ ## 0.4.0 (2023-12-28)
2
+
3
+ - Added precompiled gem for Linux ARM
4
+ - Updated tomoto to 0.12.7
5
+ - Dropped support for Ruby < 3
6
+
7
+ ## 0.3.3 (2023-02-01)
8
+
9
+ - Added `topic_label_dict` method to `LLDA`
10
+ - Fixed error with `infer` with loaded model
11
+
12
+ ## 0.3.2 (2023-01-22)
13
+
14
+ - Added precompiled gem for Mac ARM
15
+ - Updated tomoto to 0.12.4
16
+
17
+ ## 0.3.1 (2023-01-12)
18
+
19
+ - Added support for Ruby 3.2
20
+
21
+ ## 0.3.0 (2022-10-03)
22
+
23
+ - Added precompiled gems for Linux and Mac
24
+ - Updated tomoto to 0.12.3
25
+ - Dropped support for Ruby < 2.7
26
+
27
+ ## 0.2.3 (2021-08-26)
28
+
29
+ - Updated to Rice 4
30
+
31
+ ## 0.2.2 (2021-08-23)
32
+
33
+ - Reduced gem size
34
+
35
+ ## 0.2.1 (2021-08-23)
36
+
37
+ - Added support for unseen documents
38
+
39
+ ## 0.2.0 (2021-05-23)
40
+
41
+ - Updated tomoto to 0.12.0
42
+ - Dropped support for Ruby < 2.6
43
+
44
+ ## 0.1.4 (2021-03-14)
45
+
46
+ - Added `docs` method
47
+ - Updated tomoto to 0.10.2
48
+ - Updated `add_doc` to return the index of the document
49
+
50
+ ## 0.1.3 (2020-12-19)
51
+
52
+ - Updated tomoto to 0.10.0
53
+
54
+ ## 0.1.2 (2020-10-10)
55
+
56
+ - Added `summary` method
57
+ - Added `parallel` option to `train` method
58
+
59
+ ## 0.1.1 (2020-10-10)
60
+
61
+ - Added many more models
62
+
63
+ ## 0.1.0 (2020-10-09)
64
+
65
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019, bab2min
4
+ Copyright (c) 2020-2023 Andrew Kane
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,154 @@
1
+ # tomoto.rb
2
+
3
+ :tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "tomoto"
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Train a model
18
+
19
+ ```ruby
20
+ model = Tomoto::LDA.new(k: 2)
21
+ model.add_doc(["tokens", "from", "document", "one"])
22
+ model.add_doc(["tokens", "from", "document", "two"])
23
+ model.add_doc(["tokens", "from", "document", "three"])
24
+ model.train(100) # iterations
25
+ ```
26
+
27
+ Get the summary
28
+
29
+ ```ruby
30
+ model.summary
31
+ ```
32
+
33
+ Get topic words
34
+
35
+ ```ruby
36
+ model.topic_words
37
+ ```
38
+
39
+ Save the model to a file
40
+
41
+ ```ruby
42
+ model.save("model.bin")
43
+ ```
44
+
45
+ Load the model from a file
46
+
47
+ ```ruby
48
+ model = Tomoto::LDA.load("model.bin")
49
+ ```
50
+
51
+ Get topic probabilities for a document
52
+
53
+ ```ruby
54
+ doc = model.docs[0]
55
+ doc.topics
56
+ ```
57
+
58
+ Get the number of words for each topic
59
+
60
+ ```ruby
61
+ model.count_by_topics
62
+ ```
63
+
64
+ Get the vocab
65
+
66
+ ```ruby
67
+ model.vocabs
68
+ ```
69
+
70
+ Get the log likelihood per word
71
+
72
+ ```ruby
73
+ model.ll_per_word
74
+ ```
75
+
76
+ Perform inference for unseen documents
77
+
78
+ ```ruby
79
+ doc = model.make_doc(["unseen", "doc"])
80
+ topic_dist, ll = model.infer(doc)
81
+ ```
82
+
83
+ ## Models
84
+
85
+ Supports:
86
+
87
+ - Latent Dirichlet Allocation (`LDA`)
88
+ - Labeled LDA (`LLDA`)
89
+ - Partially Labeled LDA (`PLDA`)
90
+ - Supervised LDA (`SLDA`)
91
+ - Dirichlet Multinomial Regression (`DMR`)
92
+ - Generalized Dirichlet Multinomial Regression (`GDMR`)
93
+ - Hierarchical Dirichlet Process (`HDP`)
94
+ - Hierarchical LDA (`HLDA`)
95
+ - Multi Grain LDA (`MGLDA`)
96
+ - Pachinko Allocation (`PA`)
97
+ - Hierarchical PA (`HPA`)
98
+ - Correlated Topic Model (`CT`)
99
+ - Dynamic Topic Model (`DT`)
100
+
101
+ ## API
102
+
103
+ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
104
+
105
+ - The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
106
+ - Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
107
+
108
+ If a method or option you need isn’t supported, feel free to open an issue.
109
+
110
+ ## Examples
111
+
112
+ - [LDA](examples/lda_basic.rb)
113
+ - [HDP](examples/hdp_basic.rb)
114
+
115
+ ## Performance
116
+
117
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
118
+
119
+ ```ruby
120
+ Tomoto.isa
121
+ ```
122
+
123
+ ## Parallelism
124
+
125
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
126
+
127
+ ```ruby
128
+ model.train(parallel: :partition)
129
+ ```
130
+
131
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
132
+
133
+ ## History
134
+
135
+ View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
136
+
137
+ ## Contributing
138
+
139
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
140
+
141
+ - [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
142
+ - Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
143
+ - Write, clarify, or fix documentation
144
+ - Suggest or add new features
145
+
146
+ To get started with development:
147
+
148
+ ```sh
149
+ git clone --recursive https://github.com/ankane/tomoto-ruby.git
150
+ cd tomoto-ruby
151
+ bundle install
152
+ bundle exec rake compile
153
+ bundle exec rake test
154
+ ```
data/ext/tomoto/ct.cpp ADDED
@@ -0,0 +1,58 @@
1
+ #include <CT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_ct(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::CTArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_correlations",
23
+ [](tomoto::ICTModel& self, tomoto::Tid topic_id) {
24
+ return self.getCorrelationTopic(topic_id);
25
+ })
26
+ .define_method(
27
+ "num_beta_sample",
28
+ [](tomoto::ICTModel& self) {
29
+ return self.getNumBetaSample();
30
+ })
31
+ .define_method(
32
+ "num_beta_sample=",
33
+ [](tomoto::ICTModel& self, size_t value) {
34
+ self.setNumBetaSample(value);
35
+ return value;
36
+ })
37
+ .define_method(
38
+ "num_tmn_sample",
39
+ [](tomoto::ICTModel& self) {
40
+ return self.getNumTMNSample();
41
+ })
42
+ .define_method(
43
+ "num_tmn_sample=",
44
+ [](tomoto::ICTModel& self, size_t value) {
45
+ self.setNumTMNSample(value);
46
+ return value;
47
+ })
48
+ .define_method(
49
+ "_prior_cov",
50
+ [](tomoto::ICTModel& self) {
51
+ return self.getPriorCov();
52
+ })
53
+ .define_method(
54
+ "prior_mean",
55
+ [](tomoto::ICTModel& self) {
56
+ return self.getPriorMean();
57
+ });
58
+ }
@@ -0,0 +1,69 @@
1
+ #include <DMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::DMRArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.sigma = sigma;
16
+ args.eta = eta;
17
+ args.alphaEps = alpha_epsilon;
18
+ if (seed >= 0) {
19
+ args.seed = seed;
20
+ }
21
+ return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
22
+ }, Rice::Return().takeOwnership())
23
+ .define_method(
24
+ "_add_doc",
25
+ [](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
26
+ auto doc = buildDoc(words);
27
+ doc.misc["metadata"] = metadata;
28
+ return self.addDoc(doc);
29
+ })
30
+ .define_method(
31
+ "alpha_epsilon",
32
+ [](tomoto::IDMRModel& self) {
33
+ return self.getAlphaEps();
34
+ })
35
+ .define_method(
36
+ "alpha_epsilon=",
37
+ [](tomoto::IDMRModel& self, tomoto::Float value) {
38
+ self.setAlphaEps(value);
39
+ return value;
40
+ })
41
+ .define_method(
42
+ "f",
43
+ [](tomoto::IDMRModel& self) {
44
+ return self.getF();
45
+ })
46
+ .define_method(
47
+ "_lambdas",
48
+ [](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
49
+ return self.getLambdaByTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "metadata_dict",
53
+ [](tomoto::IDMRModel& self) {
54
+ auto dict = self.getMetadataDict();
55
+ Array res;
56
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
57
+ for (size_t i = 0; i < dict.size(); i++) {
58
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
59
+ Object obj(value);
60
+ res.push(obj.call("force_encoding", utf8));
61
+ }
62
+ return res;
63
+ })
64
+ .define_method(
65
+ "sigma",
66
+ [](tomoto::IDMRModel& self) {
67
+ return self.getSigma();
68
+ });
69
+ }
data/ext/tomoto/dt.cpp ADDED
@@ -0,0 +1,91 @@
1
+ #include <DT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dt(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
12
+ // Rice only supports 10 arguments
13
+ size_t seed = -1;
14
+ tomoto::DTArgs args;
15
+ args.k = k;
16
+ args.t = t;
17
+ args.alpha = {alphaVar};
18
+ args.eta = etaVar;
19
+ args.phi = phiVar;
20
+ args.shapeA = shapeA;
21
+ args.shapeB = shapeB;
22
+ args.shapeC = shapeC;
23
+ if (seed >= 0) {
24
+ args.seed = seed;
25
+ }
26
+ return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
27
+ }, Rice::Return().takeOwnership())
28
+ .define_method(
29
+ "_add_doc",
30
+ [](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
31
+ auto doc = buildDoc(words);
32
+ doc.misc["timepoint"] = timepoint;
33
+ return self.addDoc(doc);
34
+ })
35
+ .define_method(
36
+ "alpha",
37
+ [](tomoto::IDTModel& self) {
38
+ Array res;
39
+ for (size_t i = 0; i < self.getK(); i++) {
40
+ Array res2;
41
+ for (size_t j = 0; j < self.getT(); j++) {
42
+ res2.push(self.getAlpha(i, j));
43
+ }
44
+ res.push(res2);
45
+ }
46
+ return res;
47
+ })
48
+ .define_method(
49
+ "lr_a",
50
+ [](tomoto::IDTModel& self) {
51
+ return self.getShapeA();
52
+ })
53
+ .define_method(
54
+ "lr_a=",
55
+ [](tomoto::IDTModel& self, tomoto::Float value) {
56
+ self.setShapeA(value);
57
+ return value;
58
+ })
59
+ .define_method(
60
+ "lr_b",
61
+ [](tomoto::IDTModel& self) {
62
+ return self.getShapeB();
63
+ })
64
+ .define_method(
65
+ "lr_b=",
66
+ [](tomoto::IDTModel& self, tomoto::Float value) {
67
+ self.setShapeB(value);
68
+ return value;
69
+ })
70
+ .define_method(
71
+ "lr_c",
72
+ [](tomoto::IDTModel& self) {
73
+ return self.getShapeC();
74
+ })
75
+ .define_method(
76
+ "lr_c=",
77
+ [](tomoto::IDTModel& self, tomoto::Float value) {
78
+ self.setShapeC(value);
79
+ return value;
80
+ })
81
+ .define_method(
82
+ "num_docs_by_timepoint",
83
+ [](tomoto::IDTModel& self) {
84
+ return self.getNumDocsByT();
85
+ })
86
+ .define_method(
87
+ "num_timepoints",
88
+ [](tomoto::IDTModel& self) {
89
+ return self.getT();
90
+ });
91
+ }
@@ -0,0 +1,42 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
+
5
+ unless ENV["RUBY_CC_VERSION"]
6
+ default_optflags =
7
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
8
+ # -march=native not supported with Mac ARM
9
+ ""
10
+ else
11
+ # AVX-512F not supported yet
12
+ # https://github.com/bab2min/tomotopy/issues/188
13
+ "-march=native -mno-avx512f"
14
+ end
15
+
16
+ $CXXFLAGS << " " << with_config("optflags", default_optflags)
17
+ end
18
+
19
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
20
+
21
+ if apple_clang
22
+ # silence rice warnings
23
+ $CXXFLAGS += " -Wno-deprecated-declarations"
24
+ else
25
+ # silence eigen warnings
26
+ $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
27
+ end
28
+
29
+ # silence tomoto warnings
30
+ $CXXFLAGS += " -Wno-unused-variable -Wno-switch"
31
+
32
+ ext = File.expand_path(".", __dir__)
33
+ tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
34
+ eigen = File.expand_path("../../vendor/eigen", __dir__)
35
+ eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
36
+ variant = File.expand_path("../../vendor/variant/include", __dir__)
37
+
38
+ $srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
39
+ $INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
40
+ $VPATH << tomoto
41
+
42
+ create_makefile("tomoto/tomoto")
@@ -0,0 +1,42 @@
1
+ #include <GDMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_gdmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::GDMRArgs args;
13
+ args.k = k;
14
+ args.degrees = degrees;
15
+ args.alpha = {alpha};
16
+ args.sigma = sigma;
17
+ args.sigma0 = sigma0;
18
+ args.eta = eta;
19
+ args.alphaEps = alpha_epsilon;
20
+ if (seed >= 0) {
21
+ args.seed = seed;
22
+ }
23
+ return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
24
+ }, Rice::Return().takeOwnership())
25
+ .define_method(
26
+ "_add_doc",
27
+ [](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
28
+ auto doc = buildDoc(words);
29
+ doc.misc["numeric_metadata"] = numeric_metadata;
30
+ return self.addDoc(doc);
31
+ })
32
+ .define_method(
33
+ "degrees",
34
+ [](tomoto::IGDMRModel& self) {
35
+ return self.getFs();
36
+ })
37
+ .define_method(
38
+ "sigma0",
39
+ [](tomoto::IGDMRModel& self) {
40
+ return self.getSigma0();
41
+ });
42
+ }
@@ -0,0 +1,47 @@
1
+ #include <HDP.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hdp(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HDPArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHDPModel& self) {
25
+ return self.getAlpha();
26
+ })
27
+ .define_method(
28
+ "gamma",
29
+ [](tomoto::IHDPModel& self) {
30
+ return self.getGamma();
31
+ })
32
+ .define_method(
33
+ "live_k",
34
+ [](tomoto::IHDPModel& self) {
35
+ return self.getLiveK();
36
+ })
37
+ .define_method(
38
+ "live_topic?",
39
+ [](tomoto::IHDPModel& self, size_t tid) {
40
+ return self.isLiveTopic(tid);
41
+ })
42
+ .define_method(
43
+ "num_tables",
44
+ [](tomoto::IHDPModel& self) {
45
+ return self.getTotalTables();
46
+ });
47
+ }
@@ -0,0 +1,71 @@
1
+ #include <HLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hlda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HLDAArgs args;
13
+ args.k = levelDepth;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHLDAModel& self) {
25
+ Array res;
26
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
27
+ res.push(self.getAlpha(i));
28
+ }
29
+ return res;
30
+ })
31
+ .define_method(
32
+ "_children_topics",
33
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
34
+ return self.getChildTopicId(topic_id);
35
+ })
36
+ .define_method(
37
+ "depth",
38
+ [](tomoto::IHLDAModel& self) {
39
+ return self.getLevelDepth();
40
+ })
41
+ .define_method(
42
+ "gamma",
43
+ [](tomoto::IHLDAModel& self) {
44
+ return self.getGamma();
45
+ })
46
+ .define_method(
47
+ "_level",
48
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
49
+ return self.getLevelOfTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "live_k",
53
+ [](tomoto::IHLDAModel& self) {
54
+ return self.getLiveK();
55
+ })
56
+ .define_method(
57
+ "_live_topic?",
58
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
59
+ return self.isLiveTopic(topic_id);
60
+ })
61
+ .define_method(
62
+ "_num_docs_of_topic",
63
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
64
+ return self.getNumDocsOfTopic(topic_id);
65
+ })
66
+ .define_method(
67
+ "_parent_topic",
68
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
69
+ return self.getParentTopicId(topic_id);
70
+ });
71
+ }