tomoto 0.4.0-aarch64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +65 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +154 -0
  5. data/ext/tomoto/ct.cpp +58 -0
  6. data/ext/tomoto/dmr.cpp +69 -0
  7. data/ext/tomoto/dt.cpp +91 -0
  8. data/ext/tomoto/extconf.rb +42 -0
  9. data/ext/tomoto/gdmr.cpp +42 -0
  10. data/ext/tomoto/hdp.cpp +47 -0
  11. data/ext/tomoto/hlda.cpp +71 -0
  12. data/ext/tomoto/hpa.cpp +32 -0
  13. data/ext/tomoto/lda.cpp +281 -0
  14. data/ext/tomoto/llda.cpp +46 -0
  15. data/ext/tomoto/mglda.cpp +81 -0
  16. data/ext/tomoto/pa.cpp +32 -0
  17. data/ext/tomoto/plda.cpp +33 -0
  18. data/ext/tomoto/slda.cpp +48 -0
  19. data/ext/tomoto/tomoto.cpp +48 -0
  20. data/ext/tomoto/utils.h +30 -0
  21. data/lib/tomoto/3.0/tomoto.so +0 -0
  22. data/lib/tomoto/3.1/tomoto.so +0 -0
  23. data/lib/tomoto/3.2/tomoto.so +0 -0
  24. data/lib/tomoto/3.3/tomoto.so +0 -0
  25. data/lib/tomoto/ct.rb +24 -0
  26. data/lib/tomoto/dmr.rb +27 -0
  27. data/lib/tomoto/dt.rb +15 -0
  28. data/lib/tomoto/gdmr.rb +15 -0
  29. data/lib/tomoto/hdp.rb +11 -0
  30. data/lib/tomoto/hlda.rb +56 -0
  31. data/lib/tomoto/hpa.rb +11 -0
  32. data/lib/tomoto/lda.rb +186 -0
  33. data/lib/tomoto/llda.rb +15 -0
  34. data/lib/tomoto/mglda.rb +15 -0
  35. data/lib/tomoto/pa.rb +11 -0
  36. data/lib/tomoto/plda.rb +15 -0
  37. data/lib/tomoto/slda.rb +37 -0
  38. data/lib/tomoto/version.rb +3 -0
  39. data/lib/tomoto.rb +27 -0
  40. data/vendor/EigenRand/EigenRand/EigenRand +24 -0
  41. data/vendor/EigenRand/LICENSE +21 -0
  42. data/vendor/EigenRand/README.md +430 -0
  43. data/vendor/eigen/COPYING.APACHE +203 -0
  44. data/vendor/eigen/COPYING.BSD +26 -0
  45. data/vendor/eigen/COPYING.GPL +674 -0
  46. data/vendor/eigen/COPYING.LGPL +502 -0
  47. data/vendor/eigen/COPYING.MINPACK +51 -0
  48. data/vendor/eigen/COPYING.MPL2 +373 -0
  49. data/vendor/eigen/COPYING.README +18 -0
  50. data/vendor/eigen/Eigen/Cholesky +45 -0
  51. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  52. data/vendor/eigen/Eigen/Core +384 -0
  53. data/vendor/eigen/Eigen/Dense +7 -0
  54. data/vendor/eigen/Eigen/Eigen +2 -0
  55. data/vendor/eigen/Eigen/Eigenvalues +60 -0
  56. data/vendor/eigen/Eigen/Geometry +59 -0
  57. data/vendor/eigen/Eigen/Householder +29 -0
  58. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  59. data/vendor/eigen/Eigen/Jacobi +32 -0
  60. data/vendor/eigen/Eigen/KLUSupport +41 -0
  61. data/vendor/eigen/Eigen/LU +47 -0
  62. data/vendor/eigen/Eigen/MetisSupport +35 -0
  63. data/vendor/eigen/Eigen/OrderingMethods +70 -0
  64. data/vendor/eigen/Eigen/PaStiXSupport +49 -0
  65. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  66. data/vendor/eigen/Eigen/QR +50 -0
  67. data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
  68. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  69. data/vendor/eigen/Eigen/SVD +50 -0
  70. data/vendor/eigen/Eigen/Sparse +34 -0
  71. data/vendor/eigen/Eigen/SparseCholesky +37 -0
  72. data/vendor/eigen/Eigen/SparseCore +69 -0
  73. data/vendor/eigen/Eigen/SparseLU +50 -0
  74. data/vendor/eigen/Eigen/SparseQR +36 -0
  75. data/vendor/eigen/Eigen/StdDeque +27 -0
  76. data/vendor/eigen/Eigen/StdList +26 -0
  77. data/vendor/eigen/Eigen/StdVector +27 -0
  78. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  79. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  80. data/vendor/eigen/README.md +5 -0
  81. data/vendor/eigen/bench/README.txt +55 -0
  82. data/vendor/eigen/bench/btl/COPYING +340 -0
  83. data/vendor/eigen/bench/btl/README +154 -0
  84. data/vendor/eigen/bench/tensors/README +20 -0
  85. data/vendor/eigen/blas/README.txt +6 -0
  86. data/vendor/eigen/ci/README.md +56 -0
  87. data/vendor/eigen/demos/mandelbrot/README +10 -0
  88. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  89. data/vendor/eigen/demos/opengl/README +13 -0
  90. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
  91. data/vendor/eigen/unsupported/README.txt +50 -0
  92. data/vendor/tomotopy/LICENSE +21 -0
  93. data/vendor/tomotopy/README.kr.rst +536 -0
  94. data/vendor/tomotopy/README.rst +555 -0
  95. data/vendor/variant/LICENSE +25 -0
  96. data/vendor/variant/LICENSE_1_0.txt +23 -0
  97. data/vendor/variant/README.md +102 -0
  98. metadata +141 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: dd86d2aae577db884b593492994fe68fccc5cc35f8ea0c0f498e4c45fc474623
4
+ data.tar.gz: 1ab6fd3c4166c34c8f288f7d71938f5da1f0948437ebc67c3557e526db16a70f
5
+ SHA512:
6
+ metadata.gz: ac159442e5b494a318b6b0dd3c702cf7570114d64416d0f69843395470957a2f073c1ceb45e5f6c2fceca532bb62a445fff1a86b8f68fd70522911581589f1ca
7
+ data.tar.gz: 73403819de8a5ab27a7558367117ed2de6a3c9e9a7e49b1b97de6b421e9234da726883b1f4b8dea57e0d5d73f858ac41a75d61607bf0dec8a8fb99bed6aed2b0
data/CHANGELOG.md ADDED
@@ -0,0 +1,65 @@
1
+ ## 0.4.0 (2023-12-28)
2
+
3
+ - Added precompiled gem for Linux ARM
4
+ - Updated tomoto to 0.12.7
5
+ - Dropped support for Ruby < 3
6
+
7
+ ## 0.3.3 (2023-02-01)
8
+
9
+ - Added `topic_label_dict` method to `LLDA`
10
+ - Fixed error with `infer` with loaded model
11
+
12
+ ## 0.3.2 (2023-01-22)
13
+
14
+ - Added precompiled gem for Mac ARM
15
+ - Updated tomoto to 0.12.4
16
+
17
+ ## 0.3.1 (2023-01-12)
18
+
19
+ - Added support for Ruby 3.2
20
+
21
+ ## 0.3.0 (2022-10-03)
22
+
23
+ - Added precompiled gems for Linux and Mac
24
+ - Updated tomoto to 0.12.3
25
+ - Dropped support for Ruby < 2.7
26
+
27
+ ## 0.2.3 (2021-08-26)
28
+
29
+ - Updated to Rice 4
30
+
31
+ ## 0.2.2 (2021-08-23)
32
+
33
+ - Reduced gem size
34
+
35
+ ## 0.2.1 (2021-08-23)
36
+
37
+ - Added support for unseen documents
38
+
39
+ ## 0.2.0 (2021-05-23)
40
+
41
+ - Updated tomoto to 0.12.0
42
+ - Dropped support for Ruby < 2.6
43
+
44
+ ## 0.1.4 (2021-03-14)
45
+
46
+ - Added `docs` method
47
+ - Updated tomoto to 0.10.2
48
+ - Updated `add_doc` to return the index of the document
49
+
50
+ ## 0.1.3 (2020-12-19)
51
+
52
+ - Updated tomoto to 0.10.0
53
+
54
+ ## 0.1.2 (2020-10-10)
55
+
56
+ - Added `summary` method
57
+ - Added `parallel` option to `train` method
58
+
59
+ ## 0.1.1 (2020-10-10)
60
+
61
+ - Added many more models
62
+
63
+ ## 0.1.0 (2020-10-09)
64
+
65
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019, bab2min
4
+ Copyright (c) 2020-2023 Andrew Kane
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,154 @@
1
+ # tomoto.rb
2
+
3
+ :tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/tomoto-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tomoto-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "tomoto"
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Train a model
18
+
19
+ ```ruby
20
+ model = Tomoto::LDA.new(k: 2)
21
+ model.add_doc(["tokens", "from", "document", "one"])
22
+ model.add_doc(["tokens", "from", "document", "two"])
23
+ model.add_doc(["tokens", "from", "document", "three"])
24
+ model.train(100) # iterations
25
+ ```
26
+
27
+ Get the summary
28
+
29
+ ```ruby
30
+ model.summary
31
+ ```
32
+
33
+ Get topic words
34
+
35
+ ```ruby
36
+ model.topic_words
37
+ ```
38
+
39
+ Save the model to a file
40
+
41
+ ```ruby
42
+ model.save("model.bin")
43
+ ```
44
+
45
+ Load the model from a file
46
+
47
+ ```ruby
48
+ model = Tomoto::LDA.load("model.bin")
49
+ ```
50
+
51
+ Get topic probabilities for a document
52
+
53
+ ```ruby
54
+ doc = model.docs[0]
55
+ doc.topics
56
+ ```
57
+
58
+ Get the number of words for each topic
59
+
60
+ ```ruby
61
+ model.count_by_topics
62
+ ```
63
+
64
+ Get the vocab
65
+
66
+ ```ruby
67
+ model.vocabs
68
+ ```
69
+
70
+ Get the log likelihood per word
71
+
72
+ ```ruby
73
+ model.ll_per_word
74
+ ```
75
+
76
+ Perform inference for unseen documents
77
+
78
+ ```ruby
79
+ doc = model.make_doc(["unseen", "doc"])
80
+ topic_dist, ll = model.infer(doc)
81
+ ```
82
+
83
+ ## Models
84
+
85
+ Supports:
86
+
87
+ - Latent Dirichlet Allocation (`LDA`)
88
+ - Labeled LDA (`LLDA`)
89
+ - Partially Labeled LDA (`PLDA`)
90
+ - Supervised LDA (`SLDA`)
91
+ - Dirichlet Multinomial Regression (`DMR`)
92
+ - Generalized Dirichlet Multinomial Regression (`GDMR`)
93
+ - Hierarchical Dirichlet Process (`HDP`)
94
+ - Hierarchical LDA (`HLDA`)
95
+ - Multi Grain LDA (`MGLDA`)
96
+ - Pachinko Allocation (`PA`)
97
+ - Hierarchical PA (`HPA`)
98
+ - Correlated Topic Model (`CT`)
99
+ - Dynamic Topic Model (`DT`)
100
+
101
+ ## API
102
+
103
+ This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
104
+
105
+ - The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
106
+ - Methods that return booleans use `?` instead of `is_` (`live_topic?` instead of `is_live_topic`)
107
+
108
+ If a method or option you need isn’t supported, feel free to open an issue.
109
+
110
+ ## Examples
111
+
112
+ - [LDA](examples/lda_basic.rb)
113
+ - [HDP](examples/hdp_basic.rb)
114
+
115
+ ## Performance
116
+
117
+ tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
118
+
119
+ ```ruby
120
+ Tomoto.isa
121
+ ```
122
+
123
+ ## Parallelism
124
+
125
+ Choose a [parallelism algorithm](https://bab2min.github.io/tomotopy/v0.9.0/en/#parallel-sampling-algorithms) with:
126
+
127
+ ```ruby
128
+ model.train(parallel: :partition)
129
+ ```
130
+
131
+ Supported values are `:default`, `:none`, `:copy_merge`, and `:partition`.
132
+
133
+ ## History
134
+
135
+ View the [changelog](https://github.com/ankane/tomoto-ruby/blob/master/CHANGELOG.md)
136
+
137
+ ## Contributing
138
+
139
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
140
+
141
+ - [Report bugs](https://github.com/ankane/tomoto-ruby/issues)
142
+ - Fix bugs and [submit pull requests](https://github.com/ankane/tomoto-ruby/pulls)
143
+ - Write, clarify, or fix documentation
144
+ - Suggest or add new features
145
+
146
+ To get started with development:
147
+
148
+ ```sh
149
+ git clone --recursive https://github.com/ankane/tomoto-ruby.git
150
+ cd tomoto-ruby
151
+ bundle install
152
+ bundle exec rake compile
153
+ bundle exec rake test
154
+ ```
data/ext/tomoto/ct.cpp ADDED
@@ -0,0 +1,58 @@
1
+ #include <CT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_ct(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::ICTModel, tomoto::ILDAModel>(m, "CT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, size_t seed) {
12
+ tomoto::CTArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ if (seed >= 0) {
17
+ args.seed = seed;
18
+ }
19
+ return tomoto::ICTModel::create((tomoto::TermWeight)tw, args);
20
+ }, Rice::Return().takeOwnership())
21
+ .define_method(
22
+ "_correlations",
23
+ [](tomoto::ICTModel& self, tomoto::Tid topic_id) {
24
+ return self.getCorrelationTopic(topic_id);
25
+ })
26
+ .define_method(
27
+ "num_beta_sample",
28
+ [](tomoto::ICTModel& self) {
29
+ return self.getNumBetaSample();
30
+ })
31
+ .define_method(
32
+ "num_beta_sample=",
33
+ [](tomoto::ICTModel& self, size_t value) {
34
+ self.setNumBetaSample(value);
35
+ return value;
36
+ })
37
+ .define_method(
38
+ "num_tmn_sample",
39
+ [](tomoto::ICTModel& self) {
40
+ return self.getNumTMNSample();
41
+ })
42
+ .define_method(
43
+ "num_tmn_sample=",
44
+ [](tomoto::ICTModel& self, size_t value) {
45
+ self.setNumTMNSample(value);
46
+ return value;
47
+ })
48
+ .define_method(
49
+ "_prior_cov",
50
+ [](tomoto::ICTModel& self) {
51
+ return self.getPriorCov();
52
+ })
53
+ .define_method(
54
+ "prior_mean",
55
+ [](tomoto::ICTModel& self) {
56
+ return self.getPriorMean();
57
+ });
58
+ }
@@ -0,0 +1,69 @@
1
+ #include <DMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(m, "DMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::DMRArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.sigma = sigma;
16
+ args.eta = eta;
17
+ args.alphaEps = alpha_epsilon;
18
+ if (seed >= 0) {
19
+ args.seed = seed;
20
+ }
21
+ return tomoto::IDMRModel::create((tomoto::TermWeight)tw, args);
22
+ }, Rice::Return().takeOwnership())
23
+ .define_method(
24
+ "_add_doc",
25
+ [](tomoto::IDMRModel& self, std::vector<std::string> words, std::string metadata) {
26
+ auto doc = buildDoc(words);
27
+ doc.misc["metadata"] = metadata;
28
+ return self.addDoc(doc);
29
+ })
30
+ .define_method(
31
+ "alpha_epsilon",
32
+ [](tomoto::IDMRModel& self) {
33
+ return self.getAlphaEps();
34
+ })
35
+ .define_method(
36
+ "alpha_epsilon=",
37
+ [](tomoto::IDMRModel& self, tomoto::Float value) {
38
+ self.setAlphaEps(value);
39
+ return value;
40
+ })
41
+ .define_method(
42
+ "f",
43
+ [](tomoto::IDMRModel& self) {
44
+ return self.getF();
45
+ })
46
+ .define_method(
47
+ "_lambdas",
48
+ [](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
49
+ return self.getLambdaByTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "metadata_dict",
53
+ [](tomoto::IDMRModel& self) {
54
+ auto dict = self.getMetadataDict();
55
+ Array res;
56
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
57
+ for (size_t i = 0; i < dict.size(); i++) {
58
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
59
+ Object obj(value);
60
+ res.push(obj.call("force_encoding", utf8));
61
+ }
62
+ return res;
63
+ })
64
+ .define_method(
65
+ "sigma",
66
+ [](tomoto::IDMRModel& self) {
67
+ return self.getSigma();
68
+ });
69
+ }
data/ext/tomoto/dt.cpp ADDED
@@ -0,0 +1,91 @@
1
+ #include <DT.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_dt(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(m, "DT")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, size_t t, tomoto::Float alphaVar, tomoto::Float etaVar, tomoto::Float phiVar, tomoto::Float shapeA, tomoto::Float shapeB, tomoto::Float shapeC) {
12
+ // Rice only supports 10 arguments
13
+ size_t seed = -1;
14
+ tomoto::DTArgs args;
15
+ args.k = k;
16
+ args.t = t;
17
+ args.alpha = {alphaVar};
18
+ args.eta = etaVar;
19
+ args.phi = phiVar;
20
+ args.shapeA = shapeA;
21
+ args.shapeB = shapeB;
22
+ args.shapeC = shapeC;
23
+ if (seed >= 0) {
24
+ args.seed = seed;
25
+ }
26
+ return tomoto::IDTModel::create((tomoto::TermWeight)tw, args);
27
+ }, Rice::Return().takeOwnership())
28
+ .define_method(
29
+ "_add_doc",
30
+ [](tomoto::IDTModel& self, std::vector<std::string> words, uint32_t timepoint) {
31
+ auto doc = buildDoc(words);
32
+ doc.misc["timepoint"] = timepoint;
33
+ return self.addDoc(doc);
34
+ })
35
+ .define_method(
36
+ "alpha",
37
+ [](tomoto::IDTModel& self) {
38
+ Array res;
39
+ for (size_t i = 0; i < self.getK(); i++) {
40
+ Array res2;
41
+ for (size_t j = 0; j < self.getT(); j++) {
42
+ res2.push(self.getAlpha(i, j));
43
+ }
44
+ res.push(res2);
45
+ }
46
+ return res;
47
+ })
48
+ .define_method(
49
+ "lr_a",
50
+ [](tomoto::IDTModel& self) {
51
+ return self.getShapeA();
52
+ })
53
+ .define_method(
54
+ "lr_a=",
55
+ [](tomoto::IDTModel& self, tomoto::Float value) {
56
+ self.setShapeA(value);
57
+ return value;
58
+ })
59
+ .define_method(
60
+ "lr_b",
61
+ [](tomoto::IDTModel& self) {
62
+ return self.getShapeB();
63
+ })
64
+ .define_method(
65
+ "lr_b=",
66
+ [](tomoto::IDTModel& self, tomoto::Float value) {
67
+ self.setShapeB(value);
68
+ return value;
69
+ })
70
+ .define_method(
71
+ "lr_c",
72
+ [](tomoto::IDTModel& self) {
73
+ return self.getShapeC();
74
+ })
75
+ .define_method(
76
+ "lr_c=",
77
+ [](tomoto::IDTModel& self, tomoto::Float value) {
78
+ self.setShapeC(value);
79
+ return value;
80
+ })
81
+ .define_method(
82
+ "num_docs_by_timepoint",
83
+ [](tomoto::IDTModel& self) {
84
+ return self.getNumDocsByT();
85
+ })
86
+ .define_method(
87
+ "num_timepoints",
88
+ [](tomoto::IDTModel& self) {
89
+ return self.getT();
90
+ });
91
+ }
@@ -0,0 +1,42 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
+
5
+ unless ENV["RUBY_CC_VERSION"]
6
+ default_optflags =
7
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
8
+ # -march=native not supported with Mac ARM
9
+ ""
10
+ else
11
+ # AVX-512F not supported yet
12
+ # https://github.com/bab2min/tomotopy/issues/188
13
+ "-march=native -mno-avx512f"
14
+ end
15
+
16
+ $CXXFLAGS << " " << with_config("optflags", default_optflags)
17
+ end
18
+
19
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
20
+
21
+ if apple_clang
22
+ # silence rice warnings
23
+ $CXXFLAGS += " -Wno-deprecated-declarations"
24
+ else
25
+ # silence eigen warnings
26
+ $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
27
+ end
28
+
29
+ # silence tomoto warnings
30
+ $CXXFLAGS += " -Wno-unused-variable -Wno-switch"
31
+
32
+ ext = File.expand_path(".", __dir__)
33
+ tomoto = File.expand_path("../../vendor/tomotopy/src/TopicModel", __dir__)
34
+ eigen = File.expand_path("../../vendor/eigen", __dir__)
35
+ eigen_rand = File.expand_path("../../vendor/EigenRand", __dir__)
36
+ variant = File.expand_path("../../vendor/variant/include", __dir__)
37
+
38
+ $srcs = Dir["{#{ext},#{tomoto}}/*.cpp"]
39
+ $INCFLAGS += " -I#{tomoto} -I#{eigen} -I#{eigen_rand} -I#{variant}"
40
+ $VPATH << tomoto
41
+
42
+ create_makefile("tomoto/tomoto")
@@ -0,0 +1,42 @@
1
+ #include <GDMR.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_gdmr(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(m, "GDMR")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, std::vector<uint64_t> degrees, tomoto::Float alpha, tomoto::Float sigma, tomoto::Float sigma0, tomoto::Float eta, tomoto::Float alpha_epsilon, size_t seed) {
12
+ tomoto::GDMRArgs args;
13
+ args.k = k;
14
+ args.degrees = degrees;
15
+ args.alpha = {alpha};
16
+ args.sigma = sigma;
17
+ args.sigma0 = sigma0;
18
+ args.eta = eta;
19
+ args.alphaEps = alpha_epsilon;
20
+ if (seed >= 0) {
21
+ args.seed = seed;
22
+ }
23
+ return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, args);
24
+ }, Rice::Return().takeOwnership())
25
+ .define_method(
26
+ "_add_doc",
27
+ [](tomoto::IGDMRModel& self, std::vector<std::string> words, std::vector<tomoto::Float> numeric_metadata) {
28
+ auto doc = buildDoc(words);
29
+ doc.misc["numeric_metadata"] = numeric_metadata;
30
+ return self.addDoc(doc);
31
+ })
32
+ .define_method(
33
+ "degrees",
34
+ [](tomoto::IGDMRModel& self) {
35
+ return self.getFs();
36
+ })
37
+ .define_method(
38
+ "sigma0",
39
+ [](tomoto::IGDMRModel& self) {
40
+ return self.getSigma0();
41
+ });
42
+ }
@@ -0,0 +1,47 @@
1
+ #include <HDP.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hdp(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(m, "HDP")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t k, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HDPArgs args;
13
+ args.k = k;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHDPModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHDPModel& self) {
25
+ return self.getAlpha();
26
+ })
27
+ .define_method(
28
+ "gamma",
29
+ [](tomoto::IHDPModel& self) {
30
+ return self.getGamma();
31
+ })
32
+ .define_method(
33
+ "live_k",
34
+ [](tomoto::IHDPModel& self) {
35
+ return self.getLiveK();
36
+ })
37
+ .define_method(
38
+ "live_topic?",
39
+ [](tomoto::IHDPModel& self, size_t tid) {
40
+ return self.isLiveTopic(tid);
41
+ })
42
+ .define_method(
43
+ "num_tables",
44
+ [](tomoto::IHDPModel& self) {
45
+ return self.getTotalTables();
46
+ });
47
+ }
@@ -0,0 +1,71 @@
1
+ #include <HLDA.h>
2
+
3
+ #include <rice/rice.hpp>
4
+
5
+ #include "utils.h"
6
+
7
+ void init_hlda(Rice::Module& m) {
8
+ Rice::define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(m, "HLDA")
9
+ .define_singleton_function(
10
+ "_new",
11
+ [](size_t tw, size_t levelDepth, tomoto::Float alpha, tomoto::Float eta, tomoto::Float gamma, size_t seed) {
12
+ tomoto::HLDAArgs args;
13
+ args.k = levelDepth;
14
+ args.alpha = {alpha};
15
+ args.eta = eta;
16
+ args.gamma = gamma;
17
+ if (seed >= 0) {
18
+ args.seed = seed;
19
+ }
20
+ return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, args);
21
+ }, Rice::Return().takeOwnership())
22
+ .define_method(
23
+ "alpha",
24
+ [](tomoto::IHLDAModel& self) {
25
+ Array res;
26
+ for (size_t i = 0; i < self.getLevelDepth(); i++) {
27
+ res.push(self.getAlpha(i));
28
+ }
29
+ return res;
30
+ })
31
+ .define_method(
32
+ "_children_topics",
33
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
34
+ return self.getChildTopicId(topic_id);
35
+ })
36
+ .define_method(
37
+ "depth",
38
+ [](tomoto::IHLDAModel& self) {
39
+ return self.getLevelDepth();
40
+ })
41
+ .define_method(
42
+ "gamma",
43
+ [](tomoto::IHLDAModel& self) {
44
+ return self.getGamma();
45
+ })
46
+ .define_method(
47
+ "_level",
48
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
49
+ return self.getLevelOfTopic(topic_id);
50
+ })
51
+ .define_method(
52
+ "live_k",
53
+ [](tomoto::IHLDAModel& self) {
54
+ return self.getLiveK();
55
+ })
56
+ .define_method(
57
+ "_live_topic?",
58
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
59
+ return self.isLiveTopic(topic_id);
60
+ })
61
+ .define_method(
62
+ "_num_docs_of_topic",
63
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
64
+ return self.getNumDocsOfTopic(topic_id);
65
+ })
66
+ .define_method(
67
+ "_parent_topic",
68
+ [](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
69
+ return self.getParentTopicId(topic_id);
70
+ });
71
+ }