tomoto 0.4.0-aarch64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +65 -0
- data/LICENSE.txt +22 -0
- data/README.md +154 -0
- data/ext/tomoto/ct.cpp +58 -0
- data/ext/tomoto/dmr.cpp +69 -0
- data/ext/tomoto/dt.cpp +91 -0
- data/ext/tomoto/extconf.rb +42 -0
- data/ext/tomoto/gdmr.cpp +42 -0
- data/ext/tomoto/hdp.cpp +47 -0
- data/ext/tomoto/hlda.cpp +71 -0
- data/ext/tomoto/hpa.cpp +32 -0
- data/ext/tomoto/lda.cpp +281 -0
- data/ext/tomoto/llda.cpp +46 -0
- data/ext/tomoto/mglda.cpp +81 -0
- data/ext/tomoto/pa.cpp +32 -0
- data/ext/tomoto/plda.cpp +33 -0
- data/ext/tomoto/slda.cpp +48 -0
- data/ext/tomoto/tomoto.cpp +48 -0
- data/ext/tomoto/utils.h +30 -0
- data/lib/tomoto/3.0/tomoto.so +0 -0
- data/lib/tomoto/3.1/tomoto.so +0 -0
- data/lib/tomoto/3.2/tomoto.so +0 -0
- data/lib/tomoto/3.3/tomoto.so +0 -0
- data/lib/tomoto/ct.rb +24 -0
- data/lib/tomoto/dmr.rb +27 -0
- data/lib/tomoto/dt.rb +15 -0
- data/lib/tomoto/gdmr.rb +15 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/hlda.rb +56 -0
- data/lib/tomoto/hpa.rb +11 -0
- data/lib/tomoto/lda.rb +186 -0
- data/lib/tomoto/llda.rb +15 -0
- data/lib/tomoto/mglda.rb +15 -0
- data/lib/tomoto/pa.rb +11 -0
- data/lib/tomoto/plda.rb +15 -0
- data/lib/tomoto/slda.rb +37 -0
- data/lib/tomoto/version.rb +3 -0
- data/lib/tomoto.rb +27 -0
- data/vendor/EigenRand/EigenRand/EigenRand +24 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +430 -0
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +51 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/Cholesky +45 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +384 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +60 -0
- data/vendor/eigen/Eigen/Geometry +59 -0
- data/vendor/eigen/Eigen/Householder +29 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +32 -0
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +47 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +70 -0
- data/vendor/eigen/Eigen/PaStiXSupport +49 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +50 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +50 -0
- data/vendor/eigen/Eigen/Sparse +34 -0
- data/vendor/eigen/Eigen/SparseCholesky +37 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +50 -0
- data/vendor/eigen/Eigen/SparseQR +36 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/README.md +5 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +20 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +536 -0
- data/vendor/tomotopy/README.rst +555 -0
- data/vendor/variant/LICENSE +25 -0
- data/vendor/variant/LICENSE_1_0.txt +23 -0
- data/vendor/variant/README.md +102 -0
- metadata +141 -0
data/lib/tomoto/dt.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class DT
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, t: 1, alpha_var: 0.1, eta_var: 0.1, phi_var: 0.1, lr_a: 0.01, lr_b: 0.1, lr_c: 0.55) #, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, t, alpha_var, eta_var, phi_var, lr_a, lr_b, lr_c)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, timepoint: 0)
|
12
|
+
_add_doc(prepare_doc(doc), timepoint)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/gdmr.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class GDMR
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, degrees: [], alpha: 0.1, eta: 0.01, sigma: 1.0, sigma0: 3.0, alpha_epsilon: 1e-10, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, numeric_metadata: [])
|
12
|
+
_add_doc(prepare_doc(doc), numeric_metadata)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/hdp.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HDP
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, initial_k: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), initial_k, alpha, eta, gamma, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/hlda.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, depth: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), depth, alpha, eta, gamma, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def children_topics(topic_id)
|
12
|
+
check_topic(topic_id)
|
13
|
+
_children_topics(topic_id)
|
14
|
+
end
|
15
|
+
|
16
|
+
def level(topic_id)
|
17
|
+
check_topic(topic_id)
|
18
|
+
_live_topic?(topic_id) ? _level(topic_id) : -1
|
19
|
+
end
|
20
|
+
|
21
|
+
def live_topic?(topic_id)
|
22
|
+
check_topic(topic_id)
|
23
|
+
_live_topic?(topic_id)
|
24
|
+
end
|
25
|
+
|
26
|
+
def num_docs_of_topic(topic_id)
|
27
|
+
check_topic(topic_id)
|
28
|
+
_num_docs_of_topic(topic_id)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parent_topic(topic_id)
|
32
|
+
check_topic(topic_id)
|
33
|
+
_live_topic?(topic_id) ? _parent_topic(topic_id) : -1
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def check_topic(topic_id)
|
39
|
+
raise "topic_id must be < K" if topic_id >= k
|
40
|
+
raise "train() should be called first" unless @prepared
|
41
|
+
end
|
42
|
+
|
43
|
+
def topics_info(summary, topic_word_top_n:)
|
44
|
+
counts = count_by_topics
|
45
|
+
|
46
|
+
nested_info = lambda do |k = 0, level = 0|
|
47
|
+
words = topic_words(k, top_n: topic_word_top_n).keys.join(" ")
|
48
|
+
summary << "| #{" " * level}##{k} (#{counts[k]}) : #{words}"
|
49
|
+
children_topics(k).sort.each do |c|
|
50
|
+
nested_info.call(c, level + 1)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
nested_info.call
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/tomoto/hpa.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class HPA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/lda.rb
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class LDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.load(filename)
|
12
|
+
model = new
|
13
|
+
model._load(filename)
|
14
|
+
model
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_doc(doc)
|
18
|
+
_add_doc(prepare_doc(doc))
|
19
|
+
end
|
20
|
+
|
21
|
+
def make_doc(doc)
|
22
|
+
_make_doc(tokenize_doc(doc))
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO support multiple docs
|
26
|
+
def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
|
27
|
+
raise "cannot infer with untrained model" unless trained?
|
28
|
+
_infer(doc, iter, tolerance, workers, to_ps(parallel), together)
|
29
|
+
end
|
30
|
+
|
31
|
+
def count_by_topics
|
32
|
+
prepare
|
33
|
+
_count_by_topics
|
34
|
+
end
|
35
|
+
|
36
|
+
def removed_top_words
|
37
|
+
prepare
|
38
|
+
_removed_top_words(@rm_top)
|
39
|
+
end
|
40
|
+
|
41
|
+
def save(filename, full: true)
|
42
|
+
_save(filename, full)
|
43
|
+
end
|
44
|
+
|
45
|
+
# returns string instead of printing
|
46
|
+
def summary(initial_hp: true, params: true, topic_word_top_n: 5)
|
47
|
+
summary = []
|
48
|
+
|
49
|
+
summary << "<Basic Info>"
|
50
|
+
basic_info(summary)
|
51
|
+
summary << "|"
|
52
|
+
|
53
|
+
summary << "<Training Info>"
|
54
|
+
training_info(summary)
|
55
|
+
summary << "|"
|
56
|
+
|
57
|
+
if initial_hp
|
58
|
+
summary << "<Initial Parameters>"
|
59
|
+
initial_params_info(summary)
|
60
|
+
summary << "|"
|
61
|
+
end
|
62
|
+
|
63
|
+
if params
|
64
|
+
summary << "<Parameters>"
|
65
|
+
params_info(summary)
|
66
|
+
summary << "|"
|
67
|
+
end
|
68
|
+
|
69
|
+
if topic_word_top_n > 0
|
70
|
+
summary << "<Topics>"
|
71
|
+
topics_info(summary, topic_word_top_n: topic_word_top_n)
|
72
|
+
summary << "|"
|
73
|
+
end
|
74
|
+
|
75
|
+
# skip ending |
|
76
|
+
summary.pop
|
77
|
+
|
78
|
+
summary.join("\n")
|
79
|
+
end
|
80
|
+
|
81
|
+
def topic_words(topic_id = nil, top_n: 10)
|
82
|
+
if topic_id
|
83
|
+
_topic_words(topic_id, top_n)
|
84
|
+
else
|
85
|
+
k.times.map { |i| _topic_words(i, top_n) }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# TODO raise error if iterations < 1
|
90
|
+
def train(iterations = 10, workers: 0, parallel: :default)
|
91
|
+
prepare
|
92
|
+
_train(iterations, workers, to_ps(parallel))
|
93
|
+
end
|
94
|
+
|
95
|
+
def tw
|
96
|
+
TERM_WEIGHT[_tw]
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def trained?
|
102
|
+
global_step.positive?
|
103
|
+
end
|
104
|
+
|
105
|
+
def prepare
|
106
|
+
unless defined?(@prepared)
|
107
|
+
_prepare(@min_cf, @min_df, @rm_top)
|
108
|
+
@prepared = true
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def prepare_doc(doc)
|
113
|
+
raise "cannot add_doc() after train()" if defined?(@prepared)
|
114
|
+
tokenize_doc(doc)
|
115
|
+
end
|
116
|
+
|
117
|
+
def tokenize_doc(doc)
|
118
|
+
doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
|
119
|
+
doc
|
120
|
+
end
|
121
|
+
|
122
|
+
def basic_info(summary)
|
123
|
+
sum = used_vocab_freq.sum.to_f
|
124
|
+
mapped = used_vocab_freq.map { |v| v / sum }
|
125
|
+
entropy = mapped.map { |v| v * Math.log(v) }.sum
|
126
|
+
|
127
|
+
summary << "| #{self.class.name.sub("Tomoto::", "")} (current version: #{VERSION})"
|
128
|
+
summary << "| #{num_docs} docs, #{num_words} words"
|
129
|
+
summary << "| Total Vocabs: #{vocabs.size}, Used Vocabs: #{used_vocabs.size}"
|
130
|
+
summary << "| Entropy of words: %.5f" % entropy
|
131
|
+
summary << "| Removed Vocabs: #{removed_top_words.any? ? removed_top_words.join(" ") : "<NA>"}"
|
132
|
+
end
|
133
|
+
|
134
|
+
def training_info(summary)
|
135
|
+
summary << "| Iterations: #{global_step}, Burn-in steps: #{burn_in}"
|
136
|
+
summary << "| Optimization Interval: #{optim_interval}"
|
137
|
+
summary << "| Log-likelihood per word: %.5f" % ll_per_word
|
138
|
+
end
|
139
|
+
|
140
|
+
def initial_params_info(summary)
|
141
|
+
if defined?(@init_params)
|
142
|
+
@init_params.each do |k, v|
|
143
|
+
summary << "| #{k}: #{v}"
|
144
|
+
end
|
145
|
+
else
|
146
|
+
summary << "| Not Available"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def params_info(summary)
|
151
|
+
summary << "| alpha (Dirichlet prior on the per-document topic distributions)"
|
152
|
+
summary << "| #{alpha}"
|
153
|
+
summary << "| eta (Dirichlet prior on the per-topic word distribution)"
|
154
|
+
summary << "| %.5f" % eta
|
155
|
+
end
|
156
|
+
|
157
|
+
def topics_info(summary, topic_word_top_n:)
|
158
|
+
counts = count_by_topics
|
159
|
+
topic_words(top_n: topic_word_top_n).each_with_index do |words, i|
|
160
|
+
summary << "| ##{i} (#{counts[i]}) : #{words.keys.join(" ")}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def to_ps(ps)
|
165
|
+
PARALLEL_SCHEME.index(ps) || (raise ArgumentError, "Invalid parallel scheme: #{ps}")
|
166
|
+
end
|
167
|
+
|
168
|
+
class << self
|
169
|
+
private
|
170
|
+
|
171
|
+
def to_tw(tw)
|
172
|
+
TERM_WEIGHT.index(tw) || (raise ArgumentError, "Invalid tw: #{tw}")
|
173
|
+
end
|
174
|
+
|
175
|
+
def init_params(model, binding)
|
176
|
+
init_params = {}
|
177
|
+
method(:new).parameters.each do |v|
|
178
|
+
next if v[0] != :key
|
179
|
+
init_params[v[1]] = binding.local_variable_get(v[1]).inspect
|
180
|
+
end
|
181
|
+
model.instance_variable_set(:@init_params, init_params)
|
182
|
+
model
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
data/lib/tomoto/llda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class LLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, labels: [])
|
12
|
+
_add_doc(prepare_doc(doc), labels)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/mglda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class MGLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k_g: 1, k_l: 1, t: 3, alpha_g: 0.1, alpha_l: 0.1, alpha_mg: 0.1, alpha_ml: 0.1, eta_g: 0.01) #, eta_l: 0.01, gamma: 0.1, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, delimiter: ".")
|
12
|
+
_add_doc(prepare_doc(doc), delimiter)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/pa.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class PA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/tomoto/plda.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class PLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, latent_topics: 1, alpha: 0.1, eta: 0.01, seed: nil)
|
4
|
+
model = _new(to_tw(tw), latent_topics, alpha, eta, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, labels: [])
|
12
|
+
_add_doc(prepare_doc(doc), labels)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tomoto/slda.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Tomoto
|
2
|
+
class SLDA
|
3
|
+
def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, vars: "", alpha: 0.1, eta: 0.01, mu: [], nu_sq: [], glm_param: [], seed: nil)
|
4
|
+
model = _new(to_tw(tw), k, vars.split("").map { |v| to_glm(v) }, alpha, eta, mu, nu_sq, glm_param, seed || -1)
|
5
|
+
model.instance_variable_set(:@min_cf, min_cf)
|
6
|
+
model.instance_variable_set(:@min_df, min_df)
|
7
|
+
model.instance_variable_set(:@rm_top, rm_top)
|
8
|
+
init_params(model, binding)
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_doc(doc, y: [])
|
12
|
+
_add_doc(prepare_doc(doc), y)
|
13
|
+
end
|
14
|
+
|
15
|
+
def var_type(var_id)
|
16
|
+
raise "train() should be called first" unless @prepared
|
17
|
+
_var_type(var_id)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
class << self
|
23
|
+
private
|
24
|
+
|
25
|
+
def to_glm(v)
|
26
|
+
case v
|
27
|
+
when "l"
|
28
|
+
0
|
29
|
+
when "b"
|
30
|
+
1
|
31
|
+
else
|
32
|
+
raise "Invalid var: #{v}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/tomoto.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# ext
|
2
|
+
begin
|
3
|
+
require "tomoto/#{RUBY_VERSION.to_f}/tomoto"
|
4
|
+
rescue LoadError
|
5
|
+
require "tomoto/tomoto"
|
6
|
+
end
|
7
|
+
|
8
|
+
# modules
|
9
|
+
require_relative "tomoto/ct"
|
10
|
+
require_relative "tomoto/dmr"
|
11
|
+
require_relative "tomoto/dt"
|
12
|
+
require_relative "tomoto/gdmr"
|
13
|
+
require_relative "tomoto/hdp"
|
14
|
+
require_relative "tomoto/hlda"
|
15
|
+
require_relative "tomoto/hpa"
|
16
|
+
require_relative "tomoto/lda"
|
17
|
+
require_relative "tomoto/llda"
|
18
|
+
require_relative "tomoto/mglda"
|
19
|
+
require_relative "tomoto/pa"
|
20
|
+
require_relative "tomoto/plda"
|
21
|
+
require_relative "tomoto/slda"
|
22
|
+
require_relative "tomoto/version"
|
23
|
+
|
24
|
+
module Tomoto
|
25
|
+
PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
|
26
|
+
TERM_WEIGHT = [:one, :idf, :pmi]
|
27
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
/**
|
2
|
+
* @file EigenRand
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
4
|
+
* @brief
|
5
|
+
* @version 0.4.1
|
6
|
+
* @date 2022-08-13
|
7
|
+
*
|
8
|
+
* @copyright Copyright (c) 2020-2021
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
#ifndef EIGENRAND_EIGENRAND_H
|
13
|
+
#define EIGENRAND_EIGENRAND_H
|
14
|
+
|
15
|
+
#include <Eigen/Dense>
|
16
|
+
|
17
|
+
#include <Eigen/src/Core/util/DisableStupidWarnings.h>
|
18
|
+
|
19
|
+
#include "Macro.h"
|
20
|
+
#include "Core.h"
|
21
|
+
|
22
|
+
#include <Eigen/src/Core/util/ReenableStupidWarnings.h>
|
23
|
+
|
24
|
+
#endif
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020, bab2min
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|