lda-ruby 0.5.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +69 -0
- data/Gemfile +9 -0
- data/README.md +160 -0
- data/VERSION.yml +5 -0
- data/docs/modernization-handoff.md +233 -0
- data/docs/porting-strategy.md +148 -0
- data/docs/precompiled-platform-policy.md +81 -0
- data/docs/precompiled-target-evaluation.md +67 -0
- data/docs/release-runbook.md +192 -0
- data/docs/rust-orchestration-guardrails.md +50 -0
- data/ext/lda-ruby/cokus.c +144 -0
- data/ext/lda-ruby/cokus.h +27 -0
- data/ext/lda-ruby/extconf.rb +13 -0
- data/ext/lda-ruby/lda-alpha.c +96 -0
- data/ext/lda-ruby/lda-alpha.h +21 -0
- data/ext/lda-ruby/lda-data.c +67 -0
- data/ext/lda-ruby/lda-data.h +14 -0
- data/ext/lda-ruby/lda-inference.c +1023 -0
- data/ext/lda-ruby/lda-inference.h +63 -0
- data/ext/lda-ruby/lda-model.c +345 -0
- data/ext/lda-ruby/lda-model.h +31 -0
- data/ext/lda-ruby/lda-x64-mingw-ucrt.def +2 -0
- data/ext/lda-ruby/lda.h +54 -0
- data/ext/lda-ruby/utils.c +119 -0
- data/ext/lda-ruby/utils.h +18 -0
- data/ext/lda-ruby-rust/Cargo.toml +12 -0
- data/ext/lda-ruby-rust/README.md +73 -0
- data/ext/lda-ruby-rust/extconf.rb +135 -0
- data/ext/lda-ruby-rust/include/strings.h +35 -0
- data/ext/lda-ruby-rust/src/lib.rs +1263 -0
- data/lda-ruby.gemspec +78 -0
- data/lib/lda-ruby/backends/base.rb +133 -0
- data/lib/lda-ruby/backends/native.rb +158 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +675 -0
- data/lib/lda-ruby/backends/rust.rb +607 -0
- data/lib/lda-ruby/backends.rb +58 -0
- data/lib/lda-ruby/config/stopwords.yml +571 -0
- data/lib/lda-ruby/corpus/corpus.rb +45 -0
- data/lib/lda-ruby/corpus/data_corpus.rb +22 -0
- data/lib/lda-ruby/corpus/directory_corpus.rb +25 -0
- data/lib/lda-ruby/corpus/text_corpus.rb +27 -0
- data/lib/lda-ruby/document/data_document.rb +30 -0
- data/lib/lda-ruby/document/document.rb +40 -0
- data/lib/lda-ruby/document/text_document.rb +39 -0
- data/lib/lda-ruby/lda.so +0 -0
- data/lib/lda-ruby/rust_build_policy.rb +21 -0
- data/lib/lda-ruby/version.rb +5 -0
- data/lib/lda-ruby/vocabulary.rb +46 -0
- data/lib/lda-ruby.rb +413 -0
- data/lib/lda_ruby_rust.so +0 -0
- data/license.txt +504 -0
- data/test/backend_compatibility_test.rb +146 -0
- data/test/backends_selection_test.rb +100 -0
- data/test/benchmark_scripts_test.rb +23 -0
- data/test/data/docs.dat +46 -0
- data/test/data/sample.rb +20 -0
- data/test/data/wiki-test-docs.yml +123 -0
- data/test/gemspec_test.rb +27 -0
- data/test/lda_ruby_test.rb +319 -0
- data/test/packaged_gem_smoke_test.rb +33 -0
- data/test/pure_ruby_orchestration_test.rb +109 -0
- data/test/release_scripts_test.rb +93 -0
- data/test/rust_build_policy_test.rb +23 -0
- data/test/rust_orchestration_test.rb +911 -0
- data/test/simple_pipeline_test.rb +22 -0
- data/test/simple_yaml.rb +17 -0
- data/test/test_helper.rb +10 -0
- metadata +118 -0
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class LdaRubyTest < Test::Unit::TestCase
|
|
4
|
+
context "A Document instance" do
|
|
5
|
+
setup do
|
|
6
|
+
@corpus = Lda::Corpus.new
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
context "A typical Document" do
|
|
10
|
+
setup do
|
|
11
|
+
@document = Lda::Document.new(@corpus)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
should "not have text" do
|
|
15
|
+
assert !@document.text?
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
should "be empty" do
|
|
19
|
+
assert_equal @document.total, 0
|
|
20
|
+
assert_equal @document.length, 0
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
context "after adding words" do
|
|
24
|
+
setup do
|
|
25
|
+
@document.words << 1 << 2 << 3 << 4 << 5
|
|
26
|
+
@document.counts << 2 << 1 << 1 << 1 << 3
|
|
27
|
+
@document.recompute
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
should "have word count equal to what was added" do
|
|
31
|
+
assert_equal @document.length, 5
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
should "have total words equal to the sum of the counts" do
|
|
35
|
+
assert_equal @document.total, 8
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
context "A typical DataDocument" do
|
|
41
|
+
setup do
|
|
42
|
+
@data = '5 1:2 2:1 3:1 4:1 5:3'
|
|
43
|
+
@document = Lda::DataDocument.new(@corpus, @data)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
should "not have text" do
|
|
47
|
+
assert !@document.text?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
should "have word count equal to what was added" do
|
|
51
|
+
assert_equal @document.length, 5
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
should "have total words equal to the sum of the counts" do
|
|
55
|
+
assert_equal @document.total, 8
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
should "have words equal to the order they were entered" do
|
|
59
|
+
assert_equal @document.words, [1, 2, 3, 4, 5]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
should "have counts equal to the order they were entered" do
|
|
63
|
+
assert_equal @document.counts, [2, 1, 1, 1, 3]
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
context "A typical TextDocument" do
|
|
68
|
+
setup do
|
|
69
|
+
@text = 'stop words stop stop masterful stoppage buffalo buffalo buffalo'
|
|
70
|
+
@document = Lda::TextDocument.new(@corpus, @text)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
should "have text" do
|
|
74
|
+
assert @document.text?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
should "have word count equal to what was added" do
|
|
78
|
+
assert_equal @document.length, 5
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
should "have total words equal to the sum of the counts" do
|
|
82
|
+
assert_equal @document.total, @text.split(/ /).size
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
should "have tokens in the order they were entered" do
|
|
86
|
+
assert_equal @document.tokens, @text.split(/ /)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context "A Corpus instance" do
|
|
92
|
+
context "A typical Lda::Corpus instance" do
|
|
93
|
+
setup do
|
|
94
|
+
@corpus = Lda::Corpus.new
|
|
95
|
+
@document1 = Lda::TextDocument.new(@corpus, 'This is the document that never ends. Oh wait yeah it does.')
|
|
96
|
+
@document2 = Lda::TextDocument.new(@corpus, 'A second document that is just as lame as the first.')
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
should "be able to add new documents" do
|
|
100
|
+
assert @corpus.respond_to?(:add_document)
|
|
101
|
+
@corpus.add_document(@document1)
|
|
102
|
+
assert_equal @corpus.documents.size, 1
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
should "update vocabulary with words in the document" do
|
|
106
|
+
@corpus.add_document(@document2)
|
|
107
|
+
assert_equal @corpus.vocabulary.words.member?('lame'), true
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
context "An Lda::DataCorpus instance loaded from a file" do
|
|
112
|
+
setup do
|
|
113
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'docs.dat')
|
|
114
|
+
@filetext = File.open(@filename, 'r') { |f| f.read }
|
|
115
|
+
@corpus = Lda::DataCorpus.new(@filename)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
should "contain the number of documents equivalent to the number of lines in the file" do
|
|
119
|
+
assert_equal @corpus.num_docs, @filetext.split(/\n/).size
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
should "not load any words into the vocabulary since none were given" do
|
|
123
|
+
assert_equal @corpus.vocabulary.words.size, 0
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
context "An Lda::TextCorpus instance loaded from a file" do
|
|
128
|
+
setup do
|
|
129
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
130
|
+
@filedocs = YAML::load_file(@filename)
|
|
131
|
+
@corpus = Lda::TextCorpus.new(@filename)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
should "contain the number of documents equivalent to the number of lines in the file" do
|
|
135
|
+
assert_equal @corpus.num_docs, @filedocs.size
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
should "update the vocabulary with the words that were loaded" do
|
|
139
|
+
assert @corpus.vocabulary.words.size > 0
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
context "An Lda::DirectoryCorpus instance loaded from a directory" do
|
|
144
|
+
setup do
|
|
145
|
+
@path = File.join(File.dirname(__FILE__), 'data', 'tmp')
|
|
146
|
+
@extension = 'txt'
|
|
147
|
+
Dir.mkdir(@path)
|
|
148
|
+
@original_filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
149
|
+
@filedocs = YAML::load_file(@original_filename)
|
|
150
|
+
@filedocs.each_with_index do |doc, idx|
|
|
151
|
+
File.open(File.join(@path, "doc_#{idx + 1}.txt"), 'w') { |f| f.write(doc) }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
@corpus = Lda::DirectoryCorpus.new(@path, @extension)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
should "load a document for every file in the directory" do
|
|
158
|
+
assert_equal @corpus.num_docs, @filedocs.size
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
should "update the vocabulary with the words that were loaded" do
|
|
162
|
+
assert @corpus.vocabulary.words.size > 0
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
teardown do
|
|
166
|
+
Dir.glob(File.join(@path, "*.#{@extension}")).each { |f| File.unlink(f) }
|
|
167
|
+
Dir.rmdir(@path)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
context "A Vocabulary instance" do
|
|
173
|
+
setup do
|
|
174
|
+
@vocab = Lda::Vocabulary.new
|
|
175
|
+
@words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6']
|
|
176
|
+
@filename1 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.txt')
|
|
177
|
+
File.open(@filename1, 'w') do |f|
|
|
178
|
+
@words.each { |w| f.write("#{w}\n") }
|
|
179
|
+
end
|
|
180
|
+
@filename2 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.yml')
|
|
181
|
+
File.open(@filename2, 'w') { |f| YAML::dump(@words, f) }
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
should "load a file containing a list of words, one per line" do
|
|
185
|
+
assert @vocab.num_words == 0
|
|
186
|
+
@vocab.load_file(@filename1)
|
|
187
|
+
assert @vocab.words.size > 0
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
should "load a yaml file containing a list of words" do
|
|
191
|
+
assert @vocab.num_words == 0
|
|
192
|
+
@vocab.load_yaml(@filename2)
|
|
193
|
+
assert @vocab.num_words > 0
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
should "return indexes for words in the order they were loaded" do
|
|
197
|
+
@vocab.load_yaml(@filename2)
|
|
198
|
+
@words.each_with_index do |word, idx|
|
|
199
|
+
assert_equal @vocab.check_word(word), idx + 1
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
teardown do
|
|
204
|
+
File.unlink(@filename1)
|
|
205
|
+
File.unlink(@filename2)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
context "An Lda::Lda instance" do
|
|
210
|
+
setup do
|
|
211
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
212
|
+
@filedocs = YAML::load_file(@filename)
|
|
213
|
+
@corpus = Lda::TextCorpus.new(@filename)
|
|
214
|
+
|
|
215
|
+
@lda = Lda::Lda.new(@corpus)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
should "have loaded the vocabulary from the corpus" do
|
|
219
|
+
assert !@lda.vocab.nil?
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
should "have loaded the same number of words in the vocabulary as are in the original" do
|
|
223
|
+
assert_equal @lda.vocab.size, @corpus.vocabulary.num_words
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
should "have default values for the main settings" do
|
|
227
|
+
assert !@lda.max_iter.nil?
|
|
228
|
+
assert !@lda.convergence.nil?
|
|
229
|
+
assert !@lda.em_max_iter.nil?
|
|
230
|
+
assert !@lda.em_convergence.nil?
|
|
231
|
+
assert !@lda.num_topics.nil?
|
|
232
|
+
assert !@lda.init_alpha.nil?
|
|
233
|
+
assert !@lda.est_alpha.nil?
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
should "expose the selected backend name" do
|
|
237
|
+
assert(["native", "pure_ruby", "rust"].include?(@lda.backend_name))
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
should "raise when rust backend is requested but extension is unavailable" do
|
|
241
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
242
|
+
assert true
|
|
243
|
+
else
|
|
244
|
+
assert_raise(LoadError) { Lda::Lda.new(@corpus, backend: :rust) }
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
should "run with rust backend when extension is available" do
|
|
249
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
250
|
+
rust_lda = Lda::Lda.new(@corpus, backend: :rust, random_seed: 1234)
|
|
251
|
+
rust_lda.verbose = false
|
|
252
|
+
rust_lda.num_topics = 4
|
|
253
|
+
rust_lda.em("seeded")
|
|
254
|
+
|
|
255
|
+
assert_equal "rust", rust_lda.backend_name
|
|
256
|
+
assert_equal @corpus.num_docs, rust_lda.gamma.size
|
|
257
|
+
assert_equal @corpus.num_docs, rust_lda.phi.size
|
|
258
|
+
else
|
|
259
|
+
assert true
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
context "after running em" do
|
|
264
|
+
setup do
|
|
265
|
+
@lda.verbose = false
|
|
266
|
+
@lda.num_topics = 8
|
|
267
|
+
@lda.em('random')
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
should "phi should be defined" do
|
|
271
|
+
assert !@lda.phi.nil?
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
should "return the top 10 list of words for each topic" do
|
|
275
|
+
topics = @lda.top_words(10)
|
|
276
|
+
assert topics.is_a?(Hash)
|
|
277
|
+
assert_equal topics.size, @lda.num_topics
|
|
278
|
+
|
|
279
|
+
topics.each_pair do |topic, top_n_words|
|
|
280
|
+
assert_equal top_n_words.size, 10
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
context "after computing topic-document probabilities" do
|
|
285
|
+
setup do
|
|
286
|
+
@topic_doc_probs = @lda.compute_topic_document_probability
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
should "have a row for each document" do
|
|
290
|
+
assert_equal @topic_doc_probs.size, @corpus.num_docs
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
should "have columns for each topic" do
|
|
294
|
+
@topic_doc_probs.each do |doc|
|
|
295
|
+
assert_equal doc.size, @lda.num_topics
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
context "using the pure-ruby backend" do
|
|
302
|
+
setup do
|
|
303
|
+
@lda = Lda::Lda.new(@corpus, backend: :pure, random_seed: 1234)
|
|
304
|
+
@lda.verbose = false
|
|
305
|
+
@lda.num_topics = 6
|
|
306
|
+
@lda.max_iter = 20
|
|
307
|
+
@lda.em_max_iter = 30
|
|
308
|
+
@lda.em('random')
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
should "run em and generate model matrices" do
|
|
312
|
+
assert_equal "pure_ruby", @lda.backend_name
|
|
313
|
+
assert_equal @lda.num_topics, @lda.beta.size
|
|
314
|
+
assert_equal @corpus.num_docs, @lda.gamma.size
|
|
315
|
+
assert_equal @corpus.num_docs, @lda.phi.size
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "tmpdir"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require_relative "../bin/packaged-gem-smoke"
|
|
5
|
+
|
|
6
|
+
class PackagedGemSmokeTest < Test::Unit::TestCase
|
|
7
|
+
def test_gem_path_under_prefix_handles_symlinked_prefixes
|
|
8
|
+
Dir.mktmpdir("packaged-smoke") do |tmpdir|
|
|
9
|
+
real_root = File.join(tmpdir, "real")
|
|
10
|
+
link_root = File.join(tmpdir, "link")
|
|
11
|
+
gem_dir = File.join(real_root, "gems", "lda-ruby-0.4.0")
|
|
12
|
+
|
|
13
|
+
FileUtils.mkdir_p(gem_dir)
|
|
14
|
+
File.symlink(real_root, link_root)
|
|
15
|
+
|
|
16
|
+
assert(
|
|
17
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(gem_dir, link_root),
|
|
18
|
+
"expected symlinked prefix to match real gem path"
|
|
19
|
+
)
|
|
20
|
+
assert(
|
|
21
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(File.join(link_root, "gems", "lda-ruby-0.4.0"), real_root),
|
|
22
|
+
"expected real prefix to match symlinked gem path"
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_gem_path_under_prefix_rejects_neighbor_prefixes
|
|
28
|
+
assert(
|
|
29
|
+
!Lda::PackagedGemSmoke.gem_path_under_prefix?("/tmp/gemhome-other/gems/lda-ruby-0.4.0", "/tmp/gemhome"),
|
|
30
|
+
"neighbor prefixes should not match"
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class PureRubyOrchestrationTest < Test::Unit::TestCase
|
|
4
|
+
FIXTURE_DOCUMENTS = [
|
|
5
|
+
"apple banana apple banana fruit sweet fruit",
|
|
6
|
+
"truck wheel truck road engine metal road",
|
|
7
|
+
"ruby code gem ruby class module test",
|
|
8
|
+
"banana fruit apple orchard fresh sweet",
|
|
9
|
+
"engine road truck wheel fuel highway",
|
|
10
|
+
"module ruby class object gem code"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
def test_rust_em_input_includes_expected_fields
|
|
14
|
+
backend = build_backend
|
|
15
|
+
|
|
16
|
+
em_input = backend.rust_em_input("seeded")
|
|
17
|
+
|
|
18
|
+
assert_equal 3, em_input[:topics]
|
|
19
|
+
assert_equal em_input[:document_words].size, em_input[:document_counts].size
|
|
20
|
+
assert_equal em_input[:document_words].map(&:length), em_input[:document_lengths]
|
|
21
|
+
assert_equal em_input[:document_counts].map { |counts| counts.sum.to_f }, em_input[:document_totals]
|
|
22
|
+
assert_equal 3, em_input[:initial_beta_probabilities].size
|
|
23
|
+
assert em_input[:terms] > 0
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_em_from_input_matches_seeded_em_output
|
|
27
|
+
direct = build_backend
|
|
28
|
+
from_input = build_backend
|
|
29
|
+
|
|
30
|
+
direct.em("seeded")
|
|
31
|
+
em_input = from_input.rust_em_input("seeded")
|
|
32
|
+
from_input.em_from_input(em_input)
|
|
33
|
+
|
|
34
|
+
assert_nested_close(direct.gamma, from_input.gamma, 1e-9)
|
|
35
|
+
assert_nested_close(direct.beta, from_input.beta, 1e-9)
|
|
36
|
+
assert_nested_close(direct.compute_phi, from_input.compute_phi, 1e-9)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_rust_initial_beta_probabilities_matches_rust_em_input_for_random_start
|
|
40
|
+
from_helper = build_backend
|
|
41
|
+
from_input = build_backend
|
|
42
|
+
|
|
43
|
+
document_words = from_helper.corpus.documents.map { |document| document.words.map(&:to_i) }
|
|
44
|
+
document_counts = from_helper.corpus.documents.map { |document| document.counts.map(&:to_f) }
|
|
45
|
+
terms = from_helper.corpus.documents.flat_map(&:words).max + 1
|
|
46
|
+
|
|
47
|
+
helper_beta = from_helper.rust_initial_beta_probabilities(
|
|
48
|
+
"random",
|
|
49
|
+
document_words,
|
|
50
|
+
document_counts,
|
|
51
|
+
from_helper.num_topics,
|
|
52
|
+
terms
|
|
53
|
+
)
|
|
54
|
+
em_input = from_input.rust_em_input("random")
|
|
55
|
+
|
|
56
|
+
assert_nested_close(helper_beta, em_input[:initial_beta_probabilities], 1e-12)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_apply_em_state_sets_outputs
|
|
60
|
+
backend = build_backend
|
|
61
|
+
|
|
62
|
+
docs = backend.corpus.documents
|
|
63
|
+
topics = 3
|
|
64
|
+
terms = 5
|
|
65
|
+
|
|
66
|
+
beta_probabilities = Array.new(topics) { Array.new(terms, 1.0 / terms) }
|
|
67
|
+
beta_log = beta_probabilities.map { |row| row.map { |probability| Math.log(probability) } }
|
|
68
|
+
gamma = Array.new(docs.size) { Array.new(topics, 1.0) }
|
|
69
|
+
phi = docs.map { |document| Array.new(document.length) { Array.new(topics, 1.0 / topics) } }
|
|
70
|
+
|
|
71
|
+
backend.apply_em_state(
|
|
72
|
+
beta_probabilities: beta_probabilities,
|
|
73
|
+
beta_log: beta_log,
|
|
74
|
+
gamma: gamma,
|
|
75
|
+
phi: phi
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
assert_equal beta_log, backend.beta
|
|
79
|
+
assert_equal gamma, backend.gamma
|
|
80
|
+
assert_equal phi, backend.compute_phi
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def build_backend
|
|
86
|
+
backend = Lda::Backends::PureRuby.new(random_seed: 1234)
|
|
87
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
88
|
+
backend.verbose = false
|
|
89
|
+
backend.num_topics = 3
|
|
90
|
+
backend.max_iter = 25
|
|
91
|
+
backend.em_max_iter = 40
|
|
92
|
+
backend.convergence = 1e-5
|
|
93
|
+
backend.em_convergence = 1e-4
|
|
94
|
+
backend
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def assert_nested_close(left, right, tolerance)
|
|
98
|
+
assert_equal left.class, right.class
|
|
99
|
+
|
|
100
|
+
if left.is_a?(Array)
|
|
101
|
+
assert_equal left.size, right.size
|
|
102
|
+
left.each_with_index do |left_item, index|
|
|
103
|
+
assert_nested_close(left_item, right[index], tolerance)
|
|
104
|
+
end
|
|
105
|
+
else
|
|
106
|
+
assert_in_delta left.to_f, right.to_f, tolerance
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "open3"
|
|
3
|
+
|
|
4
|
+
class ReleaseScriptsTest < Test::Unit::TestCase
|
|
5
|
+
def setup
|
|
6
|
+
@repo_root = File.expand_path("..", __dir__)
|
|
7
|
+
@check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
|
|
8
|
+
@release_prepare = File.join(@repo_root, "bin", "release-prepare")
|
|
9
|
+
@verify_rubygems_api_key = File.join(@repo_root, "bin", "verify-rubygems-api-key")
|
|
10
|
+
@verify_release_publish = File.join(@repo_root, "bin", "verify-release-publish")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_check_version_sync_passes_for_repository_versions
|
|
14
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, chdir: @repo_root)
|
|
15
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
16
|
+
assert_match(/Version sync OK:/, stdout)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_check_version_sync_fails_for_mismatched_tag
|
|
20
|
+
_stdout, stderr, status = Open3.capture3(@check_version_sync, "--tag", "v9.9.9", chdir: @repo_root)
|
|
21
|
+
assert(!status.success?, "expected check-version-sync to fail for mismatched tag")
|
|
22
|
+
assert_match(/does not match expected tag/, stderr)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_check_version_sync_print_tag_matches_library_version
|
|
26
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, "--print-tag", chdir: @repo_root)
|
|
27
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
28
|
+
assert_equal("v#{Lda::VERSION}", stdout.strip)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_release_prepare_dry_run_does_not_change_files
|
|
32
|
+
version_rb_path = File.join(@repo_root, "lib", "lda-ruby", "version.rb")
|
|
33
|
+
version_yml_path = File.join(@repo_root, "VERSION.yml")
|
|
34
|
+
changelog_path = File.join(@repo_root, "CHANGELOG.md")
|
|
35
|
+
|
|
36
|
+
baseline = {
|
|
37
|
+
version_rb_path => File.read(version_rb_path),
|
|
38
|
+
version_yml_path => File.read(version_yml_path),
|
|
39
|
+
changelog_path => File.read(changelog_path)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
stdout, stderr, status = Open3.capture3(
|
|
43
|
+
@release_prepare,
|
|
44
|
+
"9.9.9",
|
|
45
|
+
"--allow-dirty",
|
|
46
|
+
"--dry-run",
|
|
47
|
+
chdir: @repo_root
|
|
48
|
+
)
|
|
49
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
50
|
+
assert_match(/Dry run: would update/, stdout)
|
|
51
|
+
|
|
52
|
+
baseline.each do |path, original|
|
|
53
|
+
assert_equal(original, File.read(path), "#{path} changed during dry-run")
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def test_verify_rubygems_api_key_help
|
|
58
|
+
stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--help", chdir: @repo_root)
|
|
59
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
60
|
+
assert_match(/Usage: \.\/bin\/verify-rubygems-api-key/, stdout)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_verify_rubygems_api_key_rejects_unknown_argument
|
|
64
|
+
_stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--unknown-flag", chdir: @repo_root)
|
|
65
|
+
assert(!status.success?, "expected verify-rubygems-api-key to fail for unknown arguments")
|
|
66
|
+
assert_match(/unknown argument/, stderr)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def test_verify_release_publish_help
|
|
70
|
+
stdout, stderr, status = Open3.capture3(@verify_release_publish, "--help", chdir: @repo_root)
|
|
71
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
72
|
+
assert_match(/Usage: \.\/bin\/verify-release-publish/, stdout)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def test_verify_release_publish_rejects_unknown_argument
|
|
76
|
+
_stdout, stderr, status = Open3.capture3(@verify_release_publish, "--unknown-flag", chdir: @repo_root)
|
|
77
|
+
assert(!status.success?, "expected verify-release-publish to fail for unknown arguments")
|
|
78
|
+
assert_match(/unknown argument/, stderr)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_verify_release_publish_rejects_invalid_tag_format
|
|
82
|
+
_stdout, stderr, status = Open3.capture3(
|
|
83
|
+
@verify_release_publish,
|
|
84
|
+
"--tag",
|
|
85
|
+
"0.4.0",
|
|
86
|
+
"--skip-rubygems",
|
|
87
|
+
"--skip-github",
|
|
88
|
+
chdir: @repo_root
|
|
89
|
+
)
|
|
90
|
+
assert(!status.success?, "expected verify-release-publish to fail for invalid tag format")
|
|
91
|
+
assert_match(/tag must be in format vX\.Y\.Z/, stderr)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "lda-ruby/rust_build_policy"
|
|
3
|
+
|
|
4
|
+
class RustBuildPolicyTest < Test::Unit::TestCase
|
|
5
|
+
def test_default_policy_is_auto
|
|
6
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(nil)
|
|
7
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("")
|
|
8
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(" ")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_resolves_valid_values_case_insensitively
|
|
12
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("always")
|
|
13
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("ALWAYS")
|
|
14
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve("never")
|
|
15
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve(" NeVeR ")
|
|
16
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("AUTO")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_invalid_policy_falls_back_to_auto
|
|
20
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("sometimes")
|
|
21
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("true")
|
|
22
|
+
end
|
|
23
|
+
end
|