lda-ruby 0.4.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +61 -0
- data/Gemfile +9 -0
- data/README.md +157 -0
- data/VERSION.yml +5 -0
- data/docs/modernization-handoff.md +190 -0
- data/docs/porting-strategy.md +127 -0
- data/docs/precompiled-platform-policy.md +68 -0
- data/docs/release-runbook.md +157 -0
- data/ext/lda-ruby/cokus.c +145 -0
- data/ext/lda-ruby/cokus.h +27 -0
- data/ext/lda-ruby/extconf.rb +13 -0
- data/ext/lda-ruby/lda-alpha.c +96 -0
- data/ext/lda-ruby/lda-alpha.h +21 -0
- data/ext/lda-ruby/lda-data.c +67 -0
- data/ext/lda-ruby/lda-data.h +14 -0
- data/ext/lda-ruby/lda-inference.c +1023 -0
- data/ext/lda-ruby/lda-inference.h +63 -0
- data/ext/lda-ruby/lda-model.c +345 -0
- data/ext/lda-ruby/lda-model.h +31 -0
- data/ext/lda-ruby/lda.h +54 -0
- data/ext/lda-ruby/utils.c +111 -0
- data/ext/lda-ruby/utils.h +18 -0
- data/ext/lda-ruby-rust/Cargo.toml +12 -0
- data/ext/lda-ruby-rust/README.md +48 -0
- data/ext/lda-ruby-rust/extconf.rb +123 -0
- data/ext/lda-ruby-rust/src/lib.rs +456 -0
- data/lda-ruby.gemspec +78 -0
- data/lib/lda-ruby/backends/base.rb +129 -0
- data/lib/lda-ruby/backends/native.rb +158 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +613 -0
- data/lib/lda-ruby/backends/rust.rb +226 -0
- data/lib/lda-ruby/backends.rb +58 -0
- data/lib/lda-ruby/config/stopwords.yml +571 -0
- data/lib/lda-ruby/corpus/corpus.rb +45 -0
- data/lib/lda-ruby/corpus/data_corpus.rb +22 -0
- data/lib/lda-ruby/corpus/directory_corpus.rb +25 -0
- data/lib/lda-ruby/corpus/text_corpus.rb +27 -0
- data/lib/lda-ruby/document/data_document.rb +30 -0
- data/lib/lda-ruby/document/document.rb +40 -0
- data/lib/lda-ruby/document/text_document.rb +39 -0
- data/lib/lda-ruby/lda.so +0 -0
- data/lib/lda-ruby/rust_build_policy.rb +21 -0
- data/lib/lda-ruby/version.rb +5 -0
- data/lib/lda-ruby/vocabulary.rb +46 -0
- data/lib/lda-ruby.rb +413 -0
- data/lib/lda_ruby_rust.so +0 -0
- data/license.txt +504 -0
- data/test/backend_compatibility_test.rb +146 -0
- data/test/backends_selection_test.rb +100 -0
- data/test/data/docs.dat +46 -0
- data/test/data/sample.rb +20 -0
- data/test/data/wiki-test-docs.yml +123 -0
- data/test/gemspec_test.rb +27 -0
- data/test/lda_ruby_test.rb +319 -0
- data/test/packaged_gem_smoke_test.rb +33 -0
- data/test/release_scripts_test.rb +54 -0
- data/test/rust_build_policy_test.rb +23 -0
- data/test/simple_pipeline_test.rb +22 -0
- data/test/simple_yaml.rb +17 -0
- data/test/test_helper.rb +10 -0
- metadata +111 -0
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class LdaRubyTest < Test::Unit::TestCase
|
|
4
|
+
context "A Document instance" do
|
|
5
|
+
setup do
|
|
6
|
+
@corpus = Lda::Corpus.new
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
context "A typical Document" do
|
|
10
|
+
setup do
|
|
11
|
+
@document = Lda::Document.new(@corpus)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
should "not have text" do
|
|
15
|
+
assert !@document.text?
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
should "be empty" do
|
|
19
|
+
assert_equal @document.total, 0
|
|
20
|
+
assert_equal @document.length, 0
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
context "after adding words" do
|
|
24
|
+
setup do
|
|
25
|
+
@document.words << 1 << 2 << 3 << 4 << 5
|
|
26
|
+
@document.counts << 2 << 1 << 1 << 1 << 3
|
|
27
|
+
@document.recompute
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
should "have word count equal to what was added" do
|
|
31
|
+
assert_equal @document.length, 5
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
should "have total words equal to the sum of the counts" do
|
|
35
|
+
assert_equal @document.total, 8
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
context "A typical DataDocument" do
|
|
41
|
+
setup do
|
|
42
|
+
@data = '5 1:2 2:1 3:1 4:1 5:3'
|
|
43
|
+
@document = Lda::DataDocument.new(@corpus, @data)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
should "not have text" do
|
|
47
|
+
assert !@document.text?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
should "have word count equal to what was added" do
|
|
51
|
+
assert_equal @document.length, 5
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
should "have total words equal to the sum of the counts" do
|
|
55
|
+
assert_equal @document.total, 8
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
should "have words equal to the order they were entered" do
|
|
59
|
+
assert_equal @document.words, [1, 2, 3, 4, 5]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
should "have counts equal to the order they were entered" do
|
|
63
|
+
assert_equal @document.counts, [2, 1, 1, 1, 3]
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
context "A typical TextDocument" do
|
|
68
|
+
setup do
|
|
69
|
+
@text = 'stop words stop stop masterful stoppage buffalo buffalo buffalo'
|
|
70
|
+
@document = Lda::TextDocument.new(@corpus, @text)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
should "have text" do
|
|
74
|
+
assert @document.text?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
should "have word count equal to what was added" do
|
|
78
|
+
assert_equal @document.length, 5
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
should "have total words equal to the sum of the counts" do
|
|
82
|
+
assert_equal @document.total, @text.split(/ /).size
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
should "have tokens in the order they were entered" do
|
|
86
|
+
assert_equal @document.tokens, @text.split(/ /)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context "A Corpus instance" do
|
|
92
|
+
context "A typical Lda::Corpus instance" do
|
|
93
|
+
setup do
|
|
94
|
+
@corpus = Lda::Corpus.new
|
|
95
|
+
@document1 = Lda::TextDocument.new(@corpus, 'This is the document that never ends. Oh wait yeah it does.')
|
|
96
|
+
@document2 = Lda::TextDocument.new(@corpus, 'A second document that is just as lame as the first.')
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
should "be able to add new documents" do
|
|
100
|
+
assert @corpus.respond_to?(:add_document)
|
|
101
|
+
@corpus.add_document(@document1)
|
|
102
|
+
assert_equal @corpus.documents.size, 1
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
should "update vocabulary with words in the document" do
|
|
106
|
+
@corpus.add_document(@document2)
|
|
107
|
+
assert_equal @corpus.vocabulary.words.member?('lame'), true
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
context "An Lda::DataCorpus instance loaded from a file" do
|
|
112
|
+
setup do
|
|
113
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'docs.dat')
|
|
114
|
+
@filetext = File.open(@filename, 'r') { |f| f.read }
|
|
115
|
+
@corpus = Lda::DataCorpus.new(@filename)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
should "contain the number of documents equivalent to the number of lines in the file" do
|
|
119
|
+
assert_equal @corpus.num_docs, @filetext.split(/\n/).size
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
should "not load any words into the vocabulary since none were given" do
|
|
123
|
+
assert_equal @corpus.vocabulary.words.size, 0
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
context "An Lda::TextCorpus instance loaded from a file" do
|
|
128
|
+
setup do
|
|
129
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
130
|
+
@filedocs = YAML::load_file(@filename)
|
|
131
|
+
@corpus = Lda::TextCorpus.new(@filename)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
should "contain the number of documents equivalent to the number of lines in the file" do
|
|
135
|
+
assert_equal @corpus.num_docs, @filedocs.size
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
should "update the vocabulary with the words that were loaded" do
|
|
139
|
+
assert @corpus.vocabulary.words.size > 0
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
context "An Lda::DirectoryCorpus instance loaded from a directory" do
|
|
144
|
+
setup do
|
|
145
|
+
@path = File.join(File.dirname(__FILE__), 'data', 'tmp')
|
|
146
|
+
@extension = 'txt'
|
|
147
|
+
Dir.mkdir(@path)
|
|
148
|
+
@original_filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
149
|
+
@filedocs = YAML::load_file(@original_filename)
|
|
150
|
+
@filedocs.each_with_index do |doc, idx|
|
|
151
|
+
File.open(File.join(@path, "doc_#{idx + 1}.txt"), 'w') { |f| f.write(doc) }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
@corpus = Lda::DirectoryCorpus.new(@path, @extension)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
should "load a document for every file in the directory" do
|
|
158
|
+
assert_equal @corpus.num_docs, @filedocs.size
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
should "update the vocabulary with the words that were loaded" do
|
|
162
|
+
assert @corpus.vocabulary.words.size > 0
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
teardown do
|
|
166
|
+
Dir.glob(File.join(@path, "*.#{@extension}")).each { |f| File.unlink(f) }
|
|
167
|
+
Dir.rmdir(@path)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
context "A Vocabulary instance" do
|
|
173
|
+
setup do
|
|
174
|
+
@vocab = Lda::Vocabulary.new
|
|
175
|
+
@words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6']
|
|
176
|
+
@filename1 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.txt')
|
|
177
|
+
File.open(@filename1, 'w') do |f|
|
|
178
|
+
@words.each { |w| f.write("#{w}\n") }
|
|
179
|
+
end
|
|
180
|
+
@filename2 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.yml')
|
|
181
|
+
File.open(@filename2, 'w') { |f| YAML::dump(@words, f) }
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
should "load a file containing a list of words, one per line" do
|
|
185
|
+
assert @vocab.num_words == 0
|
|
186
|
+
@vocab.load_file(@filename1)
|
|
187
|
+
assert @vocab.words.size > 0
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
should "load a yaml file containing a list of words" do
|
|
191
|
+
assert @vocab.num_words == 0
|
|
192
|
+
@vocab.load_yaml(@filename2)
|
|
193
|
+
assert @vocab.num_words > 0
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
should "return indexes for words in the order they were loaded" do
|
|
197
|
+
@vocab.load_yaml(@filename2)
|
|
198
|
+
@words.each_with_index do |word, idx|
|
|
199
|
+
assert_equal @vocab.check_word(word), idx + 1
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
teardown do
|
|
204
|
+
File.unlink(@filename1)
|
|
205
|
+
File.unlink(@filename2)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
context "An Lda::Lda instance" do
|
|
210
|
+
setup do
|
|
211
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
212
|
+
@filedocs = YAML::load_file(@filename)
|
|
213
|
+
@corpus = Lda::TextCorpus.new(@filename)
|
|
214
|
+
|
|
215
|
+
@lda = Lda::Lda.new(@corpus)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
should "have loaded the vocabulary from the corpus" do
|
|
219
|
+
assert !@lda.vocab.nil?
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
should "have loaded the same number of words in the vocabulary as are in the original" do
|
|
223
|
+
assert_equal @lda.vocab.size, @corpus.vocabulary.num_words
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
should "have default values for the main settings" do
|
|
227
|
+
assert !@lda.max_iter.nil?
|
|
228
|
+
assert !@lda.convergence.nil?
|
|
229
|
+
assert !@lda.em_max_iter.nil?
|
|
230
|
+
assert !@lda.em_convergence.nil?
|
|
231
|
+
assert !@lda.num_topics.nil?
|
|
232
|
+
assert !@lda.init_alpha.nil?
|
|
233
|
+
assert !@lda.est_alpha.nil?
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
should "expose the selected backend name" do
|
|
237
|
+
assert(["native", "pure_ruby", "rust"].include?(@lda.backend_name))
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
should "raise when rust backend is requested but extension is unavailable" do
|
|
241
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
242
|
+
assert true
|
|
243
|
+
else
|
|
244
|
+
assert_raise(LoadError) { Lda::Lda.new(@corpus, backend: :rust) }
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
should "run with rust backend when extension is available" do
|
|
249
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
250
|
+
rust_lda = Lda::Lda.new(@corpus, backend: :rust, random_seed: 1234)
|
|
251
|
+
rust_lda.verbose = false
|
|
252
|
+
rust_lda.num_topics = 4
|
|
253
|
+
rust_lda.em("seeded")
|
|
254
|
+
|
|
255
|
+
assert_equal "rust", rust_lda.backend_name
|
|
256
|
+
assert_equal @corpus.num_docs, rust_lda.gamma.size
|
|
257
|
+
assert_equal @corpus.num_docs, rust_lda.phi.size
|
|
258
|
+
else
|
|
259
|
+
assert true
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
context "after running em" do
|
|
264
|
+
setup do
|
|
265
|
+
@lda.verbose = false
|
|
266
|
+
@lda.num_topics = 8
|
|
267
|
+
@lda.em('random')
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
should "phi should be defined" do
|
|
271
|
+
assert !@lda.phi.nil?
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
should "return the top 10 list of words for each topic" do
|
|
275
|
+
topics = @lda.top_words(10)
|
|
276
|
+
assert topics.is_a?(Hash)
|
|
277
|
+
assert_equal topics.size, @lda.num_topics
|
|
278
|
+
|
|
279
|
+
topics.each_pair do |topic, top_n_words|
|
|
280
|
+
assert_equal top_n_words.size, 10
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
context "after computing topic-document probabilities" do
|
|
285
|
+
setup do
|
|
286
|
+
@topic_doc_probs = @lda.compute_topic_document_probability
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
should "have a row for each document" do
|
|
290
|
+
assert_equal @topic_doc_probs.size, @corpus.num_docs
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
should "have columns for each topic" do
|
|
294
|
+
@topic_doc_probs.each do |doc|
|
|
295
|
+
assert_equal doc.size, @lda.num_topics
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
context "using the pure-ruby backend" do
|
|
302
|
+
setup do
|
|
303
|
+
@lda = Lda::Lda.new(@corpus, backend: :pure, random_seed: 1234)
|
|
304
|
+
@lda.verbose = false
|
|
305
|
+
@lda.num_topics = 6
|
|
306
|
+
@lda.max_iter = 20
|
|
307
|
+
@lda.em_max_iter = 30
|
|
308
|
+
@lda.em('random')
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
should "run em and generate model matrices" do
|
|
312
|
+
assert_equal "pure_ruby", @lda.backend_name
|
|
313
|
+
assert_equal @lda.num_topics, @lda.beta.size
|
|
314
|
+
assert_equal @corpus.num_docs, @lda.gamma.size
|
|
315
|
+
assert_equal @corpus.num_docs, @lda.phi.size
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "tmpdir"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require_relative "../bin/packaged-gem-smoke"
|
|
5
|
+
|
|
6
|
+
class PackagedGemSmokeTest < Test::Unit::TestCase
|
|
7
|
+
def test_gem_path_under_prefix_handles_symlinked_prefixes
|
|
8
|
+
Dir.mktmpdir("packaged-smoke") do |tmpdir|
|
|
9
|
+
real_root = File.join(tmpdir, "real")
|
|
10
|
+
link_root = File.join(tmpdir, "link")
|
|
11
|
+
gem_dir = File.join(real_root, "gems", "lda-ruby-0.4.0")
|
|
12
|
+
|
|
13
|
+
FileUtils.mkdir_p(gem_dir)
|
|
14
|
+
File.symlink(real_root, link_root)
|
|
15
|
+
|
|
16
|
+
assert(
|
|
17
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(gem_dir, link_root),
|
|
18
|
+
"expected symlinked prefix to match real gem path"
|
|
19
|
+
)
|
|
20
|
+
assert(
|
|
21
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(File.join(link_root, "gems", "lda-ruby-0.4.0"), real_root),
|
|
22
|
+
"expected real prefix to match symlinked gem path"
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_gem_path_under_prefix_rejects_neighbor_prefixes
|
|
28
|
+
assert(
|
|
29
|
+
!Lda::PackagedGemSmoke.gem_path_under_prefix?("/tmp/gemhome-other/gems/lda-ruby-0.4.0", "/tmp/gemhome"),
|
|
30
|
+
"neighbor prefixes should not match"
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "open3"
|
|
3
|
+
|
|
4
|
+
class ReleaseScriptsTest < Test::Unit::TestCase
|
|
5
|
+
def setup
|
|
6
|
+
@repo_root = File.expand_path("..", __dir__)
|
|
7
|
+
@check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
|
|
8
|
+
@release_prepare = File.join(@repo_root, "bin", "release-prepare")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_check_version_sync_passes_for_repository_versions
|
|
12
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, chdir: @repo_root)
|
|
13
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
14
|
+
assert_match(/Version sync OK:/, stdout)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_check_version_sync_fails_for_mismatched_tag
|
|
18
|
+
_stdout, stderr, status = Open3.capture3(@check_version_sync, "--tag", "v9.9.9", chdir: @repo_root)
|
|
19
|
+
assert(!status.success?, "expected check-version-sync to fail for mismatched tag")
|
|
20
|
+
assert_match(/does not match expected tag/, stderr)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def test_check_version_sync_print_tag_matches_library_version
|
|
24
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, "--print-tag", chdir: @repo_root)
|
|
25
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
26
|
+
assert_equal("v#{Lda::VERSION}", stdout.strip)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_release_prepare_dry_run_does_not_change_files
|
|
30
|
+
version_rb_path = File.join(@repo_root, "lib", "lda-ruby", "version.rb")
|
|
31
|
+
version_yml_path = File.join(@repo_root, "VERSION.yml")
|
|
32
|
+
changelog_path = File.join(@repo_root, "CHANGELOG.md")
|
|
33
|
+
|
|
34
|
+
baseline = {
|
|
35
|
+
version_rb_path => File.read(version_rb_path),
|
|
36
|
+
version_yml_path => File.read(version_yml_path),
|
|
37
|
+
changelog_path => File.read(changelog_path)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
stdout, stderr, status = Open3.capture3(
|
|
41
|
+
@release_prepare,
|
|
42
|
+
"9.9.9",
|
|
43
|
+
"--allow-dirty",
|
|
44
|
+
"--dry-run",
|
|
45
|
+
chdir: @repo_root
|
|
46
|
+
)
|
|
47
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
48
|
+
assert_match(/Dry run: would update/, stdout)
|
|
49
|
+
|
|
50
|
+
baseline.each do |path, original|
|
|
51
|
+
assert_equal(original, File.read(path), "#{path} changed during dry-run")
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "lda-ruby/rust_build_policy"
|
|
3
|
+
|
|
4
|
+
class RustBuildPolicyTest < Test::Unit::TestCase
|
|
5
|
+
def test_default_policy_is_auto
|
|
6
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(nil)
|
|
7
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("")
|
|
8
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(" ")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_resolves_valid_values_case_insensitively
|
|
12
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("always")
|
|
13
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("ALWAYS")
|
|
14
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve("never")
|
|
15
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve(" NeVeR ")
|
|
16
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("AUTO")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_invalid_policy_falls_back_to_auto
|
|
20
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("sometimes")
|
|
21
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("true")
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class SimplePipelineTest < Test::Unit::TestCase
|
|
4
|
+
def test_end_to_end_pipeline_on_small_corpus
|
|
5
|
+
corpus = Lda::Corpus.new
|
|
6
|
+
document1 = Lda::TextDocument.new(corpus, "Dom Cobb is a skilled thief who steals secrets from dreams.")
|
|
7
|
+
document2 = Lda::TextDocument.new(corpus, "Jake Sully joins the mission on Pandora and learns from the Na'vi.")
|
|
8
|
+
|
|
9
|
+
corpus.add_document(document1)
|
|
10
|
+
corpus.add_document(document2)
|
|
11
|
+
corpus.remove_word("cobb")
|
|
12
|
+
|
|
13
|
+
lda = Lda::Lda.new(corpus)
|
|
14
|
+
lda.verbose = false
|
|
15
|
+
lda.num_topics = 2
|
|
16
|
+
lda.em("random")
|
|
17
|
+
|
|
18
|
+
topics = lda.top_words(5)
|
|
19
|
+
assert_equal 2, topics.size
|
|
20
|
+
topics.each_value { |words| assert_equal 5, words.size }
|
|
21
|
+
end
|
|
22
|
+
end
|
data/test/simple_yaml.rb
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class Test::Unit::TestCase
|
|
4
|
+
|
|
5
|
+
@filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
|
|
6
|
+
@filedocs = YAML::load_file(@filename)
|
|
7
|
+
@corpus = Lda::TextCorpus.new(@filename)
|
|
8
|
+
|
|
9
|
+
@lda = Lda::Lda.new(@corpus)
|
|
10
|
+
|
|
11
|
+
@lda.verbose = false
|
|
12
|
+
@lda.num_topics = 20
|
|
13
|
+
@lda.em('random')
|
|
14
|
+
@lda.print_topics(20)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lda-ruby
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.4.0
|
|
5
|
+
platform: x86_64-linux
|
|
6
|
+
authors:
|
|
7
|
+
- David Blei
|
|
8
|
+
- Jason Adams
|
|
9
|
+
- Rio Akasaka
|
|
10
|
+
autorequire:
|
|
11
|
+
bindir: bin
|
|
12
|
+
cert_chain: []
|
|
13
|
+
date: 2026-02-25 00:00:00.000000000 Z
|
|
14
|
+
dependencies: []
|
|
15
|
+
description: Ruby wrapper and toolkit for Latent Dirichlet Allocation based on the
|
|
16
|
+
original lda-c implementation by David M. Blei.
|
|
17
|
+
email:
|
|
18
|
+
- jasonmadams@gmail.com
|
|
19
|
+
executables: []
|
|
20
|
+
extensions: []
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
files:
|
|
23
|
+
- CHANGELOG.md
|
|
24
|
+
- Gemfile
|
|
25
|
+
- README.md
|
|
26
|
+
- VERSION.yml
|
|
27
|
+
- docs/modernization-handoff.md
|
|
28
|
+
- docs/porting-strategy.md
|
|
29
|
+
- docs/precompiled-platform-policy.md
|
|
30
|
+
- docs/release-runbook.md
|
|
31
|
+
- ext/lda-ruby-rust/Cargo.toml
|
|
32
|
+
- ext/lda-ruby-rust/README.md
|
|
33
|
+
- ext/lda-ruby-rust/extconf.rb
|
|
34
|
+
- ext/lda-ruby-rust/src/lib.rs
|
|
35
|
+
- ext/lda-ruby/cokus.c
|
|
36
|
+
- ext/lda-ruby/cokus.h
|
|
37
|
+
- ext/lda-ruby/extconf.rb
|
|
38
|
+
- ext/lda-ruby/lda-alpha.c
|
|
39
|
+
- ext/lda-ruby/lda-alpha.h
|
|
40
|
+
- ext/lda-ruby/lda-data.c
|
|
41
|
+
- ext/lda-ruby/lda-data.h
|
|
42
|
+
- ext/lda-ruby/lda-inference.c
|
|
43
|
+
- ext/lda-ruby/lda-inference.h
|
|
44
|
+
- ext/lda-ruby/lda-model.c
|
|
45
|
+
- ext/lda-ruby/lda-model.h
|
|
46
|
+
- ext/lda-ruby/lda.h
|
|
47
|
+
- ext/lda-ruby/utils.c
|
|
48
|
+
- ext/lda-ruby/utils.h
|
|
49
|
+
- lda-ruby.gemspec
|
|
50
|
+
- lib/lda-ruby.rb
|
|
51
|
+
- lib/lda-ruby/backends.rb
|
|
52
|
+
- lib/lda-ruby/backends/base.rb
|
|
53
|
+
- lib/lda-ruby/backends/native.rb
|
|
54
|
+
- lib/lda-ruby/backends/pure_ruby.rb
|
|
55
|
+
- lib/lda-ruby/backends/rust.rb
|
|
56
|
+
- lib/lda-ruby/config/stopwords.yml
|
|
57
|
+
- lib/lda-ruby/corpus/corpus.rb
|
|
58
|
+
- lib/lda-ruby/corpus/data_corpus.rb
|
|
59
|
+
- lib/lda-ruby/corpus/directory_corpus.rb
|
|
60
|
+
- lib/lda-ruby/corpus/text_corpus.rb
|
|
61
|
+
- lib/lda-ruby/document/data_document.rb
|
|
62
|
+
- lib/lda-ruby/document/document.rb
|
|
63
|
+
- lib/lda-ruby/document/text_document.rb
|
|
64
|
+
- lib/lda-ruby/lda.so
|
|
65
|
+
- lib/lda-ruby/rust_build_policy.rb
|
|
66
|
+
- lib/lda-ruby/version.rb
|
|
67
|
+
- lib/lda-ruby/vocabulary.rb
|
|
68
|
+
- lib/lda_ruby_rust.so
|
|
69
|
+
- license.txt
|
|
70
|
+
- test/backend_compatibility_test.rb
|
|
71
|
+
- test/backends_selection_test.rb
|
|
72
|
+
- test/data/docs.dat
|
|
73
|
+
- test/data/sample.rb
|
|
74
|
+
- test/data/wiki-test-docs.yml
|
|
75
|
+
- test/gemspec_test.rb
|
|
76
|
+
- test/lda_ruby_test.rb
|
|
77
|
+
- test/packaged_gem_smoke_test.rb
|
|
78
|
+
- test/release_scripts_test.rb
|
|
79
|
+
- test/rust_build_policy_test.rb
|
|
80
|
+
- test/simple_pipeline_test.rb
|
|
81
|
+
- test/simple_yaml.rb
|
|
82
|
+
- test/test_helper.rb
|
|
83
|
+
homepage: https://github.com/ealdent/lda-ruby
|
|
84
|
+
licenses:
|
|
85
|
+
- GPL-2.0-or-later
|
|
86
|
+
metadata:
|
|
87
|
+
homepage_uri: https://github.com/ealdent/lda-ruby
|
|
88
|
+
source_code_uri: https://github.com/ealdent/lda-ruby
|
|
89
|
+
changelog_uri: https://github.com/ealdent/lda-ruby/blob/master/CHANGELOG.md
|
|
90
|
+
lda_ruby_gem_variant: precompiled
|
|
91
|
+
lda_ruby_platform: x86_64-linux
|
|
92
|
+
post_install_message:
|
|
93
|
+
rdoc_options: []
|
|
94
|
+
require_paths:
|
|
95
|
+
- lib
|
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
|
+
requirements:
|
|
98
|
+
- - ">="
|
|
99
|
+
- !ruby/object:Gem::Version
|
|
100
|
+
version: '3.2'
|
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
|
+
requirements:
|
|
103
|
+
- - ">="
|
|
104
|
+
- !ruby/object:Gem::Version
|
|
105
|
+
version: '0'
|
|
106
|
+
requirements: []
|
|
107
|
+
rubygems_version: 3.5.22
|
|
108
|
+
signing_key:
|
|
109
|
+
specification_version: 4
|
|
110
|
+
summary: Ruby implementation of Latent Dirichlet Allocation (LDA).
|
|
111
|
+
test_files: []
|