lda-ruby 0.3.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/CHANGELOG.md +16 -0
- data/Gemfile +9 -0
- data/README.md +126 -3
- data/VERSION.yml +3 -3
- data/docs/modernization-handoff.md +233 -0
- data/docs/porting-strategy.md +148 -0
- data/docs/precompiled-platform-policy.md +81 -0
- data/docs/precompiled-target-evaluation.md +67 -0
- data/docs/release-runbook.md +192 -0
- data/docs/rust-orchestration-guardrails.md +50 -0
- data/ext/lda-ruby/cokus.c +10 -11
- data/ext/lda-ruby/cokus.h +3 -3
- data/ext/lda-ruby/extconf.rb +10 -6
- data/ext/lda-ruby/lda-inference.c +23 -7
- data/ext/lda-ruby/utils.c +8 -0
- data/ext/lda-ruby-rust/Cargo.toml +12 -0
- data/ext/lda-ruby-rust/README.md +73 -0
- data/ext/lda-ruby-rust/extconf.rb +135 -0
- data/ext/lda-ruby-rust/include/strings.h +35 -0
- data/ext/lda-ruby-rust/src/lib.rs +1263 -0
- data/lda-ruby.gemspec +0 -0
- data/lib/lda-ruby/backends/base.rb +133 -0
- data/lib/lda-ruby/backends/native.rb +158 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +675 -0
- data/lib/lda-ruby/backends/rust.rb +607 -0
- data/lib/lda-ruby/backends.rb +58 -0
- data/lib/lda-ruby/corpus/corpus.rb +17 -15
- data/lib/lda-ruby/corpus/data_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/directory_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/text_corpus.rb +2 -2
- data/lib/lda-ruby/document/document.rb +6 -6
- data/lib/lda-ruby/document/text_document.rb +5 -4
- data/lib/lda-ruby/rust_build_policy.rb +21 -0
- data/lib/lda-ruby/version.rb +5 -0
- data/lib/lda-ruby.rb +293 -48
- data/test/backend_compatibility_test.rb +146 -0
- data/test/backends_selection_test.rb +100 -0
- data/test/benchmark_scripts_test.rb +23 -0
- data/test/gemspec_test.rb +27 -0
- data/test/lda_ruby_test.rb +49 -11
- data/test/packaged_gem_smoke_test.rb +33 -0
- data/test/pure_ruby_orchestration_test.rb +109 -0
- data/test/release_scripts_test.rb +93 -0
- data/test/rust_build_policy_test.rb +23 -0
- data/test/rust_orchestration_test.rb +911 -0
- data/test/simple_pipeline_test.rb +22 -0
- data/test/simple_yaml.rb +1 -7
- data/test/test_helper.rb +5 -6
- metadata +54 -38
- data/Rakefile +0 -61
- data/ext/lda-ruby/Makefile +0 -181
- data/test/data/.gitignore +0 -2
- data/test/simple_test.rb +0 -26
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class BackendCompatibilityTest < Test::Unit::TestCase
|
|
4
|
+
FIXTURE_DOCUMENTS = [
|
|
5
|
+
"apple banana apple banana fruit sweet fruit",
|
|
6
|
+
"truck wheel truck road engine metal road",
|
|
7
|
+
"ruby code gem ruby class module test",
|
|
8
|
+
"banana fruit apple orchard fresh sweet",
|
|
9
|
+
"engine road truck wheel fuel highway",
|
|
10
|
+
"module ruby class object gem code"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
def setup
|
|
14
|
+
@corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_pure_backend_seeded_fixture
|
|
18
|
+
lda = build_and_train(:pure)
|
|
19
|
+
|
|
20
|
+
assert_equal "pure_ruby", lda.backend_name
|
|
21
|
+
assert_backend_output_valid(lda)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_native_backend_seeded_fixture
|
|
25
|
+
return unless Lda::NATIVE_EXTENSION_LOADED
|
|
26
|
+
|
|
27
|
+
lda = build_and_train(:native)
|
|
28
|
+
|
|
29
|
+
assert_equal "native", lda.backend_name
|
|
30
|
+
assert_backend_output_valid(lda)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def test_native_and_pure_backend_agree_on_shapes
|
|
34
|
+
return unless Lda::NATIVE_EXTENSION_LOADED
|
|
35
|
+
|
|
36
|
+
native = build_and_train(:native)
|
|
37
|
+
pure = build_and_train(:pure)
|
|
38
|
+
|
|
39
|
+
assert_equal native.model[0], pure.model[0]
|
|
40
|
+
assert_equal native.model[1], pure.model[1]
|
|
41
|
+
assert_equal native.beta.size, pure.beta.size
|
|
42
|
+
assert_equal native.gamma.size, pure.gamma.size
|
|
43
|
+
assert_equal native.phi.size, pure.phi.size
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_rust_backend_seeded_fixture
|
|
47
|
+
return unless Lda::RUST_EXTENSION_LOADED
|
|
48
|
+
|
|
49
|
+
rust = build_and_train(:rust)
|
|
50
|
+
|
|
51
|
+
assert_equal "rust", rust.backend_name
|
|
52
|
+
assert_backend_output_valid(rust)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_rust_and_pure_backend_numeric_parity
|
|
56
|
+
return unless Lda::RUST_EXTENSION_LOADED
|
|
57
|
+
|
|
58
|
+
pure = build_and_train(:pure)
|
|
59
|
+
rust = build_and_train(:rust)
|
|
60
|
+
|
|
61
|
+
assert_nested_close(pure.gamma, rust.gamma, 1e-9)
|
|
62
|
+
assert_nested_close(pure.beta, rust.beta, 1e-9)
|
|
63
|
+
assert_nested_close(pure.phi, rust.phi, 1e-9)
|
|
64
|
+
assert_nested_close(
|
|
65
|
+
exponentiate_nested(pure.compute_topic_document_probability),
|
|
66
|
+
exponentiate_nested(rust.compute_topic_document_probability),
|
|
67
|
+
1e-6
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def build_and_train(backend)
|
|
74
|
+
lda = Lda::Lda.new(@corpus, backend: backend, random_seed: 1234)
|
|
75
|
+
lda.verbose = false
|
|
76
|
+
lda.num_topics = 3
|
|
77
|
+
lda.max_iter = 25
|
|
78
|
+
lda.em_max_iter = 40
|
|
79
|
+
lda.convergence = 1e-5
|
|
80
|
+
lda.em_convergence = 1e-4
|
|
81
|
+
lda.em("seeded")
|
|
82
|
+
lda
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def assert_backend_output_valid(lda)
|
|
86
|
+
assert_equal 3, lda.model[0]
|
|
87
|
+
assert lda.model[1] > 0
|
|
88
|
+
|
|
89
|
+
assert_equal @corpus.num_docs, lda.gamma.size
|
|
90
|
+
lda.gamma.each do |topic_weights|
|
|
91
|
+
assert_equal 3, topic_weights.size
|
|
92
|
+
topic_weights.each do |weight|
|
|
93
|
+
assert weight.is_a?(Numeric)
|
|
94
|
+
assert weight.finite?
|
|
95
|
+
assert weight.positive?
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
assert_equal 3, lda.beta.size
|
|
100
|
+
lda.beta.each do |topic_log_probs|
|
|
101
|
+
assert topic_log_probs.size > 0
|
|
102
|
+
probabilities = topic_log_probs.map { |log_prob| Math.exp(log_prob) }
|
|
103
|
+
assert_in_delta 1.0, probabilities.sum, 1e-3
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
phi = lda.phi
|
|
107
|
+
assert_equal @corpus.num_docs, phi.size
|
|
108
|
+
phi.each_with_index do |doc_phi, doc_index|
|
|
109
|
+
assert_equal @corpus.documents[doc_index].length, doc_phi.size
|
|
110
|
+
doc_phi.each do |word_topic_distribution|
|
|
111
|
+
assert_equal 3, word_topic_distribution.size
|
|
112
|
+
assert_in_delta 1.0, word_topic_distribution.sum, 1e-3
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
probabilities = lda.compute_topic_document_probability
|
|
117
|
+
assert_equal @corpus.num_docs, probabilities.size
|
|
118
|
+
probabilities.each do |row|
|
|
119
|
+
assert_equal 3, row.size
|
|
120
|
+
row.each { |value| assert value.finite? }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
top_words = lda.top_words(4)
|
|
124
|
+
assert_equal 3, top_words.size
|
|
125
|
+
top_words.each_value { |words| assert_equal 4, words.size }
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def assert_nested_close(left, right, tolerance)
|
|
129
|
+
assert_equal left.class, right.class
|
|
130
|
+
|
|
131
|
+
if left.is_a?(Array)
|
|
132
|
+
assert_equal left.size, right.size
|
|
133
|
+
left.each_with_index do |left_item, index|
|
|
134
|
+
assert_nested_close(left_item, right[index], tolerance)
|
|
135
|
+
end
|
|
136
|
+
else
|
|
137
|
+
assert_in_delta left.to_f, right.to_f, tolerance
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def exponentiate_nested(value)
|
|
142
|
+
return Math.exp(value.to_f) unless value.is_a?(Array)
|
|
143
|
+
|
|
144
|
+
value.map { |item| exponentiate_nested(item) }
|
|
145
|
+
end
|
|
146
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class BackendsSelectionTest < Test::Unit::TestCase
|
|
4
|
+
RUST_ALIAS = :__test_original_rust_available__
|
|
5
|
+
NATIVE_ALIAS = :__test_original_native_available__
|
|
6
|
+
|
|
7
|
+
setup do
|
|
8
|
+
@host = Object.new
|
|
9
|
+
@rust_singleton = Lda::Backends::Rust.singleton_class
|
|
10
|
+
@native_singleton = Lda::Backends::Native.singleton_class
|
|
11
|
+
|
|
12
|
+
@rust_singleton.send(:alias_method, RUST_ALIAS, :available?)
|
|
13
|
+
@native_singleton.send(:alias_method, NATIVE_ALIAS, :available?)
|
|
14
|
+
@previous_env_backend = ENV["LDA_RUBY_BACKEND"]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
teardown do
|
|
18
|
+
restore_availability_stubs
|
|
19
|
+
ENV["LDA_RUBY_BACKEND"] = @previous_env_backend
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
should "prefer rust over native in auto mode when both are available" do
|
|
23
|
+
stub_rust_available(true)
|
|
24
|
+
stub_native_available(true)
|
|
25
|
+
|
|
26
|
+
backend = Lda::Backends.build(host: @host, requested: :auto)
|
|
27
|
+
assert_instance_of Lda::Backends::Rust, backend
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
should "fall back to native in auto mode when rust is unavailable" do
|
|
31
|
+
stub_rust_available(false)
|
|
32
|
+
stub_native_available(true)
|
|
33
|
+
|
|
34
|
+
backend = Lda::Backends.build(host: @host, requested: :auto)
|
|
35
|
+
assert_instance_of Lda::Backends::Native, backend
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
should "fall back to pure in auto mode when rust and native are unavailable" do
|
|
39
|
+
stub_rust_available(false)
|
|
40
|
+
stub_native_available(false)
|
|
41
|
+
|
|
42
|
+
backend = Lda::Backends.build(host: @host, requested: :auto)
|
|
43
|
+
assert_instance_of Lda::Backends::PureRuby, backend
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
should "respect LDA_RUBY_BACKEND env override when requested mode is nil" do
|
|
47
|
+
stub_rust_available(true)
|
|
48
|
+
stub_native_available(true)
|
|
49
|
+
ENV["LDA_RUBY_BACKEND"] = "pure_ruby"
|
|
50
|
+
|
|
51
|
+
backend = Lda::Backends.build(host: @host, requested: nil)
|
|
52
|
+
assert_instance_of Lda::Backends::PureRuby, backend
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
should "raise for unknown backend mode" do
|
|
56
|
+
stub_rust_available(false)
|
|
57
|
+
stub_native_available(false)
|
|
58
|
+
|
|
59
|
+
error = assert_raise(ArgumentError) do
|
|
60
|
+
Lda::Backends.build(host: @host, requested: :unknown_backend)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
assert_match(/Unknown backend mode/i, error.message)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def stub_rust_available(value)
|
|
69
|
+
silence_redefinition_warnings do
|
|
70
|
+
@rust_singleton.send(:define_method, :available?) do
|
|
71
|
+
value
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def stub_native_available(value)
|
|
77
|
+
silence_redefinition_warnings do
|
|
78
|
+
@native_singleton.send(:define_method, :available?) do |_host|
|
|
79
|
+
value
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def restore_availability_stubs
|
|
85
|
+
silence_redefinition_warnings do
|
|
86
|
+
@rust_singleton.send(:alias_method, :available?, RUST_ALIAS)
|
|
87
|
+
@native_singleton.send(:alias_method, :available?, NATIVE_ALIAS)
|
|
88
|
+
end
|
|
89
|
+
@rust_singleton.send(:remove_method, RUST_ALIAS)
|
|
90
|
+
@native_singleton.send(:remove_method, NATIVE_ALIAS)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def silence_redefinition_warnings
|
|
94
|
+
previous_verbose = $VERBOSE
|
|
95
|
+
$VERBOSE = nil
|
|
96
|
+
yield
|
|
97
|
+
ensure
|
|
98
|
+
$VERBOSE = previous_verbose
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "test_helper"
|
|
4
|
+
require "open3"
|
|
5
|
+
|
|
6
|
+
class BenchmarkScriptsTest < Test::Unit::TestCase
|
|
7
|
+
def setup
|
|
8
|
+
@repo_root = File.expand_path("..", __dir__)
|
|
9
|
+
@check_rust_benchmark = File.join(@repo_root, "bin", "check-rust-benchmark")
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_check_rust_benchmark_help
|
|
13
|
+
stdout, stderr, status = Open3.capture3(@check_rust_benchmark, "--help", chdir: @repo_root)
|
|
14
|
+
assert(status.success?, stderr)
|
|
15
|
+
assert_match(/Usage: \.\/bin\/check-rust-benchmark/, stdout)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_check_rust_benchmark_rejects_unknown_argument
|
|
19
|
+
_stdout, stderr, status = Open3.capture3(@check_rust_benchmark, "--unknown", chdir: @repo_root)
|
|
20
|
+
assert(!status.success?, "expected check-rust-benchmark to fail for unknown args")
|
|
21
|
+
assert_match(/Unknown argument/, stderr)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class GemspecTest < Test::Unit::TestCase
|
|
4
|
+
def test_gemspec_excludes_local_rust_build_artifacts
|
|
5
|
+
spec = Gem::Specification.load(File.expand_path("../lda-ruby.gemspec", __dir__))
|
|
6
|
+
assert_not_nil spec
|
|
7
|
+
|
|
8
|
+
rust_target_files = spec.files.grep(%r{\Aext/lda-ruby-rust/target/})
|
|
9
|
+
assert_equal [], rust_target_files
|
|
10
|
+
assert(!spec.files.include?("ext/lda-ruby-rust/Cargo.lock"))
|
|
11
|
+
assert(!spec.files.include?("ext/lda-ruby-rust/Makefile"))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_gemspec_declares_rust_extconf
|
|
15
|
+
spec = Gem::Specification.load(File.expand_path("../lda-ruby.gemspec", __dir__))
|
|
16
|
+
assert_not_nil spec
|
|
17
|
+
|
|
18
|
+
assert(spec.extensions.include?("ext/lda-ruby-rust/extconf.rb"))
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_gemspec_includes_release_runbook
|
|
22
|
+
spec = Gem::Specification.load(File.expand_path("../lda-ruby.gemspec", __dir__))
|
|
23
|
+
assert_not_nil spec
|
|
24
|
+
|
|
25
|
+
assert(spec.files.include?("docs/release-runbook.md"))
|
|
26
|
+
end
|
|
27
|
+
end
|
data/test/lda_ruby_test.rb
CHANGED
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
require 'test/unit'
|
|
3
|
-
require 'shoulda'
|
|
4
|
-
require 'yaml'
|
|
5
|
-
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
7
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
|
-
require 'lda-ruby'
|
|
1
|
+
require_relative "test_helper"
|
|
9
2
|
|
|
10
3
|
class LdaRubyTest < Test::Unit::TestCase
|
|
11
4
|
context "A Document instance" do
|
|
@@ -19,7 +12,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
19
12
|
end
|
|
20
13
|
|
|
21
14
|
should "not have text" do
|
|
22
|
-
assert !@document.
|
|
15
|
+
assert !@document.text?
|
|
23
16
|
end
|
|
24
17
|
|
|
25
18
|
should "be empty" do
|
|
@@ -51,7 +44,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
51
44
|
end
|
|
52
45
|
|
|
53
46
|
should "not have text" do
|
|
54
|
-
assert !@document.
|
|
47
|
+
assert !@document.text?
|
|
55
48
|
end
|
|
56
49
|
|
|
57
50
|
should "have word count equal to what was added" do
|
|
@@ -78,7 +71,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
78
71
|
end
|
|
79
72
|
|
|
80
73
|
should "have text" do
|
|
81
|
-
assert @document.
|
|
74
|
+
assert @document.text?
|
|
82
75
|
end
|
|
83
76
|
|
|
84
77
|
should "have word count equal to what was added" do
|
|
@@ -240,6 +233,33 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
240
233
|
assert !@lda.est_alpha.nil?
|
|
241
234
|
end
|
|
242
235
|
|
|
236
|
+
should "expose the selected backend name" do
|
|
237
|
+
assert(["native", "pure_ruby", "rust"].include?(@lda.backend_name))
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
should "raise when rust backend is requested but extension is unavailable" do
|
|
241
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
242
|
+
assert true
|
|
243
|
+
else
|
|
244
|
+
assert_raise(LoadError) { Lda::Lda.new(@corpus, backend: :rust) }
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
should "run with rust backend when extension is available" do
|
|
249
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
250
|
+
rust_lda = Lda::Lda.new(@corpus, backend: :rust, random_seed: 1234)
|
|
251
|
+
rust_lda.verbose = false
|
|
252
|
+
rust_lda.num_topics = 4
|
|
253
|
+
rust_lda.em("seeded")
|
|
254
|
+
|
|
255
|
+
assert_equal "rust", rust_lda.backend_name
|
|
256
|
+
assert_equal @corpus.num_docs, rust_lda.gamma.size
|
|
257
|
+
assert_equal @corpus.num_docs, rust_lda.phi.size
|
|
258
|
+
else
|
|
259
|
+
assert true
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
243
263
|
context "after running em" do
|
|
244
264
|
setup do
|
|
245
265
|
@lda.verbose = false
|
|
@@ -277,5 +297,23 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
277
297
|
end
|
|
278
298
|
end
|
|
279
299
|
end
|
|
300
|
+
|
|
301
|
+
context "using the pure-ruby backend" do
|
|
302
|
+
setup do
|
|
303
|
+
@lda = Lda::Lda.new(@corpus, backend: :pure, random_seed: 1234)
|
|
304
|
+
@lda.verbose = false
|
|
305
|
+
@lda.num_topics = 6
|
|
306
|
+
@lda.max_iter = 20
|
|
307
|
+
@lda.em_max_iter = 30
|
|
308
|
+
@lda.em('random')
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
should "run em and generate model matrices" do
|
|
312
|
+
assert_equal "pure_ruby", @lda.backend_name
|
|
313
|
+
assert_equal @lda.num_topics, @lda.beta.size
|
|
314
|
+
assert_equal @corpus.num_docs, @lda.gamma.size
|
|
315
|
+
assert_equal @corpus.num_docs, @lda.phi.size
|
|
316
|
+
end
|
|
317
|
+
end
|
|
280
318
|
end
|
|
281
319
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "tmpdir"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require_relative "../bin/packaged-gem-smoke"
|
|
5
|
+
|
|
6
|
+
class PackagedGemSmokeTest < Test::Unit::TestCase
|
|
7
|
+
def test_gem_path_under_prefix_handles_symlinked_prefixes
|
|
8
|
+
Dir.mktmpdir("packaged-smoke") do |tmpdir|
|
|
9
|
+
real_root = File.join(tmpdir, "real")
|
|
10
|
+
link_root = File.join(tmpdir, "link")
|
|
11
|
+
gem_dir = File.join(real_root, "gems", "lda-ruby-0.4.0")
|
|
12
|
+
|
|
13
|
+
FileUtils.mkdir_p(gem_dir)
|
|
14
|
+
File.symlink(real_root, link_root)
|
|
15
|
+
|
|
16
|
+
assert(
|
|
17
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(gem_dir, link_root),
|
|
18
|
+
"expected symlinked prefix to match real gem path"
|
|
19
|
+
)
|
|
20
|
+
assert(
|
|
21
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(File.join(link_root, "gems", "lda-ruby-0.4.0"), real_root),
|
|
22
|
+
"expected real prefix to match symlinked gem path"
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_gem_path_under_prefix_rejects_neighbor_prefixes
|
|
28
|
+
assert(
|
|
29
|
+
!Lda::PackagedGemSmoke.gem_path_under_prefix?("/tmp/gemhome-other/gems/lda-ruby-0.4.0", "/tmp/gemhome"),
|
|
30
|
+
"neighbor prefixes should not match"
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class PureRubyOrchestrationTest < Test::Unit::TestCase
|
|
4
|
+
FIXTURE_DOCUMENTS = [
|
|
5
|
+
"apple banana apple banana fruit sweet fruit",
|
|
6
|
+
"truck wheel truck road engine metal road",
|
|
7
|
+
"ruby code gem ruby class module test",
|
|
8
|
+
"banana fruit apple orchard fresh sweet",
|
|
9
|
+
"engine road truck wheel fuel highway",
|
|
10
|
+
"module ruby class object gem code"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
def test_rust_em_input_includes_expected_fields
|
|
14
|
+
backend = build_backend
|
|
15
|
+
|
|
16
|
+
em_input = backend.rust_em_input("seeded")
|
|
17
|
+
|
|
18
|
+
assert_equal 3, em_input[:topics]
|
|
19
|
+
assert_equal em_input[:document_words].size, em_input[:document_counts].size
|
|
20
|
+
assert_equal em_input[:document_words].map(&:length), em_input[:document_lengths]
|
|
21
|
+
assert_equal em_input[:document_counts].map { |counts| counts.sum.to_f }, em_input[:document_totals]
|
|
22
|
+
assert_equal 3, em_input[:initial_beta_probabilities].size
|
|
23
|
+
assert em_input[:terms] > 0
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_em_from_input_matches_seeded_em_output
|
|
27
|
+
direct = build_backend
|
|
28
|
+
from_input = build_backend
|
|
29
|
+
|
|
30
|
+
direct.em("seeded")
|
|
31
|
+
em_input = from_input.rust_em_input("seeded")
|
|
32
|
+
from_input.em_from_input(em_input)
|
|
33
|
+
|
|
34
|
+
assert_nested_close(direct.gamma, from_input.gamma, 1e-9)
|
|
35
|
+
assert_nested_close(direct.beta, from_input.beta, 1e-9)
|
|
36
|
+
assert_nested_close(direct.compute_phi, from_input.compute_phi, 1e-9)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_rust_initial_beta_probabilities_matches_rust_em_input_for_random_start
|
|
40
|
+
from_helper = build_backend
|
|
41
|
+
from_input = build_backend
|
|
42
|
+
|
|
43
|
+
document_words = from_helper.corpus.documents.map { |document| document.words.map(&:to_i) }
|
|
44
|
+
document_counts = from_helper.corpus.documents.map { |document| document.counts.map(&:to_f) }
|
|
45
|
+
terms = from_helper.corpus.documents.flat_map(&:words).max + 1
|
|
46
|
+
|
|
47
|
+
helper_beta = from_helper.rust_initial_beta_probabilities(
|
|
48
|
+
"random",
|
|
49
|
+
document_words,
|
|
50
|
+
document_counts,
|
|
51
|
+
from_helper.num_topics,
|
|
52
|
+
terms
|
|
53
|
+
)
|
|
54
|
+
em_input = from_input.rust_em_input("random")
|
|
55
|
+
|
|
56
|
+
assert_nested_close(helper_beta, em_input[:initial_beta_probabilities], 1e-12)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_apply_em_state_sets_outputs
|
|
60
|
+
backend = build_backend
|
|
61
|
+
|
|
62
|
+
docs = backend.corpus.documents
|
|
63
|
+
topics = 3
|
|
64
|
+
terms = 5
|
|
65
|
+
|
|
66
|
+
beta_probabilities = Array.new(topics) { Array.new(terms, 1.0 / terms) }
|
|
67
|
+
beta_log = beta_probabilities.map { |row| row.map { |probability| Math.log(probability) } }
|
|
68
|
+
gamma = Array.new(docs.size) { Array.new(topics, 1.0) }
|
|
69
|
+
phi = docs.map { |document| Array.new(document.length) { Array.new(topics, 1.0 / topics) } }
|
|
70
|
+
|
|
71
|
+
backend.apply_em_state(
|
|
72
|
+
beta_probabilities: beta_probabilities,
|
|
73
|
+
beta_log: beta_log,
|
|
74
|
+
gamma: gamma,
|
|
75
|
+
phi: phi
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
assert_equal beta_log, backend.beta
|
|
79
|
+
assert_equal gamma, backend.gamma
|
|
80
|
+
assert_equal phi, backend.compute_phi
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def build_backend
|
|
86
|
+
backend = Lda::Backends::PureRuby.new(random_seed: 1234)
|
|
87
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
88
|
+
backend.verbose = false
|
|
89
|
+
backend.num_topics = 3
|
|
90
|
+
backend.max_iter = 25
|
|
91
|
+
backend.em_max_iter = 40
|
|
92
|
+
backend.convergence = 1e-5
|
|
93
|
+
backend.em_convergence = 1e-4
|
|
94
|
+
backend
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def assert_nested_close(left, right, tolerance)
|
|
98
|
+
assert_equal left.class, right.class
|
|
99
|
+
|
|
100
|
+
if left.is_a?(Array)
|
|
101
|
+
assert_equal left.size, right.size
|
|
102
|
+
left.each_with_index do |left_item, index|
|
|
103
|
+
assert_nested_close(left_item, right[index], tolerance)
|
|
104
|
+
end
|
|
105
|
+
else
|
|
106
|
+
assert_in_delta left.to_f, right.to_f, tolerance
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "open3"
|
|
3
|
+
|
|
4
|
+
class ReleaseScriptsTest < Test::Unit::TestCase
|
|
5
|
+
def setup
|
|
6
|
+
@repo_root = File.expand_path("..", __dir__)
|
|
7
|
+
@check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
|
|
8
|
+
@release_prepare = File.join(@repo_root, "bin", "release-prepare")
|
|
9
|
+
@verify_rubygems_api_key = File.join(@repo_root, "bin", "verify-rubygems-api-key")
|
|
10
|
+
@verify_release_publish = File.join(@repo_root, "bin", "verify-release-publish")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_check_version_sync_passes_for_repository_versions
|
|
14
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, chdir: @repo_root)
|
|
15
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
16
|
+
assert_match(/Version sync OK:/, stdout)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_check_version_sync_fails_for_mismatched_tag
|
|
20
|
+
_stdout, stderr, status = Open3.capture3(@check_version_sync, "--tag", "v9.9.9", chdir: @repo_root)
|
|
21
|
+
assert(!status.success?, "expected check-version-sync to fail for mismatched tag")
|
|
22
|
+
assert_match(/does not match expected tag/, stderr)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_check_version_sync_print_tag_matches_library_version
|
|
26
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, "--print-tag", chdir: @repo_root)
|
|
27
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
28
|
+
assert_equal("v#{Lda::VERSION}", stdout.strip)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_release_prepare_dry_run_does_not_change_files
|
|
32
|
+
version_rb_path = File.join(@repo_root, "lib", "lda-ruby", "version.rb")
|
|
33
|
+
version_yml_path = File.join(@repo_root, "VERSION.yml")
|
|
34
|
+
changelog_path = File.join(@repo_root, "CHANGELOG.md")
|
|
35
|
+
|
|
36
|
+
baseline = {
|
|
37
|
+
version_rb_path => File.read(version_rb_path),
|
|
38
|
+
version_yml_path => File.read(version_yml_path),
|
|
39
|
+
changelog_path => File.read(changelog_path)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
stdout, stderr, status = Open3.capture3(
|
|
43
|
+
@release_prepare,
|
|
44
|
+
"9.9.9",
|
|
45
|
+
"--allow-dirty",
|
|
46
|
+
"--dry-run",
|
|
47
|
+
chdir: @repo_root
|
|
48
|
+
)
|
|
49
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
50
|
+
assert_match(/Dry run: would update/, stdout)
|
|
51
|
+
|
|
52
|
+
baseline.each do |path, original|
|
|
53
|
+
assert_equal(original, File.read(path), "#{path} changed during dry-run")
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def test_verify_rubygems_api_key_help
|
|
58
|
+
stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--help", chdir: @repo_root)
|
|
59
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
60
|
+
assert_match(/Usage: \.\/bin\/verify-rubygems-api-key/, stdout)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_verify_rubygems_api_key_rejects_unknown_argument
|
|
64
|
+
_stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--unknown-flag", chdir: @repo_root)
|
|
65
|
+
assert(!status.success?, "expected verify-rubygems-api-key to fail for unknown arguments")
|
|
66
|
+
assert_match(/unknown argument/, stderr)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def test_verify_release_publish_help
|
|
70
|
+
stdout, stderr, status = Open3.capture3(@verify_release_publish, "--help", chdir: @repo_root)
|
|
71
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
72
|
+
assert_match(/Usage: \.\/bin\/verify-release-publish/, stdout)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def test_verify_release_publish_rejects_unknown_argument
|
|
76
|
+
_stdout, stderr, status = Open3.capture3(@verify_release_publish, "--unknown-flag", chdir: @repo_root)
|
|
77
|
+
assert(!status.success?, "expected verify-release-publish to fail for unknown arguments")
|
|
78
|
+
assert_match(/unknown argument/, stderr)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_verify_release_publish_rejects_invalid_tag_format
|
|
82
|
+
_stdout, stderr, status = Open3.capture3(
|
|
83
|
+
@verify_release_publish,
|
|
84
|
+
"--tag",
|
|
85
|
+
"0.4.0",
|
|
86
|
+
"--skip-rubygems",
|
|
87
|
+
"--skip-github",
|
|
88
|
+
chdir: @repo_root
|
|
89
|
+
)
|
|
90
|
+
assert(!status.success?, "expected verify-release-publish to fail for invalid tag format")
|
|
91
|
+
assert_match(/tag must be in format vX\.Y\.Z/, stderr)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "lda-ruby/rust_build_policy"
|
|
3
|
+
|
|
4
|
+
class RustBuildPolicyTest < Test::Unit::TestCase
|
|
5
|
+
def test_default_policy_is_auto
|
|
6
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(nil)
|
|
7
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("")
|
|
8
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(" ")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_resolves_valid_values_case_insensitively
|
|
12
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("always")
|
|
13
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("ALWAYS")
|
|
14
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve("never")
|
|
15
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve(" NeVeR ")
|
|
16
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("AUTO")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_invalid_policy_falls_back_to_auto
|
|
20
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("sometimes")
|
|
21
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("true")
|
|
22
|
+
end
|
|
23
|
+
end
|