lda-ruby 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +4 -1
- data/VERSION.yml +1 -1
- data/docs/modernization-handoff.md +68 -25
- data/docs/porting-strategy.md +23 -2
- data/docs/precompiled-platform-policy.md +15 -2
- data/docs/precompiled-target-evaluation.md +67 -0
- data/docs/release-runbook.md +41 -6
- data/docs/rust-orchestration-guardrails.md +50 -0
- data/ext/lda-ruby/cokus.c +10 -11
- data/ext/lda-ruby/cokus.h +3 -3
- data/ext/lda-ruby/lda-inference.c +2 -2
- data/ext/lda-ruby/utils.c +8 -0
- data/ext/lda-ruby-rust/README.md +25 -0
- data/ext/lda-ruby-rust/extconf.rb +25 -13
- data/ext/lda-ruby-rust/include/strings.h +35 -0
- data/ext/lda-ruby-rust/src/lib.rs +816 -9
- data/lib/lda-ruby/backends/base.rb +4 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +110 -48
- data/lib/lda-ruby/backends/rust.rb +384 -3
- data/lib/lda-ruby/version.rb +1 -1
- data/test/benchmark_scripts_test.rb +23 -0
- data/test/pure_ruby_orchestration_test.rb +109 -0
- data/test/release_scripts_test.rb +39 -0
- data/test/rust_orchestration_test.rb +911 -0
- metadata +8 -2
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class PureRubyOrchestrationTest < Test::Unit::TestCase
|
|
4
|
+
FIXTURE_DOCUMENTS = [
|
|
5
|
+
"apple banana apple banana fruit sweet fruit",
|
|
6
|
+
"truck wheel truck road engine metal road",
|
|
7
|
+
"ruby code gem ruby class module test",
|
|
8
|
+
"banana fruit apple orchard fresh sweet",
|
|
9
|
+
"engine road truck wheel fuel highway",
|
|
10
|
+
"module ruby class object gem code"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
def test_rust_em_input_includes_expected_fields
|
|
14
|
+
backend = build_backend
|
|
15
|
+
|
|
16
|
+
em_input = backend.rust_em_input("seeded")
|
|
17
|
+
|
|
18
|
+
assert_equal 3, em_input[:topics]
|
|
19
|
+
assert_equal em_input[:document_words].size, em_input[:document_counts].size
|
|
20
|
+
assert_equal em_input[:document_words].map(&:length), em_input[:document_lengths]
|
|
21
|
+
assert_equal em_input[:document_counts].map { |counts| counts.sum.to_f }, em_input[:document_totals]
|
|
22
|
+
assert_equal 3, em_input[:initial_beta_probabilities].size
|
|
23
|
+
assert em_input[:terms] > 0
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_em_from_input_matches_seeded_em_output
|
|
27
|
+
direct = build_backend
|
|
28
|
+
from_input = build_backend
|
|
29
|
+
|
|
30
|
+
direct.em("seeded")
|
|
31
|
+
em_input = from_input.rust_em_input("seeded")
|
|
32
|
+
from_input.em_from_input(em_input)
|
|
33
|
+
|
|
34
|
+
assert_nested_close(direct.gamma, from_input.gamma, 1e-9)
|
|
35
|
+
assert_nested_close(direct.beta, from_input.beta, 1e-9)
|
|
36
|
+
assert_nested_close(direct.compute_phi, from_input.compute_phi, 1e-9)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_rust_initial_beta_probabilities_matches_rust_em_input_for_random_start
|
|
40
|
+
from_helper = build_backend
|
|
41
|
+
from_input = build_backend
|
|
42
|
+
|
|
43
|
+
document_words = from_helper.corpus.documents.map { |document| document.words.map(&:to_i) }
|
|
44
|
+
document_counts = from_helper.corpus.documents.map { |document| document.counts.map(&:to_f) }
|
|
45
|
+
terms = from_helper.corpus.documents.flat_map(&:words).max + 1
|
|
46
|
+
|
|
47
|
+
helper_beta = from_helper.rust_initial_beta_probabilities(
|
|
48
|
+
"random",
|
|
49
|
+
document_words,
|
|
50
|
+
document_counts,
|
|
51
|
+
from_helper.num_topics,
|
|
52
|
+
terms
|
|
53
|
+
)
|
|
54
|
+
em_input = from_input.rust_em_input("random")
|
|
55
|
+
|
|
56
|
+
assert_nested_close(helper_beta, em_input[:initial_beta_probabilities], 1e-12)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_apply_em_state_sets_outputs
|
|
60
|
+
backend = build_backend
|
|
61
|
+
|
|
62
|
+
docs = backend.corpus.documents
|
|
63
|
+
topics = 3
|
|
64
|
+
terms = 5
|
|
65
|
+
|
|
66
|
+
beta_probabilities = Array.new(topics) { Array.new(terms, 1.0 / terms) }
|
|
67
|
+
beta_log = beta_probabilities.map { |row| row.map { |probability| Math.log(probability) } }
|
|
68
|
+
gamma = Array.new(docs.size) { Array.new(topics, 1.0) }
|
|
69
|
+
phi = docs.map { |document| Array.new(document.length) { Array.new(topics, 1.0 / topics) } }
|
|
70
|
+
|
|
71
|
+
backend.apply_em_state(
|
|
72
|
+
beta_probabilities: beta_probabilities,
|
|
73
|
+
beta_log: beta_log,
|
|
74
|
+
gamma: gamma,
|
|
75
|
+
phi: phi
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
assert_equal beta_log, backend.beta
|
|
79
|
+
assert_equal gamma, backend.gamma
|
|
80
|
+
assert_equal phi, backend.compute_phi
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def build_backend
|
|
86
|
+
backend = Lda::Backends::PureRuby.new(random_seed: 1234)
|
|
87
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
88
|
+
backend.verbose = false
|
|
89
|
+
backend.num_topics = 3
|
|
90
|
+
backend.max_iter = 25
|
|
91
|
+
backend.em_max_iter = 40
|
|
92
|
+
backend.convergence = 1e-5
|
|
93
|
+
backend.em_convergence = 1e-4
|
|
94
|
+
backend
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def assert_nested_close(left, right, tolerance)
|
|
98
|
+
assert_equal left.class, right.class
|
|
99
|
+
|
|
100
|
+
if left.is_a?(Array)
|
|
101
|
+
assert_equal left.size, right.size
|
|
102
|
+
left.each_with_index do |left_item, index|
|
|
103
|
+
assert_nested_close(left_item, right[index], tolerance)
|
|
104
|
+
end
|
|
105
|
+
else
|
|
106
|
+
assert_in_delta left.to_f, right.to_f, tolerance
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -6,6 +6,8 @@ class ReleaseScriptsTest < Test::Unit::TestCase
|
|
|
6
6
|
@repo_root = File.expand_path("..", __dir__)
|
|
7
7
|
@check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
|
|
8
8
|
@release_prepare = File.join(@repo_root, "bin", "release-prepare")
|
|
9
|
+
@verify_rubygems_api_key = File.join(@repo_root, "bin", "verify-rubygems-api-key")
|
|
10
|
+
@verify_release_publish = File.join(@repo_root, "bin", "verify-release-publish")
|
|
9
11
|
end
|
|
10
12
|
|
|
11
13
|
def test_check_version_sync_passes_for_repository_versions
|
|
@@ -51,4 +53,41 @@ class ReleaseScriptsTest < Test::Unit::TestCase
|
|
|
51
53
|
assert_equal(original, File.read(path), "#{path} changed during dry-run")
|
|
52
54
|
end
|
|
53
55
|
end
|
|
56
|
+
|
|
57
|
+
def test_verify_rubygems_api_key_help
|
|
58
|
+
stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--help", chdir: @repo_root)
|
|
59
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
60
|
+
assert_match(/Usage: \.\/bin\/verify-rubygems-api-key/, stdout)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_verify_rubygems_api_key_rejects_unknown_argument
|
|
64
|
+
_stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--unknown-flag", chdir: @repo_root)
|
|
65
|
+
assert(!status.success?, "expected verify-rubygems-api-key to fail for unknown arguments")
|
|
66
|
+
assert_match(/unknown argument/, stderr)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def test_verify_release_publish_help
|
|
70
|
+
stdout, stderr, status = Open3.capture3(@verify_release_publish, "--help", chdir: @repo_root)
|
|
71
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
72
|
+
assert_match(/Usage: \.\/bin\/verify-release-publish/, stdout)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def test_verify_release_publish_rejects_unknown_argument
|
|
76
|
+
_stdout, stderr, status = Open3.capture3(@verify_release_publish, "--unknown-flag", chdir: @repo_root)
|
|
77
|
+
assert(!status.success?, "expected verify-release-publish to fail for unknown arguments")
|
|
78
|
+
assert_match(/unknown argument/, stderr)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_verify_release_publish_rejects_invalid_tag_format
|
|
82
|
+
_stdout, stderr, status = Open3.capture3(
|
|
83
|
+
@verify_release_publish,
|
|
84
|
+
"--tag",
|
|
85
|
+
"0.4.0",
|
|
86
|
+
"--skip-rubygems",
|
|
87
|
+
"--skip-github",
|
|
88
|
+
chdir: @repo_root
|
|
89
|
+
)
|
|
90
|
+
assert(!status.success?, "expected verify-release-publish to fail for invalid tag format")
|
|
91
|
+
assert_match(/tag must be in format vX\.Y\.Z/, stderr)
|
|
92
|
+
end
|
|
54
93
|
end
|