lda-ruby 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,109 @@
1
+ require_relative "test_helper"
2
+
3
+ class PureRubyOrchestrationTest < Test::Unit::TestCase
4
+ FIXTURE_DOCUMENTS = [
5
+ "apple banana apple banana fruit sweet fruit",
6
+ "truck wheel truck road engine metal road",
7
+ "ruby code gem ruby class module test",
8
+ "banana fruit apple orchard fresh sweet",
9
+ "engine road truck wheel fuel highway",
10
+ "module ruby class object gem code"
11
+ ].freeze
12
+
13
+ def test_rust_em_input_includes_expected_fields
14
+ backend = build_backend
15
+
16
+ em_input = backend.rust_em_input("seeded")
17
+
18
+ assert_equal 3, em_input[:topics]
19
+ assert_equal em_input[:document_words].size, em_input[:document_counts].size
20
+ assert_equal em_input[:document_words].map(&:length), em_input[:document_lengths]
21
+ assert_equal em_input[:document_counts].map { |counts| counts.sum.to_f }, em_input[:document_totals]
22
+ assert_equal 3, em_input[:initial_beta_probabilities].size
23
+ assert em_input[:terms] > 0
24
+ end
25
+
26
+ def test_em_from_input_matches_seeded_em_output
27
+ direct = build_backend
28
+ from_input = build_backend
29
+
30
+ direct.em("seeded")
31
+ em_input = from_input.rust_em_input("seeded")
32
+ from_input.em_from_input(em_input)
33
+
34
+ assert_nested_close(direct.gamma, from_input.gamma, 1e-9)
35
+ assert_nested_close(direct.beta, from_input.beta, 1e-9)
36
+ assert_nested_close(direct.compute_phi, from_input.compute_phi, 1e-9)
37
+ end
38
+
39
+ def test_rust_initial_beta_probabilities_matches_rust_em_input_for_random_start
40
+ from_helper = build_backend
41
+ from_input = build_backend
42
+
43
+ document_words = from_helper.corpus.documents.map { |document| document.words.map(&:to_i) }
44
+ document_counts = from_helper.corpus.documents.map { |document| document.counts.map(&:to_f) }
45
+ terms = from_helper.corpus.documents.flat_map(&:words).max + 1
46
+
47
+ helper_beta = from_helper.rust_initial_beta_probabilities(
48
+ "random",
49
+ document_words,
50
+ document_counts,
51
+ from_helper.num_topics,
52
+ terms
53
+ )
54
+ em_input = from_input.rust_em_input("random")
55
+
56
+ assert_nested_close(helper_beta, em_input[:initial_beta_probabilities], 1e-12)
57
+ end
58
+
59
+ def test_apply_em_state_sets_outputs
60
+ backend = build_backend
61
+
62
+ docs = backend.corpus.documents
63
+ topics = 3
64
+ terms = 5
65
+
66
+ beta_probabilities = Array.new(topics) { Array.new(terms, 1.0 / terms) }
67
+ beta_log = beta_probabilities.map { |row| row.map { |probability| Math.log(probability) } }
68
+ gamma = Array.new(docs.size) { Array.new(topics, 1.0) }
69
+ phi = docs.map { |document| Array.new(document.length) { Array.new(topics, 1.0 / topics) } }
70
+
71
+ backend.apply_em_state(
72
+ beta_probabilities: beta_probabilities,
73
+ beta_log: beta_log,
74
+ gamma: gamma,
75
+ phi: phi
76
+ )
77
+
78
+ assert_equal beta_log, backend.beta
79
+ assert_equal gamma, backend.gamma
80
+ assert_equal phi, backend.compute_phi
81
+ end
82
+
83
+ private
84
+
85
+ def build_backend
86
+ backend = Lda::Backends::PureRuby.new(random_seed: 1234)
87
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
88
+ backend.verbose = false
89
+ backend.num_topics = 3
90
+ backend.max_iter = 25
91
+ backend.em_max_iter = 40
92
+ backend.convergence = 1e-5
93
+ backend.em_convergence = 1e-4
94
+ backend
95
+ end
96
+
97
+ def assert_nested_close(left, right, tolerance)
98
+ assert_equal left.class, right.class
99
+
100
+ if left.is_a?(Array)
101
+ assert_equal left.size, right.size
102
+ left.each_with_index do |left_item, index|
103
+ assert_nested_close(left_item, right[index], tolerance)
104
+ end
105
+ else
106
+ assert_in_delta left.to_f, right.to_f, tolerance
107
+ end
108
+ end
109
+ end
@@ -6,6 +6,8 @@ class ReleaseScriptsTest < Test::Unit::TestCase
6
6
  @repo_root = File.expand_path("..", __dir__)
7
7
  @check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
8
8
  @release_prepare = File.join(@repo_root, "bin", "release-prepare")
9
+ @verify_rubygems_api_key = File.join(@repo_root, "bin", "verify-rubygems-api-key")
10
+ @verify_release_publish = File.join(@repo_root, "bin", "verify-release-publish")
9
11
  end
10
12
 
11
13
  def test_check_version_sync_passes_for_repository_versions
@@ -51,4 +53,41 @@ class ReleaseScriptsTest < Test::Unit::TestCase
51
53
  assert_equal(original, File.read(path), "#{path} changed during dry-run")
52
54
  end
53
55
  end
56
+
57
+ def test_verify_rubygems_api_key_help
58
+ stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--help", chdir: @repo_root)
59
+ assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
60
+ assert_match(/Usage: \.\/bin\/verify-rubygems-api-key/, stdout)
61
+ end
62
+
63
+ def test_verify_rubygems_api_key_rejects_unknown_argument
64
+ _stdout, stderr, status = Open3.capture3(@verify_rubygems_api_key, "--unknown-flag", chdir: @repo_root)
65
+ assert(!status.success?, "expected verify-rubygems-api-key to fail for unknown arguments")
66
+ assert_match(/unknown argument/, stderr)
67
+ end
68
+
69
+ def test_verify_release_publish_help
70
+ stdout, stderr, status = Open3.capture3(@verify_release_publish, "--help", chdir: @repo_root)
71
+ assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
72
+ assert_match(/Usage: \.\/bin\/verify-release-publish/, stdout)
73
+ end
74
+
75
+ def test_verify_release_publish_rejects_unknown_argument
76
+ _stdout, stderr, status = Open3.capture3(@verify_release_publish, "--unknown-flag", chdir: @repo_root)
77
+ assert(!status.success?, "expected verify-release-publish to fail for unknown arguments")
78
+ assert_match(/unknown argument/, stderr)
79
+ end
80
+
81
+ def test_verify_release_publish_rejects_invalid_tag_format
82
+ _stdout, stderr, status = Open3.capture3(
83
+ @verify_release_publish,
84
+ "--tag",
85
+ "0.4.0",
86
+ "--skip-rubygems",
87
+ "--skip-github",
88
+ chdir: @repo_root
89
+ )
90
+ assert(!status.success?, "expected verify-release-publish to fail for invalid tag format")
91
+ assert_match(/tag must be in format vX\.Y\.Z/, stderr)
92
+ end
54
93
  end