lda-ruby 0.3.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/CHANGELOG.md +16 -0
- data/Gemfile +9 -0
- data/README.md +126 -3
- data/VERSION.yml +3 -3
- data/docs/modernization-handoff.md +233 -0
- data/docs/porting-strategy.md +148 -0
- data/docs/precompiled-platform-policy.md +81 -0
- data/docs/precompiled-target-evaluation.md +67 -0
- data/docs/release-runbook.md +192 -0
- data/docs/rust-orchestration-guardrails.md +50 -0
- data/ext/lda-ruby/cokus.c +10 -11
- data/ext/lda-ruby/cokus.h +3 -3
- data/ext/lda-ruby/extconf.rb +10 -6
- data/ext/lda-ruby/lda-inference.c +23 -7
- data/ext/lda-ruby/utils.c +8 -0
- data/ext/lda-ruby-rust/Cargo.toml +12 -0
- data/ext/lda-ruby-rust/README.md +73 -0
- data/ext/lda-ruby-rust/extconf.rb +135 -0
- data/ext/lda-ruby-rust/include/strings.h +35 -0
- data/ext/lda-ruby-rust/src/lib.rs +1263 -0
- data/lda-ruby.gemspec +0 -0
- data/lib/lda-ruby/backends/base.rb +133 -0
- data/lib/lda-ruby/backends/native.rb +158 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +675 -0
- data/lib/lda-ruby/backends/rust.rb +607 -0
- data/lib/lda-ruby/backends.rb +58 -0
- data/lib/lda-ruby/corpus/corpus.rb +17 -15
- data/lib/lda-ruby/corpus/data_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/directory_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/text_corpus.rb +2 -2
- data/lib/lda-ruby/document/document.rb +6 -6
- data/lib/lda-ruby/document/text_document.rb +5 -4
- data/lib/lda-ruby/rust_build_policy.rb +21 -0
- data/lib/lda-ruby/version.rb +5 -0
- data/lib/lda-ruby.rb +293 -48
- data/test/backend_compatibility_test.rb +146 -0
- data/test/backends_selection_test.rb +100 -0
- data/test/benchmark_scripts_test.rb +23 -0
- data/test/gemspec_test.rb +27 -0
- data/test/lda_ruby_test.rb +49 -11
- data/test/packaged_gem_smoke_test.rb +33 -0
- data/test/pure_ruby_orchestration_test.rb +109 -0
- data/test/release_scripts_test.rb +93 -0
- data/test/rust_build_policy_test.rb +23 -0
- data/test/rust_orchestration_test.rb +911 -0
- data/test/simple_pipeline_test.rb +22 -0
- data/test/simple_yaml.rb +1 -7
- data/test/test_helper.rb +5 -6
- metadata +54 -38
- data/Rakefile +0 -61
- data/ext/lda-ruby/Makefile +0 -181
- data/test/data/.gitignore +0 -2
- data/test/simple_test.rb +0 -26
|
@@ -0,0 +1,911 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "test_helper"
|
|
4
|
+
|
|
5
|
+
class RustOrchestrationTest < Test::Unit::TestCase
|
|
6
|
+
FIXTURE_DOCUMENTS = [
|
|
7
|
+
"ruby code gem ruby class module test",
|
|
8
|
+
"rust backend speed ffi binding memory safety",
|
|
9
|
+
"topic model inference corpus document probability",
|
|
10
|
+
"module ruby class object gem code"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
def setup
|
|
14
|
+
omit("rust extension unavailable") unless Lda::RUST_EXTENSION_LOADED
|
|
15
|
+
omit("run_em_with_start unavailable") unless defined?(Lda::RustBackend) && Lda::RustBackend.respond_to?(:run_em_with_start)
|
|
16
|
+
|
|
17
|
+
@corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
18
|
+
@topics = 3
|
|
19
|
+
@terms = @corpus.documents.flat_map(&:words).max + 1
|
|
20
|
+
@document_words = @corpus.documents.map { |document| document.words.map(&:to_i) }
|
|
21
|
+
@document_counts = @corpus.documents.map { |document| document.counts.map(&:to_f) }
|
|
22
|
+
@max_iter = 25
|
|
23
|
+
@convergence = 1e-5
|
|
24
|
+
@em_max_iter = 40
|
|
25
|
+
@em_convergence = 1e-4
|
|
26
|
+
@init_alpha = 0.3
|
|
27
|
+
@min_probability = 1e-12
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def test_run_em_with_start_seeded_matches_explicit_seeded_initialization
|
|
31
|
+
explicit_seed = Lda::RustBackend.seeded_topic_term_probabilities(
|
|
32
|
+
@document_words,
|
|
33
|
+
@document_counts,
|
|
34
|
+
@topics,
|
|
35
|
+
@terms,
|
|
36
|
+
@min_probability
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
explicit = Lda::RustBackend.run_em(
|
|
40
|
+
explicit_seed,
|
|
41
|
+
@document_words,
|
|
42
|
+
@document_counts,
|
|
43
|
+
@max_iter,
|
|
44
|
+
@convergence,
|
|
45
|
+
@em_max_iter,
|
|
46
|
+
@em_convergence,
|
|
47
|
+
@init_alpha,
|
|
48
|
+
@min_probability
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
with_start = Lda::RustBackend.run_em_with_start(
|
|
52
|
+
"seeded",
|
|
53
|
+
@document_words,
|
|
54
|
+
@document_counts,
|
|
55
|
+
@topics,
|
|
56
|
+
@terms,
|
|
57
|
+
@max_iter,
|
|
58
|
+
@convergence,
|
|
59
|
+
@em_max_iter,
|
|
60
|
+
@em_convergence,
|
|
61
|
+
@init_alpha,
|
|
62
|
+
@min_probability
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
assert_nested_close(explicit, with_start, 1e-12)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def test_run_em_with_start_deterministic_alias_matches_seeded
|
|
69
|
+
seeded = Lda::RustBackend.run_em_with_start(
|
|
70
|
+
"seeded",
|
|
71
|
+
@document_words,
|
|
72
|
+
@document_counts,
|
|
73
|
+
@topics,
|
|
74
|
+
@terms,
|
|
75
|
+
@max_iter,
|
|
76
|
+
@convergence,
|
|
77
|
+
@em_max_iter,
|
|
78
|
+
@em_convergence,
|
|
79
|
+
@init_alpha,
|
|
80
|
+
@min_probability
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
deterministic = Lda::RustBackend.run_em_with_start(
|
|
84
|
+
"deterministic",
|
|
85
|
+
@document_words,
|
|
86
|
+
@document_counts,
|
|
87
|
+
@topics,
|
|
88
|
+
@terms,
|
|
89
|
+
@max_iter,
|
|
90
|
+
@convergence,
|
|
91
|
+
@em_max_iter,
|
|
92
|
+
@em_convergence,
|
|
93
|
+
@init_alpha,
|
|
94
|
+
@min_probability
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
assert_nested_close(seeded, deterministic, 1e-12)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_run_em_with_start_seed_random_matches_explicit_random_initialization
|
|
101
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
102
|
+
omit("random_topic_term_probabilities unavailable") unless Lda::RustBackend.respond_to?(:random_topic_term_probabilities)
|
|
103
|
+
|
|
104
|
+
random_seed = 12_345
|
|
105
|
+
explicit_seed = Lda::RustBackend.random_topic_term_probabilities(
|
|
106
|
+
@topics,
|
|
107
|
+
@terms,
|
|
108
|
+
@min_probability,
|
|
109
|
+
random_seed
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
explicit = Lda::RustBackend.run_em(
|
|
113
|
+
explicit_seed,
|
|
114
|
+
@document_words,
|
|
115
|
+
@document_counts,
|
|
116
|
+
@max_iter,
|
|
117
|
+
@convergence,
|
|
118
|
+
@em_max_iter,
|
|
119
|
+
@em_convergence,
|
|
120
|
+
@init_alpha,
|
|
121
|
+
@min_probability
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
with_start = Lda::RustBackend.run_em_with_start_seed(
|
|
125
|
+
"random",
|
|
126
|
+
@document_words,
|
|
127
|
+
@document_counts,
|
|
128
|
+
@topics,
|
|
129
|
+
@terms,
|
|
130
|
+
@max_iter,
|
|
131
|
+
@convergence,
|
|
132
|
+
@em_max_iter,
|
|
133
|
+
@em_convergence,
|
|
134
|
+
@init_alpha,
|
|
135
|
+
@min_probability,
|
|
136
|
+
random_seed
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
assert_nested_close(explicit, with_start, 1e-12)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def test_run_em_with_start_seed_keeps_seeded_start_seed_independent
|
|
143
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
144
|
+
|
|
145
|
+
left = Lda::RustBackend.run_em_with_start_seed(
|
|
146
|
+
"seeded",
|
|
147
|
+
@document_words,
|
|
148
|
+
@document_counts,
|
|
149
|
+
@topics,
|
|
150
|
+
@terms,
|
|
151
|
+
@max_iter,
|
|
152
|
+
@convergence,
|
|
153
|
+
@em_max_iter,
|
|
154
|
+
@em_convergence,
|
|
155
|
+
@init_alpha,
|
|
156
|
+
@min_probability,
|
|
157
|
+
101
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
right = Lda::RustBackend.run_em_with_start_seed(
|
|
161
|
+
"seeded",
|
|
162
|
+
@document_words,
|
|
163
|
+
@document_counts,
|
|
164
|
+
@topics,
|
|
165
|
+
@terms,
|
|
166
|
+
@max_iter,
|
|
167
|
+
@convergence,
|
|
168
|
+
@em_max_iter,
|
|
169
|
+
@em_convergence,
|
|
170
|
+
@init_alpha,
|
|
171
|
+
@min_probability,
|
|
172
|
+
202
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
assert_nested_close(left, right, 1e-12)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def test_run_em_on_session_seeded_matches_direct_seeded_start
|
|
179
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
180
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
181
|
+
omit("run_em_on_session_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_start_seed)
|
|
182
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
183
|
+
|
|
184
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
185
|
+
assert_operator session_id, :>, 0
|
|
186
|
+
|
|
187
|
+
direct = Lda::RustBackend.run_em_with_start_seed(
|
|
188
|
+
"seeded",
|
|
189
|
+
@document_words,
|
|
190
|
+
@document_counts,
|
|
191
|
+
@topics,
|
|
192
|
+
@terms,
|
|
193
|
+
@max_iter,
|
|
194
|
+
@convergence,
|
|
195
|
+
@em_max_iter,
|
|
196
|
+
@em_convergence,
|
|
197
|
+
@init_alpha,
|
|
198
|
+
@min_probability,
|
|
199
|
+
777
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
via_session = Lda::RustBackend.run_em_on_session_with_start_seed(
|
|
203
|
+
session_id,
|
|
204
|
+
"seeded",
|
|
205
|
+
@topics,
|
|
206
|
+
@max_iter,
|
|
207
|
+
@convergence,
|
|
208
|
+
@em_max_iter,
|
|
209
|
+
@em_convergence,
|
|
210
|
+
@init_alpha,
|
|
211
|
+
@min_probability,
|
|
212
|
+
777
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
assert_nested_close(direct, via_session, 1e-12)
|
|
216
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
217
|
+
assert_equal false, Lda::RustBackend.drop_corpus_session(session_id)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def test_run_em_on_session_random_matches_direct_random_start
|
|
221
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
222
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
223
|
+
omit("run_em_on_session_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_start_seed)
|
|
224
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
225
|
+
|
|
226
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
227
|
+
assert_operator session_id, :>, 0
|
|
228
|
+
|
|
229
|
+
direct = Lda::RustBackend.run_em_with_start_seed(
|
|
230
|
+
"random",
|
|
231
|
+
@document_words,
|
|
232
|
+
@document_counts,
|
|
233
|
+
@topics,
|
|
234
|
+
@terms,
|
|
235
|
+
@max_iter,
|
|
236
|
+
@convergence,
|
|
237
|
+
@em_max_iter,
|
|
238
|
+
@em_convergence,
|
|
239
|
+
@init_alpha,
|
|
240
|
+
@min_probability,
|
|
241
|
+
55_555
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
via_session = Lda::RustBackend.run_em_on_session_with_start_seed(
|
|
245
|
+
session_id,
|
|
246
|
+
"random",
|
|
247
|
+
@topics,
|
|
248
|
+
@max_iter,
|
|
249
|
+
@convergence,
|
|
250
|
+
@em_max_iter,
|
|
251
|
+
@em_convergence,
|
|
252
|
+
@init_alpha,
|
|
253
|
+
@min_probability,
|
|
254
|
+
55_555
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
assert_nested_close(direct, via_session, 1e-12)
|
|
258
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def test_run_em_on_session_unknown_start_matches_random_start
|
|
262
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
263
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
264
|
+
omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
|
|
265
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
266
|
+
|
|
267
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
268
|
+
assert_operator session_id, :>, 0
|
|
269
|
+
|
|
270
|
+
random_seed = 4545
|
|
271
|
+
direct = Lda::RustBackend.run_em_with_start_seed(
|
|
272
|
+
"random",
|
|
273
|
+
@document_words,
|
|
274
|
+
@document_counts,
|
|
275
|
+
@topics,
|
|
276
|
+
@terms,
|
|
277
|
+
@max_iter,
|
|
278
|
+
@convergence,
|
|
279
|
+
@em_max_iter,
|
|
280
|
+
@em_convergence,
|
|
281
|
+
@init_alpha,
|
|
282
|
+
@min_probability,
|
|
283
|
+
random_seed
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
via_session = Lda::RustBackend.run_em_on_session(
|
|
287
|
+
session_id,
|
|
288
|
+
"unknown_mode",
|
|
289
|
+
@topics,
|
|
290
|
+
@max_iter,
|
|
291
|
+
@convergence,
|
|
292
|
+
@em_max_iter,
|
|
293
|
+
@em_convergence,
|
|
294
|
+
@init_alpha,
|
|
295
|
+
@min_probability,
|
|
296
|
+
random_seed
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
assert_nested_close(direct, via_session, 1e-12)
|
|
300
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def test_run_em_on_session_start_uses_configured_settings
|
|
304
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
305
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
306
|
+
omit("configure_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:configure_corpus_session)
|
|
307
|
+
omit("run_em_on_session_start unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_start)
|
|
308
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
309
|
+
|
|
310
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
311
|
+
assert_operator session_id, :>, 0
|
|
312
|
+
assert_equal true, Lda::RustBackend.configure_corpus_session(
|
|
313
|
+
session_id,
|
|
314
|
+
@topics,
|
|
315
|
+
@max_iter,
|
|
316
|
+
@convergence,
|
|
317
|
+
@em_max_iter,
|
|
318
|
+
@em_convergence,
|
|
319
|
+
@init_alpha,
|
|
320
|
+
@min_probability
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
direct = Lda::RustBackend.run_em_with_start_seed(
|
|
324
|
+
"seeded",
|
|
325
|
+
@document_words,
|
|
326
|
+
@document_counts,
|
|
327
|
+
@topics,
|
|
328
|
+
@terms,
|
|
329
|
+
@max_iter,
|
|
330
|
+
@convergence,
|
|
331
|
+
@em_max_iter,
|
|
332
|
+
@em_convergence,
|
|
333
|
+
@init_alpha,
|
|
334
|
+
@min_probability,
|
|
335
|
+
9090
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
via_session = Lda::RustBackend.run_em_on_session_start(
|
|
339
|
+
session_id,
|
|
340
|
+
"seeded",
|
|
341
|
+
9090
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
assert_nested_close(direct, via_session, 1e-12)
|
|
345
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def test_run_em_on_session_start_requires_configuration
|
|
349
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
350
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
351
|
+
omit("configure_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:configure_corpus_session)
|
|
352
|
+
omit("run_em_on_session_start unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_start)
|
|
353
|
+
|
|
354
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
355
|
+
assert_operator session_id, :>, 0
|
|
356
|
+
|
|
357
|
+
unconfigured = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 1)
|
|
358
|
+
assert_equal [[], [], [], []], unconfigured
|
|
359
|
+
|
|
360
|
+
assert_equal true, Lda::RustBackend.configure_corpus_session(
|
|
361
|
+
session_id,
|
|
362
|
+
@topics,
|
|
363
|
+
@max_iter,
|
|
364
|
+
@convergence,
|
|
365
|
+
@em_max_iter,
|
|
366
|
+
@em_convergence,
|
|
367
|
+
@init_alpha,
|
|
368
|
+
@min_probability
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
configured = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 1)
|
|
372
|
+
assert_equal @topics, configured[0].size
|
|
373
|
+
assert_equal @document_words.size, configured[2].size
|
|
374
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def test_run_em_on_session_applies_settings_and_matches_direct_seeded_start
|
|
378
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
379
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
380
|
+
omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
|
|
381
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
382
|
+
|
|
383
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
384
|
+
assert_operator session_id, :>, 0
|
|
385
|
+
|
|
386
|
+
direct = Lda::RustBackend.run_em_with_start_seed(
|
|
387
|
+
"seeded",
|
|
388
|
+
@document_words,
|
|
389
|
+
@document_counts,
|
|
390
|
+
@topics,
|
|
391
|
+
@terms,
|
|
392
|
+
@max_iter,
|
|
393
|
+
@convergence,
|
|
394
|
+
@em_max_iter,
|
|
395
|
+
@em_convergence,
|
|
396
|
+
@init_alpha,
|
|
397
|
+
@min_probability,
|
|
398
|
+
8181
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
via_session = Lda::RustBackend.run_em_on_session(
|
|
402
|
+
session_id,
|
|
403
|
+
"seeded",
|
|
404
|
+
@topics,
|
|
405
|
+
@max_iter,
|
|
406
|
+
@convergence,
|
|
407
|
+
@em_max_iter,
|
|
408
|
+
@em_convergence,
|
|
409
|
+
@init_alpha,
|
|
410
|
+
@min_probability,
|
|
411
|
+
8181
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
assert_nested_close(direct, via_session, 1e-12)
|
|
415
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def test_run_em_on_session_reconfigures_topic_count
|
|
419
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
420
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
421
|
+
omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
|
|
422
|
+
|
|
423
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
424
|
+
assert_operator session_id, :>, 0
|
|
425
|
+
|
|
426
|
+
two_topics = Lda::RustBackend.run_em_on_session(
|
|
427
|
+
session_id,
|
|
428
|
+
"seeded",
|
|
429
|
+
2,
|
|
430
|
+
@max_iter,
|
|
431
|
+
@convergence,
|
|
432
|
+
@em_max_iter,
|
|
433
|
+
@em_convergence,
|
|
434
|
+
@init_alpha,
|
|
435
|
+
@min_probability,
|
|
436
|
+
5151
|
|
437
|
+
)
|
|
438
|
+
assert_equal 2, two_topics[0].size
|
|
439
|
+
|
|
440
|
+
four_topics = Lda::RustBackend.run_em_on_session(
|
|
441
|
+
session_id,
|
|
442
|
+
"seeded",
|
|
443
|
+
4,
|
|
444
|
+
@max_iter,
|
|
445
|
+
@convergence,
|
|
446
|
+
@em_max_iter,
|
|
447
|
+
@em_convergence,
|
|
448
|
+
@init_alpha,
|
|
449
|
+
@min_probability,
|
|
450
|
+
5151
|
|
451
|
+
)
|
|
452
|
+
assert_equal 4, four_topics[0].size
|
|
453
|
+
|
|
454
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def test_run_em_on_session_with_corpus_recreates_missing_session
|
|
458
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
459
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
460
|
+
omit("run_em_on_session_with_corpus unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_corpus)
|
|
461
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
462
|
+
|
|
463
|
+
recreated_session_id = nil
|
|
464
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
465
|
+
assert_operator session_id, :>, 0
|
|
466
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
467
|
+
|
|
468
|
+
managed = Lda::RustBackend.run_em_on_session_with_corpus(
|
|
469
|
+
session_id,
|
|
470
|
+
@document_words,
|
|
471
|
+
@document_counts,
|
|
472
|
+
@terms,
|
|
473
|
+
"seeded",
|
|
474
|
+
@topics,
|
|
475
|
+
@max_iter,
|
|
476
|
+
@convergence,
|
|
477
|
+
@em_max_iter,
|
|
478
|
+
@em_convergence,
|
|
479
|
+
@init_alpha,
|
|
480
|
+
@min_probability,
|
|
481
|
+
6161
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
assert_equal 5, managed.size
|
|
485
|
+
recreated_session_id = managed[0]
|
|
486
|
+
assert_operator recreated_session_id, :>, 0
|
|
487
|
+
assert_not_equal session_id, recreated_session_id
|
|
488
|
+
|
|
489
|
+
direct = Lda::RustBackend.run_em_with_start_seed(
|
|
490
|
+
"seeded",
|
|
491
|
+
@document_words,
|
|
492
|
+
@document_counts,
|
|
493
|
+
@topics,
|
|
494
|
+
@terms,
|
|
495
|
+
@max_iter,
|
|
496
|
+
@convergence,
|
|
497
|
+
@em_max_iter,
|
|
498
|
+
@em_convergence,
|
|
499
|
+
@init_alpha,
|
|
500
|
+
@min_probability,
|
|
501
|
+
6161
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
assert_nested_close(direct, managed[1..], 1e-12)
|
|
505
|
+
ensure
|
|
506
|
+
if recreated_session_id && recreated_session_id.positive?
|
|
507
|
+
Lda::RustBackend.drop_corpus_session(recreated_session_id)
|
|
508
|
+
end
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def test_replace_corpus_session_updates_existing_session_in_place
|
|
512
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
513
|
+
omit("replace_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:replace_corpus_session)
|
|
514
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
515
|
+
omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
|
|
516
|
+
|
|
517
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
518
|
+
assert_operator session_id, :>, 0
|
|
519
|
+
|
|
520
|
+
replacement_words = [[0, 1, 2], [2, 3]]
|
|
521
|
+
replacement_counts = [[2.0, 1.0, 1.0], [1.0, 4.0]]
|
|
522
|
+
replacement_terms = 5
|
|
523
|
+
starting_count = Lda::RustBackend.corpus_session_count if Lda::RustBackend.respond_to?(:corpus_session_count)
|
|
524
|
+
|
|
525
|
+
replaced_session_id = Lda::RustBackend.replace_corpus_session(
|
|
526
|
+
session_id,
|
|
527
|
+
replacement_words,
|
|
528
|
+
replacement_counts,
|
|
529
|
+
replacement_terms
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
assert_equal session_id, replaced_session_id
|
|
533
|
+
if !starting_count.nil?
|
|
534
|
+
assert_equal starting_count, Lda::RustBackend.corpus_session_count
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
output = Lda::RustBackend.run_em_on_session(
|
|
538
|
+
replaced_session_id,
|
|
539
|
+
"seeded",
|
|
540
|
+
@topics,
|
|
541
|
+
@max_iter,
|
|
542
|
+
@convergence,
|
|
543
|
+
@em_max_iter,
|
|
544
|
+
@em_convergence,
|
|
545
|
+
@init_alpha,
|
|
546
|
+
@min_probability,
|
|
547
|
+
91_919
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
assert_equal replacement_terms, output[0].first.size
|
|
551
|
+
assert_equal replacement_words.size, output[2].size
|
|
552
|
+
ensure
|
|
553
|
+
if defined?(replaced_session_id) && replaced_session_id.is_a?(Numeric) && replaced_session_id.positive?
|
|
554
|
+
Lda::RustBackend.drop_corpus_session(replaced_session_id)
|
|
555
|
+
elsif defined?(session_id) && session_id.is_a?(Numeric) && session_id.positive?
|
|
556
|
+
Lda::RustBackend.drop_corpus_session(session_id)
|
|
557
|
+
end
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
def test_rust_backend_corpus_session_lifecycle_no_leak
|
|
561
|
+
omit("corpus_session_count unavailable") unless Lda::RustBackend.respond_to?(:corpus_session_count)
|
|
562
|
+
|
|
563
|
+
starting_count = Lda::RustBackend.corpus_session_count
|
|
564
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
565
|
+
|
|
566
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
567
|
+
assert_equal starting_count + 1, Lda::RustBackend.corpus_session_count
|
|
568
|
+
first_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
|
|
569
|
+
assert_operator first_session_id, :>, 0
|
|
570
|
+
|
|
571
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS.reverse)
|
|
572
|
+
assert_equal starting_count + 1, Lda::RustBackend.corpus_session_count
|
|
573
|
+
second_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
|
|
574
|
+
assert_operator second_session_id, :>, 0
|
|
575
|
+
if Lda::RustBackend.respond_to?(:replace_corpus_session)
|
|
576
|
+
assert_equal first_session_id, second_session_id
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
backend.corpus = nil
|
|
580
|
+
assert_equal starting_count, Lda::RustBackend.corpus_session_count
|
|
581
|
+
ensure
|
|
582
|
+
backend&.corpus = nil
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
def test_rust_backend_recreates_missing_session_before_em
|
|
586
|
+
omit("corpus_session_count unavailable") unless Lda::RustBackend.respond_to?(:corpus_session_count)
|
|
587
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
588
|
+
|
|
589
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
590
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
591
|
+
backend.num_topics = @topics
|
|
592
|
+
|
|
593
|
+
session_id = backend.instance_variable_get(:@rust_corpus_session_id)
|
|
594
|
+
assert_operator session_id, :>, 0
|
|
595
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
596
|
+
|
|
597
|
+
backend.em("seeded")
|
|
598
|
+
assert_equal @topics, backend.gamma.first.size
|
|
599
|
+
|
|
600
|
+
recreated_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
|
|
601
|
+
assert_operator recreated_session_id, :>, 0
|
|
602
|
+
assert_not_equal session_id, recreated_session_id
|
|
603
|
+
ensure
|
|
604
|
+
backend&.corpus = nil
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def test_rust_backend_non_session_fallback_prefers_run_em_with_start_seed
|
|
608
|
+
backend = nil
|
|
609
|
+
rust_singleton = nil
|
|
610
|
+
run_em_alias = :__test_original_run_em_for_non_session_fallback__
|
|
611
|
+
run_em_with_start_seed_alias = :__test_original_run_em_with_start_seed_for_non_session_fallback__
|
|
612
|
+
|
|
613
|
+
omit("run_em unavailable") unless Lda::RustBackend.respond_to?(:run_em)
|
|
614
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
615
|
+
|
|
616
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
617
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
618
|
+
backend.verbose = false
|
|
619
|
+
backend.num_topics = @topics
|
|
620
|
+
backend.max_iter = @max_iter
|
|
621
|
+
backend.convergence = @convergence
|
|
622
|
+
backend.em_max_iter = @em_max_iter
|
|
623
|
+
backend.em_convergence = @em_convergence
|
|
624
|
+
backend.init_alpha = @init_alpha
|
|
625
|
+
|
|
626
|
+
# Force the direct non-managed orchestration path.
|
|
627
|
+
backend.define_singleton_method(:rust_orchestrated_em_with_managed_corpus) { |_start| false }
|
|
628
|
+
|
|
629
|
+
rust_singleton = Lda::RustBackend.singleton_class
|
|
630
|
+
run_em_calls = 0
|
|
631
|
+
run_em_with_start_seed_calls = 0
|
|
632
|
+
|
|
633
|
+
silence_redefinition_warnings do
|
|
634
|
+
rust_singleton.send(:alias_method, run_em_alias, :run_em)
|
|
635
|
+
rust_singleton.send(:alias_method, run_em_with_start_seed_alias, :run_em_with_start_seed)
|
|
636
|
+
|
|
637
|
+
rust_singleton.send(:define_method, :run_em) do |*args|
|
|
638
|
+
run_em_calls += 1
|
|
639
|
+
public_send(run_em_alias, *args)
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
rust_singleton.send(:define_method, :run_em_with_start_seed) do |*args|
|
|
643
|
+
run_em_with_start_seed_calls += 1
|
|
644
|
+
public_send(run_em_with_start_seed_alias, *args)
|
|
645
|
+
end
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
backend.em("random")
|
|
649
|
+
assert_equal 0, run_em_calls
|
|
650
|
+
assert_equal 1, run_em_with_start_seed_calls
|
|
651
|
+
assert_equal @topics, backend.gamma.first.size
|
|
652
|
+
ensure
|
|
653
|
+
silence_redefinition_warnings do
|
|
654
|
+
if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_with_start_seed_alias)
|
|
655
|
+
rust_singleton.send(:remove_method, :run_em_with_start_seed)
|
|
656
|
+
rust_singleton.send(:alias_method, :run_em_with_start_seed, run_em_with_start_seed_alias)
|
|
657
|
+
rust_singleton.send(:remove_method, run_em_with_start_seed_alias)
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_alias)
|
|
661
|
+
rust_singleton.send(:remove_method, :run_em)
|
|
662
|
+
rust_singleton.send(:alias_method, :run_em, run_em_alias)
|
|
663
|
+
rust_singleton.send(:remove_method, run_em_alias)
|
|
664
|
+
end
|
|
665
|
+
end
|
|
666
|
+
|
|
667
|
+
backend&.corpus = nil
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
def test_rust_backend_direct_non_session_path_reuses_cached_corpus_snapshot
|
|
671
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
672
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
673
|
+
backend.verbose = false
|
|
674
|
+
backend.num_topics = @topics
|
|
675
|
+
backend.max_iter = @max_iter
|
|
676
|
+
backend.convergence = @convergence
|
|
677
|
+
backend.em_max_iter = @em_max_iter
|
|
678
|
+
backend.em_convergence = @em_convergence
|
|
679
|
+
backend.init_alpha = @init_alpha
|
|
680
|
+
|
|
681
|
+
backend.define_singleton_method(:rust_orchestrated_em_with_managed_corpus) { |_start| false }
|
|
682
|
+
backend.define_singleton_method(:rust_em_corpus_input) do
|
|
683
|
+
raise "direct non-session path should reuse cached corpus snapshot"
|
|
684
|
+
end
|
|
685
|
+
|
|
686
|
+
backend.em("random")
|
|
687
|
+
assert_equal @topics, backend.gamma.first.size
|
|
688
|
+
ensure
|
|
689
|
+
backend&.corpus = nil
|
|
690
|
+
end
|
|
691
|
+
|
|
692
|
+
def test_rust_backend_beta_fallback_reuses_cached_corpus_snapshot
|
|
693
|
+
backend = nil
|
|
694
|
+
fallback = nil
|
|
695
|
+
fallback_singleton = nil
|
|
696
|
+
rust_em_input_alias = :__test_original_rust_em_input_for_beta_snapshot__
|
|
697
|
+
rust_initial_beta_alias = :__test_original_rust_initial_beta_for_beta_snapshot__
|
|
698
|
+
|
|
699
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
700
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
701
|
+
backend.verbose = false
|
|
702
|
+
backend.num_topics = @topics
|
|
703
|
+
backend.max_iter = @max_iter
|
|
704
|
+
backend.convergence = @convergence
|
|
705
|
+
backend.em_max_iter = @em_max_iter
|
|
706
|
+
backend.em_convergence = @em_convergence
|
|
707
|
+
backend.init_alpha = @init_alpha
|
|
708
|
+
|
|
709
|
+
backend.define_singleton_method(:rust_orchestrated_em_with_managed_corpus) { |_start| false }
|
|
710
|
+
backend.define_singleton_method(:rust_orchestrated_em_with_start_seed) { |_start| false }
|
|
711
|
+
|
|
712
|
+
cached_document_words = backend.instance_variable_get(:@rust_document_words)
|
|
713
|
+
cached_document_counts = backend.instance_variable_get(:@rust_document_counts)
|
|
714
|
+
cached_terms = backend.instance_variable_get(:@rust_corpus_terms)
|
|
715
|
+
expected_topics = @topics
|
|
716
|
+
|
|
717
|
+
fallback = backend.instance_variable_get(:@fallback)
|
|
718
|
+
fallback_singleton = fallback.singleton_class
|
|
719
|
+
used_cached_snapshot = false
|
|
720
|
+
|
|
721
|
+
silence_redefinition_warnings do
|
|
722
|
+
fallback_singleton.send(:alias_method, rust_em_input_alias, :rust_em_input)
|
|
723
|
+
fallback_singleton.send(:alias_method, rust_initial_beta_alias, :rust_initial_beta_probabilities)
|
|
724
|
+
|
|
725
|
+
fallback_singleton.send(:define_method, :rust_em_input) do |_start|
|
|
726
|
+
raise "beta fallback should not rebuild full rust_em_input when snapshot is cached"
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
fallback_singleton.send(:define_method, :rust_initial_beta_probabilities) do |start, document_words, document_counts, topics, terms|
|
|
730
|
+
used_cached_snapshot =
|
|
731
|
+
document_words.equal?(cached_document_words) &&
|
|
732
|
+
document_counts.equal?(cached_document_counts) &&
|
|
733
|
+
topics == expected_topics &&
|
|
734
|
+
terms == cached_terms
|
|
735
|
+
public_send(rust_initial_beta_alias, start, document_words, document_counts, topics, terms)
|
|
736
|
+
end
|
|
737
|
+
end
|
|
738
|
+
|
|
739
|
+
backend.em("random")
|
|
740
|
+
assert_equal true, used_cached_snapshot
|
|
741
|
+
assert_equal @topics, backend.gamma.first.size
|
|
742
|
+
ensure
|
|
743
|
+
silence_redefinition_warnings do
|
|
744
|
+
if defined?(fallback_singleton) && fallback_singleton.method_defined?(rust_initial_beta_alias)
|
|
745
|
+
fallback_singleton.send(:remove_method, :rust_initial_beta_probabilities)
|
|
746
|
+
fallback_singleton.send(:alias_method, :rust_initial_beta_probabilities, rust_initial_beta_alias)
|
|
747
|
+
fallback_singleton.send(:remove_method, rust_initial_beta_alias)
|
|
748
|
+
end
|
|
749
|
+
|
|
750
|
+
if defined?(fallback_singleton) && fallback_singleton.method_defined?(rust_em_input_alias)
|
|
751
|
+
fallback_singleton.send(:remove_method, :rust_em_input)
|
|
752
|
+
fallback_singleton.send(:alias_method, :rust_em_input, rust_em_input_alias)
|
|
753
|
+
fallback_singleton.send(:remove_method, rust_em_input_alias)
|
|
754
|
+
end
|
|
755
|
+
end
|
|
756
|
+
|
|
757
|
+
backend&.corpus = nil
|
|
758
|
+
end
|
|
759
|
+
|
|
760
|
+
def test_rust_backend_prefers_managed_corpus_entrypoint_without_active_session
|
|
761
|
+
backend = nil
|
|
762
|
+
rust_singleton = nil
|
|
763
|
+
run_em_on_session_alias = :__test_original_run_em_on_session_for_managed_preference__
|
|
764
|
+
run_em_on_session_with_corpus_alias = :__test_original_run_em_on_session_with_corpus_for_managed_preference__
|
|
765
|
+
run_em_with_start_seed_alias = :__test_original_run_em_with_start_seed_for_managed_preference__
|
|
766
|
+
|
|
767
|
+
omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
|
|
768
|
+
omit("run_em_on_session_with_corpus unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_corpus)
|
|
769
|
+
omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
|
|
770
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
771
|
+
|
|
772
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
773
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
774
|
+
backend.verbose = false
|
|
775
|
+
backend.num_topics = @topics
|
|
776
|
+
backend.max_iter = @max_iter
|
|
777
|
+
backend.convergence = @convergence
|
|
778
|
+
backend.em_max_iter = @em_max_iter
|
|
779
|
+
backend.em_convergence = @em_convergence
|
|
780
|
+
backend.init_alpha = @init_alpha
|
|
781
|
+
|
|
782
|
+
dropped_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
|
|
783
|
+
assert_operator dropped_session_id, :>, 0
|
|
784
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(dropped_session_id)
|
|
785
|
+
backend.instance_variable_set(:@rust_corpus_session_id, nil)
|
|
786
|
+
|
|
787
|
+
rust_singleton = Lda::RustBackend.singleton_class
|
|
788
|
+
run_em_on_session_calls = 0
|
|
789
|
+
run_em_on_session_with_corpus_calls = 0
|
|
790
|
+
run_em_with_start_seed_calls = 0
|
|
791
|
+
|
|
792
|
+
silence_redefinition_warnings do
|
|
793
|
+
rust_singleton.send(:alias_method, run_em_on_session_alias, :run_em_on_session)
|
|
794
|
+
rust_singleton.send(:alias_method, run_em_on_session_with_corpus_alias, :run_em_on_session_with_corpus)
|
|
795
|
+
rust_singleton.send(:alias_method, run_em_with_start_seed_alias, :run_em_with_start_seed)
|
|
796
|
+
|
|
797
|
+
rust_singleton.send(:define_method, :run_em_on_session) do |*args|
|
|
798
|
+
run_em_on_session_calls += 1
|
|
799
|
+
public_send(run_em_on_session_alias, *args)
|
|
800
|
+
end
|
|
801
|
+
|
|
802
|
+
rust_singleton.send(:define_method, :run_em_on_session_with_corpus) do |*args|
|
|
803
|
+
run_em_on_session_with_corpus_calls += 1
|
|
804
|
+
public_send(run_em_on_session_with_corpus_alias, *args)
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
rust_singleton.send(:define_method, :run_em_with_start_seed) do |*args|
|
|
808
|
+
run_em_with_start_seed_calls += 1
|
|
809
|
+
public_send(run_em_with_start_seed_alias, *args)
|
|
810
|
+
end
|
|
811
|
+
end
|
|
812
|
+
|
|
813
|
+
backend.em("seeded")
|
|
814
|
+
assert_equal 0, run_em_on_session_calls
|
|
815
|
+
assert_equal 1, run_em_on_session_with_corpus_calls
|
|
816
|
+
assert_equal 0, run_em_with_start_seed_calls
|
|
817
|
+
assert_equal @topics, backend.gamma.first.size
|
|
818
|
+
|
|
819
|
+
recreated_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
|
|
820
|
+
assert_operator recreated_session_id, :>, 0
|
|
821
|
+
assert_not_equal dropped_session_id, recreated_session_id
|
|
822
|
+
ensure
|
|
823
|
+
silence_redefinition_warnings do
|
|
824
|
+
if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_with_start_seed_alias)
|
|
825
|
+
rust_singleton.send(:remove_method, :run_em_with_start_seed)
|
|
826
|
+
rust_singleton.send(:alias_method, :run_em_with_start_seed, run_em_with_start_seed_alias)
|
|
827
|
+
rust_singleton.send(:remove_method, run_em_with_start_seed_alias)
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_on_session_with_corpus_alias)
|
|
831
|
+
rust_singleton.send(:remove_method, :run_em_on_session_with_corpus)
|
|
832
|
+
rust_singleton.send(:alias_method, :run_em_on_session_with_corpus, run_em_on_session_with_corpus_alias)
|
|
833
|
+
rust_singleton.send(:remove_method, run_em_on_session_with_corpus_alias)
|
|
834
|
+
end
|
|
835
|
+
|
|
836
|
+
if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_on_session_alias)
|
|
837
|
+
rust_singleton.send(:remove_method, :run_em_on_session)
|
|
838
|
+
rust_singleton.send(:alias_method, :run_em_on_session, run_em_on_session_alias)
|
|
839
|
+
rust_singleton.send(:remove_method, run_em_on_session_alias)
|
|
840
|
+
end
|
|
841
|
+
end
|
|
842
|
+
|
|
843
|
+
backend&.corpus = nil
|
|
844
|
+
end
|
|
845
|
+
|
|
846
|
+
def test_configure_corpus_session_reconfigures_topic_count
|
|
847
|
+
omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
|
|
848
|
+
omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
|
|
849
|
+
omit("configure_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:configure_corpus_session)
|
|
850
|
+
omit("run_em_on_session_start unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_start)
|
|
851
|
+
|
|
852
|
+
session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
|
|
853
|
+
assert_operator session_id, :>, 0
|
|
854
|
+
|
|
855
|
+
assert_equal true, Lda::RustBackend.configure_corpus_session(
|
|
856
|
+
session_id, 2, @max_iter, @convergence, @em_max_iter, @em_convergence, @init_alpha, @min_probability
|
|
857
|
+
)
|
|
858
|
+
two_topics = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 303)
|
|
859
|
+
assert_equal 2, two_topics[0].size
|
|
860
|
+
|
|
861
|
+
assert_equal true, Lda::RustBackend.configure_corpus_session(
|
|
862
|
+
session_id, 4, @max_iter, @convergence, @em_max_iter, @em_convergence, @init_alpha, @min_probability
|
|
863
|
+
)
|
|
864
|
+
four_topics = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 303)
|
|
865
|
+
assert_equal 4, four_topics[0].size
|
|
866
|
+
|
|
867
|
+
assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
def test_rust_backend_session_config_tracks_setting_changes
|
|
871
|
+
backend = Lda::Backends::Rust.new(random_seed: 1234)
|
|
872
|
+
backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
|
|
873
|
+
backend.verbose = false
|
|
874
|
+
backend.max_iter = 12
|
|
875
|
+
backend.em_max_iter = 18
|
|
876
|
+
backend.convergence = 1e-5
|
|
877
|
+
backend.em_convergence = 1e-4
|
|
878
|
+
|
|
879
|
+
backend.num_topics = 2
|
|
880
|
+
backend.em("seeded")
|
|
881
|
+
assert_equal 2, backend.gamma.first.size
|
|
882
|
+
|
|
883
|
+
backend.num_topics = 4
|
|
884
|
+
backend.em("seeded")
|
|
885
|
+
assert_equal 4, backend.gamma.first.size
|
|
886
|
+
ensure
|
|
887
|
+
backend&.corpus = nil
|
|
888
|
+
end
|
|
889
|
+
|
|
890
|
+
private
|
|
891
|
+
|
|
892
|
+
def silence_redefinition_warnings
|
|
893
|
+
previous_verbose = $VERBOSE
|
|
894
|
+
$VERBOSE = nil
|
|
895
|
+
yield
|
|
896
|
+
ensure
|
|
897
|
+
$VERBOSE = previous_verbose
|
|
898
|
+
end
|
|
899
|
+
|
|
900
|
+
def assert_nested_close(left, right, tolerance)
|
|
901
|
+
if left.is_a?(Array)
|
|
902
|
+
assert_equal left.size, right.size
|
|
903
|
+
left.each_with_index do |left_item, index|
|
|
904
|
+
assert_nested_close(left_item, right[index], tolerance)
|
|
905
|
+
end
|
|
906
|
+
return
|
|
907
|
+
end
|
|
908
|
+
|
|
909
|
+
assert_in_delta left.to_f, right.to_f, tolerance
|
|
910
|
+
end
|
|
911
|
+
end
|