lda-ruby 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,911 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "test_helper"
4
+
5
+ class RustOrchestrationTest < Test::Unit::TestCase
6
+ FIXTURE_DOCUMENTS = [
7
+ "ruby code gem ruby class module test",
8
+ "rust backend speed ffi binding memory safety",
9
+ "topic model inference corpus document probability",
10
+ "module ruby class object gem code"
11
+ ].freeze
12
+
13
+ def setup
14
+ omit("rust extension unavailable") unless Lda::RUST_EXTENSION_LOADED
15
+ omit("run_em_with_start unavailable") unless defined?(Lda::RustBackend) && Lda::RustBackend.respond_to?(:run_em_with_start)
16
+
17
+ @corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
18
+ @topics = 3
19
+ @terms = @corpus.documents.flat_map(&:words).max + 1
20
+ @document_words = @corpus.documents.map { |document| document.words.map(&:to_i) }
21
+ @document_counts = @corpus.documents.map { |document| document.counts.map(&:to_f) }
22
+ @max_iter = 25
23
+ @convergence = 1e-5
24
+ @em_max_iter = 40
25
+ @em_convergence = 1e-4
26
+ @init_alpha = 0.3
27
+ @min_probability = 1e-12
28
+ end
29
+
30
+ def test_run_em_with_start_seeded_matches_explicit_seeded_initialization
31
+ explicit_seed = Lda::RustBackend.seeded_topic_term_probabilities(
32
+ @document_words,
33
+ @document_counts,
34
+ @topics,
35
+ @terms,
36
+ @min_probability
37
+ )
38
+
39
+ explicit = Lda::RustBackend.run_em(
40
+ explicit_seed,
41
+ @document_words,
42
+ @document_counts,
43
+ @max_iter,
44
+ @convergence,
45
+ @em_max_iter,
46
+ @em_convergence,
47
+ @init_alpha,
48
+ @min_probability
49
+ )
50
+
51
+ with_start = Lda::RustBackend.run_em_with_start(
52
+ "seeded",
53
+ @document_words,
54
+ @document_counts,
55
+ @topics,
56
+ @terms,
57
+ @max_iter,
58
+ @convergence,
59
+ @em_max_iter,
60
+ @em_convergence,
61
+ @init_alpha,
62
+ @min_probability
63
+ )
64
+
65
+ assert_nested_close(explicit, with_start, 1e-12)
66
+ end
67
+
68
+ def test_run_em_with_start_deterministic_alias_matches_seeded
69
+ seeded = Lda::RustBackend.run_em_with_start(
70
+ "seeded",
71
+ @document_words,
72
+ @document_counts,
73
+ @topics,
74
+ @terms,
75
+ @max_iter,
76
+ @convergence,
77
+ @em_max_iter,
78
+ @em_convergence,
79
+ @init_alpha,
80
+ @min_probability
81
+ )
82
+
83
+ deterministic = Lda::RustBackend.run_em_with_start(
84
+ "deterministic",
85
+ @document_words,
86
+ @document_counts,
87
+ @topics,
88
+ @terms,
89
+ @max_iter,
90
+ @convergence,
91
+ @em_max_iter,
92
+ @em_convergence,
93
+ @init_alpha,
94
+ @min_probability
95
+ )
96
+
97
+ assert_nested_close(seeded, deterministic, 1e-12)
98
+ end
99
+
100
+ def test_run_em_with_start_seed_random_matches_explicit_random_initialization
101
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
102
+ omit("random_topic_term_probabilities unavailable") unless Lda::RustBackend.respond_to?(:random_topic_term_probabilities)
103
+
104
+ random_seed = 12_345
105
+ explicit_seed = Lda::RustBackend.random_topic_term_probabilities(
106
+ @topics,
107
+ @terms,
108
+ @min_probability,
109
+ random_seed
110
+ )
111
+
112
+ explicit = Lda::RustBackend.run_em(
113
+ explicit_seed,
114
+ @document_words,
115
+ @document_counts,
116
+ @max_iter,
117
+ @convergence,
118
+ @em_max_iter,
119
+ @em_convergence,
120
+ @init_alpha,
121
+ @min_probability
122
+ )
123
+
124
+ with_start = Lda::RustBackend.run_em_with_start_seed(
125
+ "random",
126
+ @document_words,
127
+ @document_counts,
128
+ @topics,
129
+ @terms,
130
+ @max_iter,
131
+ @convergence,
132
+ @em_max_iter,
133
+ @em_convergence,
134
+ @init_alpha,
135
+ @min_probability,
136
+ random_seed
137
+ )
138
+
139
+ assert_nested_close(explicit, with_start, 1e-12)
140
+ end
141
+
142
+ def test_run_em_with_start_seed_keeps_seeded_start_seed_independent
143
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
144
+
145
+ left = Lda::RustBackend.run_em_with_start_seed(
146
+ "seeded",
147
+ @document_words,
148
+ @document_counts,
149
+ @topics,
150
+ @terms,
151
+ @max_iter,
152
+ @convergence,
153
+ @em_max_iter,
154
+ @em_convergence,
155
+ @init_alpha,
156
+ @min_probability,
157
+ 101
158
+ )
159
+
160
+ right = Lda::RustBackend.run_em_with_start_seed(
161
+ "seeded",
162
+ @document_words,
163
+ @document_counts,
164
+ @topics,
165
+ @terms,
166
+ @max_iter,
167
+ @convergence,
168
+ @em_max_iter,
169
+ @em_convergence,
170
+ @init_alpha,
171
+ @min_probability,
172
+ 202
173
+ )
174
+
175
+ assert_nested_close(left, right, 1e-12)
176
+ end
177
+
178
+ def test_run_em_on_session_seeded_matches_direct_seeded_start
179
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
180
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
181
+ omit("run_em_on_session_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_start_seed)
182
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
183
+
184
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
185
+ assert_operator session_id, :>, 0
186
+
187
+ direct = Lda::RustBackend.run_em_with_start_seed(
188
+ "seeded",
189
+ @document_words,
190
+ @document_counts,
191
+ @topics,
192
+ @terms,
193
+ @max_iter,
194
+ @convergence,
195
+ @em_max_iter,
196
+ @em_convergence,
197
+ @init_alpha,
198
+ @min_probability,
199
+ 777
200
+ )
201
+
202
+ via_session = Lda::RustBackend.run_em_on_session_with_start_seed(
203
+ session_id,
204
+ "seeded",
205
+ @topics,
206
+ @max_iter,
207
+ @convergence,
208
+ @em_max_iter,
209
+ @em_convergence,
210
+ @init_alpha,
211
+ @min_probability,
212
+ 777
213
+ )
214
+
215
+ assert_nested_close(direct, via_session, 1e-12)
216
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
217
+ assert_equal false, Lda::RustBackend.drop_corpus_session(session_id)
218
+ end
219
+
220
+ def test_run_em_on_session_random_matches_direct_random_start
221
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
222
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
223
+ omit("run_em_on_session_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_start_seed)
224
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
225
+
226
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
227
+ assert_operator session_id, :>, 0
228
+
229
+ direct = Lda::RustBackend.run_em_with_start_seed(
230
+ "random",
231
+ @document_words,
232
+ @document_counts,
233
+ @topics,
234
+ @terms,
235
+ @max_iter,
236
+ @convergence,
237
+ @em_max_iter,
238
+ @em_convergence,
239
+ @init_alpha,
240
+ @min_probability,
241
+ 55_555
242
+ )
243
+
244
+ via_session = Lda::RustBackend.run_em_on_session_with_start_seed(
245
+ session_id,
246
+ "random",
247
+ @topics,
248
+ @max_iter,
249
+ @convergence,
250
+ @em_max_iter,
251
+ @em_convergence,
252
+ @init_alpha,
253
+ @min_probability,
254
+ 55_555
255
+ )
256
+
257
+ assert_nested_close(direct, via_session, 1e-12)
258
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
259
+ end
260
+
261
+ def test_run_em_on_session_unknown_start_matches_random_start
262
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
263
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
264
+ omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
265
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
266
+
267
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
268
+ assert_operator session_id, :>, 0
269
+
270
+ random_seed = 4545
271
+ direct = Lda::RustBackend.run_em_with_start_seed(
272
+ "random",
273
+ @document_words,
274
+ @document_counts,
275
+ @topics,
276
+ @terms,
277
+ @max_iter,
278
+ @convergence,
279
+ @em_max_iter,
280
+ @em_convergence,
281
+ @init_alpha,
282
+ @min_probability,
283
+ random_seed
284
+ )
285
+
286
+ via_session = Lda::RustBackend.run_em_on_session(
287
+ session_id,
288
+ "unknown_mode",
289
+ @topics,
290
+ @max_iter,
291
+ @convergence,
292
+ @em_max_iter,
293
+ @em_convergence,
294
+ @init_alpha,
295
+ @min_probability,
296
+ random_seed
297
+ )
298
+
299
+ assert_nested_close(direct, via_session, 1e-12)
300
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
301
+ end
302
+
303
+ def test_run_em_on_session_start_uses_configured_settings
304
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
305
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
306
+ omit("configure_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:configure_corpus_session)
307
+ omit("run_em_on_session_start unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_start)
308
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
309
+
310
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
311
+ assert_operator session_id, :>, 0
312
+ assert_equal true, Lda::RustBackend.configure_corpus_session(
313
+ session_id,
314
+ @topics,
315
+ @max_iter,
316
+ @convergence,
317
+ @em_max_iter,
318
+ @em_convergence,
319
+ @init_alpha,
320
+ @min_probability
321
+ )
322
+
323
+ direct = Lda::RustBackend.run_em_with_start_seed(
324
+ "seeded",
325
+ @document_words,
326
+ @document_counts,
327
+ @topics,
328
+ @terms,
329
+ @max_iter,
330
+ @convergence,
331
+ @em_max_iter,
332
+ @em_convergence,
333
+ @init_alpha,
334
+ @min_probability,
335
+ 9090
336
+ )
337
+
338
+ via_session = Lda::RustBackend.run_em_on_session_start(
339
+ session_id,
340
+ "seeded",
341
+ 9090
342
+ )
343
+
344
+ assert_nested_close(direct, via_session, 1e-12)
345
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
346
+ end
347
+
348
+ def test_run_em_on_session_start_requires_configuration
349
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
350
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
351
+ omit("configure_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:configure_corpus_session)
352
+ omit("run_em_on_session_start unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_start)
353
+
354
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
355
+ assert_operator session_id, :>, 0
356
+
357
+ unconfigured = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 1)
358
+ assert_equal [[], [], [], []], unconfigured
359
+
360
+ assert_equal true, Lda::RustBackend.configure_corpus_session(
361
+ session_id,
362
+ @topics,
363
+ @max_iter,
364
+ @convergence,
365
+ @em_max_iter,
366
+ @em_convergence,
367
+ @init_alpha,
368
+ @min_probability
369
+ )
370
+
371
+ configured = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 1)
372
+ assert_equal @topics, configured[0].size
373
+ assert_equal @document_words.size, configured[2].size
374
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
375
+ end
376
+
377
+ def test_run_em_on_session_applies_settings_and_matches_direct_seeded_start
378
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
379
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
380
+ omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
381
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
382
+
383
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
384
+ assert_operator session_id, :>, 0
385
+
386
+ direct = Lda::RustBackend.run_em_with_start_seed(
387
+ "seeded",
388
+ @document_words,
389
+ @document_counts,
390
+ @topics,
391
+ @terms,
392
+ @max_iter,
393
+ @convergence,
394
+ @em_max_iter,
395
+ @em_convergence,
396
+ @init_alpha,
397
+ @min_probability,
398
+ 8181
399
+ )
400
+
401
+ via_session = Lda::RustBackend.run_em_on_session(
402
+ session_id,
403
+ "seeded",
404
+ @topics,
405
+ @max_iter,
406
+ @convergence,
407
+ @em_max_iter,
408
+ @em_convergence,
409
+ @init_alpha,
410
+ @min_probability,
411
+ 8181
412
+ )
413
+
414
+ assert_nested_close(direct, via_session, 1e-12)
415
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
416
+ end
417
+
418
+ def test_run_em_on_session_reconfigures_topic_count
419
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
420
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
421
+ omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
422
+
423
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
424
+ assert_operator session_id, :>, 0
425
+
426
+ two_topics = Lda::RustBackend.run_em_on_session(
427
+ session_id,
428
+ "seeded",
429
+ 2,
430
+ @max_iter,
431
+ @convergence,
432
+ @em_max_iter,
433
+ @em_convergence,
434
+ @init_alpha,
435
+ @min_probability,
436
+ 5151
437
+ )
438
+ assert_equal 2, two_topics[0].size
439
+
440
+ four_topics = Lda::RustBackend.run_em_on_session(
441
+ session_id,
442
+ "seeded",
443
+ 4,
444
+ @max_iter,
445
+ @convergence,
446
+ @em_max_iter,
447
+ @em_convergence,
448
+ @init_alpha,
449
+ @min_probability,
450
+ 5151
451
+ )
452
+ assert_equal 4, four_topics[0].size
453
+
454
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
455
+ end
456
+
457
+ def test_run_em_on_session_with_corpus_recreates_missing_session
458
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
459
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
460
+ omit("run_em_on_session_with_corpus unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_corpus)
461
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
462
+
463
+ recreated_session_id = nil
464
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
465
+ assert_operator session_id, :>, 0
466
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
467
+
468
+ managed = Lda::RustBackend.run_em_on_session_with_corpus(
469
+ session_id,
470
+ @document_words,
471
+ @document_counts,
472
+ @terms,
473
+ "seeded",
474
+ @topics,
475
+ @max_iter,
476
+ @convergence,
477
+ @em_max_iter,
478
+ @em_convergence,
479
+ @init_alpha,
480
+ @min_probability,
481
+ 6161
482
+ )
483
+
484
+ assert_equal 5, managed.size
485
+ recreated_session_id = managed[0]
486
+ assert_operator recreated_session_id, :>, 0
487
+ assert_not_equal session_id, recreated_session_id
488
+
489
+ direct = Lda::RustBackend.run_em_with_start_seed(
490
+ "seeded",
491
+ @document_words,
492
+ @document_counts,
493
+ @topics,
494
+ @terms,
495
+ @max_iter,
496
+ @convergence,
497
+ @em_max_iter,
498
+ @em_convergence,
499
+ @init_alpha,
500
+ @min_probability,
501
+ 6161
502
+ )
503
+
504
+ assert_nested_close(direct, managed[1..], 1e-12)
505
+ ensure
506
+ if recreated_session_id && recreated_session_id.positive?
507
+ Lda::RustBackend.drop_corpus_session(recreated_session_id)
508
+ end
509
+ end
510
+
511
+ def test_replace_corpus_session_updates_existing_session_in_place
512
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
513
+ omit("replace_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:replace_corpus_session)
514
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
515
+ omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
516
+
517
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
518
+ assert_operator session_id, :>, 0
519
+
520
+ replacement_words = [[0, 1, 2], [2, 3]]
521
+ replacement_counts = [[2.0, 1.0, 1.0], [1.0, 4.0]]
522
+ replacement_terms = 5
523
+ starting_count = Lda::RustBackend.corpus_session_count if Lda::RustBackend.respond_to?(:corpus_session_count)
524
+
525
+ replaced_session_id = Lda::RustBackend.replace_corpus_session(
526
+ session_id,
527
+ replacement_words,
528
+ replacement_counts,
529
+ replacement_terms
530
+ )
531
+
532
+ assert_equal session_id, replaced_session_id
533
+ if !starting_count.nil?
534
+ assert_equal starting_count, Lda::RustBackend.corpus_session_count
535
+ end
536
+
537
+ output = Lda::RustBackend.run_em_on_session(
538
+ replaced_session_id,
539
+ "seeded",
540
+ @topics,
541
+ @max_iter,
542
+ @convergence,
543
+ @em_max_iter,
544
+ @em_convergence,
545
+ @init_alpha,
546
+ @min_probability,
547
+ 91_919
548
+ )
549
+
550
+ assert_equal replacement_terms, output[0].first.size
551
+ assert_equal replacement_words.size, output[2].size
552
+ ensure
553
+ if defined?(replaced_session_id) && replaced_session_id.is_a?(Numeric) && replaced_session_id.positive?
554
+ Lda::RustBackend.drop_corpus_session(replaced_session_id)
555
+ elsif defined?(session_id) && session_id.is_a?(Numeric) && session_id.positive?
556
+ Lda::RustBackend.drop_corpus_session(session_id)
557
+ end
558
+ end
559
+
560
+ def test_rust_backend_corpus_session_lifecycle_no_leak
561
+ omit("corpus_session_count unavailable") unless Lda::RustBackend.respond_to?(:corpus_session_count)
562
+
563
+ starting_count = Lda::RustBackend.corpus_session_count
564
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
565
+
566
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
567
+ assert_equal starting_count + 1, Lda::RustBackend.corpus_session_count
568
+ first_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
569
+ assert_operator first_session_id, :>, 0
570
+
571
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS.reverse)
572
+ assert_equal starting_count + 1, Lda::RustBackend.corpus_session_count
573
+ second_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
574
+ assert_operator second_session_id, :>, 0
575
+ if Lda::RustBackend.respond_to?(:replace_corpus_session)
576
+ assert_equal first_session_id, second_session_id
577
+ end
578
+
579
+ backend.corpus = nil
580
+ assert_equal starting_count, Lda::RustBackend.corpus_session_count
581
+ ensure
582
+ backend&.corpus = nil
583
+ end
584
+
585
+ def test_rust_backend_recreates_missing_session_before_em
586
+ omit("corpus_session_count unavailable") unless Lda::RustBackend.respond_to?(:corpus_session_count)
587
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
588
+
589
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
590
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
591
+ backend.num_topics = @topics
592
+
593
+ session_id = backend.instance_variable_get(:@rust_corpus_session_id)
594
+ assert_operator session_id, :>, 0
595
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
596
+
597
+ backend.em("seeded")
598
+ assert_equal @topics, backend.gamma.first.size
599
+
600
+ recreated_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
601
+ assert_operator recreated_session_id, :>, 0
602
+ assert_not_equal session_id, recreated_session_id
603
+ ensure
604
+ backend&.corpus = nil
605
+ end
606
+
607
+ def test_rust_backend_non_session_fallback_prefers_run_em_with_start_seed
608
+ backend = nil
609
+ rust_singleton = nil
610
+ run_em_alias = :__test_original_run_em_for_non_session_fallback__
611
+ run_em_with_start_seed_alias = :__test_original_run_em_with_start_seed_for_non_session_fallback__
612
+
613
+ omit("run_em unavailable") unless Lda::RustBackend.respond_to?(:run_em)
614
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
615
+
616
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
617
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
618
+ backend.verbose = false
619
+ backend.num_topics = @topics
620
+ backend.max_iter = @max_iter
621
+ backend.convergence = @convergence
622
+ backend.em_max_iter = @em_max_iter
623
+ backend.em_convergence = @em_convergence
624
+ backend.init_alpha = @init_alpha
625
+
626
+ # Force the direct non-managed orchestration path.
627
+ backend.define_singleton_method(:rust_orchestrated_em_with_managed_corpus) { |_start| false }
628
+
629
+ rust_singleton = Lda::RustBackend.singleton_class
630
+ run_em_calls = 0
631
+ run_em_with_start_seed_calls = 0
632
+
633
+ silence_redefinition_warnings do
634
+ rust_singleton.send(:alias_method, run_em_alias, :run_em)
635
+ rust_singleton.send(:alias_method, run_em_with_start_seed_alias, :run_em_with_start_seed)
636
+
637
+ rust_singleton.send(:define_method, :run_em) do |*args|
638
+ run_em_calls += 1
639
+ public_send(run_em_alias, *args)
640
+ end
641
+
642
+ rust_singleton.send(:define_method, :run_em_with_start_seed) do |*args|
643
+ run_em_with_start_seed_calls += 1
644
+ public_send(run_em_with_start_seed_alias, *args)
645
+ end
646
+ end
647
+
648
+ backend.em("random")
649
+ assert_equal 0, run_em_calls
650
+ assert_equal 1, run_em_with_start_seed_calls
651
+ assert_equal @topics, backend.gamma.first.size
652
+ ensure
653
+ silence_redefinition_warnings do
654
+ if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_with_start_seed_alias)
655
+ rust_singleton.send(:remove_method, :run_em_with_start_seed)
656
+ rust_singleton.send(:alias_method, :run_em_with_start_seed, run_em_with_start_seed_alias)
657
+ rust_singleton.send(:remove_method, run_em_with_start_seed_alias)
658
+ end
659
+
660
+ if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_alias)
661
+ rust_singleton.send(:remove_method, :run_em)
662
+ rust_singleton.send(:alias_method, :run_em, run_em_alias)
663
+ rust_singleton.send(:remove_method, run_em_alias)
664
+ end
665
+ end
666
+
667
+ backend&.corpus = nil
668
+ end
669
+
670
+ def test_rust_backend_direct_non_session_path_reuses_cached_corpus_snapshot
671
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
672
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
673
+ backend.verbose = false
674
+ backend.num_topics = @topics
675
+ backend.max_iter = @max_iter
676
+ backend.convergence = @convergence
677
+ backend.em_max_iter = @em_max_iter
678
+ backend.em_convergence = @em_convergence
679
+ backend.init_alpha = @init_alpha
680
+
681
+ backend.define_singleton_method(:rust_orchestrated_em_with_managed_corpus) { |_start| false }
682
+ backend.define_singleton_method(:rust_em_corpus_input) do
683
+ raise "direct non-session path should reuse cached corpus snapshot"
684
+ end
685
+
686
+ backend.em("random")
687
+ assert_equal @topics, backend.gamma.first.size
688
+ ensure
689
+ backend&.corpus = nil
690
+ end
691
+
692
+ def test_rust_backend_beta_fallback_reuses_cached_corpus_snapshot
693
+ backend = nil
694
+ fallback = nil
695
+ fallback_singleton = nil
696
+ rust_em_input_alias = :__test_original_rust_em_input_for_beta_snapshot__
697
+ rust_initial_beta_alias = :__test_original_rust_initial_beta_for_beta_snapshot__
698
+
699
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
700
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
701
+ backend.verbose = false
702
+ backend.num_topics = @topics
703
+ backend.max_iter = @max_iter
704
+ backend.convergence = @convergence
705
+ backend.em_max_iter = @em_max_iter
706
+ backend.em_convergence = @em_convergence
707
+ backend.init_alpha = @init_alpha
708
+
709
+ backend.define_singleton_method(:rust_orchestrated_em_with_managed_corpus) { |_start| false }
710
+ backend.define_singleton_method(:rust_orchestrated_em_with_start_seed) { |_start| false }
711
+
712
+ cached_document_words = backend.instance_variable_get(:@rust_document_words)
713
+ cached_document_counts = backend.instance_variable_get(:@rust_document_counts)
714
+ cached_terms = backend.instance_variable_get(:@rust_corpus_terms)
715
+ expected_topics = @topics
716
+
717
+ fallback = backend.instance_variable_get(:@fallback)
718
+ fallback_singleton = fallback.singleton_class
719
+ used_cached_snapshot = false
720
+
721
+ silence_redefinition_warnings do
722
+ fallback_singleton.send(:alias_method, rust_em_input_alias, :rust_em_input)
723
+ fallback_singleton.send(:alias_method, rust_initial_beta_alias, :rust_initial_beta_probabilities)
724
+
725
+ fallback_singleton.send(:define_method, :rust_em_input) do |_start|
726
+ raise "beta fallback should not rebuild full rust_em_input when snapshot is cached"
727
+ end
728
+
729
+ fallback_singleton.send(:define_method, :rust_initial_beta_probabilities) do |start, document_words, document_counts, topics, terms|
730
+ used_cached_snapshot =
731
+ document_words.equal?(cached_document_words) &&
732
+ document_counts.equal?(cached_document_counts) &&
733
+ topics == expected_topics &&
734
+ terms == cached_terms
735
+ public_send(rust_initial_beta_alias, start, document_words, document_counts, topics, terms)
736
+ end
737
+ end
738
+
739
+ backend.em("random")
740
+ assert_equal true, used_cached_snapshot
741
+ assert_equal @topics, backend.gamma.first.size
742
+ ensure
743
+ silence_redefinition_warnings do
744
+ if defined?(fallback_singleton) && fallback_singleton.method_defined?(rust_initial_beta_alias)
745
+ fallback_singleton.send(:remove_method, :rust_initial_beta_probabilities)
746
+ fallback_singleton.send(:alias_method, :rust_initial_beta_probabilities, rust_initial_beta_alias)
747
+ fallback_singleton.send(:remove_method, rust_initial_beta_alias)
748
+ end
749
+
750
+ if defined?(fallback_singleton) && fallback_singleton.method_defined?(rust_em_input_alias)
751
+ fallback_singleton.send(:remove_method, :rust_em_input)
752
+ fallback_singleton.send(:alias_method, :rust_em_input, rust_em_input_alias)
753
+ fallback_singleton.send(:remove_method, rust_em_input_alias)
754
+ end
755
+ end
756
+
757
+ backend&.corpus = nil
758
+ end
759
+
760
+ def test_rust_backend_prefers_managed_corpus_entrypoint_without_active_session
761
+ backend = nil
762
+ rust_singleton = nil
763
+ run_em_on_session_alias = :__test_original_run_em_on_session_for_managed_preference__
764
+ run_em_on_session_with_corpus_alias = :__test_original_run_em_on_session_with_corpus_for_managed_preference__
765
+ run_em_with_start_seed_alias = :__test_original_run_em_with_start_seed_for_managed_preference__
766
+
767
+ omit("run_em_on_session unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session)
768
+ omit("run_em_on_session_with_corpus unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_with_corpus)
769
+ omit("run_em_with_start_seed unavailable") unless Lda::RustBackend.respond_to?(:run_em_with_start_seed)
770
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
771
+
772
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
773
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
774
+ backend.verbose = false
775
+ backend.num_topics = @topics
776
+ backend.max_iter = @max_iter
777
+ backend.convergence = @convergence
778
+ backend.em_max_iter = @em_max_iter
779
+ backend.em_convergence = @em_convergence
780
+ backend.init_alpha = @init_alpha
781
+
782
+ dropped_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
783
+ assert_operator dropped_session_id, :>, 0
784
+ assert_equal true, Lda::RustBackend.drop_corpus_session(dropped_session_id)
785
+ backend.instance_variable_set(:@rust_corpus_session_id, nil)
786
+
787
+ rust_singleton = Lda::RustBackend.singleton_class
788
+ run_em_on_session_calls = 0
789
+ run_em_on_session_with_corpus_calls = 0
790
+ run_em_with_start_seed_calls = 0
791
+
792
+ silence_redefinition_warnings do
793
+ rust_singleton.send(:alias_method, run_em_on_session_alias, :run_em_on_session)
794
+ rust_singleton.send(:alias_method, run_em_on_session_with_corpus_alias, :run_em_on_session_with_corpus)
795
+ rust_singleton.send(:alias_method, run_em_with_start_seed_alias, :run_em_with_start_seed)
796
+
797
+ rust_singleton.send(:define_method, :run_em_on_session) do |*args|
798
+ run_em_on_session_calls += 1
799
+ public_send(run_em_on_session_alias, *args)
800
+ end
801
+
802
+ rust_singleton.send(:define_method, :run_em_on_session_with_corpus) do |*args|
803
+ run_em_on_session_with_corpus_calls += 1
804
+ public_send(run_em_on_session_with_corpus_alias, *args)
805
+ end
806
+
807
+ rust_singleton.send(:define_method, :run_em_with_start_seed) do |*args|
808
+ run_em_with_start_seed_calls += 1
809
+ public_send(run_em_with_start_seed_alias, *args)
810
+ end
811
+ end
812
+
813
+ backend.em("seeded")
814
+ assert_equal 0, run_em_on_session_calls
815
+ assert_equal 1, run_em_on_session_with_corpus_calls
816
+ assert_equal 0, run_em_with_start_seed_calls
817
+ assert_equal @topics, backend.gamma.first.size
818
+
819
+ recreated_session_id = backend.instance_variable_get(:@rust_corpus_session_id)
820
+ assert_operator recreated_session_id, :>, 0
821
+ assert_not_equal dropped_session_id, recreated_session_id
822
+ ensure
823
+ silence_redefinition_warnings do
824
+ if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_with_start_seed_alias)
825
+ rust_singleton.send(:remove_method, :run_em_with_start_seed)
826
+ rust_singleton.send(:alias_method, :run_em_with_start_seed, run_em_with_start_seed_alias)
827
+ rust_singleton.send(:remove_method, run_em_with_start_seed_alias)
828
+ end
829
+
830
+ if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_on_session_with_corpus_alias)
831
+ rust_singleton.send(:remove_method, :run_em_on_session_with_corpus)
832
+ rust_singleton.send(:alias_method, :run_em_on_session_with_corpus, run_em_on_session_with_corpus_alias)
833
+ rust_singleton.send(:remove_method, run_em_on_session_with_corpus_alias)
834
+ end
835
+
836
+ if defined?(rust_singleton) && rust_singleton.method_defined?(run_em_on_session_alias)
837
+ rust_singleton.send(:remove_method, :run_em_on_session)
838
+ rust_singleton.send(:alias_method, :run_em_on_session, run_em_on_session_alias)
839
+ rust_singleton.send(:remove_method, run_em_on_session_alias)
840
+ end
841
+ end
842
+
843
+ backend&.corpus = nil
844
+ end
845
+
846
+ def test_configure_corpus_session_reconfigures_topic_count
847
+ omit("create_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:create_corpus_session)
848
+ omit("drop_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:drop_corpus_session)
849
+ omit("configure_corpus_session unavailable") unless Lda::RustBackend.respond_to?(:configure_corpus_session)
850
+ omit("run_em_on_session_start unavailable") unless Lda::RustBackend.respond_to?(:run_em_on_session_start)
851
+
852
+ session_id = Lda::RustBackend.create_corpus_session(@document_words, @document_counts, @terms)
853
+ assert_operator session_id, :>, 0
854
+
855
+ assert_equal true, Lda::RustBackend.configure_corpus_session(
856
+ session_id, 2, @max_iter, @convergence, @em_max_iter, @em_convergence, @init_alpha, @min_probability
857
+ )
858
+ two_topics = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 303)
859
+ assert_equal 2, two_topics[0].size
860
+
861
+ assert_equal true, Lda::RustBackend.configure_corpus_session(
862
+ session_id, 4, @max_iter, @convergence, @em_max_iter, @em_convergence, @init_alpha, @min_probability
863
+ )
864
+ four_topics = Lda::RustBackend.run_em_on_session_start(session_id, "seeded", 303)
865
+ assert_equal 4, four_topics[0].size
866
+
867
+ assert_equal true, Lda::RustBackend.drop_corpus_session(session_id)
868
+ end
869
+
870
+ def test_rust_backend_session_config_tracks_setting_changes
871
+ backend = Lda::Backends::Rust.new(random_seed: 1234)
872
+ backend.corpus = Lda::TextCorpus.new(FIXTURE_DOCUMENTS)
873
+ backend.verbose = false
874
+ backend.max_iter = 12
875
+ backend.em_max_iter = 18
876
+ backend.convergence = 1e-5
877
+ backend.em_convergence = 1e-4
878
+
879
+ backend.num_topics = 2
880
+ backend.em("seeded")
881
+ assert_equal 2, backend.gamma.first.size
882
+
883
+ backend.num_topics = 4
884
+ backend.em("seeded")
885
+ assert_equal 4, backend.gamma.first.size
886
+ ensure
887
+ backend&.corpus = nil
888
+ end
889
+
890
+ private
891
+
892
+ def silence_redefinition_warnings
893
+ previous_verbose = $VERBOSE
894
+ $VERBOSE = nil
895
+ yield
896
+ ensure
897
+ $VERBOSE = previous_verbose
898
+ end
899
+
900
+ def assert_nested_close(left, right, tolerance)
901
+ if left.is_a?(Array)
902
+ assert_equal left.size, right.size
903
+ left.each_with_index do |left_item, index|
904
+ assert_nested_close(left_item, right[index], tolerance)
905
+ end
906
+ return
907
+ end
908
+
909
+ assert_in_delta left.to_f, right.to_f, tolerance
910
+ end
911
+ end