kreuzberg 4.1.2 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +26 -17
  5. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  6. data/kreuzberg.gemspec +13 -1
  7. data/lib/kreuzberg/cli.rb +16 -6
  8. data/lib/kreuzberg/cli_proxy.rb +3 -1
  9. data/lib/kreuzberg/config.rb +121 -39
  10. data/lib/kreuzberg/djot_content.rb +225 -0
  11. data/lib/kreuzberg/extraction_api.rb +20 -4
  12. data/lib/kreuzberg/result.rb +12 -2
  13. data/lib/kreuzberg/version.rb +1 -1
  14. data/lib/kreuzberg.rb +1 -0
  15. data/sig/kreuzberg.rbs +28 -12
  16. data/spec/binding/batch_operations_spec.rb +80 -0
  17. data/spec/binding/batch_spec.rb +6 -5
  18. data/spec/binding/error_recovery_spec.rb +3 -3
  19. data/spec/binding/metadata_types_spec.rb +77 -57
  20. data/spec/binding/tables_spec.rb +11 -2
  21. data/spec/serialization_spec.rb +134 -0
  22. data/spec/unit/config/output_format_spec.rb +380 -0
  23. data/vendor/Cargo.toml +1 -1
  24. data/vendor/kreuzberg/Cargo.toml +1 -1
  25. data/vendor/kreuzberg/README.md +1 -1
  26. data/vendor/kreuzberg/src/api/startup.rs +15 -1
  27. data/vendor/kreuzberg/src/core/config_validation/sections.rs +16 -4
  28. data/vendor/kreuzberg/src/core/extractor/file.rs +1 -2
  29. data/vendor/kreuzberg/src/core/extractor/mod.rs +2 -1
  30. data/vendor/kreuzberg/src/core/io.rs +7 -7
  31. data/vendor/kreuzberg/src/core/mime.rs +4 -4
  32. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  33. data/vendor/kreuzberg/src/extraction/pptx/parser.rs +6 -0
  34. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  35. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  36. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  37. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  38. data/vendor/kreuzberg/src/plugins/mod.rs +1 -0
  39. data/vendor/kreuzberg/src/plugins/registry/extractor.rs +251 -5
  40. data/vendor/kreuzberg/src/plugins/registry/ocr.rs +150 -2
  41. data/vendor/kreuzberg/src/plugins/registry/processor.rs +213 -5
  42. data/vendor/kreuzberg/src/plugins/registry/validator.rs +220 -4
  43. data/vendor/kreuzberg/src/plugins/startup_validation.rs +385 -0
  44. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  45. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  46. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  47. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  48. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  49. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  50. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  51. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  52. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  53. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  54. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  55. data/vendor/kreuzberg/tests/config_behavioral.rs +416 -0
  56. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  57. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  58. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  59. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  60. data/vendor/kreuzberg/tests/core_integration.rs +57 -57
  61. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  62. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  63. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  64. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  65. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  66. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  67. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  68. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  69. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  70. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  71. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  72. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  73. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  74. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  75. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  76. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  77. data/vendor/kreuzberg/tests/mime_detection.rs +75 -43
  78. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  79. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  80. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  81. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  82. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  83. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  84. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  85. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  86. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  87. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  88. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  89. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  90. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  91. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  92. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +324 -31
  93. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  94. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  95. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  96. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  97. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  98. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  99. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  100. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  101. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  102. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  103. metadata +12 -2
@@ -187,26 +187,26 @@ fn test_register_custom_postprocessor() {
187
187
  });
188
188
 
189
189
  {
190
- let mut reg = registry.write().unwrap();
191
- reg.shutdown_all().unwrap();
190
+ let mut reg = registry.write().expect("Operation failed");
191
+ reg.shutdown_all().expect("Operation failed");
192
192
  }
193
193
 
194
194
  {
195
- let mut reg = registry.write().unwrap();
195
+ let mut reg = registry.write().expect("Operation failed");
196
196
  let result = reg.register(Arc::clone(&processor) as Arc<dyn PostProcessor>, 100);
197
197
  assert!(result.is_ok(), "Failed to register processor: {:?}", result.err());
198
198
  }
199
199
 
200
200
  let list = {
201
- let reg = registry.read().unwrap();
201
+ let reg = registry.read().expect("Operation failed");
202
202
  reg.list()
203
203
  };
204
204
 
205
205
  assert!(list.contains(&"test-appender".to_string()));
206
206
 
207
207
  {
208
- let mut reg = registry.write().unwrap();
209
- reg.shutdown_all().unwrap();
208
+ let mut reg = registry.write().expect("Operation failed");
209
+ reg.shutdown_all().expect("Operation failed");
210
210
  }
211
211
  }
212
212
 
@@ -224,9 +224,9 @@ fn test_postprocessor_called_during_extraction() {
224
224
  });
225
225
 
226
226
  {
227
- let mut reg = registry.write().unwrap();
227
+ let mut reg = registry.write().expect("Operation failed");
228
228
  reg.register(Arc::clone(&processor) as Arc<dyn PostProcessor>, 100)
229
- .unwrap();
229
+ .expect("Operation failed");
230
230
  }
231
231
 
232
232
  let config = ExtractionConfig::default();
@@ -234,7 +234,7 @@ fn test_postprocessor_called_during_extraction() {
234
234
 
235
235
  assert!(result.is_ok(), "Extraction failed: {:?}", result.err());
236
236
 
237
- let extraction_result = result.unwrap();
237
+ let extraction_result = result.expect("Operation failed");
238
238
  assert!(
239
239
  extraction_result.content.contains("[APPENDED BY PROCESSOR]"),
240
240
  "Processor did not modify content. Content: {}",
@@ -248,8 +248,8 @@ fn test_postprocessor_called_during_extraction() {
248
248
  );
249
249
 
250
250
  {
251
- let mut reg = registry.write().unwrap();
252
- reg.shutdown_all().unwrap();
251
+ let mut reg = registry.write().expect("Operation failed");
252
+ reg.shutdown_all().expect("Operation failed");
253
253
  }
254
254
  }
255
255
 
@@ -265,8 +265,9 @@ fn test_postprocessor_modifies_content() {
265
265
  });
266
266
 
267
267
  {
268
- let mut reg = registry.write().unwrap();
269
- reg.register(processor as Arc<dyn PostProcessor>, 100).unwrap();
268
+ let mut reg = registry.write().expect("Operation failed");
269
+ reg.register(processor as Arc<dyn PostProcessor>, 100)
270
+ .expect("Operation failed");
270
271
  }
271
272
 
272
273
  let config = ExtractionConfig::default();
@@ -274,14 +275,14 @@ fn test_postprocessor_modifies_content() {
274
275
 
275
276
  assert!(result.is_ok());
276
277
 
277
- let extraction_result = result.unwrap();
278
+ let extraction_result = result.expect("Operation failed");
278
279
  let has_lowercase = extraction_result.content.chars().any(|c| c.is_lowercase());
279
280
 
280
281
  assert!(!has_lowercase, "Content was not fully uppercased");
281
282
 
282
283
  {
283
- let mut reg = registry.write().unwrap();
284
- reg.shutdown_all().unwrap();
284
+ let mut reg = registry.write().expect("Operation failed");
285
+ reg.shutdown_all().expect("Operation failed");
285
286
  }
286
287
  }
287
288
 
@@ -298,9 +299,9 @@ fn test_postprocessor_adds_metadata() {
298
299
  });
299
300
 
300
301
  {
301
- let mut reg = registry.write().unwrap();
302
+ let mut reg = registry.write().expect("Operation failed");
302
303
  reg.register(Arc::clone(&processor) as Arc<dyn PostProcessor>, 100)
303
- .unwrap();
304
+ .expect("Operation failed");
304
305
  }
305
306
 
306
307
  assert!(
@@ -313,7 +314,7 @@ fn test_postprocessor_adds_metadata() {
313
314
 
314
315
  assert!(result.is_ok());
315
316
 
316
- let extraction_result = result.unwrap();
317
+ let extraction_result = result.expect("Operation failed");
317
318
 
318
319
  assert!(
319
320
  extraction_result.metadata.additional.contains_key("processed_by"),
@@ -324,12 +325,19 @@ fn test_postprocessor_adds_metadata() {
324
325
  "Metadata 'word_count' not added"
325
326
  );
326
327
 
327
- let processed_by = extraction_result.metadata.additional.get("processed_by").unwrap();
328
- assert_eq!(processed_by.as_str().unwrap(), "metadata-adder");
328
+ let processed_by = extraction_result
329
+ .metadata
330
+ .additional
331
+ .get("processed_by")
332
+ .expect("Operation failed");
333
+ assert_eq!(
334
+ processed_by.as_str().expect("Failed to extract string from value"),
335
+ "metadata-adder"
336
+ );
329
337
 
330
338
  {
331
- let mut reg = registry.write().unwrap();
332
- reg.shutdown_all().unwrap();
339
+ let mut reg = registry.write().expect("Operation failed");
340
+ reg.shutdown_all().expect("Operation failed");
333
341
  }
334
342
 
335
343
  assert!(
@@ -350,18 +358,18 @@ fn test_unregister_postprocessor() {
350
358
  });
351
359
 
352
360
  {
353
- let mut reg = registry.write().unwrap();
361
+ let mut reg = registry.write().expect("Operation failed");
354
362
  reg.register(Arc::clone(&processor) as Arc<dyn PostProcessor>, 100)
355
- .unwrap();
363
+ .expect("Operation failed");
356
364
  }
357
365
 
358
366
  {
359
- let mut reg = registry.write().unwrap();
360
- reg.remove("unregister-test").unwrap();
367
+ let mut reg = registry.write().expect("Operation failed");
368
+ reg.remove("unregister-test").expect("Operation failed");
361
369
  }
362
370
 
363
371
  let list = {
364
- let reg = registry.read().unwrap();
372
+ let reg = registry.read().expect("Operation failed");
365
373
  reg.list()
366
374
  };
367
375
 
@@ -373,7 +381,7 @@ fn test_unregister_postprocessor() {
373
381
 
374
382
  assert!(result.is_ok());
375
383
 
376
- let extraction_result = result.unwrap();
384
+ let extraction_result = result.expect("Operation failed");
377
385
  assert!(
378
386
  !extraction_result.content.contains("[SHOULD NOT APPEAR]"),
379
387
  "Unregistered processor still modified content"
@@ -382,8 +390,8 @@ fn test_unregister_postprocessor() {
382
390
  assert_eq!(processor.call_count.load(Ordering::SeqCst), 0);
383
391
 
384
392
  {
385
- let mut reg = registry.write().unwrap();
386
- reg.shutdown_all().unwrap();
393
+ let mut reg = registry.write().expect("Operation failed");
394
+ reg.shutdown_all().expect("Operation failed");
387
395
  }
388
396
  }
389
397
 
@@ -393,8 +401,8 @@ fn test_clear_all_postprocessors() {
393
401
  let registry = get_post_processor_registry();
394
402
 
395
403
  {
396
- let mut reg = registry.write().unwrap();
397
- reg.shutdown_all().unwrap();
404
+ let mut reg = registry.write().expect("Operation failed");
405
+ reg.shutdown_all().expect("Operation failed");
398
406
  }
399
407
 
400
408
  let processor1 = Arc::new(AppendTextProcessor {
@@ -410,18 +418,20 @@ fn test_clear_all_postprocessors() {
410
418
  });
411
419
 
412
420
  {
413
- let mut reg = registry.write().unwrap();
414
- reg.register(processor1 as Arc<dyn PostProcessor>, 100).unwrap();
415
- reg.register(processor2 as Arc<dyn PostProcessor>, 100).unwrap();
421
+ let mut reg = registry.write().expect("Operation failed");
422
+ reg.register(processor1 as Arc<dyn PostProcessor>, 100)
423
+ .expect("Operation failed");
424
+ reg.register(processor2 as Arc<dyn PostProcessor>, 100)
425
+ .expect("Operation failed");
416
426
  }
417
427
 
418
428
  {
419
- let mut reg = registry.write().unwrap();
420
- reg.shutdown_all().unwrap();
429
+ let mut reg = registry.write().expect("Operation failed");
430
+ reg.shutdown_all().expect("Operation failed");
421
431
  }
422
432
 
423
433
  let list = {
424
- let reg = registry.read().unwrap();
434
+ let reg = registry.read().expect("Operation failed");
425
435
  reg.list()
426
436
  };
427
437
 
@@ -440,8 +450,9 @@ fn test_postprocessor_error_handling() {
440
450
  });
441
451
 
442
452
  {
443
- let mut reg = registry.write().unwrap();
444
- reg.register(failing_processor as Arc<dyn PostProcessor>, 100).unwrap();
453
+ let mut reg = registry.write().expect("Operation failed");
454
+ reg.register(failing_processor as Arc<dyn PostProcessor>, 100)
455
+ .expect("Operation failed");
445
456
  }
446
457
 
447
458
  let config = ExtractionConfig::default();
@@ -462,8 +473,8 @@ fn test_postprocessor_error_handling() {
462
473
  }
463
474
 
464
475
  {
465
- let mut reg = registry.write().unwrap();
466
- reg.shutdown_all().unwrap();
476
+ let mut reg = registry.write().expect("Operation failed");
477
+ reg.shutdown_all().expect("Operation failed");
467
478
  }
468
479
  }
469
480
 
@@ -473,8 +484,8 @@ fn test_postprocessor_invalid_name() {
473
484
  let registry = get_post_processor_registry();
474
485
 
475
486
  {
476
- let mut reg = registry.write().unwrap();
477
- reg.shutdown_all().unwrap();
487
+ let mut reg = registry.write().expect("Operation failed");
488
+ reg.shutdown_all().expect("Operation failed");
478
489
  }
479
490
 
480
491
  let processor = Arc::new(AppendTextProcessor {
@@ -484,16 +495,19 @@ fn test_postprocessor_invalid_name() {
484
495
  });
485
496
 
486
497
  {
487
- let mut reg = registry.write().unwrap();
498
+ let mut reg = registry.write().expect("Operation failed");
488
499
  let result = reg.register(processor, 100);
489
500
 
490
501
  assert!(result.is_err());
491
- assert!(matches!(result.err().unwrap(), KreuzbergError::Validation { .. }));
502
+ assert!(matches!(
503
+ result.expect_err("Operation should fail"),
504
+ KreuzbergError::Validation { .. }
505
+ ));
492
506
  }
493
507
 
494
508
  {
495
- let mut reg = registry.write().unwrap();
496
- reg.shutdown_all().unwrap();
509
+ let mut reg = registry.write().expect("Operation failed");
510
+ reg.shutdown_all().expect("Operation failed");
497
511
  }
498
512
  }
499
513
 
@@ -520,10 +534,13 @@ fn test_multiple_postprocessors_execution_order() {
520
534
  });
521
535
 
522
536
  {
523
- let mut reg = registry.write().unwrap();
524
- reg.register(early_processor as Arc<dyn PostProcessor>, 100).unwrap();
525
- reg.register(middle_processor as Arc<dyn PostProcessor>, 100).unwrap();
526
- reg.register(late_processor as Arc<dyn PostProcessor>, 100).unwrap();
537
+ let mut reg = registry.write().expect("Operation failed");
538
+ reg.register(early_processor as Arc<dyn PostProcessor>, 100)
539
+ .expect("Operation failed");
540
+ reg.register(middle_processor as Arc<dyn PostProcessor>, 100)
541
+ .expect("Operation failed");
542
+ reg.register(late_processor as Arc<dyn PostProcessor>, 100)
543
+ .expect("Operation failed");
527
544
  }
528
545
 
529
546
  let config = ExtractionConfig::default();
@@ -531,15 +548,15 @@ fn test_multiple_postprocessors_execution_order() {
531
548
 
532
549
  assert!(result.is_ok());
533
550
 
534
- let extraction_result = result.unwrap();
551
+ let extraction_result = result.expect("Operation failed");
535
552
 
536
553
  assert!(extraction_result.metadata.additional.contains_key("processed_by"));
537
554
  assert!(!extraction_result.content.chars().any(|c| c.is_lowercase()));
538
555
  assert!(extraction_result.content.contains("[LATE]"));
539
556
 
540
557
  {
541
- let mut reg = registry.write().unwrap();
542
- reg.shutdown_all().unwrap();
558
+ let mut reg = registry.write().expect("Operation failed");
559
+ reg.shutdown_all().expect("Operation failed");
543
560
  }
544
561
  }
545
562
 
@@ -557,9 +574,9 @@ fn test_postprocessor_preserves_mime_type() {
557
574
  });
558
575
 
559
576
  {
560
- let mut reg = registry.write().unwrap();
577
+ let mut reg = registry.write().expect("Operation failed");
561
578
  reg.register(Arc::clone(&processor) as Arc<dyn PostProcessor>, 100)
562
- .unwrap();
579
+ .expect("Operation failed");
563
580
  }
564
581
 
565
582
  let config = ExtractionConfig::default();
@@ -567,11 +584,11 @@ fn test_postprocessor_preserves_mime_type() {
567
584
 
568
585
  assert!(result.is_ok());
569
586
 
570
- let extraction_result = result.unwrap();
587
+ let extraction_result = result.expect("Operation failed");
571
588
  assert_eq!(extraction_result.mime_type, "text/plain");
572
589
 
573
590
  {
574
- let mut reg = registry.write().unwrap();
575
- reg.shutdown_all().unwrap();
591
+ let mut reg = registry.write().expect("Operation failed");
592
+ reg.shutdown_all().expect("Operation failed");
576
593
  }
577
594
  }
@@ -202,9 +202,9 @@ async fn test_extractor_extraction_failure() {
202
202
  should_fail_extract: true,
203
203
  });
204
204
 
205
- registry.register(failing_extractor).unwrap();
205
+ registry.register(failing_extractor).expect("Operation failed");
206
206
 
207
- let extractor = registry.get("text/plain").unwrap();
207
+ let extractor = registry.get("text/plain").expect("Value not found");
208
208
  let config = ExtractionConfig::default();
209
209
  let result = extractor.extract_bytes(b"test", "text/plain", &config).await;
210
210
 
@@ -227,8 +227,8 @@ fn test_extractor_duplicate_registration() {
227
227
  should_fail_extract: false,
228
228
  });
229
229
 
230
- registry.register(extractor1).unwrap();
231
- registry.register(extractor2).unwrap();
230
+ registry.register(extractor1).expect("Operation failed");
231
+ registry.register(extractor2).expect("Operation failed");
232
232
 
233
233
  let names = registry.list();
234
234
  assert_eq!(names.len(), 1);
@@ -255,13 +255,13 @@ fn test_extractor_concurrent_registration() {
255
255
  let mut reg = registry_clone
256
256
  .write()
257
257
  .expect("Failed to acquire write lock on registry in test");
258
- reg.register(extractor).unwrap();
258
+ reg.register(extractor).expect("Operation failed");
259
259
  });
260
260
  handles.push(handle);
261
261
  }
262
262
 
263
263
  for handle in handles {
264
- handle.join().unwrap();
264
+ handle.join().expect("Operation failed");
265
265
  }
266
266
 
267
267
  let reg = registry
@@ -323,10 +323,10 @@ fn test_extractor_priority_ordering_complex() {
323
323
  name: format!("priority-{}", priority),
324
324
  priority,
325
325
  });
326
- registry.register(extractor).unwrap();
326
+ registry.register(extractor).expect("Operation failed");
327
327
  }
328
328
 
329
- let selected = registry.get("text/plain").unwrap();
329
+ let selected = registry.get("text/plain").expect("Value not found");
330
330
  assert_eq!(selected.name(), "priority-100");
331
331
  assert_eq!(selected.priority(), 100);
332
332
  }
@@ -382,10 +382,10 @@ fn test_extractor_wildcard_vs_exact_priority() {
382
382
  should_fail_extract: false,
383
383
  });
384
384
 
385
- registry.register(wildcard_arc).unwrap();
386
- registry.register(exact).unwrap();
385
+ registry.register(wildcard_arc).expect("Operation failed");
386
+ registry.register(exact).expect("Operation failed");
387
387
 
388
- let selected = registry.get("text/plain").unwrap();
388
+ let selected = registry.get("text/plain").expect("Value not found");
389
389
  assert_eq!(selected.name(), "exact-low");
390
390
  }
391
391
 
@@ -420,11 +420,11 @@ fn test_extractor_list_after_partial_removal() {
420
420
  should_fail_init: false,
421
421
  should_fail_extract: false,
422
422
  });
423
- registry.register(extractor).unwrap();
423
+ registry.register(extractor).expect("Operation failed");
424
424
  }
425
425
 
426
- registry.remove("extractor-2").unwrap();
427
- registry.remove("extractor-3").unwrap();
426
+ registry.remove("extractor-2").expect("Operation failed");
427
+ registry.remove("extractor-3").expect("Operation failed");
428
428
 
429
429
  let names = registry.list();
430
430
  assert_eq!(names.len(), 3);
@@ -452,9 +452,9 @@ async fn test_processor_execution_order_within_stage() {
452
452
  stage: ProcessingStage::Early,
453
453
  });
454
454
 
455
- registry.register(low, 10).unwrap();
456
- registry.register(high, 100).unwrap();
457
- registry.register(medium, 50).unwrap();
455
+ registry.register(low, 10).expect("Operation failed");
456
+ registry.register(high, 100).expect("Operation failed");
457
+ registry.register(medium, 50).expect("Operation failed");
458
458
 
459
459
  let processors = registry.get_for_stage(ProcessingStage::Early);
460
460
  assert_eq!(processors.len(), 3);
@@ -474,7 +474,10 @@ async fn test_processor_execution_order_within_stage() {
474
474
 
475
475
  let config = ExtractionConfig::default();
476
476
  for processor in processors {
477
- processor.process(&mut result, &config).await.unwrap();
477
+ processor
478
+ .process(&mut result, &config)
479
+ .await
480
+ .expect("Async operation failed");
478
481
  }
479
482
 
480
483
  assert_eq!(result.content, "start [high] [medium] [low]");
@@ -488,7 +491,7 @@ async fn test_processor_error_propagation() {
488
491
  name: "failing".to_string(),
489
492
  });
490
493
 
491
- registry.register(failing, 50).unwrap();
494
+ registry.register(failing, 50).expect("Operation failed");
492
495
 
493
496
  let processors = registry.get_for_stage(ProcessingStage::Early);
494
497
  assert_eq!(processors.len(), 1);
@@ -531,9 +534,9 @@ fn test_processor_multiple_stages() {
531
534
  stage: ProcessingStage::Late,
532
535
  });
533
536
 
534
- registry.register(early, 50).unwrap();
535
- registry.register(middle, 50).unwrap();
536
- registry.register(late, 50).unwrap();
537
+ registry.register(early, 50).expect("Operation failed");
538
+ registry.register(middle, 50).expect("Operation failed");
539
+ registry.register(late, 50).expect("Operation failed");
537
540
 
538
541
  assert_eq!(registry.get_for_stage(ProcessingStage::Early).len(), 1);
539
542
  assert_eq!(registry.get_for_stage(ProcessingStage::Middle).len(), 1);
@@ -593,8 +596,8 @@ fn test_processor_same_priority_same_stage() {
593
596
  stage: ProcessingStage::Early,
594
597
  });
595
598
 
596
- registry.register(proc1, 50).unwrap();
597
- registry.register(proc2, 50).unwrap();
599
+ registry.register(proc1, 50).expect("Operation failed");
600
+ registry.register(proc2, 50).expect("Operation failed");
598
601
 
599
602
  let processors = registry.get_for_stage(ProcessingStage::Early);
600
603
  assert_eq!(processors.len(), 2);
@@ -609,10 +612,10 @@ fn test_processor_remove_from_specific_stage() {
609
612
  stage: ProcessingStage::Early,
610
613
  });
611
614
 
612
- registry.register(early, 50).unwrap();
615
+ registry.register(early, 50).expect("Operation failed");
613
616
  assert_eq!(registry.get_for_stage(ProcessingStage::Early).len(), 1);
614
617
 
615
- registry.remove("processor").unwrap();
618
+ registry.remove("processor").expect("Operation failed");
616
619
  assert_eq!(registry.get_for_stage(ProcessingStage::Early).len(), 0);
617
620
  }
618
621
 
@@ -625,7 +628,7 @@ fn test_processor_list_across_stages() {
625
628
  name: format!("{:?}-processor", stage),
626
629
  stage,
627
630
  });
628
- registry.register(processor, 50).unwrap();
631
+ registry.register(processor, 50).expect("Operation failed");
629
632
  }
630
633
 
631
634
  let names = registry.list();
@@ -641,10 +644,10 @@ fn test_processor_shutdown_clears_all_stages() {
641
644
  name: format!("{:?}-processor", stage),
642
645
  stage,
643
646
  });
644
- registry.register(processor, 50).unwrap();
647
+ registry.register(processor, 50).expect("Operation failed");
645
648
  }
646
649
 
647
- registry.shutdown_all().unwrap();
650
+ registry.shutdown_all().expect("Operation failed");
648
651
 
649
652
  assert_eq!(registry.get_for_stage(ProcessingStage::Early).len(), 0);
650
653
  assert_eq!(registry.get_for_stage(ProcessingStage::Middle).len(), 0);
@@ -660,7 +663,7 @@ async fn test_validator_content_validation() {
660
663
  min_length: 10,
661
664
  });
662
665
 
663
- registry.register(strict).unwrap();
666
+ registry.register(strict).expect("Operation failed");
664
667
 
665
668
  let validators = registry.get_all();
666
669
  assert_eq!(validators.len(), 1);
@@ -791,9 +794,9 @@ fn test_validator_priority_ordering() {
791
794
  let low = Arc::new(LowPriorityValidator);
792
795
  let high_priority = Arc::new(HighPriorityValidator);
793
796
 
794
- registry.register(medium).unwrap();
795
- registry.register(low).unwrap();
796
- registry.register(high_priority).unwrap();
797
+ registry.register(medium).expect("Operation failed");
798
+ registry.register(low).expect("Operation failed");
799
+ registry.register(high_priority).expect("Operation failed");
797
800
 
798
801
  let validators = registry.get_all();
799
802
  assert_eq!(validators.len(), 3);
@@ -857,13 +860,13 @@ fn test_validator_remove_and_reregister() {
857
860
  min_length: 5,
858
861
  });
859
862
 
860
- registry.register(Arc::clone(&validator)).unwrap();
863
+ registry.register(Arc::clone(&validator)).expect("Operation failed");
861
864
  assert_eq!(registry.get_all().len(), 1);
862
865
 
863
- registry.remove("validator").unwrap();
866
+ registry.remove("validator").expect("Operation failed");
864
867
  assert_eq!(registry.get_all().len(), 0);
865
868
 
866
- registry.register(validator).unwrap();
869
+ registry.register(validator).expect("Operation failed");
867
870
  assert_eq!(registry.get_all().len(), 1);
868
871
  }
869
872
 
@@ -890,9 +893,9 @@ fn test_multiple_registries_independence() {
890
893
  min_length: 5,
891
894
  });
892
895
 
893
- extractor_registry.register(extractor).unwrap();
894
- processor_registry.register(processor, 50).unwrap();
895
- validator_registry.register(validator).unwrap();
896
+ extractor_registry.register(extractor).expect("Operation failed");
897
+ processor_registry.register(processor, 50).expect("Operation failed");
898
+ validator_registry.register(validator).expect("Operation failed");
896
899
 
897
900
  assert_eq!(ocr_registry.list().len(), 0);
898
901
  assert_eq!(extractor_registry.list().len(), 1);
@@ -923,14 +926,14 @@ fn test_shutdown_all_registries() {
923
926
  min_length: 5,
924
927
  });
925
928
 
926
- extractor_registry.register(extractor).unwrap();
927
- processor_registry.register(processor, 50).unwrap();
928
- validator_registry.register(validator).unwrap();
929
+ extractor_registry.register(extractor).expect("Operation failed");
930
+ processor_registry.register(processor, 50).expect("Operation failed");
931
+ validator_registry.register(validator).expect("Operation failed");
929
932
 
930
- ocr_registry.shutdown_all().unwrap();
931
- extractor_registry.shutdown_all().unwrap();
932
- processor_registry.shutdown_all().unwrap();
933
- validator_registry.shutdown_all().unwrap();
933
+ ocr_registry.shutdown_all().expect("Operation failed");
934
+ extractor_registry.shutdown_all().expect("Operation failed");
935
+ processor_registry.shutdown_all().expect("Operation failed");
936
+ validator_registry.shutdown_all().expect("Operation failed");
934
937
 
935
938
  assert_eq!(ocr_registry.list().len(), 0);
936
939
  assert_eq!(extractor_registry.list().len(), 0);