kreuzberg 4.2.0 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +26 -17
- data/lib/kreuzberg/cli.rb +16 -6
- data/lib/kreuzberg/cli_proxy.rb +3 -1
- data/lib/kreuzberg/config.rb +56 -9
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/extraction_api.rb +20 -4
- data/lib/kreuzberg/result.rb +12 -2
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +1 -0
- data/sig/kreuzberg.rbs +23 -11
- data/spec/binding/batch_spec.rb +6 -5
- data/spec/binding/error_recovery_spec.rb +3 -3
- data/spec/binding/tables_spec.rb +11 -2
- data/spec/unit/config/output_format_spec.rb +18 -18
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/api/startup.rs +15 -1
- data/vendor/kreuzberg/src/core/config_validation/sections.rs +16 -4
- data/vendor/kreuzberg/src/core/extractor/file.rs +1 -2
- data/vendor/kreuzberg/src/core/extractor/mod.rs +2 -1
- data/vendor/kreuzberg/src/core/io.rs +7 -7
- data/vendor/kreuzberg/src/core/mime.rs +4 -4
- data/vendor/kreuzberg/src/extraction/pptx/parser.rs +6 -0
- data/vendor/kreuzberg/src/plugins/mod.rs +1 -0
- data/vendor/kreuzberg/src/plugins/registry/extractor.rs +251 -5
- data/vendor/kreuzberg/src/plugins/registry/ocr.rs +150 -2
- data/vendor/kreuzberg/src/plugins/registry/processor.rs +213 -5
- data/vendor/kreuzberg/src/plugins/registry/validator.rs +220 -4
- data/vendor/kreuzberg/src/plugins/startup_validation.rs +385 -0
- data/vendor/kreuzberg/tests/config_behavioral.rs +14 -12
- data/vendor/kreuzberg/tests/core_integration.rs +2 -4
- data/vendor/kreuzberg/tests/mime_detection.rs +3 -2
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +284 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +4 -2
|
@@ -43,9 +43,26 @@ impl DocumentExtractorRegistry {
|
|
|
43
43
|
let priority = extractor.priority();
|
|
44
44
|
let mime_types: Vec<String> = extractor.supported_mime_types().iter().map(|s| s.to_string()).collect();
|
|
45
45
|
|
|
46
|
-
super::validate_plugin_name(&name)
|
|
46
|
+
if let Err(e) = super::validate_plugin_name(&name) {
|
|
47
|
+
tracing::warn!(
|
|
48
|
+
"Failed to validate document extractor name '{}': {}. \
|
|
49
|
+
Registration aborted. Plugin names must be non-empty and contain only alphanumeric characters, hyphens, and underscores.",
|
|
50
|
+
name,
|
|
51
|
+
e
|
|
52
|
+
);
|
|
53
|
+
return Err(e);
|
|
54
|
+
}
|
|
47
55
|
|
|
48
|
-
extractor.initialize()
|
|
56
|
+
if let Err(e) = extractor.initialize() {
|
|
57
|
+
tracing::error!(
|
|
58
|
+
"Failed to initialize document extractor '{}': {}. \
|
|
59
|
+
Extraction for MIME types {:?} will be unavailable.",
|
|
60
|
+
name,
|
|
61
|
+
e,
|
|
62
|
+
mime_types
|
|
63
|
+
);
|
|
64
|
+
return Err(e);
|
|
65
|
+
}
|
|
49
66
|
|
|
50
67
|
let mut index_entries = Vec::new();
|
|
51
68
|
|
|
@@ -57,7 +74,13 @@ impl DocumentExtractorRegistry {
|
|
|
57
74
|
index_entries.push((mime_type.clone(), priority));
|
|
58
75
|
}
|
|
59
76
|
|
|
60
|
-
self.name_index.insert(name, index_entries);
|
|
77
|
+
self.name_index.insert(name.clone(), index_entries);
|
|
78
|
+
tracing::debug!(
|
|
79
|
+
"Registered document extractor '{}' with priority {} for MIME types: {:?}",
|
|
80
|
+
name,
|
|
81
|
+
priority,
|
|
82
|
+
mime_types
|
|
83
|
+
);
|
|
61
84
|
|
|
62
85
|
Ok(())
|
|
63
86
|
}
|
|
@@ -128,7 +151,13 @@ impl DocumentExtractorRegistry {
|
|
|
128
151
|
pub fn remove(&mut self, name: &str) -> Result<()> {
|
|
129
152
|
let index_entries = match self.name_index.remove(name) {
|
|
130
153
|
Some(entries) => entries,
|
|
131
|
-
None =>
|
|
154
|
+
None => {
|
|
155
|
+
tracing::debug!(
|
|
156
|
+
"Document extractor '{}' not found in registry (already removed or never registered)",
|
|
157
|
+
name
|
|
158
|
+
);
|
|
159
|
+
return Ok(());
|
|
160
|
+
}
|
|
132
161
|
};
|
|
133
162
|
|
|
134
163
|
let mut extractor_to_shutdown: Option<Arc<dyn DocumentExtractor>> = None;
|
|
@@ -148,7 +177,16 @@ impl DocumentExtractorRegistry {
|
|
|
148
177
|
}
|
|
149
178
|
|
|
150
179
|
if let Some(extractor) = extractor_to_shutdown {
|
|
151
|
-
extractor.shutdown()
|
|
180
|
+
if let Err(e) = extractor.shutdown() {
|
|
181
|
+
tracing::warn!(
|
|
182
|
+
"Failed to shutdown document extractor '{}': {}. \
|
|
183
|
+
Resources may not have been properly released.",
|
|
184
|
+
name,
|
|
185
|
+
e
|
|
186
|
+
);
|
|
187
|
+
return Err(e);
|
|
188
|
+
}
|
|
189
|
+
tracing::debug!("Successfully removed and shut down document extractor '{}'", name);
|
|
152
190
|
}
|
|
153
191
|
|
|
154
192
|
Ok(())
|
|
@@ -157,9 +195,19 @@ impl DocumentExtractorRegistry {
|
|
|
157
195
|
/// Shutdown all extractors and clear the registry.
|
|
158
196
|
pub fn shutdown_all(&mut self) -> Result<()> {
|
|
159
197
|
let names = self.list();
|
|
198
|
+
let count = names.len();
|
|
199
|
+
|
|
200
|
+
if count > 0 {
|
|
201
|
+
tracing::debug!("Shutting down {} document extractors", count);
|
|
202
|
+
}
|
|
203
|
+
|
|
160
204
|
for name in names {
|
|
161
205
|
self.remove(&name)?;
|
|
162
206
|
}
|
|
207
|
+
|
|
208
|
+
if count > 0 {
|
|
209
|
+
tracing::debug!("Successfully shut down all {} document extractors", count);
|
|
210
|
+
}
|
|
163
211
|
Ok(())
|
|
164
212
|
}
|
|
165
213
|
}
|
|
@@ -413,4 +461,202 @@ mod tests {
|
|
|
413
461
|
assert_eq!(registry.get("text/markdown").unwrap().name(), "multi-extractor");
|
|
414
462
|
assert_eq!(registry.get("text/html").unwrap().name(), "multi-extractor");
|
|
415
463
|
}
|
|
464
|
+
|
|
465
|
+
struct FailingExtractor {
|
|
466
|
+
name: String,
|
|
467
|
+
fail_on_init: bool,
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
impl Plugin for FailingExtractor {
|
|
471
|
+
fn name(&self) -> &str {
|
|
472
|
+
&self.name
|
|
473
|
+
}
|
|
474
|
+
fn version(&self) -> String {
|
|
475
|
+
"1.0.0".to_string()
|
|
476
|
+
}
|
|
477
|
+
fn initialize(&self) -> Result<()> {
|
|
478
|
+
if self.fail_on_init {
|
|
479
|
+
Err(KreuzbergError::Plugin {
|
|
480
|
+
message: "Extractor initialization failed".to_string(),
|
|
481
|
+
plugin_name: self.name.clone(),
|
|
482
|
+
})
|
|
483
|
+
} else {
|
|
484
|
+
Ok(())
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
fn shutdown(&self) -> Result<()> {
|
|
488
|
+
Ok(())
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
#[async_trait]
|
|
493
|
+
impl DocumentExtractor for FailingExtractor {
|
|
494
|
+
async fn extract_bytes(&self, _: &[u8], _: &str, _: &ExtractionConfig) -> Result<ExtractionResult> {
|
|
495
|
+
Ok(ExtractionResult {
|
|
496
|
+
content: "test".to_string(),
|
|
497
|
+
mime_type: "text/plain".to_string(),
|
|
498
|
+
metadata: crate::types::Metadata::default(),
|
|
499
|
+
tables: vec![],
|
|
500
|
+
detected_languages: None,
|
|
501
|
+
chunks: None,
|
|
502
|
+
images: None,
|
|
503
|
+
djot_content: None,
|
|
504
|
+
pages: None,
|
|
505
|
+
elements: None,
|
|
506
|
+
})
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
fn supported_mime_types(&self) -> &[&str] {
|
|
510
|
+
&["text/plain"]
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
fn priority(&self) -> i32 {
|
|
514
|
+
50
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
#[test]
|
|
519
|
+
fn test_document_extractor_initialization_failure_logs_error() {
|
|
520
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
521
|
+
|
|
522
|
+
let extractor = Arc::new(FailingExtractor {
|
|
523
|
+
name: "failing-extractor".to_string(),
|
|
524
|
+
fail_on_init: true,
|
|
525
|
+
});
|
|
526
|
+
|
|
527
|
+
let result = registry.register(extractor);
|
|
528
|
+
assert!(result.is_err());
|
|
529
|
+
assert_eq!(registry.list().len(), 0);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
#[test]
|
|
533
|
+
fn test_document_extractor_invalid_name_empty_logs_warning() {
|
|
534
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
535
|
+
|
|
536
|
+
let extractor = Arc::new(MockExtractor {
|
|
537
|
+
name: "".to_string(),
|
|
538
|
+
mime_types: &["text/plain"],
|
|
539
|
+
priority: 50,
|
|
540
|
+
});
|
|
541
|
+
|
|
542
|
+
let result = registry.register(extractor);
|
|
543
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
#[test]
|
|
547
|
+
fn test_document_extractor_invalid_name_with_spaces_logs_warning() {
|
|
548
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
549
|
+
|
|
550
|
+
let extractor = Arc::new(MockExtractor {
|
|
551
|
+
name: "invalid extractor".to_string(),
|
|
552
|
+
mime_types: &["text/plain"],
|
|
553
|
+
priority: 50,
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
let result = registry.register(extractor);
|
|
557
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
#[test]
|
|
561
|
+
fn test_document_extractor_successful_registration_logs_debug() {
|
|
562
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
563
|
+
|
|
564
|
+
let extractor = Arc::new(MockExtractor {
|
|
565
|
+
name: "valid-pdf-extractor".to_string(),
|
|
566
|
+
mime_types: &["application/pdf"],
|
|
567
|
+
priority: 100,
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
let result = registry.register(extractor);
|
|
571
|
+
assert!(result.is_ok());
|
|
572
|
+
assert_eq!(registry.list().len(), 1);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
#[test]
|
|
576
|
+
fn test_document_extractor_remove_nonexistent_logs_debug() {
|
|
577
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
578
|
+
|
|
579
|
+
let result = registry.remove("nonexistent-extractor");
|
|
580
|
+
assert!(result.is_ok());
|
|
581
|
+
assert_eq!(registry.list().len(), 0);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
#[test]
|
|
585
|
+
fn test_document_extractor_shutdown_empty_registry() {
|
|
586
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
587
|
+
let result = registry.shutdown_all();
|
|
588
|
+
assert!(result.is_ok());
|
|
589
|
+
assert_eq!(registry.list().len(), 0);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
#[test]
|
|
593
|
+
fn test_document_extractor_shutdown_with_multiple_extractors() {
|
|
594
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
595
|
+
|
|
596
|
+
let extractor1 = Arc::new(MockExtractor {
|
|
597
|
+
name: "extractor1".to_string(),
|
|
598
|
+
mime_types: &["text/plain"],
|
|
599
|
+
priority: 50,
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
let extractor2 = Arc::new(MockExtractor {
|
|
603
|
+
name: "extractor2".to_string(),
|
|
604
|
+
mime_types: &["application/pdf"],
|
|
605
|
+
priority: 100,
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
let extractor3 = Arc::new(MockExtractor {
|
|
609
|
+
name: "extractor3".to_string(),
|
|
610
|
+
mime_types: &["image/png"],
|
|
611
|
+
priority: 75,
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
registry.register(extractor1).unwrap();
|
|
615
|
+
registry.register(extractor2).unwrap();
|
|
616
|
+
registry.register(extractor3).unwrap();
|
|
617
|
+
|
|
618
|
+
assert_eq!(registry.list().len(), 3);
|
|
619
|
+
|
|
620
|
+
registry.shutdown_all().unwrap();
|
|
621
|
+
assert_eq!(registry.list().len(), 0);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
#[test]
|
|
625
|
+
fn test_document_extractor_priority_ordering_complex() {
|
|
626
|
+
let mut registry = DocumentExtractorRegistry::new();
|
|
627
|
+
|
|
628
|
+
let extractors = vec![
|
|
629
|
+
(
|
|
630
|
+
Arc::new(MockExtractor {
|
|
631
|
+
name: "priority-1".to_string(),
|
|
632
|
+
mime_types: &["application/pdf"],
|
|
633
|
+
priority: 1,
|
|
634
|
+
}),
|
|
635
|
+
1,
|
|
636
|
+
),
|
|
637
|
+
(
|
|
638
|
+
Arc::new(MockExtractor {
|
|
639
|
+
name: "priority-100".to_string(),
|
|
640
|
+
mime_types: &["application/pdf"],
|
|
641
|
+
priority: 100,
|
|
642
|
+
}),
|
|
643
|
+
100,
|
|
644
|
+
),
|
|
645
|
+
(
|
|
646
|
+
Arc::new(MockExtractor {
|
|
647
|
+
name: "priority-50".to_string(),
|
|
648
|
+
mime_types: &["application/pdf"],
|
|
649
|
+
priority: 50,
|
|
650
|
+
}),
|
|
651
|
+
50,
|
|
652
|
+
),
|
|
653
|
+
];
|
|
654
|
+
|
|
655
|
+
for (extractor, _priority) in &extractors {
|
|
656
|
+
registry.register(extractor.clone()).unwrap();
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
let retrieved = registry.get("application/pdf").unwrap();
|
|
660
|
+
assert_eq!(retrieved.name(), "priority-100");
|
|
661
|
+
}
|
|
416
662
|
}
|
|
@@ -31,6 +31,8 @@ impl OcrBackendRegistry {
|
|
|
31
31
|
/// Create a new OCR backend registry with default backends.
|
|
32
32
|
///
|
|
33
33
|
/// Registers the Tesseract backend by default if the "ocr" feature is enabled.
|
|
34
|
+
/// Logs warnings if backend initialization fails (common in containerized environments
|
|
35
|
+
/// with missing dependencies or permission issues).
|
|
34
36
|
pub fn new() -> Self {
|
|
35
37
|
#[cfg(feature = "ocr")]
|
|
36
38
|
let mut registry = Self {
|
|
@@ -45,8 +47,27 @@ impl OcrBackendRegistry {
|
|
|
45
47
|
#[cfg(feature = "ocr")]
|
|
46
48
|
{
|
|
47
49
|
use crate::ocr::tesseract_backend::TesseractBackend;
|
|
48
|
-
|
|
49
|
-
|
|
50
|
+
match TesseractBackend::new() {
|
|
51
|
+
Ok(backend) => {
|
|
52
|
+
if let Err(e) = registry.register(Arc::new(backend)) {
|
|
53
|
+
tracing::error!(
|
|
54
|
+
"Failed to register Tesseract OCR backend: {}. \
|
|
55
|
+
OCR functionality will be unavailable. \
|
|
56
|
+
Check TESSDATA_PREFIX environment variable and tessdata file permissions.",
|
|
57
|
+
e
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
Err(e) => {
|
|
62
|
+
tracing::warn!(
|
|
63
|
+
"Tesseract OCR backend initialization failed: {}. \
|
|
64
|
+
OCR functionality will be unavailable. \
|
|
65
|
+
Common causes: missing TESSDATA_PREFIX env var, \
|
|
66
|
+
tessdata files not found, or permission issues in containerized environments. \
|
|
67
|
+
See https://docs.kreuzberg.dev/guides/docker/ for Kubernetes troubleshooting.",
|
|
68
|
+
e
|
|
69
|
+
);
|
|
70
|
+
}
|
|
50
71
|
}
|
|
51
72
|
}
|
|
52
73
|
|
|
@@ -290,4 +311,131 @@ mod tests {
|
|
|
290
311
|
registry.shutdown_all().unwrap();
|
|
291
312
|
assert_eq!(registry.list().len(), 0);
|
|
292
313
|
}
|
|
314
|
+
|
|
315
|
+
struct FailingOcrBackend {
|
|
316
|
+
name: String,
|
|
317
|
+
fail_on_init: bool,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
impl Plugin for FailingOcrBackend {
|
|
321
|
+
fn name(&self) -> &str {
|
|
322
|
+
&self.name
|
|
323
|
+
}
|
|
324
|
+
fn version(&self) -> String {
|
|
325
|
+
"1.0.0".to_string()
|
|
326
|
+
}
|
|
327
|
+
fn initialize(&self) -> Result<()> {
|
|
328
|
+
if self.fail_on_init {
|
|
329
|
+
Err(KreuzbergError::Plugin {
|
|
330
|
+
message: "Backend initialization failed".to_string(),
|
|
331
|
+
plugin_name: self.name.clone(),
|
|
332
|
+
})
|
|
333
|
+
} else {
|
|
334
|
+
Ok(())
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
fn shutdown(&self) -> Result<()> {
|
|
338
|
+
Ok(())
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
#[async_trait]
|
|
343
|
+
impl OcrBackend for FailingOcrBackend {
|
|
344
|
+
async fn process_image(&self, _: &[u8], _: &OcrConfig) -> Result<ExtractionResult> {
|
|
345
|
+
Ok(ExtractionResult {
|
|
346
|
+
content: "test".to_string(),
|
|
347
|
+
mime_type: "text/plain".to_string(),
|
|
348
|
+
metadata: crate::types::Metadata::default(),
|
|
349
|
+
tables: vec![],
|
|
350
|
+
detected_languages: None,
|
|
351
|
+
chunks: None,
|
|
352
|
+
images: None,
|
|
353
|
+
djot_content: None,
|
|
354
|
+
pages: None,
|
|
355
|
+
elements: None,
|
|
356
|
+
})
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
fn supports_language(&self, _lang: &str) -> bool {
|
|
360
|
+
false
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
fn backend_type(&self) -> crate::plugins::ocr::OcrBackendType {
|
|
364
|
+
crate::plugins::ocr::OcrBackendType::Custom
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
#[test]
|
|
369
|
+
fn test_ocr_backend_initialization_failure_logs_error() {
|
|
370
|
+
let mut registry = OcrBackendRegistry::new_empty();
|
|
371
|
+
|
|
372
|
+
let backend = Arc::new(FailingOcrBackend {
|
|
373
|
+
name: "failing-ocr".to_string(),
|
|
374
|
+
fail_on_init: true,
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
let result = registry.register(backend);
|
|
378
|
+
assert!(result.is_err());
|
|
379
|
+
assert_eq!(registry.list().len(), 0);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
#[test]
|
|
383
|
+
fn test_ocr_backend_invalid_name_empty_logs_warning() {
|
|
384
|
+
let mut registry = OcrBackendRegistry::new_empty();
|
|
385
|
+
|
|
386
|
+
let backend = Arc::new(MockOcrBackend {
|
|
387
|
+
name: "".to_string(),
|
|
388
|
+
languages: vec!["eng".to_string()],
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
let result = registry.register(backend);
|
|
392
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
#[test]
|
|
396
|
+
fn test_ocr_backend_invalid_name_with_spaces_logs_warning() {
|
|
397
|
+
let mut registry = OcrBackendRegistry::new_empty();
|
|
398
|
+
|
|
399
|
+
let backend = Arc::new(MockOcrBackend {
|
|
400
|
+
name: "invalid ocr backend".to_string(),
|
|
401
|
+
languages: vec!["eng".to_string()],
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
let result = registry.register(backend);
|
|
405
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
#[test]
|
|
409
|
+
fn test_ocr_backend_successful_registration_logs_debug() {
|
|
410
|
+
let mut registry = OcrBackendRegistry::new_empty();
|
|
411
|
+
|
|
412
|
+
let backend = Arc::new(MockOcrBackend {
|
|
413
|
+
name: "valid-ocr".to_string(),
|
|
414
|
+
languages: vec!["eng".to_string()],
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
let result = registry.register(backend);
|
|
418
|
+
assert!(result.is_ok());
|
|
419
|
+
assert_eq!(registry.list().len(), 1);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
#[test]
|
|
423
|
+
fn test_ocr_backend_multiple_registrations() {
|
|
424
|
+
let mut registry = OcrBackendRegistry::new_empty();
|
|
425
|
+
|
|
426
|
+
let backend1 = Arc::new(MockOcrBackend {
|
|
427
|
+
name: "ocr-backend-1".to_string(),
|
|
428
|
+
languages: vec!["eng".to_string()],
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
let backend2 = Arc::new(MockOcrBackend {
|
|
432
|
+
name: "ocr-backend-2".to_string(),
|
|
433
|
+
languages: vec!["deu".to_string()],
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
registry.register(backend1).unwrap();
|
|
437
|
+
registry.register(backend2).unwrap();
|
|
438
|
+
|
|
439
|
+
assert_eq!(registry.list().len(), 2);
|
|
440
|
+
}
|
|
293
441
|
}
|