kreuzberg 4.1.2 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +26 -17
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/kreuzberg.gemspec +13 -1
- data/lib/kreuzberg/cli.rb +16 -6
- data/lib/kreuzberg/cli_proxy.rb +3 -1
- data/lib/kreuzberg/config.rb +121 -39
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/extraction_api.rb +20 -4
- data/lib/kreuzberg/result.rb +12 -2
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +1 -0
- data/sig/kreuzberg.rbs +28 -12
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/batch_spec.rb +6 -5
- data/spec/binding/error_recovery_spec.rb +3 -3
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/binding/tables_spec.rb +11 -2
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/api/startup.rs +15 -1
- data/vendor/kreuzberg/src/core/config_validation/sections.rs +16 -4
- data/vendor/kreuzberg/src/core/extractor/file.rs +1 -2
- data/vendor/kreuzberg/src/core/extractor/mod.rs +2 -1
- data/vendor/kreuzberg/src/core/io.rs +7 -7
- data/vendor/kreuzberg/src/core/mime.rs +4 -4
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/extraction/pptx/parser.rs +6 -0
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/src/plugins/mod.rs +1 -0
- data/vendor/kreuzberg/src/plugins/registry/extractor.rs +251 -5
- data/vendor/kreuzberg/src/plugins/registry/ocr.rs +150 -2
- data/vendor/kreuzberg/src/plugins/registry/processor.rs +213 -5
- data/vendor/kreuzberg/src/plugins/registry/validator.rs +220 -4
- data/vendor/kreuzberg/src/plugins/startup_validation.rs +385 -0
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +416 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +57 -57
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +75 -43
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +324 -31
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +12 -2
|
@@ -32,11 +32,33 @@ impl PostProcessorRegistry {
|
|
|
32
32
|
let name = processor.name().to_string();
|
|
33
33
|
let stage = processor.processing_stage();
|
|
34
34
|
|
|
35
|
-
super::validate_plugin_name(&name)
|
|
35
|
+
if let Err(e) = super::validate_plugin_name(&name) {
|
|
36
|
+
tracing::warn!(
|
|
37
|
+
"Failed to validate post-processor name '{}': {}. \
|
|
38
|
+
Registration aborted. Plugin names must be non-empty and contain only alphanumeric characters, hyphens, and underscores.",
|
|
39
|
+
name,
|
|
40
|
+
e
|
|
41
|
+
);
|
|
42
|
+
return Err(e);
|
|
43
|
+
}
|
|
36
44
|
|
|
37
|
-
processor.initialize()
|
|
45
|
+
if let Err(e) = processor.initialize() {
|
|
46
|
+
tracing::error!(
|
|
47
|
+
"Failed to initialize post-processor '{}' for processing stage {:?} with priority {}: {}. \
|
|
48
|
+
Post-processing step will not be executed.",
|
|
49
|
+
name,
|
|
50
|
+
stage,
|
|
51
|
+
priority,
|
|
52
|
+
e
|
|
53
|
+
);
|
|
54
|
+
return Err(e);
|
|
55
|
+
}
|
|
38
56
|
|
|
39
57
|
if self.name_index.contains_key(&name) {
|
|
58
|
+
tracing::debug!(
|
|
59
|
+
"Post-processor '{}' is already registered. Removing old instance and registering new one.",
|
|
60
|
+
name
|
|
61
|
+
);
|
|
40
62
|
self.remove(&name)?;
|
|
41
63
|
}
|
|
42
64
|
|
|
@@ -47,7 +69,13 @@ impl PostProcessorRegistry {
|
|
|
47
69
|
.or_default()
|
|
48
70
|
.push(Arc::clone(&processor));
|
|
49
71
|
|
|
50
|
-
self.name_index.insert(name, (stage, priority));
|
|
72
|
+
self.name_index.insert(name.clone(), (stage, priority));
|
|
73
|
+
tracing::debug!(
|
|
74
|
+
"Registered post-processor '{}' for stage {:?} with priority {}",
|
|
75
|
+
name,
|
|
76
|
+
stage,
|
|
77
|
+
priority
|
|
78
|
+
);
|
|
51
79
|
|
|
52
80
|
Ok(())
|
|
53
81
|
}
|
|
@@ -84,7 +112,13 @@ impl PostProcessorRegistry {
|
|
|
84
112
|
pub fn remove(&mut self, name: &str) -> Result<()> {
|
|
85
113
|
let (stage, priority) = match self.name_index.remove(name) {
|
|
86
114
|
Some(location) => location,
|
|
87
|
-
None =>
|
|
115
|
+
None => {
|
|
116
|
+
tracing::debug!(
|
|
117
|
+
"Post-processor '{}' not found in registry (already removed or never registered)",
|
|
118
|
+
name
|
|
119
|
+
);
|
|
120
|
+
return Ok(());
|
|
121
|
+
}
|
|
88
122
|
};
|
|
89
123
|
|
|
90
124
|
let processor_to_shutdown = if let Some(priority_map) = self.processors.get_mut(&stage) {
|
|
@@ -110,7 +144,16 @@ impl PostProcessorRegistry {
|
|
|
110
144
|
};
|
|
111
145
|
|
|
112
146
|
if let Some(processor) = processor_to_shutdown {
|
|
113
|
-
processor.shutdown()
|
|
147
|
+
if let Err(e) = processor.shutdown() {
|
|
148
|
+
tracing::warn!(
|
|
149
|
+
"Failed to shutdown post-processor '{}': {}. \
|
|
150
|
+
Resources may not have been properly released.",
|
|
151
|
+
name,
|
|
152
|
+
e
|
|
153
|
+
);
|
|
154
|
+
return Err(e);
|
|
155
|
+
}
|
|
156
|
+
tracing::debug!("Successfully removed and shut down post-processor '{}'", name);
|
|
114
157
|
}
|
|
115
158
|
|
|
116
159
|
Ok(())
|
|
@@ -119,9 +162,19 @@ impl PostProcessorRegistry {
|
|
|
119
162
|
/// Shutdown all processors and clear the registry.
|
|
120
163
|
pub fn shutdown_all(&mut self) -> Result<()> {
|
|
121
164
|
let names = self.list();
|
|
165
|
+
let count = names.len();
|
|
166
|
+
|
|
167
|
+
if count > 0 {
|
|
168
|
+
tracing::debug!("Shutting down {} post-processors", count);
|
|
169
|
+
}
|
|
170
|
+
|
|
122
171
|
for name in names {
|
|
123
172
|
self.remove(&name)?;
|
|
124
173
|
}
|
|
174
|
+
|
|
175
|
+
if count > 0 {
|
|
176
|
+
tracing::debug!("Successfully shut down all {} post-processors", count);
|
|
177
|
+
}
|
|
125
178
|
Ok(())
|
|
126
179
|
}
|
|
127
180
|
}
|
|
@@ -301,4 +354,159 @@ mod tests {
|
|
|
301
354
|
let processors = registry.get_for_stage(ProcessingStage::Late);
|
|
302
355
|
assert_eq!(processors.len(), 0);
|
|
303
356
|
}
|
|
357
|
+
|
|
358
|
+
struct FailingPostProcessor {
|
|
359
|
+
name: String,
|
|
360
|
+
stage: ProcessingStage,
|
|
361
|
+
fail_on_init: bool,
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
impl Plugin for FailingPostProcessor {
|
|
365
|
+
fn name(&self) -> &str {
|
|
366
|
+
&self.name
|
|
367
|
+
}
|
|
368
|
+
fn version(&self) -> String {
|
|
369
|
+
"1.0.0".to_string()
|
|
370
|
+
}
|
|
371
|
+
fn initialize(&self) -> Result<()> {
|
|
372
|
+
if self.fail_on_init {
|
|
373
|
+
Err(KreuzbergError::Plugin {
|
|
374
|
+
message: "Processor initialization failed".to_string(),
|
|
375
|
+
plugin_name: self.name.clone(),
|
|
376
|
+
})
|
|
377
|
+
} else {
|
|
378
|
+
Ok(())
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
fn shutdown(&self) -> Result<()> {
|
|
382
|
+
Ok(())
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
#[async_trait]
|
|
387
|
+
impl PostProcessor for FailingPostProcessor {
|
|
388
|
+
async fn process(&self, _result: &mut ExtractionResult, _: &ExtractionConfig) -> Result<()> {
|
|
389
|
+
Ok(())
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
fn processing_stage(&self) -> ProcessingStage {
|
|
393
|
+
self.stage
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
#[test]
|
|
398
|
+
fn test_post_processor_initialization_failure_logs_error() {
|
|
399
|
+
let mut registry = PostProcessorRegistry::new();
|
|
400
|
+
|
|
401
|
+
let processor = Arc::new(FailingPostProcessor {
|
|
402
|
+
name: "failing-processor".to_string(),
|
|
403
|
+
stage: ProcessingStage::Early,
|
|
404
|
+
fail_on_init: true,
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
let result = registry.register(processor, 50);
|
|
408
|
+
assert!(result.is_err());
|
|
409
|
+
assert_eq!(registry.list().len(), 0);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
#[test]
|
|
413
|
+
fn test_post_processor_invalid_name_empty_logs_warning() {
|
|
414
|
+
let mut registry = PostProcessorRegistry::new();
|
|
415
|
+
|
|
416
|
+
let processor = Arc::new(MockPostProcessor {
|
|
417
|
+
name: "".to_string(),
|
|
418
|
+
stage: ProcessingStage::Early,
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
let result = registry.register(processor, 50);
|
|
422
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
#[test]
|
|
426
|
+
fn test_post_processor_invalid_name_with_spaces_logs_warning() {
|
|
427
|
+
let mut registry = PostProcessorRegistry::new();
|
|
428
|
+
|
|
429
|
+
let processor = Arc::new(MockPostProcessor {
|
|
430
|
+
name: "invalid processor".to_string(),
|
|
431
|
+
stage: ProcessingStage::Early,
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
let result = registry.register(processor, 50);
|
|
435
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
#[test]
|
|
439
|
+
fn test_post_processor_successful_registration_logs_debug() {
|
|
440
|
+
let mut registry = PostProcessorRegistry::new();
|
|
441
|
+
|
|
442
|
+
let processor = Arc::new(MockPostProcessor {
|
|
443
|
+
name: "valid-processor".to_string(),
|
|
444
|
+
stage: ProcessingStage::Early,
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
let result = registry.register(processor, 50);
|
|
448
|
+
assert!(result.is_ok());
|
|
449
|
+
assert_eq!(registry.list().len(), 1);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
#[test]
|
|
453
|
+
fn test_post_processor_remove_nonexistent_logs_debug() {
|
|
454
|
+
let mut registry = PostProcessorRegistry::new();
|
|
455
|
+
|
|
456
|
+
let result = registry.remove("nonexistent-processor");
|
|
457
|
+
assert!(result.is_ok());
|
|
458
|
+
assert_eq!(registry.list().len(), 0);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
#[test]
|
|
462
|
+
fn test_post_processor_register_same_name_twice() {
|
|
463
|
+
let mut registry = PostProcessorRegistry::new();
|
|
464
|
+
|
|
465
|
+
let processor = Arc::new(MockPostProcessor {
|
|
466
|
+
name: "duplicate-processor".to_string(),
|
|
467
|
+
stage: ProcessingStage::Early,
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
registry.register(processor.clone(), 50).unwrap();
|
|
471
|
+
assert_eq!(registry.list().len(), 1);
|
|
472
|
+
|
|
473
|
+
registry.register(processor, 75).unwrap();
|
|
474
|
+
assert_eq!(registry.list().len(), 1);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
#[test]
|
|
478
|
+
fn test_post_processor_multiple_stages() {
|
|
479
|
+
let mut registry = PostProcessorRegistry::new();
|
|
480
|
+
|
|
481
|
+
let early_processor = Arc::new(MockPostProcessor {
|
|
482
|
+
name: "early-proc".to_string(),
|
|
483
|
+
stage: ProcessingStage::Early,
|
|
484
|
+
});
|
|
485
|
+
|
|
486
|
+
let middle_processor = Arc::new(MockPostProcessor {
|
|
487
|
+
name: "middle-proc".to_string(),
|
|
488
|
+
stage: ProcessingStage::Middle,
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
let late_processor = Arc::new(MockPostProcessor {
|
|
492
|
+
name: "late-proc".to_string(),
|
|
493
|
+
stage: ProcessingStage::Late,
|
|
494
|
+
});
|
|
495
|
+
|
|
496
|
+
registry.register(early_processor, 100).unwrap();
|
|
497
|
+
registry.register(middle_processor, 50).unwrap();
|
|
498
|
+
registry.register(late_processor, 25).unwrap();
|
|
499
|
+
|
|
500
|
+
assert_eq!(registry.get_for_stage(ProcessingStage::Early).len(), 1);
|
|
501
|
+
assert_eq!(registry.get_for_stage(ProcessingStage::Middle).len(), 1);
|
|
502
|
+
assert_eq!(registry.get_for_stage(ProcessingStage::Late).len(), 1);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
#[test]
|
|
506
|
+
fn test_post_processor_shutdown_empty_registry() {
|
|
507
|
+
let mut registry = PostProcessorRegistry::new();
|
|
508
|
+
let result = registry.shutdown_all();
|
|
509
|
+
assert!(result.is_ok());
|
|
510
|
+
assert_eq!(registry.list().len(), 0);
|
|
511
|
+
}
|
|
304
512
|
}
|
|
@@ -30,11 +30,32 @@ impl ValidatorRegistry {
|
|
|
30
30
|
let name = validator.name().to_string();
|
|
31
31
|
let priority = validator.priority();
|
|
32
32
|
|
|
33
|
-
super::validate_plugin_name(&name)
|
|
33
|
+
if let Err(e) = super::validate_plugin_name(&name) {
|
|
34
|
+
tracing::warn!(
|
|
35
|
+
"Failed to validate validator name '{}': {}. \
|
|
36
|
+
Registration aborted. Plugin names must be non-empty and contain only alphanumeric characters, hyphens, and underscores.",
|
|
37
|
+
name,
|
|
38
|
+
e
|
|
39
|
+
);
|
|
40
|
+
return Err(e);
|
|
41
|
+
}
|
|
34
42
|
|
|
35
|
-
validator.initialize()
|
|
43
|
+
if let Err(e) = validator.initialize() {
|
|
44
|
+
tracing::error!(
|
|
45
|
+
"Failed to initialize validator '{}' with priority {}: {}. \
|
|
46
|
+
Validation step will not be executed.",
|
|
47
|
+
name,
|
|
48
|
+
priority,
|
|
49
|
+
e
|
|
50
|
+
);
|
|
51
|
+
return Err(e);
|
|
52
|
+
}
|
|
36
53
|
|
|
37
|
-
self.validators
|
|
54
|
+
self.validators
|
|
55
|
+
.entry(priority)
|
|
56
|
+
.or_default()
|
|
57
|
+
.insert(name.clone(), validator);
|
|
58
|
+
tracing::debug!("Registered validator '{}' with priority {}", name, priority);
|
|
38
59
|
|
|
39
60
|
Ok(())
|
|
40
61
|
}
|
|
@@ -68,17 +89,35 @@ impl ValidatorRegistry {
|
|
|
68
89
|
/// Remove a validator from the registry.
|
|
69
90
|
pub fn remove(&mut self, name: &str) -> Result<()> {
|
|
70
91
|
let mut validator_to_shutdown: Option<Arc<dyn Validator>> = None;
|
|
92
|
+
let mut found = false;
|
|
71
93
|
|
|
72
94
|
for validators in self.validators.values_mut() {
|
|
73
95
|
if let Some(validator) = validators.shift_remove(name)
|
|
74
96
|
&& validator_to_shutdown.is_none()
|
|
75
97
|
{
|
|
76
98
|
validator_to_shutdown = Some(validator);
|
|
99
|
+
found = true;
|
|
77
100
|
}
|
|
78
101
|
}
|
|
79
102
|
|
|
103
|
+
if !found {
|
|
104
|
+
tracing::debug!(
|
|
105
|
+
"Validator '{}' not found in registry (already removed or never registered)",
|
|
106
|
+
name
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
80
110
|
if let Some(validator) = validator_to_shutdown {
|
|
81
|
-
validator.shutdown()
|
|
111
|
+
if let Err(e) = validator.shutdown() {
|
|
112
|
+
tracing::warn!(
|
|
113
|
+
"Failed to shutdown validator '{}': {}. \
|
|
114
|
+
Resources may not have been properly released.",
|
|
115
|
+
name,
|
|
116
|
+
e
|
|
117
|
+
);
|
|
118
|
+
return Err(e);
|
|
119
|
+
}
|
|
120
|
+
tracing::debug!("Successfully removed and shut down validator '{}'", name);
|
|
82
121
|
}
|
|
83
122
|
|
|
84
123
|
self.validators.retain(|_, validators| !validators.is_empty());
|
|
@@ -89,9 +128,19 @@ impl ValidatorRegistry {
|
|
|
89
128
|
/// Shutdown all validators and clear the registry.
|
|
90
129
|
pub fn shutdown_all(&mut self) -> Result<()> {
|
|
91
130
|
let names = self.list();
|
|
131
|
+
let count = names.len();
|
|
132
|
+
|
|
133
|
+
if count > 0 {
|
|
134
|
+
tracing::debug!("Shutting down {} validators", count);
|
|
135
|
+
}
|
|
136
|
+
|
|
92
137
|
for name in names {
|
|
93
138
|
self.remove(&name)?;
|
|
94
139
|
}
|
|
140
|
+
|
|
141
|
+
if count > 0 {
|
|
142
|
+
tracing::debug!("Successfully shut down all {} validators", count);
|
|
143
|
+
}
|
|
95
144
|
Ok(())
|
|
96
145
|
}
|
|
97
146
|
}
|
|
@@ -235,4 +284,171 @@ mod tests {
|
|
|
235
284
|
registry.shutdown_all().unwrap();
|
|
236
285
|
assert_eq!(registry.get_all().len(), 0);
|
|
237
286
|
}
|
|
287
|
+
|
|
288
|
+
struct FailingValidator {
|
|
289
|
+
name: String,
|
|
290
|
+
priority: i32,
|
|
291
|
+
fail_on_init: bool,
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
impl Plugin for FailingValidator {
|
|
295
|
+
fn name(&self) -> &str {
|
|
296
|
+
&self.name
|
|
297
|
+
}
|
|
298
|
+
fn version(&self) -> String {
|
|
299
|
+
"1.0.0".to_string()
|
|
300
|
+
}
|
|
301
|
+
fn initialize(&self) -> Result<()> {
|
|
302
|
+
if self.fail_on_init {
|
|
303
|
+
Err(KreuzbergError::Plugin {
|
|
304
|
+
message: "Validator initialization failed".to_string(),
|
|
305
|
+
plugin_name: self.name.clone(),
|
|
306
|
+
})
|
|
307
|
+
} else {
|
|
308
|
+
Ok(())
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
fn shutdown(&self) -> Result<()> {
|
|
312
|
+
Ok(())
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
#[async_trait]
|
|
317
|
+
impl Validator for FailingValidator {
|
|
318
|
+
async fn validate(&self, _: &ExtractionResult, _: &ExtractionConfig) -> Result<()> {
|
|
319
|
+
Ok(())
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
fn priority(&self) -> i32 {
|
|
323
|
+
self.priority
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
#[test]
|
|
328
|
+
fn test_validator_initialization_failure_logs_error() {
|
|
329
|
+
let mut registry = ValidatorRegistry::new();
|
|
330
|
+
|
|
331
|
+
let validator = Arc::new(FailingValidator {
|
|
332
|
+
name: "failing-validator".to_string(),
|
|
333
|
+
priority: 50,
|
|
334
|
+
fail_on_init: true,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
let result = registry.register(validator);
|
|
338
|
+
assert!(result.is_err());
|
|
339
|
+
assert_eq!(registry.get_all().len(), 0);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
#[test]
|
|
343
|
+
fn test_validator_invalid_name_empty_logs_warning() {
|
|
344
|
+
let mut registry = ValidatorRegistry::new();
|
|
345
|
+
|
|
346
|
+
let validator = Arc::new(MockValidator {
|
|
347
|
+
name: "".to_string(),
|
|
348
|
+
priority: 50,
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
let result = registry.register(validator);
|
|
352
|
+
assert!(matches!(result, Err(KreuzbergError::Validation { .. })));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
#[test]
|
|
356
|
+
fn test_validator_successful_registration_logs_debug() {
|
|
357
|
+
let mut registry = ValidatorRegistry::new();
|
|
358
|
+
|
|
359
|
+
let validator = Arc::new(MockValidator {
|
|
360
|
+
name: "valid-validator".to_string(),
|
|
361
|
+
priority: 50,
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
let result = registry.register(validator);
|
|
365
|
+
assert!(result.is_ok());
|
|
366
|
+
assert_eq!(registry.get_all().len(), 1);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
#[test]
|
|
370
|
+
fn test_validator_remove_nonexistent_logs_debug() {
|
|
371
|
+
let mut registry = ValidatorRegistry::new();
|
|
372
|
+
|
|
373
|
+
let result = registry.remove("nonexistent-validator");
|
|
374
|
+
assert!(result.is_ok());
|
|
375
|
+
assert_eq!(registry.get_all().len(), 0);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
#[test]
|
|
379
|
+
fn test_validator_priority_ordering_reversed() {
|
|
380
|
+
let mut registry = ValidatorRegistry::new();
|
|
381
|
+
|
|
382
|
+
let high = Arc::new(MockValidator {
|
|
383
|
+
name: "high-priority".to_string(),
|
|
384
|
+
priority: 100,
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
let low = Arc::new(MockValidator {
|
|
388
|
+
name: "low-priority".to_string(),
|
|
389
|
+
priority: 10,
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
registry.register(low).unwrap();
|
|
393
|
+
registry.register(high).unwrap();
|
|
394
|
+
|
|
395
|
+
let validators = registry.get_all();
|
|
396
|
+
assert_eq!(validators.len(), 2);
|
|
397
|
+
assert_eq!(validators[0].name(), "high-priority");
|
|
398
|
+
assert_eq!(validators[1].name(), "low-priority");
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
#[test]
|
|
402
|
+
fn test_validator_multiple_same_priority() {
|
|
403
|
+
let mut registry = ValidatorRegistry::new();
|
|
404
|
+
|
|
405
|
+
let validator1 = Arc::new(MockValidator {
|
|
406
|
+
name: "validator-a".to_string(),
|
|
407
|
+
priority: 50,
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
let validator2 = Arc::new(MockValidator {
|
|
411
|
+
name: "validator-b".to_string(),
|
|
412
|
+
priority: 50,
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
let validator3 = Arc::new(MockValidator {
|
|
416
|
+
name: "validator-c".to_string(),
|
|
417
|
+
priority: 50,
|
|
418
|
+
});
|
|
419
|
+
|
|
420
|
+
registry.register(validator1).unwrap();
|
|
421
|
+
registry.register(validator2).unwrap();
|
|
422
|
+
registry.register(validator3).unwrap();
|
|
423
|
+
|
|
424
|
+
let validators = registry.get_all();
|
|
425
|
+
assert_eq!(validators.len(), 3);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
#[test]
|
|
429
|
+
fn test_validator_shutdown_empty_registry() {
|
|
430
|
+
let mut registry = ValidatorRegistry::new();
|
|
431
|
+
let result = registry.shutdown_all();
|
|
432
|
+
assert!(result.is_ok());
|
|
433
|
+
assert_eq!(registry.get_all().len(), 0);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
#[test]
|
|
437
|
+
fn test_validator_remove_and_readd() {
|
|
438
|
+
let mut registry = ValidatorRegistry::new();
|
|
439
|
+
|
|
440
|
+
let validator = Arc::new(MockValidator {
|
|
441
|
+
name: "test-validator".to_string(),
|
|
442
|
+
priority: 50,
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
registry.register(validator.clone()).unwrap();
|
|
446
|
+
assert_eq!(registry.get_all().len(), 1);
|
|
447
|
+
|
|
448
|
+
registry.remove("test-validator").unwrap();
|
|
449
|
+
assert_eq!(registry.get_all().len(), 0);
|
|
450
|
+
|
|
451
|
+
registry.register(validator).unwrap();
|
|
452
|
+
assert_eq!(registry.get_all().len(), 1);
|
|
453
|
+
}
|
|
238
454
|
}
|