kreuzberg 4.1.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/README.md +8 -5
- data/ext/kreuzberg_rb/native/Cargo.toml +2 -2
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/ext/kreuzberg_rb/native/src/config/types.rs +23 -13
- data/kreuzberg.gemspec +14 -2
- data/lib/kreuzberg/api_proxy.rb +0 -1
- data/lib/kreuzberg/cli_proxy.rb +0 -1
- data/lib/kreuzberg/config.rb +70 -35
- data/lib/kreuzberg/mcp_proxy.rb +0 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +5 -1
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +3 -3
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +55 -53
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +3 -3
- data/vendor/kreuzberg-tesseract/build.rs +4 -4
- data/vendor/kreuzberg-tesseract/src/lib.rs +6 -6
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +3 -3
- metadata +13 -2
|
@@ -146,7 +146,7 @@ async fn test_pipeline_empty_no_processors() {
|
|
|
146
146
|
};
|
|
147
147
|
let config = ExtractionConfig::default();
|
|
148
148
|
|
|
149
|
-
let processed = run_pipeline(result, &config).await.
|
|
149
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
150
150
|
assert_eq!(processed.content, "original content");
|
|
151
151
|
}
|
|
152
152
|
|
|
@@ -174,9 +174,9 @@ async fn test_pipeline_single_processor_per_stage() {
|
|
|
174
174
|
stage: ProcessingStage::Late,
|
|
175
175
|
});
|
|
176
176
|
|
|
177
|
-
reg.register(early, 50).
|
|
178
|
-
reg.register(middle, 50).
|
|
179
|
-
reg.register(late, 50).
|
|
177
|
+
reg.register(early, 50).expect("Operation failed");
|
|
178
|
+
reg.register(middle, 50).expect("Operation failed");
|
|
179
|
+
reg.register(late, 50).expect("Operation failed");
|
|
180
180
|
}
|
|
181
181
|
|
|
182
182
|
let result = ExtractionResult {
|
|
@@ -193,7 +193,7 @@ async fn test_pipeline_single_processor_per_stage() {
|
|
|
193
193
|
};
|
|
194
194
|
let config = ExtractionConfig::default();
|
|
195
195
|
|
|
196
|
-
let processed = run_pipeline(result, &config).await.
|
|
196
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
197
197
|
assert_eq!(processed.content, "start[early][middle][late]");
|
|
198
198
|
}
|
|
199
199
|
|
|
@@ -221,9 +221,9 @@ async fn test_pipeline_multiple_processors_per_stage() {
|
|
|
221
221
|
stage: ProcessingStage::Early,
|
|
222
222
|
});
|
|
223
223
|
|
|
224
|
-
reg.register(early_low, 10).
|
|
225
|
-
reg.register(early_high, 100).
|
|
226
|
-
reg.register(early_medium, 50).
|
|
224
|
+
reg.register(early_low, 10).expect("Operation failed");
|
|
225
|
+
reg.register(early_high, 100).expect("Operation failed");
|
|
226
|
+
reg.register(early_medium, 50).expect("Operation failed");
|
|
227
227
|
}
|
|
228
228
|
|
|
229
229
|
let result = ExtractionResult {
|
|
@@ -240,7 +240,7 @@ async fn test_pipeline_multiple_processors_per_stage() {
|
|
|
240
240
|
};
|
|
241
241
|
let config = ExtractionConfig::default();
|
|
242
242
|
|
|
243
|
-
let processed = run_pipeline(result, &config).await.
|
|
243
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
244
244
|
assert_eq!(processed.content, "start[early-high][early-medium][early-low]");
|
|
245
245
|
}
|
|
246
246
|
|
|
@@ -260,7 +260,7 @@ async fn test_pipeline_all_stages_enabled() {
|
|
|
260
260
|
name: format!("{:?}", stage),
|
|
261
261
|
stage,
|
|
262
262
|
});
|
|
263
|
-
reg.register(processor, 50).
|
|
263
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
264
264
|
}
|
|
265
265
|
}
|
|
266
266
|
|
|
@@ -278,7 +278,7 @@ async fn test_pipeline_all_stages_enabled() {
|
|
|
278
278
|
};
|
|
279
279
|
let config = ExtractionConfig::default();
|
|
280
280
|
|
|
281
|
-
let processed = run_pipeline(result, &config).await.
|
|
281
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
282
282
|
assert_eq!(processed.content, "start[Early][Middle][Late]");
|
|
283
283
|
}
|
|
284
284
|
|
|
@@ -297,7 +297,7 @@ async fn test_pipeline_postprocessing_disabled() {
|
|
|
297
297
|
name: "processor".to_string(),
|
|
298
298
|
stage: ProcessingStage::Early,
|
|
299
299
|
});
|
|
300
|
-
reg.register(processor, 50).
|
|
300
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
let result = ExtractionResult {
|
|
@@ -323,7 +323,7 @@ async fn test_pipeline_postprocessing_disabled() {
|
|
|
323
323
|
..Default::default()
|
|
324
324
|
};
|
|
325
325
|
|
|
326
|
-
let processed = run_pipeline(result, &config).await.
|
|
326
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
327
327
|
assert_eq!(processed.content, "start");
|
|
328
328
|
}
|
|
329
329
|
|
|
@@ -347,8 +347,8 @@ async fn test_pipeline_early_stage_runs_first() {
|
|
|
347
347
|
stage: ProcessingStage::Early,
|
|
348
348
|
});
|
|
349
349
|
|
|
350
|
-
reg.register(late, 50).
|
|
351
|
-
reg.register(early, 50).
|
|
350
|
+
reg.register(late, 50).expect("Operation failed");
|
|
351
|
+
reg.register(early, 50).expect("Operation failed");
|
|
352
352
|
}
|
|
353
353
|
|
|
354
354
|
let result = ExtractionResult {
|
|
@@ -365,7 +365,7 @@ async fn test_pipeline_early_stage_runs_first() {
|
|
|
365
365
|
};
|
|
366
366
|
let config = ExtractionConfig::default();
|
|
367
367
|
|
|
368
|
-
let processed = run_pipeline(result, &config).await.
|
|
368
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
369
369
|
assert_eq!(processed.content, "start[early][late]");
|
|
370
370
|
}
|
|
371
371
|
|
|
@@ -389,8 +389,8 @@ async fn test_pipeline_middle_stage_runs_second() {
|
|
|
389
389
|
stage: ProcessingStage::Middle,
|
|
390
390
|
});
|
|
391
391
|
|
|
392
|
-
reg.register(middle, 50).
|
|
393
|
-
reg.register(early, 50).
|
|
392
|
+
reg.register(middle, 50).expect("Operation failed");
|
|
393
|
+
reg.register(early, 50).expect("Operation failed");
|
|
394
394
|
}
|
|
395
395
|
|
|
396
396
|
let result = ExtractionResult {
|
|
@@ -407,7 +407,7 @@ async fn test_pipeline_middle_stage_runs_second() {
|
|
|
407
407
|
};
|
|
408
408
|
let config = ExtractionConfig::default();
|
|
409
409
|
|
|
410
|
-
let processed = run_pipeline(result, &config).await.
|
|
410
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
411
411
|
assert_eq!(processed.content, "start[early][middle]");
|
|
412
412
|
}
|
|
413
413
|
|
|
@@ -427,7 +427,7 @@ async fn test_pipeline_late_stage_runs_last() {
|
|
|
427
427
|
name: format!("{:?}", stage),
|
|
428
428
|
stage,
|
|
429
429
|
});
|
|
430
|
-
reg.register(processor, 50).
|
|
430
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
431
431
|
}
|
|
432
432
|
}
|
|
433
433
|
|
|
@@ -445,7 +445,7 @@ async fn test_pipeline_late_stage_runs_last() {
|
|
|
445
445
|
};
|
|
446
446
|
let config = ExtractionConfig::default();
|
|
447
447
|
|
|
448
|
-
let processed = run_pipeline(result, &config).await.
|
|
448
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
449
449
|
assert_eq!(processed.content, "start[Early][Middle][Late]");
|
|
450
450
|
}
|
|
451
451
|
|
|
@@ -465,7 +465,7 @@ async fn test_pipeline_within_stage_priority_order() {
|
|
|
465
465
|
name: name.to_string(),
|
|
466
466
|
stage: ProcessingStage::Early,
|
|
467
467
|
});
|
|
468
|
-
reg.register(processor, priority).
|
|
468
|
+
reg.register(processor, priority).expect("Operation failed");
|
|
469
469
|
}
|
|
470
470
|
}
|
|
471
471
|
|
|
@@ -483,7 +483,7 @@ async fn test_pipeline_within_stage_priority_order() {
|
|
|
483
483
|
};
|
|
484
484
|
let config = ExtractionConfig::default();
|
|
485
485
|
|
|
486
|
-
let processed = run_pipeline(result, &config).await.
|
|
486
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
487
487
|
assert_eq!(processed.content, "start[p1][p4][p3][p2]");
|
|
488
488
|
}
|
|
489
489
|
|
|
@@ -523,7 +523,10 @@ async fn test_pipeline_cross_stage_data_flow() {
|
|
|
523
523
|
impl PostProcessor for MiddleProcessor {
|
|
524
524
|
async fn process(&self, result: &mut ExtractionResult, _: &ExtractionConfig) -> Result<()> {
|
|
525
525
|
if let Some(stage) = result.metadata.additional.get("stage") {
|
|
526
|
-
result.content.push_str(&format!(
|
|
526
|
+
result.content.push_str(&format!(
|
|
527
|
+
"[saw:{}]",
|
|
528
|
+
stage.as_str().expect("Failed to extract string from value")
|
|
529
|
+
));
|
|
527
530
|
}
|
|
528
531
|
Ok(())
|
|
529
532
|
}
|
|
@@ -532,8 +535,8 @@ async fn test_pipeline_cross_stage_data_flow() {
|
|
|
532
535
|
}
|
|
533
536
|
}
|
|
534
537
|
|
|
535
|
-
reg.register(early, 50).
|
|
536
|
-
reg.register(Arc::new(MiddleProcessor), 50).
|
|
538
|
+
reg.register(early, 50).expect("Operation failed");
|
|
539
|
+
reg.register(Arc::new(MiddleProcessor), 50).expect("Operation failed");
|
|
537
540
|
}
|
|
538
541
|
|
|
539
542
|
let result = ExtractionResult {
|
|
@@ -550,7 +553,7 @@ async fn test_pipeline_cross_stage_data_flow() {
|
|
|
550
553
|
};
|
|
551
554
|
let config = ExtractionConfig::default();
|
|
552
555
|
|
|
553
|
-
let processed = run_pipeline(result, &config).await.
|
|
556
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
554
557
|
assert!(processed.content.contains("[saw:early]"));
|
|
555
558
|
}
|
|
556
559
|
|
|
@@ -592,7 +595,8 @@ async fn test_pipeline_early_stage_error_recorded() {
|
|
|
592
595
|
let mut reg = registry
|
|
593
596
|
.write()
|
|
594
597
|
.expect("Failed to acquire write lock on registry in test");
|
|
595
|
-
reg.register(Arc::new(EarlyFailingProcessor), 50)
|
|
598
|
+
reg.register(Arc::new(EarlyFailingProcessor), 50)
|
|
599
|
+
.expect("Operation failed");
|
|
596
600
|
}
|
|
597
601
|
|
|
598
602
|
let result = ExtractionResult {
|
|
@@ -636,7 +640,7 @@ async fn test_pipeline_middle_stage_error_propagation() {
|
|
|
636
640
|
error_message: "Middle stage error".to_string(),
|
|
637
641
|
});
|
|
638
642
|
|
|
639
|
-
reg.register(failing, 50).
|
|
643
|
+
reg.register(failing, 50).expect("Operation failed");
|
|
640
644
|
}
|
|
641
645
|
|
|
642
646
|
let result = ExtractionResult {
|
|
@@ -709,8 +713,8 @@ async fn test_pipeline_late_stage_error_doesnt_affect_earlier_stages() {
|
|
|
709
713
|
});
|
|
710
714
|
let late_failing = Arc::new(LateFailingProcessor);
|
|
711
715
|
|
|
712
|
-
reg.register(early, 50).
|
|
713
|
-
reg.register(late_failing, 50).
|
|
716
|
+
reg.register(early, 50).expect("Operation failed");
|
|
717
|
+
reg.register(late_failing, 50).expect("Operation failed");
|
|
714
718
|
}
|
|
715
719
|
|
|
716
720
|
let result = ExtractionResult {
|
|
@@ -792,15 +796,15 @@ async fn test_pipeline_processor_error_doesnt_stop_other_processors() {
|
|
|
792
796
|
}
|
|
793
797
|
}
|
|
794
798
|
|
|
795
|
-
reg.register(p1, 100).
|
|
799
|
+
reg.register(p1, 100).expect("Operation failed");
|
|
796
800
|
reg.register(
|
|
797
801
|
Arc::new(EarlyFailingProcessor {
|
|
798
802
|
name: "p2-failing".to_string(),
|
|
799
803
|
}),
|
|
800
804
|
50,
|
|
801
805
|
)
|
|
802
|
-
.
|
|
803
|
-
reg.register(p3, 50).
|
|
806
|
+
.expect("Operation failed");
|
|
807
|
+
reg.register(p3, 50).expect("Operation failed");
|
|
804
808
|
}
|
|
805
809
|
|
|
806
810
|
let result = ExtractionResult {
|
|
@@ -879,7 +883,7 @@ async fn test_pipeline_multiple_processor_errors() {
|
|
|
879
883
|
name: name.to_string(),
|
|
880
884
|
stage,
|
|
881
885
|
});
|
|
882
|
-
reg.register(processor, 50).
|
|
886
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
883
887
|
}
|
|
884
888
|
}
|
|
885
889
|
|
|
@@ -924,7 +928,7 @@ async fn test_pipeline_error_context_preservation() {
|
|
|
924
928
|
error_message: "Detailed error message with context".to_string(),
|
|
925
929
|
});
|
|
926
930
|
|
|
927
|
-
reg.register(failing, 50).
|
|
931
|
+
reg.register(failing, 50).expect("Operation failed");
|
|
928
932
|
}
|
|
929
933
|
|
|
930
934
|
let result = ExtractionResult {
|
|
@@ -997,8 +1001,9 @@ async fn test_pipeline_metadata_added_in_early_visible_in_middle() {
|
|
|
997
1001
|
}
|
|
998
1002
|
}
|
|
999
1003
|
|
|
1000
|
-
reg.register(early, 50).
|
|
1001
|
-
reg.register(Arc::new(MiddleReadingProcessor), 50)
|
|
1004
|
+
reg.register(early, 50).expect("Operation failed");
|
|
1005
|
+
reg.register(Arc::new(MiddleReadingProcessor), 50)
|
|
1006
|
+
.expect("Operation failed");
|
|
1002
1007
|
}
|
|
1003
1008
|
|
|
1004
1009
|
let result = ExtractionResult {
|
|
@@ -1015,15 +1020,15 @@ async fn test_pipeline_metadata_added_in_early_visible_in_middle() {
|
|
|
1015
1020
|
};
|
|
1016
1021
|
let config = ExtractionConfig::default();
|
|
1017
1022
|
|
|
1018
|
-
let processed = run_pipeline(result, &config).await.
|
|
1023
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1019
1024
|
assert_eq!(
|
|
1020
1025
|
processed
|
|
1021
1026
|
.metadata
|
|
1022
1027
|
.additional
|
|
1023
1028
|
.get("middle_saw")
|
|
1024
|
-
.
|
|
1029
|
+
.expect("Operation failed")
|
|
1025
1030
|
.as_str()
|
|
1026
|
-
.
|
|
1031
|
+
.expect("Operation failed"),
|
|
1027
1032
|
"early_value"
|
|
1028
1033
|
);
|
|
1029
1034
|
}
|
|
@@ -1070,8 +1075,9 @@ async fn test_pipeline_content_modified_in_middle_visible_in_late() {
|
|
|
1070
1075
|
}
|
|
1071
1076
|
}
|
|
1072
1077
|
|
|
1073
|
-
reg.register(middle, 50).
|
|
1074
|
-
reg.register(Arc::new(LateReadingProcessor), 50)
|
|
1078
|
+
reg.register(middle, 50).expect("Operation failed");
|
|
1079
|
+
reg.register(Arc::new(LateReadingProcessor), 50)
|
|
1080
|
+
.expect("Operation failed");
|
|
1075
1081
|
}
|
|
1076
1082
|
|
|
1077
1083
|
let result = ExtractionResult {
|
|
@@ -1088,7 +1094,7 @@ async fn test_pipeline_content_modified_in_middle_visible_in_late() {
|
|
|
1088
1094
|
};
|
|
1089
1095
|
let config = ExtractionConfig::default();
|
|
1090
1096
|
|
|
1091
|
-
let processed = run_pipeline(result, &config).await.
|
|
1097
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1092
1098
|
assert!(processed.content.contains("[middle-content]"));
|
|
1093
1099
|
assert!(processed.content.contains("[late-saw-middle]"));
|
|
1094
1100
|
}
|
|
@@ -1141,7 +1147,7 @@ async fn test_pipeline_multiple_processors_modifying_same_metadata() {
|
|
|
1141
1147
|
name: format!("proc{}", i),
|
|
1142
1148
|
value: format!("value{}", i),
|
|
1143
1149
|
});
|
|
1144
|
-
reg.register(processor, 100 - i * 10).
|
|
1150
|
+
reg.register(processor, 100 - i * 10).expect("Operation failed");
|
|
1145
1151
|
}
|
|
1146
1152
|
}
|
|
1147
1153
|
|
|
@@ -1159,15 +1165,15 @@ async fn test_pipeline_multiple_processors_modifying_same_metadata() {
|
|
|
1159
1165
|
};
|
|
1160
1166
|
let config = ExtractionConfig::default();
|
|
1161
1167
|
|
|
1162
|
-
let processed = run_pipeline(result, &config).await.
|
|
1168
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1163
1169
|
assert_eq!(
|
|
1164
1170
|
processed
|
|
1165
1171
|
.metadata
|
|
1166
1172
|
.additional
|
|
1167
1173
|
.get("shared_key")
|
|
1168
|
-
.
|
|
1174
|
+
.expect("Operation failed")
|
|
1169
1175
|
.as_str()
|
|
1170
|
-
.
|
|
1176
|
+
.expect("Operation failed"),
|
|
1171
1177
|
"value3"
|
|
1172
1178
|
);
|
|
1173
1179
|
}
|
|
@@ -1231,7 +1237,7 @@ async fn test_pipeline_processors_reading_previous_output() {
|
|
|
1231
1237
|
name: name.to_string(),
|
|
1232
1238
|
stage,
|
|
1233
1239
|
});
|
|
1234
|
-
reg.register(processor, 50).
|
|
1240
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
1235
1241
|
}
|
|
1236
1242
|
}
|
|
1237
1243
|
|
|
@@ -1249,8 +1255,17 @@ async fn test_pipeline_processors_reading_previous_output() {
|
|
|
1249
1255
|
};
|
|
1250
1256
|
let config = ExtractionConfig::default();
|
|
1251
1257
|
|
|
1252
|
-
let processed = run_pipeline(result, &config).await.
|
|
1253
|
-
assert_eq!(
|
|
1258
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1259
|
+
assert_eq!(
|
|
1260
|
+
processed
|
|
1261
|
+
.metadata
|
|
1262
|
+
.additional
|
|
1263
|
+
.get("count")
|
|
1264
|
+
.expect("Operation failed")
|
|
1265
|
+
.as_i64()
|
|
1266
|
+
.expect("Operation failed"),
|
|
1267
|
+
4
|
|
1268
|
+
);
|
|
1254
1269
|
}
|
|
1255
1270
|
|
|
1256
1271
|
#[tokio::test]
|
|
@@ -1289,7 +1304,8 @@ async fn test_pipeline_large_content_modification() {
|
|
|
1289
1304
|
let mut reg = registry
|
|
1290
1305
|
.write()
|
|
1291
1306
|
.expect("Failed to acquire write lock on registry in test");
|
|
1292
|
-
reg.register(Arc::new(LargeContentProcessor), 50)
|
|
1307
|
+
reg.register(Arc::new(LargeContentProcessor), 50)
|
|
1308
|
+
.expect("Operation failed");
|
|
1293
1309
|
}
|
|
1294
1310
|
|
|
1295
1311
|
let result = ExtractionResult {
|
|
@@ -1306,7 +1322,7 @@ async fn test_pipeline_large_content_modification() {
|
|
|
1306
1322
|
};
|
|
1307
1323
|
let config = ExtractionConfig::default();
|
|
1308
1324
|
|
|
1309
|
-
let processed = run_pipeline(result, &config).await.
|
|
1325
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1310
1326
|
assert!(processed.content.len() > 10000);
|
|
1311
1327
|
}
|
|
1312
1328
|
|
|
@@ -1326,7 +1342,7 @@ async fn test_pipeline_enabled_processors_whitelist() {
|
|
|
1326
1342
|
name: name.to_string(),
|
|
1327
1343
|
stage: ProcessingStage::Early,
|
|
1328
1344
|
});
|
|
1329
|
-
reg.register(processor, 50).
|
|
1345
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
1330
1346
|
}
|
|
1331
1347
|
}
|
|
1332
1348
|
|
|
@@ -1353,7 +1369,7 @@ async fn test_pipeline_enabled_processors_whitelist() {
|
|
|
1353
1369
|
..Default::default()
|
|
1354
1370
|
};
|
|
1355
1371
|
|
|
1356
|
-
let processed = run_pipeline(result, &config).await.
|
|
1372
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1357
1373
|
assert!(processed.content.contains("[proc1]"));
|
|
1358
1374
|
assert!(!processed.content.contains("[proc2]"));
|
|
1359
1375
|
assert!(processed.content.contains("[proc3]"));
|
|
@@ -1375,7 +1391,7 @@ async fn test_pipeline_disabled_processors_blacklist() {
|
|
|
1375
1391
|
name: name.to_string(),
|
|
1376
1392
|
stage: ProcessingStage::Early,
|
|
1377
1393
|
});
|
|
1378
|
-
reg.register(processor, 50).
|
|
1394
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
1379
1395
|
}
|
|
1380
1396
|
}
|
|
1381
1397
|
|
|
@@ -1402,7 +1418,7 @@ async fn test_pipeline_disabled_processors_blacklist() {
|
|
|
1402
1418
|
..Default::default()
|
|
1403
1419
|
};
|
|
1404
1420
|
|
|
1405
|
-
let processed = run_pipeline(result, &config).await.
|
|
1421
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1406
1422
|
assert!(processed.content.contains("[proc1]"));
|
|
1407
1423
|
assert!(!processed.content.contains("[proc2]"));
|
|
1408
1424
|
assert!(processed.content.contains("[proc3]"));
|
|
@@ -1424,7 +1440,7 @@ async fn test_pipeline_no_filtering_runs_all() {
|
|
|
1424
1440
|
name: name.to_string(),
|
|
1425
1441
|
stage: ProcessingStage::Early,
|
|
1426
1442
|
});
|
|
1427
|
-
reg.register(processor, 50).
|
|
1443
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
1428
1444
|
}
|
|
1429
1445
|
}
|
|
1430
1446
|
|
|
@@ -1442,7 +1458,7 @@ async fn test_pipeline_no_filtering_runs_all() {
|
|
|
1442
1458
|
};
|
|
1443
1459
|
let config = ExtractionConfig::default();
|
|
1444
1460
|
|
|
1445
|
-
let processed = run_pipeline(result, &config).await.
|
|
1461
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1446
1462
|
assert!(processed.content.contains("[proc1]"));
|
|
1447
1463
|
assert!(processed.content.contains("[proc2]"));
|
|
1448
1464
|
assert!(processed.content.contains("[proc3]"));
|
|
@@ -1464,7 +1480,7 @@ async fn test_pipeline_empty_whitelist_runs_none() {
|
|
|
1464
1480
|
name: name.to_string(),
|
|
1465
1481
|
stage: ProcessingStage::Early,
|
|
1466
1482
|
});
|
|
1467
|
-
reg.register(processor, 50).
|
|
1483
|
+
reg.register(processor, 50).expect("Operation failed");
|
|
1468
1484
|
}
|
|
1469
1485
|
}
|
|
1470
1486
|
|
|
@@ -1491,6 +1507,6 @@ async fn test_pipeline_empty_whitelist_runs_none() {
|
|
|
1491
1507
|
..Default::default()
|
|
1492
1508
|
};
|
|
1493
1509
|
|
|
1494
|
-
let processed = run_pipeline(result, &config).await.
|
|
1510
|
+
let processed = run_pipeline(result, &config).await.expect("Async operation failed");
|
|
1495
1511
|
assert_eq!(processed.content, "start");
|
|
1496
1512
|
}
|