kreuzberg 4.1.2 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +26 -17
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/kreuzberg.gemspec +13 -1
- data/lib/kreuzberg/cli.rb +16 -6
- data/lib/kreuzberg/cli_proxy.rb +3 -1
- data/lib/kreuzberg/config.rb +121 -39
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/extraction_api.rb +20 -4
- data/lib/kreuzberg/result.rb +12 -2
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +1 -0
- data/sig/kreuzberg.rbs +28 -12
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/batch_spec.rb +6 -5
- data/spec/binding/error_recovery_spec.rb +3 -3
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/binding/tables_spec.rb +11 -2
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/api/startup.rs +15 -1
- data/vendor/kreuzberg/src/core/config_validation/sections.rs +16 -4
- data/vendor/kreuzberg/src/core/extractor/file.rs +1 -2
- data/vendor/kreuzberg/src/core/extractor/mod.rs +2 -1
- data/vendor/kreuzberg/src/core/io.rs +7 -7
- data/vendor/kreuzberg/src/core/mime.rs +4 -4
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/extraction/pptx/parser.rs +6 -0
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/src/plugins/mod.rs +1 -0
- data/vendor/kreuzberg/src/plugins/registry/extractor.rs +251 -5
- data/vendor/kreuzberg/src/plugins/registry/ocr.rs +150 -2
- data/vendor/kreuzberg/src/plugins/registry/processor.rs +213 -5
- data/vendor/kreuzberg/src/plugins/registry/validator.rs +220 -4
- data/vendor/kreuzberg/src/plugins/startup_validation.rs +385 -0
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +416 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +57 -57
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +75 -43
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +324 -31
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +12 -2
|
@@ -26,7 +26,7 @@ async fn test_fastembed_embedding_generation() {
|
|
|
26
26
|
let result = model.embed(texts.clone(), None);
|
|
27
27
|
assert!(result.is_ok(), "Failed to generate embeddings: {:?}", result.err());
|
|
28
28
|
|
|
29
|
-
let embeddings = result.
|
|
29
|
+
let embeddings = result.expect("Operation failed");
|
|
30
30
|
assert_eq!(embeddings.len(), 3, "Expected 3 embeddings");
|
|
31
31
|
|
|
32
32
|
for (i, embedding) in embeddings.iter().enumerate() {
|
|
@@ -64,7 +64,7 @@ async fn test_fastembed_batch_processing() {
|
|
|
64
64
|
|
|
65
65
|
assert!(result.is_ok(), "Batch embedding failed: {:?}", result.err());
|
|
66
66
|
|
|
67
|
-
let embeddings = result.
|
|
67
|
+
let embeddings = result.expect("Operation failed");
|
|
68
68
|
assert_eq!(embeddings.len(), 50, "Expected 50 embeddings");
|
|
69
69
|
|
|
70
70
|
println!(
|
|
@@ -96,7 +96,7 @@ async fn test_fastembed_different_models() {
|
|
|
96
96
|
let result = m.embed(test_text.clone(), None);
|
|
97
97
|
assert!(result.is_ok(), "Failed to generate embedding for {}", description);
|
|
98
98
|
|
|
99
|
-
let embeddings = result.
|
|
99
|
+
let embeddings = result.expect("Operation failed");
|
|
100
100
|
assert_eq!(embeddings.len(), 1);
|
|
101
101
|
assert_eq!(
|
|
102
102
|
embeddings[0].len(),
|
|
@@ -197,7 +197,7 @@ async fn test_generate_embeddings_for_chunks_basic() {
|
|
|
197
197
|
for (i, chunk) in chunks.iter().enumerate() {
|
|
198
198
|
assert!(chunk.embedding.is_some(), "Chunk {} missing embedding", i);
|
|
199
199
|
|
|
200
|
-
let embedding = chunk.embedding.as_ref().
|
|
200
|
+
let embedding = chunk.embedding.as_ref().expect("Operation failed");
|
|
201
201
|
assert_eq!(embedding.len(), 384, "Chunk {} has wrong embedding dimensions", i);
|
|
202
202
|
|
|
203
203
|
let sum: f32 = embedding.iter().sum();
|
|
@@ -269,8 +269,8 @@ async fn test_generate_embeddings_for_chunks_normalization() {
|
|
|
269
269
|
|
|
270
270
|
generate_embeddings_for_chunks(&mut chunks_norm, &config_norm).expect("Failed to generate normalized embeddings");
|
|
271
271
|
|
|
272
|
-
let embedding_no_norm = chunks_no_norm[0].embedding.as_ref().
|
|
273
|
-
let embedding_norm = chunks_norm[0].embedding.as_ref().
|
|
272
|
+
let embedding_no_norm = chunks_no_norm[0].embedding.as_ref().expect("Operation failed");
|
|
273
|
+
let embedding_norm = chunks_norm[0].embedding.as_ref().expect("Operation failed");
|
|
274
274
|
|
|
275
275
|
let magnitude_no_norm: f32 = embedding_no_norm.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
276
276
|
let magnitude_norm: f32 = embedding_norm.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
@@ -560,7 +560,7 @@ async fn test_generate_embeddings_for_chunks_batch_size() {
|
|
|
560
560
|
i
|
|
561
561
|
);
|
|
562
562
|
assert_eq!(
|
|
563
|
-
chunk.embedding.as_ref().
|
|
563
|
+
chunk.embedding.as_ref().expect("Operation failed").len(),
|
|
564
564
|
384,
|
|
565
565
|
"Chunk {} has wrong dimensions",
|
|
566
566
|
i
|
|
@@ -612,7 +612,7 @@ async fn test_generate_embeddings_chunking_integration() {
|
|
|
612
612
|
for (i, chunk) in chunking_result.chunks.iter().enumerate() {
|
|
613
613
|
assert!(chunk.embedding.is_some(), "Chunk {} missing embedding", i);
|
|
614
614
|
|
|
615
|
-
let embedding = chunk.embedding.as_ref().
|
|
615
|
+
let embedding = chunk.embedding.as_ref().expect("Operation failed");
|
|
616
616
|
assert_eq!(embedding.len(), 384, "Chunk {} has wrong embedding dimensions", i);
|
|
617
617
|
|
|
618
618
|
let magnitude: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
@@ -56,7 +56,7 @@ async fn test_simple_typst_document_extraction() {
|
|
|
56
56
|
return;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
let extraction = result.
|
|
59
|
+
let extraction = result.expect("Operation failed");
|
|
60
60
|
|
|
61
61
|
assert_eq!(extraction.mime_type, "text/x-typst", "MIME type should be preserved");
|
|
62
62
|
|
|
@@ -145,7 +145,7 @@ async fn test_minimal_typst_document_extraction() {
|
|
|
145
145
|
return;
|
|
146
146
|
}
|
|
147
147
|
|
|
148
|
-
let extraction = result.
|
|
148
|
+
let extraction = result.expect("Operation failed");
|
|
149
149
|
|
|
150
150
|
assert!(
|
|
151
151
|
!extraction.content.is_empty(),
|
|
@@ -189,7 +189,7 @@ async fn test_heading_hierarchy_extraction() {
|
|
|
189
189
|
return;
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
-
let extraction = result.
|
|
192
|
+
let extraction = result.expect("Operation failed");
|
|
193
193
|
|
|
194
194
|
assert!(!extraction.content.is_empty(), "Document should extract content");
|
|
195
195
|
|
|
@@ -269,7 +269,7 @@ async fn test_metadata_extraction() {
|
|
|
269
269
|
return;
|
|
270
270
|
}
|
|
271
271
|
|
|
272
|
-
let extraction = result.
|
|
272
|
+
let extraction = result.expect("Operation failed");
|
|
273
273
|
|
|
274
274
|
if let Some(title) = extraction.metadata.additional.get("title") {
|
|
275
275
|
assert!(
|
|
@@ -330,7 +330,7 @@ async fn test_advanced_typst_document_extraction() {
|
|
|
330
330
|
return;
|
|
331
331
|
}
|
|
332
332
|
|
|
333
|
-
let extraction = result.
|
|
333
|
+
let extraction = result.expect("Operation failed");
|
|
334
334
|
|
|
335
335
|
assert!(
|
|
336
336
|
extraction.metadata.additional.contains_key("title"),
|
|
@@ -411,7 +411,7 @@ async fn test_typst_reader_extraction() {
|
|
|
411
411
|
return;
|
|
412
412
|
}
|
|
413
413
|
|
|
414
|
-
let extraction = result.
|
|
414
|
+
let extraction = result.expect("Operation failed");
|
|
415
415
|
|
|
416
416
|
assert!(
|
|
417
417
|
!extraction.content.is_empty(),
|
|
@@ -454,7 +454,7 @@ async fn test_undergradmath_extraction() {
|
|
|
454
454
|
return;
|
|
455
455
|
}
|
|
456
456
|
|
|
457
|
-
let extraction = result.
|
|
457
|
+
let extraction = result.expect("Operation failed");
|
|
458
458
|
|
|
459
459
|
assert!(
|
|
460
460
|
!extraction.content.is_empty(),
|
|
@@ -534,7 +534,7 @@ async fn test_formatting_preservation() {
|
|
|
534
534
|
return;
|
|
535
535
|
}
|
|
536
536
|
|
|
537
|
-
let extraction = result.
|
|
537
|
+
let extraction = result.expect("Operation failed");
|
|
538
538
|
|
|
539
539
|
assert!(
|
|
540
540
|
extraction.content.contains("*") || extraction.content.contains("bold"),
|
|
@@ -576,7 +576,7 @@ async fn test_large_document_extraction() {
|
|
|
576
576
|
return;
|
|
577
577
|
}
|
|
578
578
|
|
|
579
|
-
let extraction = result.
|
|
579
|
+
let extraction = result.expect("Operation failed");
|
|
580
580
|
|
|
581
581
|
assert!(
|
|
582
582
|
!extraction.content.is_empty(),
|
|
@@ -7,9 +7,9 @@ use kreuzberg::extraction::excel::read_excel_file;
|
|
|
7
7
|
fn test_xlsx_full_metadata_extraction() {
|
|
8
8
|
let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
9
9
|
.parent()
|
|
10
|
-
.
|
|
10
|
+
.expect("Operation failed")
|
|
11
11
|
.parent()
|
|
12
|
-
.
|
|
12
|
+
.expect("Operation failed");
|
|
13
13
|
let test_file = workspace_root.join("test_documents/office/excel.xlsx");
|
|
14
14
|
|
|
15
15
|
if !test_file.exists() {
|
|
@@ -17,7 +17,8 @@ fn test_xlsx_full_metadata_extraction() {
|
|
|
17
17
|
return;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
let
|
|
20
|
+
let file_path = test_file.to_str().expect("File path should be valid UTF-8");
|
|
21
|
+
let result = read_excel_file(file_path).expect("Should extract XLSX successfully");
|
|
21
22
|
|
|
22
23
|
assert!(!result.sheets.is_empty(), "Should have at least one sheet");
|
|
23
24
|
|
|
@@ -34,9 +35,9 @@ fn test_xlsx_full_metadata_extraction() {
|
|
|
34
35
|
fn test_xlsx_multi_sheet_metadata() {
|
|
35
36
|
let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
36
37
|
.parent()
|
|
37
|
-
.
|
|
38
|
+
.expect("Operation failed")
|
|
38
39
|
.parent()
|
|
39
|
-
.
|
|
40
|
+
.expect("Operation failed");
|
|
40
41
|
let test_file = workspace_root.join("test_documents/spreadsheets/excel_multi_sheet.xlsx");
|
|
41
42
|
|
|
42
43
|
if !test_file.exists() {
|
|
@@ -44,7 +45,8 @@ fn test_xlsx_multi_sheet_metadata() {
|
|
|
44
45
|
return;
|
|
45
46
|
}
|
|
46
47
|
|
|
47
|
-
let
|
|
48
|
+
let file_path = test_file.to_str().expect("File path should be valid UTF-8");
|
|
49
|
+
let result = read_excel_file(file_path).expect("Should extract multi-sheet XLSX successfully");
|
|
48
50
|
|
|
49
51
|
assert!(
|
|
50
52
|
result.sheets.len() > 1,
|
|
@@ -65,9 +67,9 @@ fn test_xlsx_multi_sheet_metadata() {
|
|
|
65
67
|
fn test_xlsx_minimal_metadata_extraction() {
|
|
66
68
|
let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
67
69
|
.parent()
|
|
68
|
-
.
|
|
70
|
+
.expect("Operation failed")
|
|
69
71
|
.parent()
|
|
70
|
-
.
|
|
72
|
+
.expect("Operation failed");
|
|
71
73
|
let test_file = workspace_root.join("test_documents/spreadsheets/test_01.xlsx");
|
|
72
74
|
|
|
73
75
|
if !test_file.exists() {
|
|
@@ -75,7 +77,8 @@ fn test_xlsx_minimal_metadata_extraction() {
|
|
|
75
77
|
return;
|
|
76
78
|
}
|
|
77
79
|
|
|
78
|
-
let
|
|
80
|
+
let file_path = test_file.to_str().expect("File path should be valid UTF-8");
|
|
81
|
+
let result = read_excel_file(file_path).expect("Should extract XLSX successfully");
|
|
79
82
|
|
|
80
83
|
assert!(!result.sheets.is_empty(), "Content should not be empty");
|
|
81
84
|
assert!(
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.1
|
|
4
|
+
version: 4.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-01-
|
|
11
|
+
date: 2026-01-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -209,6 +209,7 @@ files:
|
|
|
209
209
|
- ext/kreuzberg_rb/native/include/msvc_compat/strings.h
|
|
210
210
|
- ext/kreuzberg_rb/native/include/strings.h
|
|
211
211
|
- ext/kreuzberg_rb/native/include/unistd.h
|
|
212
|
+
- ext/kreuzberg_rb/native/libpdfium.so
|
|
212
213
|
- ext/kreuzberg_rb/native/src/batch.rs
|
|
213
214
|
- ext/kreuzberg_rb/native/src/config/mod.rs
|
|
214
215
|
- ext/kreuzberg_rb/native/src/config/types.rs
|
|
@@ -232,6 +233,7 @@ files:
|
|
|
232
233
|
- lib/kreuzberg/cli.rb
|
|
233
234
|
- lib/kreuzberg/cli_proxy.rb
|
|
234
235
|
- lib/kreuzberg/config.rb
|
|
236
|
+
- lib/kreuzberg/djot_content.rb
|
|
235
237
|
- lib/kreuzberg/error_context.rb
|
|
236
238
|
- lib/kreuzberg/errors.rb
|
|
237
239
|
- lib/kreuzberg/extraction_api.rb
|
|
@@ -271,6 +273,7 @@ files:
|
|
|
271
273
|
- spec/fixtures/config.toml
|
|
272
274
|
- spec/fixtures/config.yaml
|
|
273
275
|
- spec/fixtures/invalid_config.toml
|
|
276
|
+
- spec/serialization_spec.rb
|
|
274
277
|
- spec/smoke/package_spec.rb
|
|
275
278
|
- spec/spec_helper.rb
|
|
276
279
|
- spec/unit/config/chunking_config_spec.rb
|
|
@@ -283,6 +286,7 @@ files:
|
|
|
283
286
|
- spec/unit/config/keyword_config_spec.rb
|
|
284
287
|
- spec/unit/config/language_detection_config_spec.rb
|
|
285
288
|
- spec/unit/config/ocr_config_spec.rb
|
|
289
|
+
- spec/unit/config/output_format_spec.rb
|
|
286
290
|
- spec/unit/config/page_config_spec.rb
|
|
287
291
|
- spec/unit/config/pdf_config_spec.rb
|
|
288
292
|
- spec/unit/config/postprocessor_config_spec.rb
|
|
@@ -588,6 +592,7 @@ files:
|
|
|
588
592
|
- vendor/kreuzberg/src/plugins/registry/ocr.rs
|
|
589
593
|
- vendor/kreuzberg/src/plugins/registry/processor.rs
|
|
590
594
|
- vendor/kreuzberg/src/plugins/registry/validator.rs
|
|
595
|
+
- vendor/kreuzberg/src/plugins/startup_validation.rs
|
|
591
596
|
- vendor/kreuzberg/src/plugins/traits.rs
|
|
592
597
|
- vendor/kreuzberg/src/plugins/validator/mod.rs
|
|
593
598
|
- vendor/kreuzberg/src/plugins/validator/registry.rs
|
|
@@ -705,6 +710,7 @@ files:
|
|
|
705
710
|
- vendor/kreuzberg/stopwords/zh_stopwords.json
|
|
706
711
|
- vendor/kreuzberg/stopwords/zu_stopwords.json
|
|
707
712
|
- vendor/kreuzberg/tests/api_chunk.rs
|
|
713
|
+
- vendor/kreuzberg/tests/api_consistency.rs
|
|
708
714
|
- vendor/kreuzberg/tests/api_embed.rs
|
|
709
715
|
- vendor/kreuzberg/tests/api_extract_multipart.rs
|
|
710
716
|
- vendor/kreuzberg/tests/api_large_pdf_extraction.rs
|
|
@@ -716,9 +722,11 @@ files:
|
|
|
716
722
|
- vendor/kreuzberg/tests/batch_processing.rs
|
|
717
723
|
- vendor/kreuzberg/tests/bibtex_parity_test.rs
|
|
718
724
|
- vendor/kreuzberg/tests/concurrency_stress.rs
|
|
725
|
+
- vendor/kreuzberg/tests/config_behavioral.rs
|
|
719
726
|
- vendor/kreuzberg/tests/config_features.rs
|
|
720
727
|
- vendor/kreuzberg/tests/config_integration_test.rs
|
|
721
728
|
- vendor/kreuzberg/tests/config_loading_tests.rs
|
|
729
|
+
- vendor/kreuzberg/tests/contract_mcp.rs
|
|
722
730
|
- vendor/kreuzberg/tests/core_integration.rs
|
|
723
731
|
- vendor/kreuzberg/tests/csv_integration.rs
|
|
724
732
|
- vendor/kreuzberg/tests/data/hierarchy_ground_truth.json
|
|
@@ -740,6 +748,7 @@ files:
|
|
|
740
748
|
- vendor/kreuzberg/tests/keywords_quality.rs
|
|
741
749
|
- vendor/kreuzberg/tests/latex_extractor_tests.rs
|
|
742
750
|
- vendor/kreuzberg/tests/markdown_extractor_tests.rs
|
|
751
|
+
- vendor/kreuzberg/tests/mcp_integration.rs
|
|
743
752
|
- vendor/kreuzberg/tests/mime_detection.rs
|
|
744
753
|
- vendor/kreuzberg/tests/ocr_configuration.rs
|
|
745
754
|
- vendor/kreuzberg/tests/ocr_errors.rs
|
|
@@ -766,6 +775,7 @@ files:
|
|
|
766
775
|
- vendor/kreuzberg/tests/rst_extractor_tests.rs
|
|
767
776
|
- vendor/kreuzberg/tests/rtf_extractor_tests.rs
|
|
768
777
|
- vendor/kreuzberg/tests/security_validation.rs
|
|
778
|
+
- vendor/kreuzberg/tests/serialization_integration.rs
|
|
769
779
|
- vendor/kreuzberg/tests/stopwords_integration_test.rs
|
|
770
780
|
- vendor/kreuzberg/tests/test_fastembed.rs
|
|
771
781
|
- vendor/kreuzberg/tests/typst_behavioral_tests.rs
|