kreuzberg 4.1.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/kreuzberg.gemspec +13 -1
- data/lib/kreuzberg/config.rb +70 -35
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +5 -1
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +55 -53
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +10 -2
|
@@ -13,7 +13,7 @@ use tempfile::TempDir;
|
|
|
13
13
|
/// Test loading config from TOML file.
|
|
14
14
|
#[test]
|
|
15
15
|
fn test_from_file_toml_succeeds() {
|
|
16
|
-
let temp_dir = TempDir::new().
|
|
16
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
17
17
|
let config_path = temp_dir.path().join("config.toml");
|
|
18
18
|
|
|
19
19
|
let toml_content = r#"
|
|
@@ -26,16 +26,16 @@ max_chars = 1000
|
|
|
26
26
|
max_overlap = 100
|
|
27
27
|
"#;
|
|
28
28
|
|
|
29
|
-
fs::write(&config_path, toml_content).
|
|
29
|
+
fs::write(&config_path, toml_content).expect("Operation failed");
|
|
30
30
|
|
|
31
31
|
let config = ExtractionConfig::from_file(&config_path);
|
|
32
32
|
assert!(config.is_ok(), "Should load TOML config successfully");
|
|
33
33
|
|
|
34
|
-
let config = config.
|
|
34
|
+
let config = config.expect("Operation failed");
|
|
35
35
|
assert!(config.ocr.is_some(), "Should have OCR config");
|
|
36
36
|
assert!(config.chunking.is_some(), "Should have chunking config");
|
|
37
37
|
|
|
38
|
-
let chunking = config.chunking.
|
|
38
|
+
let chunking = config.chunking.expect("Operation failed");
|
|
39
39
|
assert_eq!(chunking.max_chars, 1000);
|
|
40
40
|
assert_eq!(chunking.max_overlap, 100);
|
|
41
41
|
}
|
|
@@ -43,7 +43,7 @@ max_overlap = 100
|
|
|
43
43
|
/// Test loading config from YAML file.
|
|
44
44
|
#[test]
|
|
45
45
|
fn test_from_file_yaml_succeeds() {
|
|
46
|
-
let temp_dir = TempDir::new().
|
|
46
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
47
47
|
let config_path = temp_dir.path().join("config.yaml");
|
|
48
48
|
|
|
49
49
|
let yaml_content = r#"
|
|
@@ -55,16 +55,16 @@ chunking:
|
|
|
55
55
|
max_overlap: 100
|
|
56
56
|
"#;
|
|
57
57
|
|
|
58
|
-
fs::write(&config_path, yaml_content).
|
|
58
|
+
fs::write(&config_path, yaml_content).expect("Operation failed");
|
|
59
59
|
|
|
60
60
|
let config = ExtractionConfig::from_file(&config_path);
|
|
61
61
|
assert!(config.is_ok(), "Should load YAML config successfully");
|
|
62
62
|
|
|
63
|
-
let config = config.
|
|
63
|
+
let config = config.expect("Operation failed");
|
|
64
64
|
assert!(config.ocr.is_some(), "Should have OCR config");
|
|
65
65
|
assert!(config.chunking.is_some(), "Should have chunking config");
|
|
66
66
|
|
|
67
|
-
let chunking = config.chunking.
|
|
67
|
+
let chunking = config.chunking.expect("Operation failed");
|
|
68
68
|
assert_eq!(chunking.max_chars, 1000);
|
|
69
69
|
assert_eq!(chunking.max_overlap, 100);
|
|
70
70
|
}
|
|
@@ -72,7 +72,7 @@ chunking:
|
|
|
72
72
|
/// Test loading config from JSON file.
|
|
73
73
|
#[test]
|
|
74
74
|
fn test_from_file_json_succeeds() {
|
|
75
|
-
let temp_dir = TempDir::new().
|
|
75
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
76
76
|
let config_path = temp_dir.path().join("config.json");
|
|
77
77
|
|
|
78
78
|
let json_content = r#"
|
|
@@ -88,16 +88,16 @@ fn test_from_file_json_succeeds() {
|
|
|
88
88
|
}
|
|
89
89
|
"#;
|
|
90
90
|
|
|
91
|
-
fs::write(&config_path, json_content).
|
|
91
|
+
fs::write(&config_path, json_content).expect("Operation failed");
|
|
92
92
|
|
|
93
93
|
let config = ExtractionConfig::from_file(&config_path);
|
|
94
94
|
assert!(config.is_ok(), "Should load JSON config successfully");
|
|
95
95
|
|
|
96
|
-
let config = config.
|
|
96
|
+
let config = config.expect("Operation failed");
|
|
97
97
|
assert!(config.ocr.is_some(), "Should have OCR config");
|
|
98
98
|
assert!(config.chunking.is_some(), "Should have chunking config");
|
|
99
99
|
|
|
100
|
-
let chunking = config.chunking.
|
|
100
|
+
let chunking = config.chunking.expect("Operation failed");
|
|
101
101
|
assert_eq!(chunking.max_chars, 1000);
|
|
102
102
|
assert_eq!(chunking.max_overlap, 100);
|
|
103
103
|
}
|
|
@@ -105,7 +105,7 @@ fn test_from_file_json_succeeds() {
|
|
|
105
105
|
/// Test loading config from .yml extension.
|
|
106
106
|
#[test]
|
|
107
107
|
fn test_from_file_yml_extension_succeeds() {
|
|
108
|
-
let temp_dir = TempDir::new().
|
|
108
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
109
109
|
let config_path = temp_dir.path().join("config.yml");
|
|
110
110
|
|
|
111
111
|
let yml_content = r#"
|
|
@@ -113,7 +113,7 @@ ocr:
|
|
|
113
113
|
enabled: true
|
|
114
114
|
"#;
|
|
115
115
|
|
|
116
|
-
fs::write(&config_path, yml_content).
|
|
116
|
+
fs::write(&config_path, yml_content).expect("Operation failed");
|
|
117
117
|
|
|
118
118
|
let config = ExtractionConfig::from_file(&config_path);
|
|
119
119
|
assert!(config.is_ok(), "Should load .yml config successfully");
|
|
@@ -129,7 +129,7 @@ fn test_from_file_nonexistent_path_fails() {
|
|
|
129
129
|
/// Test from_file with malformed TOML fails.
|
|
130
130
|
#[test]
|
|
131
131
|
fn test_from_file_malformed_toml_fails() {
|
|
132
|
-
let temp_dir = TempDir::new().
|
|
132
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
133
133
|
let config_path = temp_dir.path().join("config.toml");
|
|
134
134
|
|
|
135
135
|
let malformed_toml = r#"
|
|
@@ -137,7 +137,7 @@ fn test_from_file_malformed_toml_fails() {
|
|
|
137
137
|
enabled = true
|
|
138
138
|
"#;
|
|
139
139
|
|
|
140
|
-
fs::write(&config_path, malformed_toml).
|
|
140
|
+
fs::write(&config_path, malformed_toml).expect("Operation failed");
|
|
141
141
|
|
|
142
142
|
let result = ExtractionConfig::from_file(&config_path);
|
|
143
143
|
assert!(result.is_err(), "Should fail for malformed TOML: {:?}", result);
|
|
@@ -146,7 +146,7 @@ enabled = true
|
|
|
146
146
|
/// Test from_file with malformed JSON fails.
|
|
147
147
|
#[test]
|
|
148
148
|
fn test_from_file_malformed_json_fails() {
|
|
149
|
-
let temp_dir = TempDir::new().
|
|
149
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
150
150
|
let config_path = temp_dir.path().join("config.json");
|
|
151
151
|
|
|
152
152
|
let malformed_json = r#"
|
|
@@ -158,7 +158,7 @@ fn test_from_file_malformed_json_fails() {
|
|
|
158
158
|
}
|
|
159
159
|
"#;
|
|
160
160
|
|
|
161
|
-
fs::write(&config_path, malformed_json).
|
|
161
|
+
fs::write(&config_path, malformed_json).expect("Operation failed");
|
|
162
162
|
|
|
163
163
|
let result = ExtractionConfig::from_file(&config_path);
|
|
164
164
|
assert!(result.is_err(), "Should fail for malformed JSON: {:?}", result);
|
|
@@ -167,7 +167,7 @@ fn test_from_file_malformed_json_fails() {
|
|
|
167
167
|
/// Test from_file with malformed YAML fails.
|
|
168
168
|
#[test]
|
|
169
169
|
fn test_from_file_malformed_yaml_fails() {
|
|
170
|
-
let temp_dir = TempDir::new().
|
|
170
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
171
171
|
let config_path = temp_dir.path().join("config.yaml");
|
|
172
172
|
|
|
173
173
|
let malformed_yaml = r#"
|
|
@@ -176,7 +176,7 @@ ocr:
|
|
|
176
176
|
- invalid_list
|
|
177
177
|
"#;
|
|
178
178
|
|
|
179
|
-
fs::write(&config_path, malformed_yaml).
|
|
179
|
+
fs::write(&config_path, malformed_yaml).expect("Operation failed");
|
|
180
180
|
|
|
181
181
|
let result = ExtractionConfig::from_file(&config_path);
|
|
182
182
|
assert!(result.is_err(), "Should fail for malformed YAML: {:?}", result);
|
|
@@ -185,15 +185,15 @@ ocr:
|
|
|
185
185
|
/// Test from_file with empty file uses defaults.
|
|
186
186
|
#[test]
|
|
187
187
|
fn test_from_file_empty_file_uses_defaults() {
|
|
188
|
-
let temp_dir = TempDir::new().
|
|
188
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
189
189
|
let config_path = temp_dir.path().join("config.toml");
|
|
190
190
|
|
|
191
|
-
fs::write(&config_path, "").
|
|
191
|
+
fs::write(&config_path, "").expect("Operation failed");
|
|
192
192
|
|
|
193
193
|
let config = ExtractionConfig::from_file(&config_path);
|
|
194
194
|
assert!(config.is_ok(), "Should load empty file successfully");
|
|
195
195
|
|
|
196
|
-
let config = config.
|
|
196
|
+
let config = config.expect("Operation failed");
|
|
197
197
|
assert!(config.ocr.is_none(), "Default config should have no OCR");
|
|
198
198
|
assert!(config.chunking.is_none(), "Default config should have no chunking");
|
|
199
199
|
}
|
|
@@ -201,10 +201,10 @@ fn test_from_file_empty_file_uses_defaults() {
|
|
|
201
201
|
/// Test from_file with unsupported extension fails.
|
|
202
202
|
#[test]
|
|
203
203
|
fn test_from_file_unsupported_extension_fails() {
|
|
204
|
-
let temp_dir = TempDir::new().
|
|
204
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
205
205
|
let config_path = temp_dir.path().join("config.txt");
|
|
206
206
|
|
|
207
|
-
fs::write(&config_path, "ocr:\n enabled: true").
|
|
207
|
+
fs::write(&config_path, "ocr:\n enabled: true").expect("Operation failed");
|
|
208
208
|
|
|
209
209
|
let result = ExtractionConfig::from_file(&config_path);
|
|
210
210
|
assert!(result.is_err(), "Should fail for unsupported extension: {:?}", result);
|
|
@@ -222,7 +222,7 @@ fn test_from_file_unsupported_extension_fails() {
|
|
|
222
222
|
#[test]
|
|
223
223
|
#[serial_test::serial]
|
|
224
224
|
fn test_discover_finds_config_in_current_dir() {
|
|
225
|
-
let temp_dir = TempDir::new().
|
|
225
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
226
226
|
let config_path = temp_dir.path().join("kreuzberg.toml");
|
|
227
227
|
|
|
228
228
|
let toml_content = r#"
|
|
@@ -230,26 +230,29 @@ fn test_discover_finds_config_in_current_dir() {
|
|
|
230
230
|
enabled = true
|
|
231
231
|
"#;
|
|
232
232
|
|
|
233
|
-
fs::write(&config_path, toml_content).
|
|
233
|
+
fs::write(&config_path, toml_content).expect("Operation failed");
|
|
234
234
|
|
|
235
|
-
let original_dir = std::env::current_dir().
|
|
236
|
-
std::env::set_current_dir(temp_dir.path()).
|
|
235
|
+
let original_dir = std::env::current_dir().expect("Operation failed");
|
|
236
|
+
std::env::set_current_dir(temp_dir.path()).expect("Operation failed");
|
|
237
237
|
|
|
238
238
|
let result = ExtractionConfig::discover();
|
|
239
239
|
|
|
240
|
-
std::env::set_current_dir(original_dir).
|
|
240
|
+
std::env::set_current_dir(original_dir).expect("Operation failed");
|
|
241
241
|
|
|
242
242
|
assert!(result.is_ok(), "Discover should succeed");
|
|
243
|
-
let config = result.
|
|
243
|
+
let config = result.expect("Operation failed");
|
|
244
244
|
assert!(config.is_some(), "Should find config in current directory");
|
|
245
|
-
assert!(
|
|
245
|
+
assert!(
|
|
246
|
+
config.expect("Operation failed").ocr.is_some(),
|
|
247
|
+
"Should have OCR config"
|
|
248
|
+
);
|
|
246
249
|
}
|
|
247
250
|
|
|
248
251
|
/// Test discover() finds config in parent directory.
|
|
249
252
|
#[test]
|
|
250
253
|
#[serial_test::serial]
|
|
251
254
|
fn test_discover_finds_config_in_parent_dir() {
|
|
252
|
-
let temp_dir = TempDir::new().
|
|
255
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
253
256
|
let config_path = temp_dir.path().join("kreuzberg.toml");
|
|
254
257
|
|
|
255
258
|
let toml_content = r#"
|
|
@@ -257,53 +260,56 @@ fn test_discover_finds_config_in_parent_dir() {
|
|
|
257
260
|
enabled = true
|
|
258
261
|
"#;
|
|
259
262
|
|
|
260
|
-
fs::write(&config_path, toml_content).
|
|
263
|
+
fs::write(&config_path, toml_content).expect("Operation failed");
|
|
261
264
|
|
|
262
265
|
let sub_dir = temp_dir.path().join("subdir");
|
|
263
|
-
fs::create_dir(&sub_dir).
|
|
266
|
+
fs::create_dir(&sub_dir).expect("Operation failed");
|
|
264
267
|
|
|
265
|
-
let original_dir = std::env::current_dir().
|
|
266
|
-
std::env::set_current_dir(&sub_dir).
|
|
268
|
+
let original_dir = std::env::current_dir().expect("Operation failed");
|
|
269
|
+
std::env::set_current_dir(&sub_dir).expect("Operation failed");
|
|
267
270
|
|
|
268
271
|
let result = ExtractionConfig::discover();
|
|
269
272
|
|
|
270
|
-
std::env::set_current_dir(original_dir).
|
|
273
|
+
std::env::set_current_dir(original_dir).expect("Operation failed");
|
|
271
274
|
|
|
272
275
|
assert!(result.is_ok(), "Discover should succeed");
|
|
273
|
-
let config = result.
|
|
276
|
+
let config = result.expect("Operation failed");
|
|
274
277
|
assert!(config.is_some(), "Should find config in parent directory");
|
|
275
|
-
assert!(
|
|
278
|
+
assert!(
|
|
279
|
+
config.expect("Operation failed").ocr.is_some(),
|
|
280
|
+
"Should have OCR config"
|
|
281
|
+
);
|
|
276
282
|
}
|
|
277
283
|
|
|
278
284
|
/// Test discover() returns None when no config found.
|
|
279
285
|
#[test]
|
|
280
286
|
#[serial_test::serial]
|
|
281
287
|
fn test_discover_returns_none_when_not_found() {
|
|
282
|
-
let temp_dir = TempDir::new().
|
|
288
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
283
289
|
let sub_dir = temp_dir.path().join("subdir");
|
|
284
|
-
fs::create_dir(&sub_dir).
|
|
290
|
+
fs::create_dir(&sub_dir).expect("Operation failed");
|
|
285
291
|
|
|
286
|
-
let original_dir = std::env::current_dir().
|
|
287
|
-
std::env::set_current_dir(&sub_dir).
|
|
292
|
+
let original_dir = std::env::current_dir().expect("Operation failed");
|
|
293
|
+
std::env::set_current_dir(&sub_dir).expect("Operation failed");
|
|
288
294
|
|
|
289
295
|
let result = ExtractionConfig::discover();
|
|
290
296
|
|
|
291
|
-
std::env::set_current_dir(original_dir).
|
|
297
|
+
std::env::set_current_dir(original_dir).expect("Operation failed");
|
|
292
298
|
|
|
293
299
|
assert!(result.is_ok(), "Discover should succeed even when no config found");
|
|
294
|
-
let _config = result.
|
|
300
|
+
let _config = result.expect("Operation failed");
|
|
295
301
|
}
|
|
296
302
|
|
|
297
303
|
/// Test discover() prefers certain file names.
|
|
298
304
|
#[test]
|
|
299
305
|
#[serial_test::serial]
|
|
300
306
|
fn test_discover_file_name_preference() {
|
|
301
|
-
let temp_dir = TempDir::new().
|
|
307
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
302
308
|
|
|
303
|
-
fs::write(temp_dir.path().join("kreuzberg.toml"), "[ocr]\nenabled = true").
|
|
304
|
-
fs::write(temp_dir.path().join(".kreuzberg.toml"), "[ocr]\nenabled = false").
|
|
309
|
+
fs::write(temp_dir.path().join("kreuzberg.toml"), "[ocr]\nenabled = true").expect("Operation failed");
|
|
310
|
+
fs::write(temp_dir.path().join(".kreuzberg.toml"), "[ocr]\nenabled = false").expect("Operation failed");
|
|
305
311
|
|
|
306
|
-
let original_dir = std::env::current_dir().
|
|
312
|
+
let original_dir = std::env::current_dir().expect("Operation failed");
|
|
307
313
|
if std::env::set_current_dir(temp_dir.path()).is_err() {
|
|
308
314
|
return;
|
|
309
315
|
}
|
|
@@ -313,7 +319,7 @@ fn test_discover_file_name_preference() {
|
|
|
313
319
|
let _ = std::env::set_current_dir(original_dir);
|
|
314
320
|
|
|
315
321
|
assert!(result.is_ok(), "Discover should succeed");
|
|
316
|
-
let config = result.
|
|
322
|
+
let config = result.expect("Operation failed");
|
|
317
323
|
assert!(config.is_some(), "Should find a config file");
|
|
318
324
|
}
|
|
319
325
|
|
|
@@ -321,7 +327,7 @@ fn test_discover_file_name_preference() {
|
|
|
321
327
|
#[test]
|
|
322
328
|
#[serial_test::serial]
|
|
323
329
|
fn test_discover_with_nested_directories() {
|
|
324
|
-
let temp_dir = TempDir::new().
|
|
330
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
325
331
|
let config_path = temp_dir.path().join("kreuzberg.toml");
|
|
326
332
|
|
|
327
333
|
let toml_content = r#"
|
|
@@ -329,14 +335,14 @@ fn test_discover_with_nested_directories() {
|
|
|
329
335
|
enabled = true
|
|
330
336
|
"#;
|
|
331
337
|
|
|
332
|
-
fs::write(&config_path, toml_content).
|
|
338
|
+
fs::write(&config_path, toml_content).expect("Operation failed");
|
|
333
339
|
|
|
334
340
|
let level1 = temp_dir.path().join("level1");
|
|
335
341
|
let level2 = level1.join("level2");
|
|
336
342
|
let level3 = level2.join("level3");
|
|
337
|
-
fs::create_dir_all(&level3).
|
|
343
|
+
fs::create_dir_all(&level3).expect("Operation failed");
|
|
338
344
|
|
|
339
|
-
let original_dir = std::env::current_dir().
|
|
345
|
+
let original_dir = std::env::current_dir().expect("Operation failed");
|
|
340
346
|
if std::env::set_current_dir(&level3).is_err() {
|
|
341
347
|
return;
|
|
342
348
|
}
|
|
@@ -346,15 +352,18 @@ enabled = true
|
|
|
346
352
|
let _ = std::env::set_current_dir(&original_dir);
|
|
347
353
|
|
|
348
354
|
assert!(result.is_ok(), "Discover should succeed");
|
|
349
|
-
let config = result.
|
|
355
|
+
let config = result.expect("Operation failed");
|
|
350
356
|
assert!(config.is_some(), "Should find config in ancestor directory");
|
|
351
|
-
assert!(
|
|
357
|
+
assert!(
|
|
358
|
+
config.expect("Operation failed").ocr.is_some(),
|
|
359
|
+
"Should have OCR config"
|
|
360
|
+
);
|
|
352
361
|
}
|
|
353
362
|
|
|
354
363
|
/// Test config loading with all supported features.
|
|
355
364
|
#[test]
|
|
356
365
|
fn test_from_file_comprehensive_config() {
|
|
357
|
-
let temp_dir = TempDir::new().
|
|
366
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
358
367
|
let config_path = temp_dir.path().join("config.toml");
|
|
359
368
|
|
|
360
369
|
let toml_content = r#"
|
|
@@ -376,12 +385,12 @@ enabled = true
|
|
|
376
385
|
extract_images = true
|
|
377
386
|
"#;
|
|
378
387
|
|
|
379
|
-
fs::write(&config_path, toml_content).
|
|
388
|
+
fs::write(&config_path, toml_content).expect("Operation failed");
|
|
380
389
|
|
|
381
390
|
let config = ExtractionConfig::from_file(&config_path);
|
|
382
391
|
assert!(config.is_ok(), "Should load comprehensive config successfully");
|
|
383
392
|
|
|
384
|
-
let config = config.
|
|
393
|
+
let config = config.expect("Operation failed");
|
|
385
394
|
assert!(config.ocr.is_some(), "Should have OCR config");
|
|
386
395
|
assert!(config.chunking.is_some(), "Should have chunking config");
|
|
387
396
|
assert!(
|
|
@@ -396,7 +405,7 @@ extract_images = true
|
|
|
396
405
|
/// Test config validation with invalid values.
|
|
397
406
|
#[test]
|
|
398
407
|
fn test_from_file_with_invalid_values() {
|
|
399
|
-
let temp_dir = TempDir::new().
|
|
408
|
+
let temp_dir = TempDir::new().expect("Operation failed");
|
|
400
409
|
let config_path = temp_dir.path().join("config.toml");
|
|
401
410
|
|
|
402
411
|
let toml_content = r#"
|
|
@@ -405,7 +414,7 @@ max_chars = -1000
|
|
|
405
414
|
max_overlap = -100
|
|
406
415
|
"#;
|
|
407
416
|
|
|
408
|
-
fs::write(&config_path, toml_content).
|
|
417
|
+
fs::write(&config_path, toml_content).expect("Operation failed");
|
|
409
418
|
|
|
410
419
|
let result = ExtractionConfig::from_file(&config_path);
|
|
411
420
|
if let Ok(config) = result
|