kreuzberg 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -4
  3. data/README.md +8 -5
  4. data/ext/kreuzberg_rb/native/Cargo.toml +2 -2
  5. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  6. data/ext/kreuzberg_rb/native/src/config/types.rs +23 -13
  7. data/kreuzberg.gemspec +14 -2
  8. data/lib/kreuzberg/api_proxy.rb +0 -1
  9. data/lib/kreuzberg/cli_proxy.rb +0 -1
  10. data/lib/kreuzberg/config.rb +70 -35
  11. data/lib/kreuzberg/mcp_proxy.rb +0 -1
  12. data/lib/kreuzberg/version.rb +1 -1
  13. data/sig/kreuzberg.rbs +5 -1
  14. data/spec/binding/batch_operations_spec.rb +80 -0
  15. data/spec/binding/metadata_types_spec.rb +77 -57
  16. data/spec/serialization_spec.rb +134 -0
  17. data/spec/unit/config/output_format_spec.rb +380 -0
  18. data/vendor/Cargo.toml +1 -1
  19. data/vendor/kreuzberg/Cargo.toml +3 -3
  20. data/vendor/kreuzberg/README.md +1 -1
  21. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  22. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  23. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  24. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  25. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  26. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  27. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  28. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  29. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  30. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  31. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  32. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  33. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  34. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  35. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  36. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  37. data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
  38. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  39. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  40. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  41. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  42. data/vendor/kreuzberg/tests/core_integration.rs +55 -53
  43. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  44. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  45. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  46. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  47. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  48. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  49. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  50. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  51. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  52. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  53. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  54. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  55. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  56. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  57. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  58. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  59. data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
  60. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  61. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  62. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  63. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  64. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  65. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  66. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  67. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  68. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  69. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  70. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  71. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  72. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  73. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  74. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
  75. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  76. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  77. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  78. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  79. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  80. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  81. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  82. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  83. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  84. data/vendor/kreuzberg-tesseract/Cargo.toml +3 -3
  85. data/vendor/kreuzberg-tesseract/build.rs +4 -4
  86. data/vendor/kreuzberg-tesseract/src/lib.rs +6 -6
  87. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +3 -3
  88. metadata +13 -2
@@ -13,7 +13,7 @@ use tempfile::TempDir;
13
13
  /// Test loading config from TOML file.
14
14
  #[test]
15
15
  fn test_from_file_toml_succeeds() {
16
- let temp_dir = TempDir::new().unwrap();
16
+ let temp_dir = TempDir::new().expect("Operation failed");
17
17
  let config_path = temp_dir.path().join("config.toml");
18
18
 
19
19
  let toml_content = r#"
@@ -26,16 +26,16 @@ max_chars = 1000
26
26
  max_overlap = 100
27
27
  "#;
28
28
 
29
- fs::write(&config_path, toml_content).unwrap();
29
+ fs::write(&config_path, toml_content).expect("Operation failed");
30
30
 
31
31
  let config = ExtractionConfig::from_file(&config_path);
32
32
  assert!(config.is_ok(), "Should load TOML config successfully");
33
33
 
34
- let config = config.unwrap();
34
+ let config = config.expect("Operation failed");
35
35
  assert!(config.ocr.is_some(), "Should have OCR config");
36
36
  assert!(config.chunking.is_some(), "Should have chunking config");
37
37
 
38
- let chunking = config.chunking.unwrap();
38
+ let chunking = config.chunking.expect("Operation failed");
39
39
  assert_eq!(chunking.max_chars, 1000);
40
40
  assert_eq!(chunking.max_overlap, 100);
41
41
  }
@@ -43,7 +43,7 @@ max_overlap = 100
43
43
  /// Test loading config from YAML file.
44
44
  #[test]
45
45
  fn test_from_file_yaml_succeeds() {
46
- let temp_dir = TempDir::new().unwrap();
46
+ let temp_dir = TempDir::new().expect("Operation failed");
47
47
  let config_path = temp_dir.path().join("config.yaml");
48
48
 
49
49
  let yaml_content = r#"
@@ -55,16 +55,16 @@ chunking:
55
55
  max_overlap: 100
56
56
  "#;
57
57
 
58
- fs::write(&config_path, yaml_content).unwrap();
58
+ fs::write(&config_path, yaml_content).expect("Operation failed");
59
59
 
60
60
  let config = ExtractionConfig::from_file(&config_path);
61
61
  assert!(config.is_ok(), "Should load YAML config successfully");
62
62
 
63
- let config = config.unwrap();
63
+ let config = config.expect("Operation failed");
64
64
  assert!(config.ocr.is_some(), "Should have OCR config");
65
65
  assert!(config.chunking.is_some(), "Should have chunking config");
66
66
 
67
- let chunking = config.chunking.unwrap();
67
+ let chunking = config.chunking.expect("Operation failed");
68
68
  assert_eq!(chunking.max_chars, 1000);
69
69
  assert_eq!(chunking.max_overlap, 100);
70
70
  }
@@ -72,7 +72,7 @@ chunking:
72
72
  /// Test loading config from JSON file.
73
73
  #[test]
74
74
  fn test_from_file_json_succeeds() {
75
- let temp_dir = TempDir::new().unwrap();
75
+ let temp_dir = TempDir::new().expect("Operation failed");
76
76
  let config_path = temp_dir.path().join("config.json");
77
77
 
78
78
  let json_content = r#"
@@ -88,16 +88,16 @@ fn test_from_file_json_succeeds() {
88
88
  }
89
89
  "#;
90
90
 
91
- fs::write(&config_path, json_content).unwrap();
91
+ fs::write(&config_path, json_content).expect("Operation failed");
92
92
 
93
93
  let config = ExtractionConfig::from_file(&config_path);
94
94
  assert!(config.is_ok(), "Should load JSON config successfully");
95
95
 
96
- let config = config.unwrap();
96
+ let config = config.expect("Operation failed");
97
97
  assert!(config.ocr.is_some(), "Should have OCR config");
98
98
  assert!(config.chunking.is_some(), "Should have chunking config");
99
99
 
100
- let chunking = config.chunking.unwrap();
100
+ let chunking = config.chunking.expect("Operation failed");
101
101
  assert_eq!(chunking.max_chars, 1000);
102
102
  assert_eq!(chunking.max_overlap, 100);
103
103
  }
@@ -105,7 +105,7 @@ fn test_from_file_json_succeeds() {
105
105
  /// Test loading config from .yml extension.
106
106
  #[test]
107
107
  fn test_from_file_yml_extension_succeeds() {
108
- let temp_dir = TempDir::new().unwrap();
108
+ let temp_dir = TempDir::new().expect("Operation failed");
109
109
  let config_path = temp_dir.path().join("config.yml");
110
110
 
111
111
  let yml_content = r#"
@@ -113,7 +113,7 @@ ocr:
113
113
  enabled: true
114
114
  "#;
115
115
 
116
- fs::write(&config_path, yml_content).unwrap();
116
+ fs::write(&config_path, yml_content).expect("Operation failed");
117
117
 
118
118
  let config = ExtractionConfig::from_file(&config_path);
119
119
  assert!(config.is_ok(), "Should load .yml config successfully");
@@ -129,7 +129,7 @@ fn test_from_file_nonexistent_path_fails() {
129
129
  /// Test from_file with malformed TOML fails.
130
130
  #[test]
131
131
  fn test_from_file_malformed_toml_fails() {
132
- let temp_dir = TempDir::new().unwrap();
132
+ let temp_dir = TempDir::new().expect("Operation failed");
133
133
  let config_path = temp_dir.path().join("config.toml");
134
134
 
135
135
  let malformed_toml = r#"
@@ -137,7 +137,7 @@ fn test_from_file_malformed_toml_fails() {
137
137
  enabled = true
138
138
  "#;
139
139
 
140
- fs::write(&config_path, malformed_toml).unwrap();
140
+ fs::write(&config_path, malformed_toml).expect("Operation failed");
141
141
 
142
142
  let result = ExtractionConfig::from_file(&config_path);
143
143
  assert!(result.is_err(), "Should fail for malformed TOML: {:?}", result);
@@ -146,7 +146,7 @@ enabled = true
146
146
  /// Test from_file with malformed JSON fails.
147
147
  #[test]
148
148
  fn test_from_file_malformed_json_fails() {
149
- let temp_dir = TempDir::new().unwrap();
149
+ let temp_dir = TempDir::new().expect("Operation failed");
150
150
  let config_path = temp_dir.path().join("config.json");
151
151
 
152
152
  let malformed_json = r#"
@@ -158,7 +158,7 @@ fn test_from_file_malformed_json_fails() {
158
158
  }
159
159
  "#;
160
160
 
161
- fs::write(&config_path, malformed_json).unwrap();
161
+ fs::write(&config_path, malformed_json).expect("Operation failed");
162
162
 
163
163
  let result = ExtractionConfig::from_file(&config_path);
164
164
  assert!(result.is_err(), "Should fail for malformed JSON: {:?}", result);
@@ -167,7 +167,7 @@ fn test_from_file_malformed_json_fails() {
167
167
  /// Test from_file with malformed YAML fails.
168
168
  #[test]
169
169
  fn test_from_file_malformed_yaml_fails() {
170
- let temp_dir = TempDir::new().unwrap();
170
+ let temp_dir = TempDir::new().expect("Operation failed");
171
171
  let config_path = temp_dir.path().join("config.yaml");
172
172
 
173
173
  let malformed_yaml = r#"
@@ -176,7 +176,7 @@ ocr:
176
176
  - invalid_list
177
177
  "#;
178
178
 
179
- fs::write(&config_path, malformed_yaml).unwrap();
179
+ fs::write(&config_path, malformed_yaml).expect("Operation failed");
180
180
 
181
181
  let result = ExtractionConfig::from_file(&config_path);
182
182
  assert!(result.is_err(), "Should fail for malformed YAML: {:?}", result);
@@ -185,15 +185,15 @@ ocr:
185
185
  /// Test from_file with empty file uses defaults.
186
186
  #[test]
187
187
  fn test_from_file_empty_file_uses_defaults() {
188
- let temp_dir = TempDir::new().unwrap();
188
+ let temp_dir = TempDir::new().expect("Operation failed");
189
189
  let config_path = temp_dir.path().join("config.toml");
190
190
 
191
- fs::write(&config_path, "").unwrap();
191
+ fs::write(&config_path, "").expect("Operation failed");
192
192
 
193
193
  let config = ExtractionConfig::from_file(&config_path);
194
194
  assert!(config.is_ok(), "Should load empty file successfully");
195
195
 
196
- let config = config.unwrap();
196
+ let config = config.expect("Operation failed");
197
197
  assert!(config.ocr.is_none(), "Default config should have no OCR");
198
198
  assert!(config.chunking.is_none(), "Default config should have no chunking");
199
199
  }
@@ -201,10 +201,10 @@ fn test_from_file_empty_file_uses_defaults() {
201
201
  /// Test from_file with unsupported extension fails.
202
202
  #[test]
203
203
  fn test_from_file_unsupported_extension_fails() {
204
- let temp_dir = TempDir::new().unwrap();
204
+ let temp_dir = TempDir::new().expect("Operation failed");
205
205
  let config_path = temp_dir.path().join("config.txt");
206
206
 
207
- fs::write(&config_path, "ocr:\n enabled: true").unwrap();
207
+ fs::write(&config_path, "ocr:\n enabled: true").expect("Operation failed");
208
208
 
209
209
  let result = ExtractionConfig::from_file(&config_path);
210
210
  assert!(result.is_err(), "Should fail for unsupported extension: {:?}", result);
@@ -222,7 +222,7 @@ fn test_from_file_unsupported_extension_fails() {
222
222
  #[test]
223
223
  #[serial_test::serial]
224
224
  fn test_discover_finds_config_in_current_dir() {
225
- let temp_dir = TempDir::new().unwrap();
225
+ let temp_dir = TempDir::new().expect("Operation failed");
226
226
  let config_path = temp_dir.path().join("kreuzberg.toml");
227
227
 
228
228
  let toml_content = r#"
@@ -230,26 +230,29 @@ fn test_discover_finds_config_in_current_dir() {
230
230
  enabled = true
231
231
  "#;
232
232
 
233
- fs::write(&config_path, toml_content).unwrap();
233
+ fs::write(&config_path, toml_content).expect("Operation failed");
234
234
 
235
- let original_dir = std::env::current_dir().unwrap();
236
- std::env::set_current_dir(temp_dir.path()).unwrap();
235
+ let original_dir = std::env::current_dir().expect("Operation failed");
236
+ std::env::set_current_dir(temp_dir.path()).expect("Operation failed");
237
237
 
238
238
  let result = ExtractionConfig::discover();
239
239
 
240
- std::env::set_current_dir(original_dir).unwrap();
240
+ std::env::set_current_dir(original_dir).expect("Operation failed");
241
241
 
242
242
  assert!(result.is_ok(), "Discover should succeed");
243
- let config = result.unwrap();
243
+ let config = result.expect("Operation failed");
244
244
  assert!(config.is_some(), "Should find config in current directory");
245
- assert!(config.unwrap().ocr.is_some(), "Should have OCR config");
245
+ assert!(
246
+ config.expect("Operation failed").ocr.is_some(),
247
+ "Should have OCR config"
248
+ );
246
249
  }
247
250
 
248
251
  /// Test discover() finds config in parent directory.
249
252
  #[test]
250
253
  #[serial_test::serial]
251
254
  fn test_discover_finds_config_in_parent_dir() {
252
- let temp_dir = TempDir::new().unwrap();
255
+ let temp_dir = TempDir::new().expect("Operation failed");
253
256
  let config_path = temp_dir.path().join("kreuzberg.toml");
254
257
 
255
258
  let toml_content = r#"
@@ -257,53 +260,56 @@ fn test_discover_finds_config_in_parent_dir() {
257
260
  enabled = true
258
261
  "#;
259
262
 
260
- fs::write(&config_path, toml_content).unwrap();
263
+ fs::write(&config_path, toml_content).expect("Operation failed");
261
264
 
262
265
  let sub_dir = temp_dir.path().join("subdir");
263
- fs::create_dir(&sub_dir).unwrap();
266
+ fs::create_dir(&sub_dir).expect("Operation failed");
264
267
 
265
- let original_dir = std::env::current_dir().unwrap();
266
- std::env::set_current_dir(&sub_dir).unwrap();
268
+ let original_dir = std::env::current_dir().expect("Operation failed");
269
+ std::env::set_current_dir(&sub_dir).expect("Operation failed");
267
270
 
268
271
  let result = ExtractionConfig::discover();
269
272
 
270
- std::env::set_current_dir(original_dir).unwrap();
273
+ std::env::set_current_dir(original_dir).expect("Operation failed");
271
274
 
272
275
  assert!(result.is_ok(), "Discover should succeed");
273
- let config = result.unwrap();
276
+ let config = result.expect("Operation failed");
274
277
  assert!(config.is_some(), "Should find config in parent directory");
275
- assert!(config.unwrap().ocr.is_some(), "Should have OCR config");
278
+ assert!(
279
+ config.expect("Operation failed").ocr.is_some(),
280
+ "Should have OCR config"
281
+ );
276
282
  }
277
283
 
278
284
  /// Test discover() returns None when no config found.
279
285
  #[test]
280
286
  #[serial_test::serial]
281
287
  fn test_discover_returns_none_when_not_found() {
282
- let temp_dir = TempDir::new().unwrap();
288
+ let temp_dir = TempDir::new().expect("Operation failed");
283
289
  let sub_dir = temp_dir.path().join("subdir");
284
- fs::create_dir(&sub_dir).unwrap();
290
+ fs::create_dir(&sub_dir).expect("Operation failed");
285
291
 
286
- let original_dir = std::env::current_dir().unwrap();
287
- std::env::set_current_dir(&sub_dir).unwrap();
292
+ let original_dir = std::env::current_dir().expect("Operation failed");
293
+ std::env::set_current_dir(&sub_dir).expect("Operation failed");
288
294
 
289
295
  let result = ExtractionConfig::discover();
290
296
 
291
- std::env::set_current_dir(original_dir).unwrap();
297
+ std::env::set_current_dir(original_dir).expect("Operation failed");
292
298
 
293
299
  assert!(result.is_ok(), "Discover should succeed even when no config found");
294
- let _config = result.unwrap();
300
+ let _config = result.expect("Operation failed");
295
301
  }
296
302
 
297
303
  /// Test discover() prefers certain file names.
298
304
  #[test]
299
305
  #[serial_test::serial]
300
306
  fn test_discover_file_name_preference() {
301
- let temp_dir = TempDir::new().unwrap();
307
+ let temp_dir = TempDir::new().expect("Operation failed");
302
308
 
303
- fs::write(temp_dir.path().join("kreuzberg.toml"), "[ocr]\nenabled = true").unwrap();
304
- fs::write(temp_dir.path().join(".kreuzberg.toml"), "[ocr]\nenabled = false").unwrap();
309
+ fs::write(temp_dir.path().join("kreuzberg.toml"), "[ocr]\nenabled = true").expect("Operation failed");
310
+ fs::write(temp_dir.path().join(".kreuzberg.toml"), "[ocr]\nenabled = false").expect("Operation failed");
305
311
 
306
- let original_dir = std::env::current_dir().unwrap();
312
+ let original_dir = std::env::current_dir().expect("Operation failed");
307
313
  if std::env::set_current_dir(temp_dir.path()).is_err() {
308
314
  return;
309
315
  }
@@ -313,7 +319,7 @@ fn test_discover_file_name_preference() {
313
319
  let _ = std::env::set_current_dir(original_dir);
314
320
 
315
321
  assert!(result.is_ok(), "Discover should succeed");
316
- let config = result.unwrap();
322
+ let config = result.expect("Operation failed");
317
323
  assert!(config.is_some(), "Should find a config file");
318
324
  }
319
325
 
@@ -321,7 +327,7 @@ fn test_discover_file_name_preference() {
321
327
  #[test]
322
328
  #[serial_test::serial]
323
329
  fn test_discover_with_nested_directories() {
324
- let temp_dir = TempDir::new().unwrap();
330
+ let temp_dir = TempDir::new().expect("Operation failed");
325
331
  let config_path = temp_dir.path().join("kreuzberg.toml");
326
332
 
327
333
  let toml_content = r#"
@@ -329,14 +335,14 @@ fn test_discover_with_nested_directories() {
329
335
  enabled = true
330
336
  "#;
331
337
 
332
- fs::write(&config_path, toml_content).unwrap();
338
+ fs::write(&config_path, toml_content).expect("Operation failed");
333
339
 
334
340
  let level1 = temp_dir.path().join("level1");
335
341
  let level2 = level1.join("level2");
336
342
  let level3 = level2.join("level3");
337
- fs::create_dir_all(&level3).unwrap();
343
+ fs::create_dir_all(&level3).expect("Operation failed");
338
344
 
339
- let original_dir = std::env::current_dir().unwrap();
345
+ let original_dir = std::env::current_dir().expect("Operation failed");
340
346
  if std::env::set_current_dir(&level3).is_err() {
341
347
  return;
342
348
  }
@@ -346,15 +352,18 @@ enabled = true
346
352
  let _ = std::env::set_current_dir(&original_dir);
347
353
 
348
354
  assert!(result.is_ok(), "Discover should succeed");
349
- let config = result.unwrap();
355
+ let config = result.expect("Operation failed");
350
356
  assert!(config.is_some(), "Should find config in ancestor directory");
351
- assert!(config.unwrap().ocr.is_some(), "Should have OCR config");
357
+ assert!(
358
+ config.expect("Operation failed").ocr.is_some(),
359
+ "Should have OCR config"
360
+ );
352
361
  }
353
362
 
354
363
  /// Test config loading with all supported features.
355
364
  #[test]
356
365
  fn test_from_file_comprehensive_config() {
357
- let temp_dir = TempDir::new().unwrap();
366
+ let temp_dir = TempDir::new().expect("Operation failed");
358
367
  let config_path = temp_dir.path().join("config.toml");
359
368
 
360
369
  let toml_content = r#"
@@ -376,12 +385,12 @@ enabled = true
376
385
  extract_images = true
377
386
  "#;
378
387
 
379
- fs::write(&config_path, toml_content).unwrap();
388
+ fs::write(&config_path, toml_content).expect("Operation failed");
380
389
 
381
390
  let config = ExtractionConfig::from_file(&config_path);
382
391
  assert!(config.is_ok(), "Should load comprehensive config successfully");
383
392
 
384
- let config = config.unwrap();
393
+ let config = config.expect("Operation failed");
385
394
  assert!(config.ocr.is_some(), "Should have OCR config");
386
395
  assert!(config.chunking.is_some(), "Should have chunking config");
387
396
  assert!(
@@ -396,7 +405,7 @@ extract_images = true
396
405
  /// Test config validation with invalid values.
397
406
  #[test]
398
407
  fn test_from_file_with_invalid_values() {
399
- let temp_dir = TempDir::new().unwrap();
408
+ let temp_dir = TempDir::new().expect("Operation failed");
400
409
  let config_path = temp_dir.path().join("config.toml");
401
410
 
402
411
  let toml_content = r#"
@@ -405,7 +414,7 @@ max_chars = -1000
405
414
  max_overlap = -100
406
415
  "#;
407
416
 
408
- fs::write(&config_path, toml_content).unwrap();
417
+ fs::write(&config_path, toml_content).expect("Operation failed");
409
418
 
410
419
  let result = ExtractionConfig::from_file(&config_path);
411
420
  if let Ok(config) = result