kreuzberg 4.1.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  5. data/kreuzberg.gemspec +13 -1
  6. data/lib/kreuzberg/config.rb +70 -35
  7. data/lib/kreuzberg/version.rb +1 -1
  8. data/sig/kreuzberg.rbs +5 -1
  9. data/spec/binding/batch_operations_spec.rb +80 -0
  10. data/spec/binding/metadata_types_spec.rb +77 -57
  11. data/spec/serialization_spec.rb +134 -0
  12. data/spec/unit/config/output_format_spec.rb +380 -0
  13. data/vendor/Cargo.toml +1 -1
  14. data/vendor/kreuzberg/Cargo.toml +1 -1
  15. data/vendor/kreuzberg/README.md +1 -1
  16. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  17. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  18. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  19. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  20. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  21. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  22. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  23. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  24. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  25. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  26. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  27. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  28. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  29. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  30. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  31. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  32. data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
  33. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  34. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  35. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  36. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  37. data/vendor/kreuzberg/tests/core_integration.rs +55 -53
  38. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  39. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  40. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  41. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  42. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  43. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  44. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  45. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  46. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  47. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  48. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  49. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  50. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  51. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  52. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  53. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  54. data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
  55. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  56. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  57. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  58. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  59. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  60. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  61. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  62. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  63. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  64. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  65. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  66. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  67. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  68. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  69. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
  70. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  71. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  72. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  73. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  74. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  75. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  76. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  77. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  78. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  79. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  80. metadata +10 -2
@@ -48,12 +48,17 @@ async fn test_mime_detection_by_extension() {
48
48
  let temp_dir = TempDir::new().expect("Should create temp dir");
49
49
  let temp_path = temp_dir.path().join(filename);
50
50
 
51
- std::fs::write(&temp_path, b"test content").unwrap();
51
+ std::fs::write(&temp_path, b"test content").expect("Operation failed");
52
52
 
53
53
  let detected = detect_mime_type(&temp_path, true);
54
54
 
55
55
  assert!(detected.is_ok(), "Should detect MIME type for {}", filename);
56
- assert_eq!(detected.unwrap(), expected_mime, "MIME type mismatch for {}", filename);
56
+ assert_eq!(
57
+ detected.expect("Operation failed"),
58
+ expected_mime,
59
+ "MIME type mismatch for {}",
60
+ filename
61
+ );
57
62
  }
58
63
  }
59
64
 
@@ -76,11 +81,11 @@ async fn test_mime_detection_case_insensitive() {
76
81
  let temp_dir = TempDir::new().expect("Should create temp dir");
77
82
  let temp_path = temp_dir.path().join(filename);
78
83
 
79
- std::fs::write(&temp_path, b"test").unwrap();
84
+ std::fs::write(&temp_path, b"test").expect("Operation failed");
80
85
 
81
86
  let detected = detect_mime_type(&temp_path, true);
82
87
  assert!(detected.is_ok(), "Should handle {} (case insensitive)", filename);
83
- assert_eq!(detected.unwrap(), expected_mime);
88
+ assert_eq!(detected.expect("Operation failed"), expected_mime);
84
89
  }
85
90
  }
86
91
 
@@ -118,11 +123,15 @@ async fn test_mime_detection_by_content() {
118
123
 
119
124
  for test_case in test_cases {
120
125
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
121
- let temp_path = temp_file.path().parent().unwrap().join(test_case.filename);
126
+ let temp_path = temp_file
127
+ .path()
128
+ .parent()
129
+ .expect("Operation failed")
130
+ .join(test_case.filename);
122
131
 
123
- temp_file.write_all(&test_case.content).unwrap();
124
- temp_file.flush().unwrap();
125
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
132
+ temp_file.write_all(&test_case.content).expect("Operation failed");
133
+ temp_file.flush().expect("Operation failed");
134
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
126
135
 
127
136
  let detected = detect_mime_type(&temp_path, true);
128
137
 
@@ -170,7 +179,7 @@ async fn test_mime_type_validation() {
170
179
  for mime_type in supported {
171
180
  let result = validate_mime_type(mime_type);
172
181
  assert!(result.is_ok(), "Should validate supported MIME type: {}", mime_type);
173
- assert_eq!(result.unwrap(), mime_type);
182
+ assert_eq!(result.expect("Operation failed"), mime_type);
174
183
  }
175
184
  }
176
185
 
@@ -222,18 +231,24 @@ async fn test_unknown_mime_type() {
222
231
  #[tokio::test]
223
232
  async fn test_mime_mismatch_warning() {
224
233
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
225
- let temp_path = temp_file.path().parent().unwrap().join("document.pdf");
226
-
227
- temp_file.write_all(&[0x50, 0x4B, 0x03, 0x04]).unwrap();
228
- temp_file.flush().unwrap();
229
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
234
+ let temp_path = temp_file
235
+ .path()
236
+ .parent()
237
+ .expect("Operation failed")
238
+ .join("document.pdf");
239
+
240
+ temp_file
241
+ .write_all(&[0x50, 0x4B, 0x03, 0x04])
242
+ .expect("Operation failed");
243
+ temp_file.flush().expect("Operation failed");
244
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
230
245
 
231
246
  let detected = detect_mime_type(&temp_path, true);
232
247
 
233
248
  assert!(detected.is_ok(), "Should detect MIME type even with mismatch");
234
249
 
235
250
  assert_eq!(
236
- detected.unwrap(),
251
+ detected.expect("Operation failed"),
237
252
  "application/pdf",
238
253
  "Extension-based detection should take precedence"
239
254
  );
@@ -245,18 +260,22 @@ async fn test_mime_mismatch_warning() {
245
260
  #[tokio::test]
246
261
  async fn test_extension_content_mismatch() {
247
262
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
248
- let temp_path = temp_file.path().parent().unwrap().join("document.txt");
263
+ let temp_path = temp_file
264
+ .path()
265
+ .parent()
266
+ .expect("Operation failed")
267
+ .join("document.txt");
249
268
 
250
- temp_file.write_all(b"%PDF-1.4\n").unwrap();
251
- temp_file.flush().unwrap();
252
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
269
+ temp_file.write_all(b"%PDF-1.4\n").expect("Operation failed");
270
+ temp_file.flush().expect("Operation failed");
271
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
253
272
 
254
273
  let detected = detect_mime_type(&temp_path, true);
255
274
 
256
275
  assert!(detected.is_ok(), "Should detect MIME type");
257
276
 
258
277
  assert_eq!(
259
- detected.unwrap(),
278
+ detected.expect("Operation failed"),
260
279
  "text/plain",
261
280
  "Should use extension for MIME detection"
262
281
  );
@@ -268,11 +287,11 @@ async fn test_extension_content_mismatch() {
268
287
  #[tokio::test]
269
288
  async fn test_no_extension() {
270
289
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
271
- let temp_path = temp_file.path().parent().unwrap().join("testfile");
290
+ let temp_path = temp_file.path().parent().expect("Operation failed").join("testfile");
272
291
 
273
- temp_file.write_all(b"test content").unwrap();
274
- temp_file.flush().unwrap();
275
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
292
+ temp_file.write_all(b"test content").expect("Operation failed");
293
+ temp_file.flush().expect("Operation failed");
294
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
276
295
 
277
296
  let detected = detect_mime_type(&temp_path, true);
278
297
 
@@ -322,23 +341,31 @@ async fn test_mime_detection_skip_existence_check() {
322
341
  let result = detect_mime_type(nonexistent_path, false);
323
342
 
324
343
  assert!(result.is_ok(), "Should succeed when skipping existence check");
325
- assert_eq!(result.unwrap(), "application/pdf");
344
+ assert_eq!(result.expect("Operation failed"), "application/pdf");
326
345
  }
327
346
 
328
347
  /// Test multiple dots in filename.
329
348
  #[tokio::test]
330
349
  async fn test_filename_multiple_dots() {
331
350
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
332
- let temp_path = temp_file.path().parent().unwrap().join("my.backup.file.pdf");
351
+ let temp_path = temp_file
352
+ .path()
353
+ .parent()
354
+ .expect("Operation failed")
355
+ .join("my.backup.file.pdf");
333
356
 
334
- temp_file.write_all(b"test").unwrap();
335
- temp_file.flush().unwrap();
336
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
357
+ temp_file.write_all(b"test").expect("Operation failed");
358
+ temp_file.flush().expect("Operation failed");
359
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
337
360
 
338
361
  let detected = detect_mime_type(&temp_path, true);
339
362
 
340
363
  assert!(detected.is_ok(), "Should handle multiple dots in filename");
341
- assert_eq!(detected.unwrap(), "application/pdf", "Should use last extension");
364
+ assert_eq!(
365
+ detected.expect("Operation failed"),
366
+ "application/pdf",
367
+ "Should use last extension"
368
+ );
342
369
 
343
370
  let _ = std::fs::remove_file(&temp_path);
344
371
  }
@@ -347,16 +374,20 @@ async fn test_filename_multiple_dots() {
347
374
  #[tokio::test]
348
375
  async fn test_filename_special_characters() {
349
376
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
350
- let temp_path = temp_file.path().parent().unwrap().join("文档 (copy) [v2].pdf");
377
+ let temp_path = temp_file
378
+ .path()
379
+ .parent()
380
+ .expect("Operation failed")
381
+ .join("文档 (copy) [v2].pdf");
351
382
 
352
- temp_file.write_all(b"test").unwrap();
353
- temp_file.flush().unwrap();
354
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
383
+ temp_file.write_all(b"test").expect("Operation failed");
384
+ temp_file.flush().expect("Operation failed");
385
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
355
386
 
356
387
  let detected = detect_mime_type(&temp_path, true);
357
388
 
358
389
  assert!(detected.is_ok(), "Should handle special characters in filename");
359
- assert_eq!(detected.unwrap(), "application/pdf");
390
+ assert_eq!(detected.expect("Operation failed"), "application/pdf");
360
391
 
361
392
  let _ = std::fs::remove_file(&temp_path);
362
393
  }
@@ -382,11 +413,11 @@ async fn test_pandoc_formats_mime_detection() {
382
413
 
383
414
  for (filename, expected_mime) in pandoc_formats {
384
415
  let mut temp_file = NamedTempFile::new().expect("Should create temp file");
385
- let temp_path = temp_file.path().parent().unwrap().join(filename);
416
+ let temp_path = temp_file.path().parent().expect("Operation failed").join(filename);
386
417
 
387
- temp_file.write_all(b"test content").unwrap();
388
- temp_file.flush().unwrap();
389
- std::fs::copy(temp_file.path(), &temp_path).unwrap();
418
+ temp_file.write_all(b"test content").expect("Operation failed");
419
+ temp_file.flush().expect("Operation failed");
420
+ std::fs::copy(temp_file.path(), &temp_path).expect("Operation failed");
390
421
 
391
422
  let detected = detect_mime_type(&temp_path, true);
392
423
 
@@ -396,7 +427,7 @@ async fn test_pandoc_formats_mime_detection() {
396
427
  filename
397
428
  );
398
429
  assert_eq!(
399
- detected.unwrap(),
430
+ detected.expect("Operation failed"),
400
431
  expected_mime,
401
432
  "MIME type mismatch for Pandoc format: {}",
402
433
  filename
@@ -424,6 +455,6 @@ async fn test_pandoc_mime_validation() {
424
455
  for mime_type in pandoc_mimes {
425
456
  let result = validate_mime_type(mime_type);
426
457
  assert!(result.is_ok(), "Pandoc MIME type should be supported: {}", mime_type);
427
- assert_eq!(result.unwrap(), mime_type);
458
+ assert_eq!(result.expect("Operation failed"), mime_type);
428
459
  }
429
460
  }
@@ -492,8 +492,8 @@ fn test_ocr_cache_disabled_then_enabled() {
492
492
  }
493
493
  assert!(result2.is_ok(), "Second extraction should succeed");
494
494
 
495
- assert_non_empty_content(&result1.unwrap());
496
- assert_non_empty_content(&result2.unwrap());
495
+ assert_non_empty_content(&result1.expect("Operation failed"));
496
+ assert_non_empty_content(&result2.expect("Operation failed"));
497
497
  }
498
498
 
499
499
  #[test]
@@ -548,7 +548,10 @@ fn test_ocr_concurrent_same_file() {
548
548
  handles.push(handle);
549
549
  }
550
550
 
551
- let successes: usize = handles.into_iter().map(|h| if h.join().unwrap() { 1 } else { 0 }).sum();
551
+ let successes: usize = handles
552
+ .into_iter()
553
+ .map(|h| if h.join().expect("Iterator failed") { 1 } else { 0 })
554
+ .sum();
552
555
 
553
556
  tracing::debug!("Concurrent processing: {}/5 threads succeeded", successes);
554
557
 
@@ -615,7 +618,10 @@ fn test_ocr_concurrent_different_files() {
615
618
  handles.push(handle);
616
619
  }
617
620
 
618
- let successes: usize = handles.into_iter().map(|h| if h.join().unwrap() { 1 } else { 0 }).sum();
621
+ let successes: usize = handles
622
+ .into_iter()
623
+ .map(|h| if h.join().expect("Iterator failed") { 1 } else { 0 })
624
+ .sum();
619
625
 
620
626
  assert_eq!(
621
627
  successes, 2,
@@ -120,7 +120,7 @@ fn test_registry_singleton_behavior() {
120
120
  #[test]
121
121
  fn test_easyocr_special_languages() {
122
122
  let registry = LanguageRegistry::new();
123
- let languages = registry.get_supported_languages("easyocr").unwrap();
123
+ let languages = registry.get_supported_languages("easyocr").expect("Operation failed");
124
124
 
125
125
  let special_langs = vec!["ch_sim", "ch_tra", "rs_cyrillic", "rs_latin"];
126
126
 
@@ -56,10 +56,10 @@ fn test_rayon_batch_stress_many_images() {
56
56
  success_count
57
57
  );
58
58
 
59
- let first_content = results[0].result.as_ref().unwrap().content.clone();
59
+ let first_content = results[0].result.as_ref().expect("Operation failed").content.clone();
60
60
  for (i, result) in results.iter().enumerate().skip(1) {
61
61
  assert!(result.success, "Result {} should succeed", i);
62
- let content = &result.result.as_ref().unwrap().content;
62
+ let content = &result.result.as_ref().expect("Operation failed").content;
63
63
  assert_eq!(
64
64
  content, &first_content,
65
65
  "Result {} content differs - possible race condition",
@@ -220,7 +220,7 @@ fn test_tesseract_api_thread_safety() {
220
220
  thread_id,
221
221
  result.err()
222
222
  );
223
- result.unwrap()
223
+ result.expect("Operation failed")
224
224
  }));
225
225
  }
226
226
 
@@ -26,9 +26,9 @@ mod helpers;
26
26
  fn get_test_file_path(filename: &str) -> PathBuf {
27
27
  let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
28
28
  .parent()
29
- .unwrap()
29
+ .expect("Operation failed")
30
30
  .parent()
31
- .unwrap();
31
+ .expect("Operation failed");
32
32
  workspace_root.join(format!("test_documents/odt/{}", filename))
33
33
  }
34
34
 
@@ -48,9 +48,9 @@ fn ensure_test_file_exists(path: &Path) -> bool {
48
48
  async fn test_odt_metadata_extraction() {
49
49
  let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
50
50
  .parent()
51
- .unwrap()
51
+ .expect("Operation failed")
52
52
  .parent()
53
- .unwrap();
53
+ .expect("Operation failed");
54
54
  let test_file = workspace_root.join("test_documents/metadata_test.odt");
55
55
 
56
56
  if !ensure_test_file_exists(&test_file) {
@@ -615,9 +615,9 @@ async fn test_odt_table_no_duplicate_content() {
615
615
  async fn test_odt_comprehensive_table_extraction() {
616
616
  let test_file = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
617
617
  .parent()
618
- .unwrap()
618
+ .expect("Operation failed")
619
619
  .parent()
620
- .unwrap()
620
+ .expect("Operation failed")
621
621
  .join("test_documents/extraction_test.odt");
622
622
 
623
623
  if !test_file.exists() {
@@ -22,9 +22,9 @@ mod helpers;
22
22
  fn get_test_opml_path(filename: &str) -> PathBuf {
23
23
  let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
24
24
  .parent()
25
- .unwrap()
25
+ .expect("Operation failed")
26
26
  .parent()
27
- .unwrap();
27
+ .expect("Operation failed");
28
28
  workspace_root.join(format!("test_documents/opml/{}", filename))
29
29
  }
30
30
 
@@ -24,9 +24,9 @@ use std::path::PathBuf;
24
24
  fn get_test_orgmode_path(filename: &str) -> PathBuf {
25
25
  let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
26
26
  .parent()
27
- .unwrap()
27
+ .expect("Operation failed")
28
28
  .parent()
29
- .unwrap();
29
+ .expect("Operation failed");
30
30
  workspace_root.join(format!("test_documents/orgmode/{}", filename))
31
31
  }
32
32
 
@@ -202,7 +202,7 @@ fn test_marker_appears_before_content() {
202
202
  assert!(marker_pos.is_some(), "Marker should be present");
203
203
 
204
204
  // Marker should be very early in the content (within first 50 chars)
205
- let pos = marker_pos.unwrap();
205
+ let pos = marker_pos.expect("Operation failed");
206
206
  assert!(
207
207
  pos < 50,
208
208
  "Marker for page 1 should appear at the start, but found at position {}",
@@ -64,7 +64,7 @@ async fn test_full_hierarchy_extraction() {
64
64
  "Pages should be extracted when extract_pages is enabled"
65
65
  );
66
66
 
67
- let pages = result.pages.as_ref().unwrap();
67
+ let pages = result.pages.as_ref().expect("Operation failed");
68
68
  assert!(!pages.is_empty(), "At least one page should be extracted");
69
69
 
70
70
  // Check that the first page has hierarchy information
@@ -74,7 +74,7 @@ async fn test_full_hierarchy_extraction() {
74
74
  "First page should have hierarchy information when hierarchy extraction is enabled"
75
75
  );
76
76
 
77
- let hierarchy = first_page.hierarchy.as_ref().unwrap();
77
+ let hierarchy = first_page.hierarchy.as_ref().expect("Operation failed");
78
78
 
79
79
  // Verify hierarchy structure
80
80
  assert!(hierarchy.block_count > 0, "Hierarchy should contain at least one block");
@@ -172,7 +172,7 @@ async fn test_hierarchy_disabled() {
172
172
  // Verify that pages were extracted
173
173
  assert!(result.pages.is_some(), "Pages should be extracted");
174
174
 
175
- let pages = result.pages.as_ref().unwrap();
175
+ let pages = result.pages.as_ref().expect("Operation failed");
176
176
  assert!(!pages.is_empty(), "At least one page should be extracted");
177
177
 
178
178
  // Check that the first page does NOT have hierarchy information when disabled
@@ -227,7 +227,7 @@ async fn test_hierarchy_with_explicit_disabled() {
227
227
  // Verify that pages were extracted
228
228
  assert!(result.pages.is_some(), "Pages should be extracted");
229
229
 
230
- let pages = result.pages.as_ref().unwrap();
230
+ let pages = result.pages.as_ref().expect("Operation failed");
231
231
  assert!(!pages.is_empty(), "At least one page should be extracted");
232
232
 
233
233
  // Check that the first page does NOT have hierarchy information when disabled
@@ -282,7 +282,7 @@ async fn test_hierarchy_different_k_clusters() {
282
282
 
283
283
  assert!(result.pages.is_some(), "Pages should be extracted");
284
284
 
285
- let pages = result.pages.as_ref().unwrap();
285
+ let pages = result.pages.as_ref().expect("Operation failed");
286
286
  assert!(!pages.is_empty(), "At least one page should be extracted");
287
287
 
288
288
  let first_page = &pages[0];
@@ -292,7 +292,7 @@ async fn test_hierarchy_different_k_clusters() {
292
292
  k
293
293
  );
294
294
 
295
- let hierarchy = first_page.hierarchy.as_ref().unwrap();
295
+ let hierarchy = first_page.hierarchy.as_ref().expect("Operation failed");
296
296
  eprintln!("K={}: {} hierarchy blocks extracted", k, hierarchy.block_count);
297
297
  assert!(hierarchy.block_count > 0, "Should have blocks with k={}", k);
298
298
  }
@@ -29,7 +29,7 @@ fn test_extract_chars_basic() {
29
29
  // Load PDF
30
30
  let pdfium = Pdfium;
31
31
  let document = pdfium
32
- .load_pdf_from_file(pdf_path.to_str().unwrap(), None)
32
+ .load_pdf_from_file(pdf_path.to_str().expect("Operation failed"), None)
33
33
  .expect("Failed to load test PDF");
34
34
 
35
35
  // Get first page
@@ -62,7 +62,7 @@ fn test_extract_chars_preserves_order() {
62
62
  // Load PDF
63
63
  let pdfium = Pdfium;
64
64
  let document = pdfium
65
- .load_pdf_from_file(pdf_path.to_str().unwrap(), None)
65
+ .load_pdf_from_file(pdf_path.to_str().expect("Operation failed"), None)
66
66
  .expect("Failed to load test PDF");
67
67
 
68
68
  // Get first page