kreuzberg 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -4
  3. data/README.md +8 -5
  4. data/ext/kreuzberg_rb/native/Cargo.toml +2 -2
  5. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  6. data/ext/kreuzberg_rb/native/src/config/types.rs +23 -13
  7. data/kreuzberg.gemspec +14 -2
  8. data/lib/kreuzberg/api_proxy.rb +0 -1
  9. data/lib/kreuzberg/cli_proxy.rb +0 -1
  10. data/lib/kreuzberg/config.rb +70 -35
  11. data/lib/kreuzberg/mcp_proxy.rb +0 -1
  12. data/lib/kreuzberg/version.rb +1 -1
  13. data/sig/kreuzberg.rbs +5 -1
  14. data/spec/binding/batch_operations_spec.rb +80 -0
  15. data/spec/binding/metadata_types_spec.rb +77 -57
  16. data/spec/serialization_spec.rb +134 -0
  17. data/spec/unit/config/output_format_spec.rb +380 -0
  18. data/vendor/Cargo.toml +1 -1
  19. data/vendor/kreuzberg/Cargo.toml +3 -3
  20. data/vendor/kreuzberg/README.md +1 -1
  21. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  22. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  23. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  24. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  25. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  26. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  27. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  28. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  29. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  30. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  31. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  32. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  33. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  34. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  35. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  36. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  37. data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
  38. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  39. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  40. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  41. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  42. data/vendor/kreuzberg/tests/core_integration.rs +55 -53
  43. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  44. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  45. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  46. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  47. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  48. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  49. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  50. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  51. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  52. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  53. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  54. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  55. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  56. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  57. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  58. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  59. data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
  60. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  61. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  62. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  63. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  64. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  65. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  66. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  67. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  68. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  69. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  70. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  71. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  72. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  73. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  74. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
  75. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  76. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  77. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  78. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  79. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  80. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  81. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  82. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  83. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  84. data/vendor/kreuzberg-tesseract/Cargo.toml +3 -3
  85. data/vendor/kreuzberg-tesseract/build.rs +4 -4
  86. data/vendor/kreuzberg-tesseract/src/lib.rs +6 -6
  87. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +3 -3
  88. metadata +13 -2
@@ -34,7 +34,7 @@ async fn test_zip_basic_extraction() {
34
34
  assert!(result.content.contains("Hello from ZIP!"));
35
35
 
36
36
  assert!(result.metadata.format.is_some());
37
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
37
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
38
38
  kreuzberg::FormatMetadata::Archive(meta) => meta,
39
39
  _ => panic!("Expected Archive metadata"),
40
40
  };
@@ -54,16 +54,16 @@ async fn test_zip_multiple_files() {
54
54
  let mut zip = ZipWriter::new(&mut cursor);
55
55
  let options = FileOptions::<'_, ()>::default();
56
56
 
57
- zip.start_file("file1.txt", options).unwrap();
58
- zip.write_all(b"Content 1").unwrap();
57
+ zip.start_file("file1.txt", options).expect("Operation failed");
58
+ zip.write_all(b"Content 1").expect("Operation failed");
59
59
 
60
- zip.start_file("file2.md", options).unwrap();
61
- zip.write_all(b"# Content 2").unwrap();
60
+ zip.start_file("file2.md", options).expect("Operation failed");
61
+ zip.write_all(b"# Content 2").expect("Operation failed");
62
62
 
63
- zip.start_file("file3.json", options).unwrap();
64
- zip.write_all(b"{\"key\": \"value\"}").unwrap();
63
+ zip.start_file("file3.json", options).expect("Operation failed");
64
+ zip.write_all(b"{\"key\": \"value\"}").expect("Operation failed");
65
65
 
66
- zip.finish().unwrap();
66
+ zip.finish().expect("Operation failed");
67
67
  }
68
68
 
69
69
  let zip_bytes = cursor.into_inner();
@@ -84,7 +84,7 @@ async fn test_zip_multiple_files() {
84
84
  assert!(result.content.contains("value"));
85
85
 
86
86
  assert!(result.metadata.format.is_some());
87
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
87
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
88
88
  kreuzberg::FormatMetadata::Archive(meta) => meta,
89
89
  _ => panic!("Expected Archive metadata"),
90
90
  };
@@ -105,16 +105,17 @@ async fn test_zip_nested_directories() {
105
105
  let mut zip = ZipWriter::new(&mut cursor);
106
106
  let options = FileOptions::<'_, ()>::default();
107
107
 
108
- zip.add_directory("dir1/", options).unwrap();
109
- zip.add_directory("dir1/subdir/", options).unwrap();
108
+ zip.add_directory("dir1/", options).expect("Operation failed");
109
+ zip.add_directory("dir1/subdir/", options).expect("Operation failed");
110
110
 
111
- zip.start_file("dir1/file.txt", options).unwrap();
112
- zip.write_all(b"File in dir1").unwrap();
111
+ zip.start_file("dir1/file.txt", options).expect("Operation failed");
112
+ zip.write_all(b"File in dir1").expect("Operation failed");
113
113
 
114
- zip.start_file("dir1/subdir/nested.txt", options).unwrap();
115
- zip.write_all(b"Nested file").unwrap();
114
+ zip.start_file("dir1/subdir/nested.txt", options)
115
+ .expect("Operation failed");
116
+ zip.write_all(b"Nested file").expect("Operation failed");
116
117
 
117
- zip.finish().unwrap();
118
+ zip.finish().expect("Operation failed");
118
119
  }
119
120
 
120
121
  let zip_bytes = cursor.into_inner();
@@ -134,7 +135,7 @@ async fn test_zip_nested_directories() {
134
135
  assert!(result.content.contains("Nested file"));
135
136
 
136
137
  assert!(result.metadata.format.is_some());
137
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
138
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
138
139
  kreuzberg::FormatMetadata::Archive(meta) => meta,
139
140
  _ => panic!("Expected Archive metadata"),
140
141
  };
@@ -172,7 +173,7 @@ async fn test_tar_extraction() {
172
173
  assert!(result.content.contains("Hello from TAR!"));
173
174
 
174
175
  assert!(result.metadata.format.is_some());
175
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
176
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
176
177
  kreuzberg::FormatMetadata::Archive(meta) => meta,
177
178
  _ => panic!("Expected Archive metadata"),
178
179
  };
@@ -202,7 +203,7 @@ async fn test_tar_gz_extraction() {
202
203
  assert!(result.content.contains("test.txt"));
203
204
 
204
205
  assert!(result.metadata.format.is_some());
205
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
206
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
206
207
  kreuzberg::FormatMetadata::Archive(meta) => meta,
207
208
  _ => panic!("Expected Archive metadata"),
208
209
  };
@@ -242,13 +243,14 @@ async fn test_nested_archive() {
242
243
  let mut zip = ZipWriter::new(&mut cursor);
243
244
  let options = FileOptions::<'_, ()>::default();
244
245
 
245
- zip.start_file("inner.zip", options).unwrap();
246
- zip.write_all(&inner_zip).unwrap();
246
+ zip.start_file("inner.zip", options).expect("Operation failed");
247
+ zip.write_all(&inner_zip).expect("Operation failed");
247
248
 
248
- zip.start_file("readme.txt", options).unwrap();
249
- zip.write_all(b"This archive contains another archive").unwrap();
249
+ zip.start_file("readme.txt", options).expect("Operation failed");
250
+ zip.write_all(b"This archive contains another archive")
251
+ .expect("Operation failed");
250
252
 
251
- zip.finish().unwrap();
253
+ zip.finish().expect("Operation failed");
252
254
  }
253
255
 
254
256
  let outer_zip_bytes = cursor.into_inner();
@@ -265,7 +267,7 @@ async fn test_nested_archive() {
265
267
  assert!(result.content.contains("This archive contains another archive"));
266
268
 
267
269
  assert!(result.metadata.format.is_some());
268
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
270
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
269
271
  kreuzberg::FormatMetadata::Archive(meta) => meta,
270
272
  _ => panic!("Expected Archive metadata"),
271
273
  };
@@ -284,19 +286,19 @@ async fn test_archive_mixed_formats() {
284
286
  let mut zip = ZipWriter::new(&mut cursor);
285
287
  let options = FileOptions::<'_, ()>::default();
286
288
 
287
- zip.start_file("document.txt", options).unwrap();
288
- zip.write_all(b"Text document").unwrap();
289
+ zip.start_file("document.txt", options).expect("Operation failed");
290
+ zip.write_all(b"Text document").expect("Operation failed");
289
291
 
290
- zip.start_file("readme.md", options).unwrap();
291
- zip.write_all(b"# README").unwrap();
292
+ zip.start_file("readme.md", options).expect("Operation failed");
293
+ zip.write_all(b"# README").expect("Operation failed");
292
294
 
293
- zip.start_file("image.png", options).unwrap();
294
- zip.write_all(&[0x89, 0x50, 0x4E, 0x47]).unwrap();
295
+ zip.start_file("image.png", options).expect("Operation failed");
296
+ zip.write_all(&[0x89, 0x50, 0x4E, 0x47]).expect("Operation failed");
295
297
 
296
- zip.start_file("document.pdf", options).unwrap();
297
- zip.write_all(b"%PDF-1.4").unwrap();
298
+ zip.start_file("document.pdf", options).expect("Operation failed");
299
+ zip.write_all(b"%PDF-1.4").expect("Operation failed");
298
300
 
299
- zip.finish().unwrap();
301
+ zip.finish().expect("Operation failed");
300
302
  }
301
303
 
302
304
  let zip_bytes = cursor.into_inner();
@@ -317,7 +319,7 @@ async fn test_archive_mixed_formats() {
317
319
  assert!(result.content.contains("# README"));
318
320
 
319
321
  assert!(result.metadata.format.is_some());
320
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
322
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
321
323
  kreuzberg::FormatMetadata::Archive(meta) => meta,
322
324
  _ => panic!("Expected Archive metadata"),
323
325
  };
@@ -373,11 +375,13 @@ async fn test_large_archive() {
373
375
  let options = FileOptions::<'_, ()>::default();
374
376
 
375
377
  for i in 0..100 {
376
- zip.start_file(format!("file_{}.txt", i), options).unwrap();
377
- zip.write_all(format!("Content {}", i).as_bytes()).unwrap();
378
+ zip.start_file(format!("file_{}.txt", i), options)
379
+ .expect("Operation failed");
380
+ zip.write_all(format!("Content {}", i).as_bytes())
381
+ .expect("Failed to convert to bytes");
378
382
  }
379
383
 
380
- zip.finish().unwrap();
384
+ zip.finish().expect("Operation failed");
381
385
  }
382
386
 
383
387
  let zip_bytes = cursor.into_inner();
@@ -390,7 +394,7 @@ async fn test_large_archive() {
390
394
  assert!(result.tables.is_empty(), "Archive should not have tables");
391
395
 
392
396
  assert!(result.metadata.format.is_some());
393
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
397
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
394
398
  kreuzberg::FormatMetadata::Archive(meta) => meta,
395
399
  _ => panic!("Expected Archive metadata"),
396
400
  };
@@ -418,16 +422,19 @@ async fn test_archive_with_special_characters() {
418
422
  let mut zip = ZipWriter::new(&mut cursor);
419
423
  let options = FileOptions::<'_, ()>::default();
420
424
 
421
- zip.start_file("测试文件.txt", options).unwrap();
422
- zip.write_all("Unicode content".as_bytes()).unwrap();
425
+ zip.start_file("测试文件.txt", options).expect("Operation failed");
426
+ zip.write_all("Unicode content".as_bytes())
427
+ .expect("Failed to convert to bytes");
423
428
 
424
- zip.start_file("file with spaces.txt", options).unwrap();
425
- zip.write_all(b"Spaces in filename").unwrap();
429
+ zip.start_file("file with spaces.txt", options)
430
+ .expect("Operation failed");
431
+ zip.write_all(b"Spaces in filename").expect("Operation failed");
426
432
 
427
- zip.start_file("file-with-dashes.txt", options).unwrap();
428
- zip.write_all(b"Dashes").unwrap();
433
+ zip.start_file("file-with-dashes.txt", options)
434
+ .expect("Operation failed");
435
+ zip.write_all(b"Dashes").expect("Operation failed");
429
436
 
430
- zip.finish().unwrap();
437
+ zip.finish().expect("Operation failed");
431
438
  }
432
439
 
433
440
  let zip_bytes = cursor.into_inner();
@@ -444,7 +451,7 @@ async fn test_archive_with_special_characters() {
444
451
  assert!(result.content.contains("file-with-dashes.txt"));
445
452
 
446
453
  assert!(result.metadata.format.is_some());
447
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
454
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
448
455
  kreuzberg::FormatMetadata::Archive(meta) => meta,
449
456
  _ => panic!("Expected Archive metadata"),
450
457
  };
@@ -463,7 +470,7 @@ async fn test_empty_archive() {
463
470
  let mut cursor = Cursor::new(Vec::new());
464
471
  {
465
472
  let zip = ZipWriter::new(&mut cursor);
466
- zip.finish().unwrap();
473
+ zip.finish().expect("Operation failed");
467
474
  }
468
475
 
469
476
  let zip_bytes = cursor.into_inner();
@@ -477,7 +484,7 @@ async fn test_empty_archive() {
477
484
 
478
485
  assert!(result.content.contains("ZIP Archive"));
479
486
  assert!(result.metadata.format.is_some());
480
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
487
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
481
488
  kreuzberg::FormatMetadata::Archive(meta) => meta,
482
489
  _ => panic!("Expected Archive metadata"),
483
490
  };
@@ -503,7 +510,7 @@ fn test_archive_extraction_sync() {
503
510
  assert!(result.content.contains("Hello from ZIP!"));
504
511
 
505
512
  assert!(result.metadata.format.is_some(), "Should have archive metadata");
506
- let archive_meta = match result.metadata.format.as_ref().unwrap() {
513
+ let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
507
514
  kreuzberg::FormatMetadata::Archive(meta) => meta,
508
515
  _ => panic!("Expected Archive metadata"),
509
516
  };
@@ -519,10 +526,10 @@ fn create_simple_zip() -> Vec<u8> {
519
526
  let mut zip = ZipWriter::new(&mut cursor);
520
527
  let options = FileOptions::<'_, ()>::default();
521
528
 
522
- zip.start_file("test.txt", options).unwrap();
523
- zip.write_all(b"Hello from ZIP!").unwrap();
529
+ zip.start_file("test.txt", options).expect("Operation failed");
530
+ zip.write_all(b"Hello from ZIP!").expect("Operation failed");
524
531
 
525
- zip.finish().unwrap();
532
+ zip.finish().expect("Operation failed");
526
533
  }
527
534
  cursor.into_inner()
528
535
  }
@@ -534,12 +541,12 @@ fn create_simple_tar() -> Vec<u8> {
534
541
 
535
542
  let data = b"Hello from TAR!";
536
543
  let mut header = tar::Header::new_gnu();
537
- header.set_path("test.txt").unwrap();
544
+ header.set_path("test.txt").expect("Operation failed");
538
545
  header.set_size(data.len() as u64);
539
546
  header.set_cksum();
540
- tar.append(&header, &data[..]).unwrap();
547
+ tar.append(&header, &data[..]).expect("Operation failed");
541
548
 
542
- tar.finish().unwrap();
549
+ tar.finish().expect("Operation failed");
543
550
  }
544
551
  cursor.into_inner()
545
552
  }
@@ -63,7 +63,7 @@ async fn test_batch_documents_parallel_execution() {
63
63
  let parallel_duration = parallel_start.elapsed();
64
64
 
65
65
  assert!(results.is_ok(), "Batch extraction should succeed");
66
- let results = results.unwrap();
66
+ let results = results.expect("Operation failed");
67
67
  assert_eq!(results.len(), 20, "Should process all 20 files");
68
68
 
69
69
  for result in &results {
@@ -102,7 +102,7 @@ async fn test_batch_documents_concurrency_limiting() {
102
102
  let results = batch_extract_file(paths, &config).await;
103
103
 
104
104
  assert!(results.is_ok());
105
- let results = results.unwrap();
105
+ let results = results.expect("Operation failed");
106
106
  assert_eq!(results.len(), 4);
107
107
  }
108
108
 
@@ -127,7 +127,7 @@ async fn test_batch_documents_default_concurrency() {
127
127
  let duration = start.elapsed();
128
128
 
129
129
  assert!(results.is_ok());
130
- let results = results.unwrap();
130
+ let results = results.expect("Operation failed");
131
131
  assert_eq!(results.len(), 50);
132
132
 
133
133
  println!("Processed 50 files in {:?}", duration);
@@ -152,7 +152,9 @@ async fn test_batch_documents_preserves_order() {
152
152
  get_test_file_path("xml/simple_note.xml"),
153
153
  ];
154
154
 
155
- let results = batch_extract_file(paths, &config).await.unwrap();
155
+ let results = batch_extract_file(paths, &config)
156
+ .await
157
+ .expect("Async operation failed");
156
158
 
157
159
  assert_eq!(results.len(), 3, "Should have 3 results");
158
160
 
@@ -201,7 +203,7 @@ async fn test_multipage_pdf_extraction() {
201
203
  let duration = start.elapsed();
202
204
 
203
205
  assert!(result.is_ok(), "Multi-page PDF extraction should succeed");
204
- let extraction = result.unwrap();
206
+ let extraction = result.expect("Operation failed");
205
207
 
206
208
  assert!(!extraction.content.is_empty(), "Should extract text from all pages");
207
209
  println!("Extracted multi-page PDF in {:?}", duration);
@@ -230,7 +232,7 @@ async fn test_concurrent_pdf_extractions() {
230
232
  let duration = start.elapsed();
231
233
 
232
234
  assert!(results.is_ok());
233
- let results = results.unwrap();
235
+ let results = results.expect("Operation failed");
234
236
  assert_eq!(results.len(), 10);
235
237
 
236
238
  println!("Processed 10 PDFs in {:?}", duration);
@@ -318,7 +320,7 @@ async fn test_batch_bytes_parallel_processing() {
318
320
  let duration = start.elapsed();
319
321
 
320
322
  assert!(results.is_ok());
321
- let results = results.unwrap();
323
+ let results = results.expect("Operation failed");
322
324
  assert_eq!(results.len(), 30);
323
325
 
324
326
  for (i, result) in results.iter().enumerate() {
@@ -350,7 +352,7 @@ async fn test_batch_bytes_mixed_valid_invalid() {
350
352
  let results = batch_extract_bytes(owned_contents, &config).await;
351
353
 
352
354
  assert!(results.is_ok());
353
- let results = results.unwrap();
355
+ let results = results.expect("Operation failed");
354
356
  assert_eq!(results.len(), 5);
355
357
 
356
358
  assert_text_content(&results[0].content, "valid content 1");
@@ -394,7 +396,7 @@ async fn test_batch_utilizes_multiple_cores() {
394
396
  let duration = start.elapsed();
395
397
 
396
398
  assert!(results.is_ok());
397
- let results = results.unwrap();
399
+ let results = results.expect("Operation failed");
398
400
  assert_eq!(results.len(), 20);
399
401
 
400
402
  println!(
@@ -437,7 +439,7 @@ async fn test_batch_memory_pressure_handling() {
437
439
  let duration = start.elapsed();
438
440
 
439
441
  assert!(results.is_ok());
440
- let results = results.unwrap();
442
+ let results = results.expect("Operation failed");
441
443
  assert_eq!(results.len(), 50);
442
444
 
443
445
  println!("Processed 50 large documents with concurrency limit in {:?}", duration);
@@ -469,7 +471,9 @@ async fn test_batch_scales_with_cpu_count() {
469
471
  .collect();
470
472
 
471
473
  let start = Instant::now();
472
- let _ = batch_extract_bytes(owned_contents_1, &config_1).await.unwrap();
474
+ let _ = batch_extract_bytes(owned_contents_1, &config_1)
475
+ .await
476
+ .expect("Async operation failed");
473
477
  let duration_1 = start.elapsed();
474
478
 
475
479
  let config_full = ExtractionConfig {
@@ -483,7 +487,9 @@ async fn test_batch_scales_with_cpu_count() {
483
487
  .collect();
484
488
 
485
489
  let start = Instant::now();
486
- let _ = batch_extract_bytes(owned_contents_full, &config_full).await.unwrap();
490
+ let _ = batch_extract_bytes(owned_contents_full, &config_full)
491
+ .await
492
+ .expect("Async operation failed");
487
493
  let duration_full = start.elapsed();
488
494
 
489
495
  println!(
@@ -522,7 +528,7 @@ async fn test_batch_mixed_document_types() {
522
528
  let results = batch_extract_file(paths, &config).await;
523
529
 
524
530
  assert!(results.is_ok());
525
- let results = results.unwrap();
531
+ let results = results.expect("Operation failed");
526
532
  assert_eq!(results.len(), 4);
527
533
 
528
534
  for (i, result) in results.iter().enumerate() {
@@ -572,7 +578,9 @@ async fn test_batch_accuracy_under_load() {
572
578
  .map(|(bytes, mime)| (bytes.to_vec(), mime.to_string()))
573
579
  .collect();
574
580
 
575
- let results = batch_extract_bytes(owned_contents, &config).await.unwrap();
581
+ let results = batch_extract_bytes(owned_contents, &config)
582
+ .await
583
+ .expect("Async operation failed");
576
584
 
577
585
  assert_eq!(results.len(), 100);
578
586
 
@@ -21,7 +21,7 @@ mod tests {
21
21
 
22
22
  let mut buffers = vec![];
23
23
  for _ in 0..3 {
24
- let buf = pool.acquire().unwrap();
24
+ let buf = pool.acquire().expect("Operation failed");
25
25
  buffers.push(buf);
26
26
  }
27
27
 
@@ -31,7 +31,7 @@ mod tests {
31
31
 
32
32
  let mut buffers = vec![];
33
33
  for _ in 0..3 {
34
- let buf = pool.acquire().unwrap();
34
+ let buf = pool.acquire().expect("Operation failed");
35
35
  buffers.push(buf);
36
36
  }
37
37
  drop(buffers);
@@ -47,8 +47,8 @@ mod tests {
47
47
  let mut results = vec![];
48
48
 
49
49
  for _i in 0..5 {
50
- let string_buf = processor.string_pool().acquire().unwrap();
51
- let byte_buf = processor.byte_pool().acquire().unwrap();
50
+ let string_buf = processor.string_pool().acquire().expect("Operation failed");
51
+ let byte_buf = processor.byte_pool().acquire().expect("Operation failed");
52
52
 
53
53
  results.push((string_buf, byte_buf));
54
54
  }
@@ -65,17 +65,17 @@ mod tests {
65
65
  let pool = create_string_buffer_pool(5, 4096);
66
66
 
67
67
  let capacity_initial = {
68
- let buf = pool.acquire().unwrap();
68
+ let buf = pool.acquire().expect("Operation failed");
69
69
  buf.capacity()
70
70
  };
71
71
 
72
72
  for _ in 0..10 {
73
- let mut buf = pool.acquire().unwrap();
73
+ let mut buf = pool.acquire().expect("Operation failed");
74
74
  buf.push_str("test data");
75
75
  }
76
76
 
77
77
  let capacity_final = {
78
- let buf = pool.acquire().unwrap();
78
+ let buf = pool.acquire().expect("Operation failed");
79
79
  buf.capacity()
80
80
  };
81
81
 
@@ -101,15 +101,15 @@ mod tests {
101
101
  let processor = BatchProcessor::new();
102
102
 
103
103
  {
104
- let _s1 = processor.string_pool().acquire().unwrap();
105
- let _s2 = processor.string_pool().acquire().unwrap();
106
- let _b1 = processor.byte_pool().acquire().unwrap();
104
+ let _s1 = processor.string_pool().acquire().expect("Operation failed");
105
+ let _s2 = processor.string_pool().acquire().expect("Operation failed");
106
+ let _b1 = processor.byte_pool().acquire().expect("Operation failed");
107
107
  }
108
108
 
109
109
  assert!(processor.string_pool_size() > 0);
110
110
  assert!(processor.byte_pool_size() > 0);
111
111
 
112
- processor.clear_pools().unwrap();
112
+ processor.clear_pools().expect("Operation failed");
113
113
 
114
114
  assert_eq!(processor.string_pool_size(), 0);
115
115
  assert_eq!(processor.byte_pool_size(), 0);
@@ -137,7 +137,7 @@ mod tests {
137
137
  }
138
138
 
139
139
  for handle in handles {
140
- handle.join().unwrap();
140
+ handle.join().expect("Operation failed");
141
141
  }
142
142
 
143
143
  assert!(processor.string_pool_size() <= 10);
@@ -148,7 +148,7 @@ mod tests {
148
148
  fn test_pool_respects_capacity_hints() {
149
149
  let pool = create_string_buffer_pool(3, 2048);
150
150
 
151
- let buf = pool.acquire().unwrap();
151
+ let buf = pool.acquire().expect("Operation failed");
152
152
  assert!(buf.capacity() >= 2048, "buffer should respect capacity hint");
153
153
  }
154
154
  }
@@ -51,7 +51,7 @@ async fn test_batch_extract_file_multiple_formats() {
51
51
  let results = batch_extract_file(paths, &config).await;
52
52
 
53
53
  assert!(results.is_ok(), "Batch extraction should succeed");
54
- let results = results.unwrap();
54
+ let results = results.expect("Operation failed");
55
55
 
56
56
  assert_eq!(results.len(), 3);
57
57
 
@@ -95,7 +95,7 @@ fn test_batch_extract_file_sync_variant() {
95
95
  let results = batch_extract_file_sync(paths, &config);
96
96
 
97
97
  assert!(results.is_ok(), "Sync batch extraction should succeed");
98
- let results = results.unwrap();
98
+ let results = results.expect("Operation failed");
99
99
 
100
100
  assert_eq!(results.len(), 2);
101
101
 
@@ -137,7 +137,7 @@ async fn test_batch_extract_bytes_multiple() {
137
137
  let results = batch_extract_bytes(owned_contents, &config).await;
138
138
 
139
139
  assert!(results.is_ok(), "Batch bytes extraction should succeed");
140
- let results = results.unwrap();
140
+ let results = results.expect("Operation failed");
141
141
 
142
142
  assert_eq!(results.len(), 3);
143
143
 
@@ -161,7 +161,11 @@ async fn test_batch_extract_empty_list() {
161
161
  let results = batch_extract_file(paths, &config).await;
162
162
 
163
163
  assert!(results.is_ok(), "Empty batch should succeed");
164
- assert_eq!(results.unwrap().len(), 0, "Should return empty vector");
164
+ assert_eq!(
165
+ results.expect("Operation failed").len(),
166
+ 0,
167
+ "Should return empty vector"
168
+ );
165
169
  }
166
170
 
167
171
  /// Test batch extraction when one file fails (others should succeed).
@@ -187,7 +191,7 @@ async fn test_batch_extract_one_file_fails() {
187
191
  let results = batch_extract_file(paths, &config).await;
188
192
 
189
193
  assert!(results.is_ok(), "Batch should succeed even with one failure");
190
- let results = results.unwrap();
194
+ let results = results.expect("Operation failed");
191
195
 
192
196
  assert_eq!(results.len(), 3);
193
197
 
@@ -216,7 +220,7 @@ async fn test_batch_extract_all_fail() {
216
220
  let results = batch_extract_file(paths, &config).await;
217
221
 
218
222
  assert!(results.is_ok(), "Batch should succeed (errors in metadata)");
219
- let results = results.unwrap();
223
+ let results = results.expect("Operation failed");
220
224
 
221
225
  assert_eq!(results.len(), 3);
222
226
 
@@ -251,7 +255,7 @@ async fn test_batch_extract_concurrent() {
251
255
  let duration = start.elapsed();
252
256
 
253
257
  assert!(results.is_ok(), "Concurrent batch should succeed");
254
- let results = results.unwrap();
258
+ let results = results.expect("Operation failed");
255
259
 
256
260
  assert_eq!(results.len(), 20);
257
261
 
@@ -289,7 +293,7 @@ async fn test_batch_extract_large_batch() {
289
293
  let results = batch_extract_file(paths, &config).await;
290
294
 
291
295
  assert!(results.is_ok(), "Large batch should succeed");
292
- let results = results.unwrap();
296
+ let results = results.expect("Operation failed");
293
297
 
294
298
  assert_eq!(results.len(), 50);
295
299
 
@@ -319,7 +323,7 @@ fn test_batch_extract_bytes_sync_variant() {
319
323
  let results = batch_extract_bytes_sync(owned_contents, &config);
320
324
 
321
325
  assert!(results.is_ok(), "Sync batch bytes extraction should succeed");
322
- let results = results.unwrap();
326
+ let results = results.expect("Operation failed");
323
327
 
324
328
  assert_eq!(results.len(), 3);
325
329
  assert_text_content(&results[0].content, "content 1");
@@ -65,7 +65,7 @@ async fn test_all_entry_types() {
65
65
  .await;
66
66
 
67
67
  assert!(result.is_ok(), "Failed to parse {} entry", expected_type);
68
- let result = result.unwrap();
68
+ let result = result.expect("Operation failed");
69
69
 
70
70
  if let Some(entry_types) = result.metadata.additional.get("entry_types") {
71
71
  assert!(entry_types.as_object().is_some(), "Entry types should be an object");
@@ -116,7 +116,7 @@ async fn test_all_common_fields() {
116
116
  .await;
117
117
 
118
118
  assert!(result.is_ok());
119
- let result = result.unwrap();
119
+ let result = result.expect("Operation failed");
120
120
 
121
121
  let content = &result.content;
122
122
 
@@ -183,7 +183,7 @@ async fn test_author_parsing() {
183
183
  .await;
184
184
 
185
185
  assert!(result.is_ok());
186
- let result = result.unwrap();
186
+ let result = result.expect("Operation failed");
187
187
 
188
188
  if let Some(authors) = result.metadata.additional.get("authors") {
189
189
  let authors_array = authors.as_array().expect("Authors should be an array");
@@ -221,7 +221,7 @@ async fn test_special_characters() {
221
221
  .await;
222
222
 
223
223
  assert!(result.is_ok());
224
- let result = result.unwrap();
224
+ let result = result.expect("Operation failed");
225
225
 
226
226
  assert_eq!(
227
227
  result.metadata.additional.get("entry_count"),
@@ -250,7 +250,7 @@ async fn test_year_range_extraction() {
250
250
  .await;
251
251
 
252
252
  assert!(result.is_ok());
253
- let result = result.unwrap();
253
+ let result = result.expect("Operation failed");
254
254
 
255
255
  if let Some(year_range) = result.metadata.additional.get("year_range") {
256
256
  assert_eq!(year_range.get("min"), Some(&serde_json::json!(1990)));
@@ -281,7 +281,7 @@ async fn test_citation_keys_extraction() {
281
281
  .await;
282
282
 
283
283
  assert!(result.is_ok());
284
- let result = result.unwrap();
284
+ let result = result.expect("Operation failed");
285
285
 
286
286
  if let Some(citation_keys) = result.metadata.additional.get("citation_keys") {
287
287
  let keys_array = citation_keys.as_array().expect("Citation keys should be an array");
@@ -316,7 +316,7 @@ async fn test_entry_type_distribution() {
316
316
  .await;
317
317
 
318
318
  assert!(result.is_ok());
319
- let result = result.unwrap();
319
+ let result = result.expect("Operation failed");
320
320
 
321
321
  if let Some(entry_types) = result.metadata.additional.get("entry_types") {
322
322
  let types_obj = entry_types.as_object().expect("Entry types should be an object");
@@ -348,7 +348,7 @@ async fn test_unicode_support() {
348
348
  .await;
349
349
 
350
350
  assert!(result.is_ok());
351
- let result = result.unwrap();
351
+ let result = result.expect("Operation failed");
352
352
 
353
353
  assert_eq!(
354
354
  result.metadata.additional.get("entry_count"),
@@ -376,7 +376,7 @@ async fn test_empty_fields() {
376
376
  .await;
377
377
 
378
378
  assert!(result.is_ok());
379
- let result = result.unwrap();
379
+ let result = result.expect("Operation failed");
380
380
  assert_eq!(
381
381
  result.metadata.additional.get("entry_count"),
382
382
  Some(&serde_json::json!(1))
@@ -397,7 +397,7 @@ async fn test_comprehensive_file() {
397
397
  .await;
398
398
 
399
399
  assert!(result.is_ok());
400
- let result = result.unwrap();
400
+ let result = result.expect("Operation failed");
401
401
 
402
402
  assert_eq!(
403
403
  result.metadata.additional.get("entry_count"),