kreuzberg 4.1.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  5. data/kreuzberg.gemspec +13 -1
  6. data/lib/kreuzberg/config.rb +70 -35
  7. data/lib/kreuzberg/version.rb +1 -1
  8. data/sig/kreuzberg.rbs +5 -1
  9. data/spec/binding/batch_operations_spec.rb +80 -0
  10. data/spec/binding/metadata_types_spec.rb +77 -57
  11. data/spec/serialization_spec.rb +134 -0
  12. data/spec/unit/config/output_format_spec.rb +380 -0
  13. data/vendor/Cargo.toml +1 -1
  14. data/vendor/kreuzberg/Cargo.toml +1 -1
  15. data/vendor/kreuzberg/README.md +1 -1
  16. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  17. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  18. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  19. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  20. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  21. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  22. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  23. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  24. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  25. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  26. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  27. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  28. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  29. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  30. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  31. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  32. data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
  33. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  34. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  35. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  36. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  37. data/vendor/kreuzberg/tests/core_integration.rs +55 -53
  38. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  39. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  40. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  41. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  42. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  43. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  44. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  45. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  46. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  47. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  48. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  49. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  50. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  51. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  52. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  53. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  54. data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
  55. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  56. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  57. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  58. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  59. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  60. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  61. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  62. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  63. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  64. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  65. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  66. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  67. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  68. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  69. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
  70. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  71. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  72. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  73. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  74. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  75. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  76. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  77. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  78. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  79. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  80. metadata +10 -2
@@ -25,9 +25,9 @@ use zip::write::{FileOptions, ZipWriter};
25
25
  async fn test_ppsx_slideshow_extraction() {
26
26
  let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
27
27
  .parent()
28
- .unwrap()
28
+ .expect("Operation failed")
29
29
  .parent()
30
- .unwrap();
30
+ .expect("Operation failed");
31
31
  let test_file = workspace_root.join("test_documents/presentations/sample.ppsx");
32
32
 
33
33
  if !test_file.exists() {
@@ -69,9 +69,9 @@ async fn test_ppsx_slideshow_extraction() {
69
69
  async fn test_ppsx_with_explicit_mime_type() {
70
70
  let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
71
71
  .parent()
72
- .unwrap()
72
+ .expect("Operation failed")
73
73
  .parent()
74
- .unwrap();
74
+ .expect("Operation failed");
75
75
  let test_file = workspace_root.join("test_documents/presentations/sample.ppsx");
76
76
 
77
77
  if !test_file.exists() {
@@ -120,24 +120,26 @@ async fn test_pptx_with_image_placeholder_no_txbody() {
120
120
  let options: FileOptions<()> = FileOptions::default().compression_method(CompressionMethod::Stored);
121
121
 
122
122
  // Add [Content_Types].xml
123
- zip.start_file("[Content_Types].xml", options).unwrap();
123
+ zip.start_file("[Content_Types].xml", options)
124
+ .expect("Operation failed");
124
125
  zip.write_all(br#"<?xml version="1.0" encoding="UTF-8"?>
125
126
  <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
126
127
  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
127
128
  <Default Extension="xml" ContentType="application/xml"/>
128
129
  <Override PartName="/ppt/presentation.xml" ContentType="application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"/>
129
130
  <Override PartName="/ppt/slides/slide1.xml" ContentType="application/vnd.openxmlformats-officedocument.presentationml.slide+xml"/>
130
- </Types>"#).unwrap();
131
+ </Types>"#).expect("Operation failed");
131
132
 
132
133
  // Add _rels/.rels
133
- zip.start_file("_rels/.rels", options).unwrap();
134
+ zip.start_file("_rels/.rels", options).expect("Operation failed");
134
135
  zip.write_all(br#"<?xml version="1.0" encoding="UTF-8"?>
135
136
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
136
137
  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="ppt/presentation.xml"/>
137
- </Relationships>"#).unwrap();
138
+ </Relationships>"#).expect("Operation failed");
138
139
 
139
140
  // Add ppt/presentation.xml
140
- zip.start_file("ppt/presentation.xml", options).unwrap();
141
+ zip.start_file("ppt/presentation.xml", options)
142
+ .expect("Operation failed");
141
143
  zip.write_all(
142
144
  br#"<?xml version="1.0" encoding="UTF-8"?>
143
145
  <p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
@@ -148,18 +150,20 @@ async fn test_pptx_with_image_placeholder_no_txbody() {
148
150
  </p:sldIdLst>
149
151
  </p:presentation>"#,
150
152
  )
151
- .unwrap();
153
+ .expect("Operation failed");
152
154
 
153
155
  // Add ppt/_rels/presentation.xml.rels
154
- zip.start_file("ppt/_rels/presentation.xml.rels", options).unwrap();
156
+ zip.start_file("ppt/_rels/presentation.xml.rels", options)
157
+ .expect("Operation failed");
155
158
  zip.write_all(br#"<?xml version="1.0" encoding="UTF-8"?>
156
159
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
157
160
  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide1.xml"/>
158
- </Relationships>"#).unwrap();
161
+ </Relationships>"#).expect("Operation failed");
159
162
 
160
163
  // Add ppt/slides/slide1.xml with a shape WITHOUT txBody (image placeholder)
161
164
  // This is the critical test case - a <p:sp> element with no <p:txBody>
162
- zip.start_file("ppt/slides/slide1.xml", options).unwrap();
165
+ zip.start_file("ppt/slides/slide1.xml", options)
166
+ .expect("Operation failed");
163
167
  zip.write_all(
164
168
  br#"<?xml version="1.0" encoding="UTF-8"?>
165
169
  <p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
@@ -259,18 +263,19 @@ async fn test_pptx_with_image_placeholder_no_txbody() {
259
263
  </p:cSld>
260
264
  </p:sld>"#,
261
265
  )
262
- .unwrap();
266
+ .expect("Operation failed");
263
267
 
264
268
  // Add ppt/slides/_rels/slide1.xml.rels (empty)
265
- zip.start_file("ppt/slides/_rels/slide1.xml.rels", options).unwrap();
269
+ zip.start_file("ppt/slides/_rels/slide1.xml.rels", options)
270
+ .expect("Operation failed");
266
271
  zip.write_all(
267
272
  br#"<?xml version="1.0" encoding="UTF-8"?>
268
273
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
269
274
  </Relationships>"#,
270
275
  )
271
- .unwrap();
276
+ .expect("Operation failed");
272
277
 
273
- zip.finish().unwrap();
278
+ zip.finish().expect("Operation failed");
274
279
  }
275
280
 
276
281
  // Extract the PPTX file
@@ -336,24 +341,26 @@ async fn test_pptx_mixed_shapes_extraction() {
336
341
  let options: FileOptions<()> = FileOptions::default().compression_method(CompressionMethod::Stored);
337
342
 
338
343
  // Add [Content_Types].xml
339
- zip.start_file("[Content_Types].xml", options).unwrap();
344
+ zip.start_file("[Content_Types].xml", options)
345
+ .expect("Operation failed");
340
346
  zip.write_all(br#"<?xml version="1.0" encoding="UTF-8"?>
341
347
  <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
342
348
  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
343
349
  <Default Extension="xml" ContentType="application/xml"/>
344
350
  <Override PartName="/ppt/presentation.xml" ContentType="application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"/>
345
351
  <Override PartName="/ppt/slides/slide1.xml" ContentType="application/vnd.openxmlformats-officedocument.presentationml.slide+xml"/>
346
- </Types>"#).unwrap();
352
+ </Types>"#).expect("Operation failed");
347
353
 
348
354
  // Add _rels/.rels
349
- zip.start_file("_rels/.rels", options).unwrap();
355
+ zip.start_file("_rels/.rels", options).expect("Operation failed");
350
356
  zip.write_all(br#"<?xml version="1.0" encoding="UTF-8"?>
351
357
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
352
358
  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="ppt/presentation.xml"/>
353
- </Relationships>"#).unwrap();
359
+ </Relationships>"#).expect("Operation failed");
354
360
 
355
361
  // Add ppt/presentation.xml
356
- zip.start_file("ppt/presentation.xml", options).unwrap();
362
+ zip.start_file("ppt/presentation.xml", options)
363
+ .expect("Operation failed");
357
364
  zip.write_all(
358
365
  br#"<?xml version="1.0" encoding="UTF-8"?>
359
366
  <p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
@@ -364,17 +371,19 @@ async fn test_pptx_mixed_shapes_extraction() {
364
371
  </p:sldIdLst>
365
372
  </p:presentation>"#,
366
373
  )
367
- .unwrap();
374
+ .expect("Operation failed");
368
375
 
369
376
  // Add ppt/_rels/presentation.xml.rels
370
- zip.start_file("ppt/_rels/presentation.xml.rels", options).unwrap();
377
+ zip.start_file("ppt/_rels/presentation.xml.rels", options)
378
+ .expect("Operation failed");
371
379
  zip.write_all(br#"<?xml version="1.0" encoding="UTF-8"?>
372
380
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
373
381
  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide1.xml"/>
374
- </Relationships>"#).unwrap();
382
+ </Relationships>"#).expect("Operation failed");
375
383
 
376
384
  // Add slide with various shapes - some with txBody, some without
377
- zip.start_file("ppt/slides/slide1.xml", options).unwrap();
385
+ zip.start_file("ppt/slides/slide1.xml", options)
386
+ .expect("Operation failed");
378
387
  zip.write_all(
379
388
  br#"<?xml version="1.0" encoding="UTF-8"?>
380
389
  <p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
@@ -453,18 +462,19 @@ async fn test_pptx_mixed_shapes_extraction() {
453
462
  </p:cSld>
454
463
  </p:sld>"#,
455
464
  )
456
- .unwrap();
465
+ .expect("Operation failed");
457
466
 
458
467
  // Add empty rels
459
- zip.start_file("ppt/slides/_rels/slide1.xml.rels", options).unwrap();
468
+ zip.start_file("ppt/slides/_rels/slide1.xml.rels", options)
469
+ .expect("Operation failed");
460
470
  zip.write_all(
461
471
  br#"<?xml version="1.0" encoding="UTF-8"?>
462
472
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
463
473
  </Relationships>"#,
464
474
  )
465
- .unwrap();
475
+ .expect("Operation failed");
466
476
 
467
- zip.finish().unwrap();
477
+ zip.finish().expect("Operation failed");
468
478
  }
469
479
 
470
480
  let result = extract_file(
@@ -184,9 +184,9 @@ fn test_register_multiple_validators_succeeds() {
184
184
  should_fail: true,
185
185
  });
186
186
 
187
- registry.register(v1).unwrap();
188
- registry.register(v2).unwrap();
189
- registry.register(v3).unwrap();
187
+ registry.register(v1).expect("Operation failed");
188
+ registry.register(v2).expect("Operation failed");
189
+ registry.register(v3).expect("Operation failed");
190
190
 
191
191
  let list = registry.list();
192
192
  assert_eq!(list.len(), 3, "Should have three validators");
@@ -205,7 +205,7 @@ fn test_validator_unregistration_succeeds() {
205
205
  should_fail: false,
206
206
  });
207
207
 
208
- registry.register(validator).unwrap();
208
+ registry.register(validator).expect("Operation failed");
209
209
  assert_eq!(registry.list().len(), 1);
210
210
 
211
211
  let result = registry.remove("temp-validator");
@@ -298,8 +298,8 @@ fn test_clear_validators_succeeds() {
298
298
  should_fail: false,
299
299
  });
300
300
 
301
- registry.register(v1).unwrap();
302
- registry.register(v2).unwrap();
301
+ registry.register(v1).expect("Operation failed");
302
+ registry.register(v2).expect("Operation failed");
303
303
  assert_eq!(registry.list().len(), 2);
304
304
 
305
305
  let result = registry.shutdown_all();
@@ -355,9 +355,9 @@ fn test_get_all_validators_respects_priority() {
355
355
  priority: 100,
356
356
  });
357
357
 
358
- registry.register(medium).unwrap();
359
- registry.register(low).unwrap();
360
- registry.register(high).unwrap();
358
+ registry.register(medium).expect("Operation failed");
359
+ registry.register(low).expect("Operation failed");
360
+ registry.register(high).expect("Operation failed");
361
361
 
362
362
  let all = registry.get_all();
363
363
  assert_eq!(all.len(), 3, "Should have three validators");
@@ -397,11 +397,11 @@ fn test_get_extractor_by_mime_type_succeeds() {
397
397
  priority: 50,
398
398
  });
399
399
 
400
- registry.register(extractor).unwrap();
400
+ registry.register(extractor).expect("Operation failed");
401
401
 
402
402
  let result = registry.get("application/pdf");
403
403
  assert!(result.is_ok(), "Should find extractor for PDF");
404
- assert_eq!(result.unwrap().name(), "pdf-extractor");
404
+ assert_eq!(result.expect("Operation failed").name(), "pdf-extractor");
405
405
  }
406
406
 
407
407
  /// Test extractor not found for unsupported MIME type.
@@ -437,10 +437,10 @@ fn test_extractor_priority_selection() {
437
437
  priority: 100,
438
438
  });
439
439
 
440
- registry.register(low_priority).unwrap();
441
- registry.register(high_priority).unwrap();
440
+ registry.register(low_priority).expect("Operation failed");
441
+ registry.register(high_priority).expect("Operation failed");
442
442
 
443
- let result = registry.get("text/plain").unwrap();
443
+ let result = registry.get("text/plain").expect("Value not found");
444
444
  assert_eq!(
445
445
  result.name(),
446
446
  "high-priority-extractor",
@@ -459,15 +459,15 @@ fn test_extractor_wildcard_mime_matching() {
459
459
  priority: 50,
460
460
  });
461
461
 
462
- registry.register(extractor).unwrap();
462
+ registry.register(extractor).expect("Operation failed");
463
463
 
464
464
  let result = registry.get("text/plain");
465
465
  assert!(result.is_ok(), "Should match text/plain with text/*");
466
- assert_eq!(result.unwrap().name(), "text-extractor");
466
+ assert_eq!(result.expect("Operation failed").name(), "text-extractor");
467
467
 
468
468
  let result = registry.get("text/html");
469
469
  assert!(result.is_ok(), "Should match text/html with text/*");
470
- assert_eq!(result.unwrap().name(), "text-extractor");
470
+ assert_eq!(result.expect("Operation failed").name(), "text-extractor");
471
471
 
472
472
  let result = registry.get("application/pdf");
473
473
  assert!(result.is_err(), "Should not match application/pdf with text/*");
@@ -484,7 +484,7 @@ fn test_extractor_unregistration_succeeds() {
484
484
  priority: 50,
485
485
  });
486
486
 
487
- registry.register(extractor).unwrap();
487
+ registry.register(extractor).expect("Operation failed");
488
488
  assert_eq!(registry.list().len(), 1);
489
489
 
490
490
  let result = registry.remove("temp-extractor");
@@ -506,17 +506,20 @@ fn test_extractor_multiple_mime_types() {
506
506
  priority: 50,
507
507
  });
508
508
 
509
- registry.register(extractor).unwrap();
509
+ registry.register(extractor).expect("Operation failed");
510
510
 
511
511
  assert!(registry.get("application/pdf").is_ok());
512
512
  assert!(registry.get("application/vnd.ms-excel").is_ok());
513
513
  assert!(registry.get("text/csv").is_ok());
514
514
 
515
515
  assert_eq!(
516
- registry.get("application/pdf").unwrap().name(),
516
+ registry.get("application/pdf").expect("Value not found").name(),
517
+ "multi-format-extractor"
518
+ );
519
+ assert_eq!(
520
+ registry.get("text/csv").expect("Value not found").name(),
517
521
  "multi-format-extractor"
518
522
  );
519
- assert_eq!(registry.get("text/csv").unwrap().name(), "multi-format-extractor");
520
523
  }
521
524
 
522
525
  /// Test clearing all extractors.
@@ -535,8 +538,8 @@ fn test_clear_extractors_succeeds() {
535
538
  priority: 50,
536
539
  });
537
540
 
538
- registry.register(e1).unwrap();
539
- registry.register(e2).unwrap();
541
+ registry.register(e1).expect("Operation failed");
542
+ registry.register(e2).expect("Operation failed");
540
543
  assert_eq!(registry.list().len(), 2);
541
544
 
542
545
  let result = registry.shutdown_all();
@@ -686,7 +686,7 @@ async fn test_rst_extraction_no_errors() {
686
686
  result.err()
687
687
  );
688
688
 
689
- let extraction = result.unwrap();
689
+ let extraction = result.expect("Operation failed");
690
690
 
691
691
  assert!(!extraction.content.is_empty(), "Extracted content should not be empty");
692
692
 
@@ -74,7 +74,7 @@ async fn test_rtf_accent_extraction() {
74
74
  let result = extract_file(&path, Some("application/rtf"), &config).await;
75
75
 
76
76
  assert!(result.is_ok(), "RTF extraction should succeed for accent.rtf");
77
- let extraction = result.unwrap();
77
+ let extraction = result.expect("Operation failed");
78
78
 
79
79
  assert_eq!(extraction.mime_type, "application/rtf");
80
80
 
@@ -112,7 +112,7 @@ async fn test_rtf_bookmark_extraction() {
112
112
  let result = extract_file(&path, Some("application/rtf"), &config).await;
113
113
 
114
114
  assert!(result.is_ok(), "RTF extraction should succeed for bookmark.rtf");
115
- let extraction = result.unwrap();
115
+ let extraction = result.expect("Operation failed");
116
116
 
117
117
  let content = extraction.content.to_lowercase();
118
118
 
@@ -137,7 +137,7 @@ async fn test_rtf_footnote_extraction() {
137
137
  let result = extract_file(&path, Some("application/rtf"), &config).await;
138
138
 
139
139
  assert!(result.is_ok(), "RTF extraction should succeed for footnote.rtf");
140
- let extraction = result.unwrap();
140
+ let extraction = result.expect("Operation failed");
141
141
 
142
142
  assert!(!extraction.content.is_empty(), "Content should not be empty");
143
143
 
@@ -176,7 +176,7 @@ async fn test_rtf_formatting_extraction() {
176
176
  let result = extract_file(&path, Some("application/rtf"), &config).await;
177
177
 
178
178
  assert!(result.is_ok(), "RTF extraction should succeed for formatting.rtf");
179
- let extraction = result.unwrap();
179
+ let extraction = result.expect("Operation failed");
180
180
 
181
181
  assert!(!extraction.content.is_empty(), "Content should not be empty");
182
182
 
@@ -223,7 +223,7 @@ async fn test_rtf_heading_extraction() {
223
223
  let result = extract_file(&path, Some("application/rtf"), &config).await;
224
224
 
225
225
  assert!(result.is_ok(), "RTF extraction should succeed for heading.rtf");
226
- let extraction = result.unwrap();
226
+ let extraction = result.expect("Operation failed");
227
227
 
228
228
  assert!(!extraction.content.is_empty(), "Content should not be empty");
229
229
 
@@ -269,7 +269,7 @@ async fn test_rtf_image_extraction() {
269
269
  let result = extract_file(&path, Some("application/rtf"), &config).await;
270
270
 
271
271
  assert!(result.is_ok(), "RTF extraction should succeed for image.rtf");
272
- let extraction = result.unwrap();
272
+ let extraction = result.expect("Operation failed");
273
273
 
274
274
  assert!(!extraction.content.is_empty(), "Content should not be empty");
275
275
 
@@ -301,7 +301,7 @@ async fn test_rtf_link_extraction() {
301
301
  let result = extract_file(&path, Some("application/rtf"), &config).await;
302
302
 
303
303
  assert!(result.is_ok(), "RTF extraction should succeed for link.rtf");
304
- let extraction = result.unwrap();
304
+ let extraction = result.expect("Operation failed");
305
305
 
306
306
  assert!(!extraction.content.is_empty(), "Content should not be empty");
307
307
 
@@ -328,7 +328,7 @@ async fn test_rtf_list_complex_extraction() {
328
328
  let result = extract_file(&path, Some("application/rtf"), &config).await;
329
329
 
330
330
  assert!(result.is_ok(), "RTF extraction should succeed for list_complex.rtf");
331
- let extraction = result.unwrap();
331
+ let extraction = result.expect("Operation failed");
332
332
 
333
333
  assert!(!extraction.content.is_empty(), "Content should not be empty");
334
334
 
@@ -381,7 +381,7 @@ async fn test_rtf_list_simple_extraction() {
381
381
  let result = extract_file(&path, Some("application/rtf"), &config).await;
382
382
 
383
383
  assert!(result.is_ok(), "RTF extraction should succeed for list_simple.rtf");
384
- let extraction = result.unwrap();
384
+ let extraction = result.expect("Operation failed");
385
385
 
386
386
  assert!(!extraction.content.is_empty(), "Content should not be empty");
387
387
 
@@ -422,7 +422,7 @@ async fn test_rtf_table_error_codes_extraction() {
422
422
  result.is_ok(),
423
423
  "RTF extraction should succeed for table_error_codes.rtf"
424
424
  );
425
- let extraction = result.unwrap();
425
+ let extraction = result.expect("Operation failed");
426
426
 
427
427
  assert!(
428
428
  extraction.mime_type == "application/rtf",
@@ -448,7 +448,7 @@ async fn test_rtf_table_simple_extraction() {
448
448
  let result = extract_file(&path, Some("application/rtf"), &config).await;
449
449
 
450
450
  assert!(result.is_ok(), "RTF extraction should succeed for table_simple.rtf");
451
- let extraction = result.unwrap();
451
+ let extraction = result.expect("Operation failed");
452
452
 
453
453
  assert!(
454
454
  extraction.mime_type == "application/rtf",
@@ -470,7 +470,7 @@ async fn test_rtf_unicode_extraction() {
470
470
  let result = extract_file(&path, Some("application/rtf"), &config).await;
471
471
 
472
472
  assert!(result.is_ok(), "RTF extraction should succeed for unicode.rtf");
473
- let extraction = result.unwrap();
473
+ let extraction = result.expect("Operation failed");
474
474
 
475
475
  assert!(!extraction.content.is_empty(), "Content should not be empty");
476
476
 
@@ -493,8 +493,8 @@ async fn test_rtf_extraction_deterministic_unicode() {
493
493
 
494
494
  assert!(result1.is_ok() && result2.is_ok(), "Both extractions should succeed");
495
495
 
496
- let extraction1 = result1.unwrap();
497
- let extraction2 = result2.unwrap();
496
+ let extraction1 = result1.expect("Operation failed");
497
+ let extraction2 = result2.expect("Operation failed");
498
498
 
499
499
  assert_eq!(
500
500
  extraction1.content, extraction2.content,
@@ -514,8 +514,8 @@ async fn test_rtf_extraction_deterministic_list_complex() {
514
514
 
515
515
  assert!(result1.is_ok() && result2.is_ok(), "Both extractions should succeed");
516
516
 
517
- let extraction1 = result1.unwrap();
518
- let extraction2 = result2.unwrap();
517
+ let extraction1 = result1.expect("Operation failed");
518
+ let extraction2 = result2.expect("Operation failed");
519
519
 
520
520
  assert_eq!(
521
521
  extraction1.content, extraction2.content,
@@ -551,7 +551,7 @@ async fn test_rtf_no_critical_content_loss() {
551
551
  filename
552
552
  );
553
553
 
554
- let extraction = result.unwrap();
554
+ let extraction = result.expect("Operation failed");
555
555
  assert!(
556
556
  !extraction.content.is_empty(),
557
557
  "FAIL: CRITICAL - Extracted 0 bytes from {}. RTF extractor lost all content.",
@@ -582,7 +582,7 @@ async fn test_rtf_mime_type_preservation() {
582
582
 
583
583
  assert!(result.is_ok(), "Extraction should succeed for {}", filename);
584
584
 
585
- let extraction = result.unwrap();
585
+ let extraction = result.expect("Operation failed");
586
586
  assert_eq!(
587
587
  extraction.mime_type, "application/rtf",
588
588
  "FAIL: MIME type not preserved for {}",
@@ -31,11 +31,11 @@ fn test_archive_zip_bomb_detection() {
31
31
  let mut zip = ZipWriter::new(&mut cursor);
32
32
  let options = FileOptions::<'_, ()>::default();
33
33
 
34
- zip.start_file("large.txt", options).unwrap();
34
+ zip.start_file("large.txt", options).expect("Operation failed");
35
35
  let zeros = vec![0u8; 10 * 1024 * 1024];
36
- zip.write_all(&zeros).unwrap();
36
+ zip.write_all(&zeros).expect("Operation failed");
37
37
 
38
- zip.finish().unwrap();
38
+ zip.finish().expect("Operation failed");
39
39
  }
40
40
 
41
41
  let bytes = cursor.into_inner();
@@ -57,10 +57,10 @@ fn test_archive_path_traversal_zip() {
57
57
  let mut zip = ZipWriter::new(&mut cursor);
58
58
  let options = FileOptions::<'_, ()>::default();
59
59
 
60
- zip.start_file("../../etc/passwd", options).unwrap();
61
- zip.write_all(b"malicious content").unwrap();
60
+ zip.start_file("../../etc/passwd", options).expect("Operation failed");
61
+ zip.write_all(b"malicious content").expect("Operation failed");
62
62
 
63
- zip.finish().unwrap();
63
+ zip.finish().expect("Operation failed");
64
64
  }
65
65
 
66
66
  let bytes = cursor.into_inner();
@@ -97,10 +97,10 @@ fn test_archive_absolute_paths_rejected() {
97
97
  let mut zip = ZipWriter::new(&mut cursor);
98
98
  let options = FileOptions::<'_, ()>::default();
99
99
 
100
- zip.start_file("/tmp/malicious.txt", options).unwrap();
101
- zip.write_all(b"malicious content").unwrap();
100
+ zip.start_file("/tmp/malicious.txt", options).expect("Operation failed");
101
+ zip.write_all(b"malicious content").expect("Operation failed");
102
102
 
103
- zip.finish().unwrap();
103
+ zip.finish().expect("Operation failed");
104
104
  }
105
105
 
106
106
  let bytes = cursor.into_inner();
@@ -125,10 +125,10 @@ fn test_archive_deeply_nested_directories() {
125
125
  let deep_path = (0..100).map(|i| format!("dir{}", i)).collect::<Vec<_>>().join("/");
126
126
  let file_path = format!("{}/file.txt", deep_path);
127
127
 
128
- zip.start_file(&file_path, options).unwrap();
129
- zip.write_all(b"deep content").unwrap();
128
+ zip.start_file(&file_path, options).expect("Operation failed");
129
+ zip.write_all(b"deep content").expect("Operation failed");
130
130
 
131
- zip.finish().unwrap();
131
+ zip.finish().expect("Operation failed");
132
132
  }
133
133
 
134
134
  let bytes = cursor.into_inner();
@@ -149,11 +149,12 @@ fn test_archive_many_small_files() {
149
149
  let options = FileOptions::<'_, ()>::default();
150
150
 
151
151
  for i in 0..1000 {
152
- zip.start_file(format!("file{}.txt", i), options).unwrap();
153
- zip.write_all(b"small content").unwrap();
152
+ zip.start_file(format!("file{}.txt", i), options)
153
+ .expect("Operation failed");
154
+ zip.write_all(b"small content").expect("Operation failed");
154
155
  }
155
156
 
156
- zip.finish().unwrap();
157
+ zip.finish().expect("Operation failed");
157
158
  }
158
159
 
159
160
  let bytes = cursor.into_inner();
@@ -404,13 +405,13 @@ fn test_security_directory_instead_of_file() {
404
405
 
405
406
  #[test]
406
407
  fn test_security_special_file_handling() {
407
- let mut tmpfile = NamedTempFile::new().unwrap();
408
- tmpfile.write_all(b"test content").unwrap();
409
- tmpfile.flush().unwrap();
408
+ let mut tmpfile = NamedTempFile::new().expect("Operation failed");
409
+ tmpfile.write_all(b"test content").expect("Operation failed");
410
+ tmpfile.flush().expect("Operation failed");
410
411
  let path = tmpfile.path();
411
412
 
412
413
  let config = ExtractionConfig::default();
413
- let result = extract_file_sync(path.to_str().unwrap(), None, &config);
414
+ let result = extract_file_sync(path.to_str().expect("Operation failed"), None, &config);
414
415
 
415
416
  assert!(result.is_ok() || result.is_err());
416
417
  }