RubyGems - kreuzberg - Versions diffs - 4.1.1 → 4.2.0 - Mend

kreuzberg 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

checksums.yaml +4 -4
data/Gemfile.lock +4 -4
data/README.md +8 -5
data/ext/kreuzberg_rb/native/Cargo.toml +2 -2
data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
data/ext/kreuzberg_rb/native/src/config/types.rs +23 -13
data/kreuzberg.gemspec +14 -2
data/lib/kreuzberg/api_proxy.rb +0 -1
data/lib/kreuzberg/cli_proxy.rb +0 -1
data/lib/kreuzberg/config.rb +70 -35
data/lib/kreuzberg/mcp_proxy.rb +0 -1
data/lib/kreuzberg/version.rb +1 -1
data/sig/kreuzberg.rbs +5 -1
data/spec/binding/batch_operations_spec.rb +80 -0
data/spec/binding/metadata_types_spec.rb +77 -57
data/spec/serialization_spec.rb +134 -0
data/spec/unit/config/output_format_spec.rb +380 -0
data/vendor/Cargo.toml +1 -1
data/vendor/kreuzberg/Cargo.toml +3 -3
data/vendor/kreuzberg/README.md +1 -1
data/vendor/kreuzberg/src/embeddings.rs +4 -4
data/vendor/kreuzberg/src/mcp/format.rs +237 -39
data/vendor/kreuzberg/src/mcp/params.rs +26 -33
data/vendor/kreuzberg/src/mcp/server.rs +6 -3
data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
data/vendor/kreuzberg/tests/api_embed.rs +84 -50
data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
data/vendor/kreuzberg/tests/api_tests.rs +298 -139
data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
data/vendor/kreuzberg/tests/config_features.rs +19 -15
data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
data/vendor/kreuzberg/tests/core_integration.rs +55 -53
data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
data/vendor/kreuzberg/tests/email_integration.rs +7 -7
data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/error_handling.rs +13 -11
data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/page_markers.rs +1 -1
data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
data/vendor/kreuzberg/tests/security_validation.rs +20 -19
data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
data/vendor/kreuzberg-tesseract/Cargo.toml +3 -3
data/vendor/kreuzberg-tesseract/build.rs +4 -4
data/vendor/kreuzberg-tesseract/src/lib.rs +6 -6
data/vendor/kreuzberg-tesseract/tests/integration_test.rs +3 -3
metadata +13 -2

data/vendor/kreuzberg/tests/archive_integration.rs CHANGED Viewed

@@ -34,7 +34,7 @@ async fn test_zip_basic_extraction() {
     assert!(result.content.contains("Hello from ZIP!"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -54,16 +54,16 @@ async fn test_zip_multiple_files() {
         let mut zip = ZipWriter::new(&mut cursor);
         let options = FileOptions::<'_, ()>::default();
-        zip.start_file("file1.txt", options).unwrap();
-        zip.write_all(b"Content 1").unwrap();
+        zip.start_file("file1.txt", options).expect("Operation failed");
+        zip.write_all(b"Content 1").expect("Operation failed");
-        zip.start_file("file2.md", options).unwrap();
-        zip.write_all(b"# Content 2").unwrap();
+        zip.start_file("file2.md", options).expect("Operation failed");
+        zip.write_all(b"# Content 2").expect("Operation failed");
-        zip.start_file("file3.json", options).unwrap();
-        zip.write_all(b"{\"key\": \"value\"}").unwrap();
+        zip.start_file("file3.json", options).expect("Operation failed");
+        zip.write_all(b"{\"key\": \"value\"}").expect("Operation failed");
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let zip_bytes = cursor.into_inner();
@@ -84,7 +84,7 @@ async fn test_zip_multiple_files() {
     assert!(result.content.contains("value"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -105,16 +105,17 @@ async fn test_zip_nested_directories() {
         let mut zip = ZipWriter::new(&mut cursor);
         let options = FileOptions::<'_, ()>::default();
-        zip.add_directory("dir1/", options).unwrap();
-        zip.add_directory("dir1/subdir/", options).unwrap();
+        zip.add_directory("dir1/", options).expect("Operation failed");
+        zip.add_directory("dir1/subdir/", options).expect("Operation failed");
-        zip.start_file("dir1/file.txt", options).unwrap();
-        zip.write_all(b"File in dir1").unwrap();
+        zip.start_file("dir1/file.txt", options).expect("Operation failed");
+        zip.write_all(b"File in dir1").expect("Operation failed");
-        zip.start_file("dir1/subdir/nested.txt", options).unwrap();
-        zip.write_all(b"Nested file").unwrap();
+        zip.start_file("dir1/subdir/nested.txt", options)
+            .expect("Operation failed");
+        zip.write_all(b"Nested file").expect("Operation failed");
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let zip_bytes = cursor.into_inner();
@@ -134,7 +135,7 @@ async fn test_zip_nested_directories() {
     assert!(result.content.contains("Nested file"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -172,7 +173,7 @@ async fn test_tar_extraction() {
     assert!(result.content.contains("Hello from TAR!"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -202,7 +203,7 @@ async fn test_tar_gz_extraction() {
     assert!(result.content.contains("test.txt"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -242,13 +243,14 @@ async fn test_nested_archive() {
         let mut zip = ZipWriter::new(&mut cursor);
         let options = FileOptions::<'_, ()>::default();
-        zip.start_file("inner.zip", options).unwrap();
-        zip.write_all(&inner_zip).unwrap();
+        zip.start_file("inner.zip", options).expect("Operation failed");
+        zip.write_all(&inner_zip).expect("Operation failed");
-        zip.start_file("readme.txt", options).unwrap();
-        zip.write_all(b"This archive contains another archive").unwrap();
+        zip.start_file("readme.txt", options).expect("Operation failed");
+        zip.write_all(b"This archive contains another archive")
+            .expect("Operation failed");
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let outer_zip_bytes = cursor.into_inner();
@@ -265,7 +267,7 @@ async fn test_nested_archive() {
     assert!(result.content.contains("This archive contains another archive"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -284,19 +286,19 @@ async fn test_archive_mixed_formats() {
         let mut zip = ZipWriter::new(&mut cursor);
         let options = FileOptions::<'_, ()>::default();
-        zip.start_file("document.txt", options).unwrap();
-        zip.write_all(b"Text document").unwrap();
+        zip.start_file("document.txt", options).expect("Operation failed");
+        zip.write_all(b"Text document").expect("Operation failed");
-        zip.start_file("readme.md", options).unwrap();
-        zip.write_all(b"# README").unwrap();
+        zip.start_file("readme.md", options).expect("Operation failed");
+        zip.write_all(b"# README").expect("Operation failed");
-        zip.start_file("image.png", options).unwrap();
-        zip.write_all(&[0x89, 0x50, 0x4E, 0x47]).unwrap();
+        zip.start_file("image.png", options).expect("Operation failed");
+        zip.write_all(&[0x89, 0x50, 0x4E, 0x47]).expect("Operation failed");
-        zip.start_file("document.pdf", options).unwrap();
-        zip.write_all(b"%PDF-1.4").unwrap();
+        zip.start_file("document.pdf", options).expect("Operation failed");
+        zip.write_all(b"%PDF-1.4").expect("Operation failed");
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let zip_bytes = cursor.into_inner();
@@ -317,7 +319,7 @@ async fn test_archive_mixed_formats() {
     assert!(result.content.contains("# README"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -373,11 +375,13 @@ async fn test_large_archive() {
         let options = FileOptions::<'_, ()>::default();
         for i in 0..100 {
-            zip.start_file(format!("file_{}.txt", i), options).unwrap();
-            zip.write_all(format!("Content {}", i).as_bytes()).unwrap();
+            zip.start_file(format!("file_{}.txt", i), options)
+                .expect("Operation failed");
+            zip.write_all(format!("Content {}", i).as_bytes())
+                .expect("Failed to convert to bytes");
         }
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let zip_bytes = cursor.into_inner();
@@ -390,7 +394,7 @@ async fn test_large_archive() {
     assert!(result.tables.is_empty(), "Archive should not have tables");
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -418,16 +422,19 @@ async fn test_archive_with_special_characters() {
         let mut zip = ZipWriter::new(&mut cursor);
         let options = FileOptions::<'_, ()>::default();
-        zip.start_file("测试文件.txt", options).unwrap();
-        zip.write_all("Unicode content".as_bytes()).unwrap();
+        zip.start_file("测试文件.txt", options).expect("Operation failed");
+        zip.write_all("Unicode content".as_bytes())
+            .expect("Failed to convert to bytes");
-        zip.start_file("file with spaces.txt", options).unwrap();
-        zip.write_all(b"Spaces in filename").unwrap();
+        zip.start_file("file with spaces.txt", options)
+            .expect("Operation failed");
+        zip.write_all(b"Spaces in filename").expect("Operation failed");
-        zip.start_file("file-with-dashes.txt", options).unwrap();
-        zip.write_all(b"Dashes").unwrap();
+        zip.start_file("file-with-dashes.txt", options)
+            .expect("Operation failed");
+        zip.write_all(b"Dashes").expect("Operation failed");
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let zip_bytes = cursor.into_inner();
@@ -444,7 +451,7 @@ async fn test_archive_with_special_characters() {
     assert!(result.content.contains("file-with-dashes.txt"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -463,7 +470,7 @@ async fn test_empty_archive() {
     let mut cursor = Cursor::new(Vec::new());
     {
         let zip = ZipWriter::new(&mut cursor);
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     let zip_bytes = cursor.into_inner();
@@ -477,7 +484,7 @@ async fn test_empty_archive() {
     assert!(result.content.contains("ZIP Archive"));
     assert!(result.metadata.format.is_some());
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -503,7 +510,7 @@ fn test_archive_extraction_sync() {
     assert!(result.content.contains("Hello from ZIP!"));
     assert!(result.metadata.format.is_some(), "Should have archive metadata");
-    let archive_meta = match result.metadata.format.as_ref().unwrap() {
+    let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
         kreuzberg::FormatMetadata::Archive(meta) => meta,
         _ => panic!("Expected Archive metadata"),
     };
@@ -519,10 +526,10 @@ fn create_simple_zip() -> Vec<u8> {
         let mut zip = ZipWriter::new(&mut cursor);
         let options = FileOptions::<'_, ()>::default();
-        zip.start_file("test.txt", options).unwrap();
-        zip.write_all(b"Hello from ZIP!").unwrap();
+        zip.start_file("test.txt", options).expect("Operation failed");
+        zip.write_all(b"Hello from ZIP!").expect("Operation failed");
-        zip.finish().unwrap();
+        zip.finish().expect("Operation failed");
     }
     cursor.into_inner()
 }
@@ -534,12 +541,12 @@ fn create_simple_tar() -> Vec<u8> {
         let data = b"Hello from TAR!";
         let mut header = tar::Header::new_gnu();
-        header.set_path("test.txt").unwrap();
+        header.set_path("test.txt").expect("Operation failed");
         header.set_size(data.len() as u64);
         header.set_cksum();
-        tar.append(&header, &data[..]).unwrap();
+        tar.append(&header, &data[..]).expect("Operation failed");
-        tar.finish().unwrap();
+        tar.finish().expect("Operation failed");
     }
     cursor.into_inner()
 }

data/vendor/kreuzberg/tests/batch_orchestration.rs CHANGED Viewed

@@ -63,7 +63,7 @@ async fn test_batch_documents_parallel_execution() {
     let parallel_duration = parallel_start.elapsed();
     assert!(results.is_ok(), "Batch extraction should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 20, "Should process all 20 files");
     for result in &results {
@@ -102,7 +102,7 @@ async fn test_batch_documents_concurrency_limiting() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 4);
 }
@@ -127,7 +127,7 @@ async fn test_batch_documents_default_concurrency() {
     let duration = start.elapsed();
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 50);
     println!("Processed 50 files in {:?}", duration);
@@ -152,7 +152,9 @@ async fn test_batch_documents_preserves_order() {
         get_test_file_path("xml/simple_note.xml"),
     ];
-    let results = batch_extract_file(paths, &config).await.unwrap();
+    let results = batch_extract_file(paths, &config)
+        .await
+        .expect("Async operation failed");
     assert_eq!(results.len(), 3, "Should have 3 results");
@@ -201,7 +203,7 @@ async fn test_multipage_pdf_extraction() {
     let duration = start.elapsed();
     assert!(result.is_ok(), "Multi-page PDF extraction should succeed");
-    let extraction = result.unwrap();
+    let extraction = result.expect("Operation failed");
     assert!(!extraction.content.is_empty(), "Should extract text from all pages");
     println!("Extracted multi-page PDF in {:?}", duration);
@@ -230,7 +232,7 @@ async fn test_concurrent_pdf_extractions() {
     let duration = start.elapsed();
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 10);
     println!("Processed 10 PDFs in {:?}", duration);
@@ -318,7 +320,7 @@ async fn test_batch_bytes_parallel_processing() {
     let duration = start.elapsed();
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 30);
     for (i, result) in results.iter().enumerate() {
@@ -350,7 +352,7 @@ async fn test_batch_bytes_mixed_valid_invalid() {
     let results = batch_extract_bytes(owned_contents, &config).await;
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 5);
     assert_text_content(&results[0].content, "valid content 1");
@@ -394,7 +396,7 @@ async fn test_batch_utilizes_multiple_cores() {
     let duration = start.elapsed();
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 20);
     println!(
@@ -437,7 +439,7 @@ async fn test_batch_memory_pressure_handling() {
     let duration = start.elapsed();
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 50);
     println!("Processed 50 large documents with concurrency limit in {:?}", duration);
@@ -469,7 +471,9 @@ async fn test_batch_scales_with_cpu_count() {
         .collect();
     let start = Instant::now();
-    let _ = batch_extract_bytes(owned_contents_1, &config_1).await.unwrap();
+    let _ = batch_extract_bytes(owned_contents_1, &config_1)
+        .await
+        .expect("Async operation failed");
     let duration_1 = start.elapsed();
     let config_full = ExtractionConfig {
@@ -483,7 +487,9 @@ async fn test_batch_scales_with_cpu_count() {
         .collect();
     let start = Instant::now();
-    let _ = batch_extract_bytes(owned_contents_full, &config_full).await.unwrap();
+    let _ = batch_extract_bytes(owned_contents_full, &config_full)
+        .await
+        .expect("Async operation failed");
     let duration_full = start.elapsed();
     println!(
@@ -522,7 +528,7 @@ async fn test_batch_mixed_document_types() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok());
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 4);
     for (i, result) in results.iter().enumerate() {
@@ -572,7 +578,9 @@ async fn test_batch_accuracy_under_load() {
         .map(|(bytes, mime)| (bytes.to_vec(), mime.to_string()))
         .collect();
-    let results = batch_extract_bytes(owned_contents, &config).await.unwrap();
+    let results = batch_extract_bytes(owned_contents, &config)
+        .await
+        .expect("Async operation failed");
     assert_eq!(results.len(), 100);

data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs CHANGED Viewed

@@ -21,7 +21,7 @@ mod tests {
         let mut buffers = vec![];
         for _ in 0..3 {
-            let buf = pool.acquire().unwrap();
+            let buf = pool.acquire().expect("Operation failed");
             buffers.push(buf);
         }
@@ -31,7 +31,7 @@ mod tests {
         let mut buffers = vec![];
         for _ in 0..3 {
-            let buf = pool.acquire().unwrap();
+            let buf = pool.acquire().expect("Operation failed");
             buffers.push(buf);
         }
         drop(buffers);
@@ -47,8 +47,8 @@ mod tests {
             let mut results = vec![];
             for _i in 0..5 {
-                let string_buf = processor.string_pool().acquire().unwrap();
-                let byte_buf = processor.byte_pool().acquire().unwrap();
+                let string_buf = processor.string_pool().acquire().expect("Operation failed");
+                let byte_buf = processor.byte_pool().acquire().expect("Operation failed");
                 results.push((string_buf, byte_buf));
             }
@@ -65,17 +65,17 @@ mod tests {
         let pool = create_string_buffer_pool(5, 4096);
         let capacity_initial = {
-            let buf = pool.acquire().unwrap();
+            let buf = pool.acquire().expect("Operation failed");
             buf.capacity()
         };
         for _ in 0..10 {
-            let mut buf = pool.acquire().unwrap();
+            let mut buf = pool.acquire().expect("Operation failed");
             buf.push_str("test data");
         }
         let capacity_final = {
-            let buf = pool.acquire().unwrap();
+            let buf = pool.acquire().expect("Operation failed");
             buf.capacity()
         };
@@ -101,15 +101,15 @@ mod tests {
         let processor = BatchProcessor::new();
         {
-            let _s1 = processor.string_pool().acquire().unwrap();
-            let _s2 = processor.string_pool().acquire().unwrap();
-            let _b1 = processor.byte_pool().acquire().unwrap();
+            let _s1 = processor.string_pool().acquire().expect("Operation failed");
+            let _s2 = processor.string_pool().acquire().expect("Operation failed");
+            let _b1 = processor.byte_pool().acquire().expect("Operation failed");
         }
         assert!(processor.string_pool_size() > 0);
         assert!(processor.byte_pool_size() > 0);
-        processor.clear_pools().unwrap();
+        processor.clear_pools().expect("Operation failed");
         assert_eq!(processor.string_pool_size(), 0);
         assert_eq!(processor.byte_pool_size(), 0);
@@ -137,7 +137,7 @@ mod tests {
         }
         for handle in handles {
-            handle.join().unwrap();
+            handle.join().expect("Operation failed");
         }
         assert!(processor.string_pool_size() <= 10);
@@ -148,7 +148,7 @@ mod tests {
     fn test_pool_respects_capacity_hints() {
         let pool = create_string_buffer_pool(3, 2048);
-        let buf = pool.acquire().unwrap();
+        let buf = pool.acquire().expect("Operation failed");
         assert!(buf.capacity() >= 2048, "buffer should respect capacity hint");
     }
 }

data/vendor/kreuzberg/tests/batch_processing.rs CHANGED Viewed

@@ -51,7 +51,7 @@ async fn test_batch_extract_file_multiple_formats() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok(), "Batch extraction should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 3);
@@ -95,7 +95,7 @@ fn test_batch_extract_file_sync_variant() {
     let results = batch_extract_file_sync(paths, &config);
     assert!(results.is_ok(), "Sync batch extraction should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 2);
@@ -137,7 +137,7 @@ async fn test_batch_extract_bytes_multiple() {
     let results = batch_extract_bytes(owned_contents, &config).await;
     assert!(results.is_ok(), "Batch bytes extraction should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 3);
@@ -161,7 +161,11 @@ async fn test_batch_extract_empty_list() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok(), "Empty batch should succeed");
-    assert_eq!(results.unwrap().len(), 0, "Should return empty vector");
+    assert_eq!(
+        results.expect("Operation failed").len(),
+        0,
+        "Should return empty vector"
+    );
 }
 /// Test batch extraction when one file fails (others should succeed).
@@ -187,7 +191,7 @@ async fn test_batch_extract_one_file_fails() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok(), "Batch should succeed even with one failure");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 3);
@@ -216,7 +220,7 @@ async fn test_batch_extract_all_fail() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok(), "Batch should succeed (errors in metadata)");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 3);
@@ -251,7 +255,7 @@ async fn test_batch_extract_concurrent() {
     let duration = start.elapsed();
     assert!(results.is_ok(), "Concurrent batch should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 20);
@@ -289,7 +293,7 @@ async fn test_batch_extract_large_batch() {
     let results = batch_extract_file(paths, &config).await;
     assert!(results.is_ok(), "Large batch should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 50);
@@ -319,7 +323,7 @@ fn test_batch_extract_bytes_sync_variant() {
     let results = batch_extract_bytes_sync(owned_contents, &config);
     assert!(results.is_ok(), "Sync batch bytes extraction should succeed");
-    let results = results.unwrap();
+    let results = results.expect("Operation failed");
     assert_eq!(results.len(), 3);
     assert_text_content(&results[0].content, "content 1");

data/vendor/kreuzberg/tests/bibtex_parity_test.rs CHANGED Viewed

@@ -65,7 +65,7 @@ async fn test_all_entry_types() {
             .await;
         assert!(result.is_ok(), "Failed to parse {} entry", expected_type);
-        let result = result.unwrap();
+        let result = result.expect("Operation failed");
         if let Some(entry_types) = result.metadata.additional.get("entry_types") {
             assert!(entry_types.as_object().is_some(), "Entry types should be an object");
@@ -116,7 +116,7 @@ async fn test_all_common_fields() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     let content = &result.content;
@@ -183,7 +183,7 @@ async fn test_author_parsing() {
             .await;
         assert!(result.is_ok());
-        let result = result.unwrap();
+        let result = result.expect("Operation failed");
         if let Some(authors) = result.metadata.additional.get("authors") {
             let authors_array = authors.as_array().expect("Authors should be an array");
@@ -221,7 +221,7 @@ async fn test_special_characters() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     assert_eq!(
         result.metadata.additional.get("entry_count"),
@@ -250,7 +250,7 @@ async fn test_year_range_extraction() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     if let Some(year_range) = result.metadata.additional.get("year_range") {
         assert_eq!(year_range.get("min"), Some(&serde_json::json!(1990)));
@@ -281,7 +281,7 @@ async fn test_citation_keys_extraction() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     if let Some(citation_keys) = result.metadata.additional.get("citation_keys") {
         let keys_array = citation_keys.as_array().expect("Citation keys should be an array");
@@ -316,7 +316,7 @@ async fn test_entry_type_distribution() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     if let Some(entry_types) = result.metadata.additional.get("entry_types") {
         let types_obj = entry_types.as_object().expect("Entry types should be an object");
@@ -348,7 +348,7 @@ async fn test_unicode_support() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     assert_eq!(
         result.metadata.additional.get("entry_count"),
@@ -376,7 +376,7 @@ async fn test_empty_fields() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     assert_eq!(
         result.metadata.additional.get("entry_count"),
         Some(&serde_json::json!(1))
@@ -397,7 +397,7 @@ async fn test_comprehensive_file() {
         .await;
     assert!(result.is_ok());
-    let result = result.unwrap();
+    let result = result.expect("Operation failed");
     assert_eq!(
         result.metadata.additional.get("entry_count"),