RubyGems - kreuzberg - Versions diffs - 4.1.2 → 4.2.0 - Mend

kreuzberg 4.1.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

checksums.yaml +4 -4
data/Gemfile.lock +2 -2
data/README.md +1 -1
data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
data/kreuzberg.gemspec +13 -1
data/lib/kreuzberg/config.rb +70 -35
data/lib/kreuzberg/version.rb +1 -1
data/sig/kreuzberg.rbs +5 -1
data/spec/binding/batch_operations_spec.rb +80 -0
data/spec/binding/metadata_types_spec.rb +77 -57
data/spec/serialization_spec.rb +134 -0
data/spec/unit/config/output_format_spec.rb +380 -0
data/vendor/Cargo.toml +1 -1
data/vendor/kreuzberg/Cargo.toml +1 -1
data/vendor/kreuzberg/README.md +1 -1
data/vendor/kreuzberg/src/embeddings.rs +4 -4
data/vendor/kreuzberg/src/mcp/format.rs +237 -39
data/vendor/kreuzberg/src/mcp/params.rs +26 -33
data/vendor/kreuzberg/src/mcp/server.rs +6 -3
data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
data/vendor/kreuzberg/tests/api_embed.rs +84 -50
data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
data/vendor/kreuzberg/tests/api_tests.rs +298 -139
data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
data/vendor/kreuzberg/tests/config_features.rs +19 -15
data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
data/vendor/kreuzberg/tests/core_integration.rs +55 -53
data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
data/vendor/kreuzberg/tests/email_integration.rs +7 -7
data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/error_handling.rs +13 -11
data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/page_markers.rs +1 -1
data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
data/vendor/kreuzberg/tests/security_validation.rs +20 -19
data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
metadata +10 -2

data/vendor/kreuzberg/tests/concurrency_stress.rs CHANGED Viewed

@@ -120,7 +120,7 @@ async fn test_concurrent_batch_extractions() {
     for handle in handles {
         let results = handle.await.expect("Task should not panic");
         assert!(results.is_ok(), "Batch extraction should succeed");
-        let results = results.unwrap();
+        let results = results.expect("Operation failed");
         assert_eq!(results.len(), 20, "Should return all results");
     }
 }
@@ -147,7 +147,9 @@ async fn test_concurrent_extractions_with_cache() {
     let test_data = b"Cached content for concurrent access test";
-    let _ = extract_bytes(test_data, "text/plain", &config).await.unwrap();
+    let _ = extract_bytes(test_data, "text/plain", &config)
+        .await
+        .expect("Async operation failed");
     let mut handles = vec![];
     for _ in 0..100 {
@@ -163,7 +165,7 @@ async fn test_concurrent_extractions_with_cache() {
     for handle in handles {
         let result = handle.await.expect("Task should not panic");
         assert!(result.is_ok(), "Cache read should succeed");
-        let extraction = result.unwrap();
+        let extraction = result.expect("Operation failed");
         assert_text_content(&extraction.content, expected_content);
     }
 }
@@ -225,7 +227,7 @@ async fn test_concurrent_ocr_processing() {
     let mut extracted_texts = vec![];
     for result in results {
         assert!(result.is_ok(), "OCR should succeed: {:?}", result.err());
-        let extraction = result.unwrap();
+        let extraction = result.expect("Operation failed");
         assert!(!extraction.content.is_empty(), "OCR should extract text");
         extracted_texts.push(extraction.content);
     }
@@ -394,7 +396,7 @@ async fn test_concurrent_pipeline_processing() {
     for handle in handles {
         let result = handle.await.expect("Task should not panic");
         assert!(result.is_ok(), "Pipeline should succeed");
-        let processed = result.unwrap();
+        let processed = result.expect("Operation failed");
         assert!(processed.content.contains("[processed]"), "Processor should run");
     }
@@ -457,7 +459,9 @@ async fn test_extraction_throughput_scales() {
     let sequential_start = std::time::Instant::now();
     for _ in 0..20 {
-        let _ = extract_bytes(test_data, "text/plain", &config).await.unwrap();
+        let _ = extract_bytes(test_data, "text/plain", &config)
+            .await
+            .expect("Async operation failed");
     }
     let sequential_duration = sequential_start.elapsed();

data/vendor/kreuzberg/tests/config_behavioral.rs ADDED Viewed

@@ -0,0 +1,414 @@
+//! Config behavioral verification tests
+//!
+//! These tests verify that configuration options actually affect extraction behavior,
+//! not just that they serialize correctly.
+//!
+//! Unlike serialization tests that only check if configs deserialize, these tests verify
+//! that the configuration options actually influence the extraction process and produce
+//! observable differences in the output.
+use kreuzberg::core::config::ChunkingConfig;
+use kreuzberg::core::config::ExtractionConfig;
+use kreuzberg::core::config::OutputFormat;
+use kreuzberg::core::extractor::extract_bytes;
+use kreuzberg::types::OutputFormat as ResultFormat;
+mod helpers;
+/// Test output_format Plain produces text without formatting
+///
+/// Note: HTML extractors often convert to markdown internally, so this test
+/// uses plain text input to verify the output_format configuration is respected.
+#[tokio::test]
+async fn test_output_format_plain_produces_plain() {
+    let plain_text = b"Title\n\nParagraph with bold text.";
+    let config = ExtractionConfig {
+        output_format: OutputFormat::Plain,
+        ..Default::default()
+    };
+    let result = extract_bytes(plain_text, "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    // Plain text should not have markdown or HTML formatting
+    assert!(
+        !result.content.contains("# ") && !result.content.contains("<h1>"),
+        "Plain format should not contain markdown headers or HTML tags, got: {}",
+        result.content
+    );
+    assert!(
+        result.content.contains("Title") || result.content.contains("Paragraph"),
+        "Should still contain extracted text content"
+    );
+}
+/// Test output_format Markdown produces markdown formatting
+#[tokio::test]
+async fn test_output_format_markdown_produces_markdown() {
+    let html = b"<h1>Title</h1><p>Paragraph with <strong>bold</strong> text.</p>";
+    let config = ExtractionConfig {
+        output_format: OutputFormat::Markdown,
+        ..Default::default()
+    };
+    let result = extract_bytes(html, "text/html", &config)
+        .await
+        .expect("Should extract successfully");
+    // Verify markdown formatting is present (# for headers or ** for bold)
+    let has_markdown = result.content.contains("# ") || result.content.contains("**") || result.content.contains("*");
+    assert!(
+        has_markdown,
+        "Markdown format should contain # headers or ** bold, got: {}",
+        result.content
+    );
+}
+/// Test output_format HTML produces valid HTML content
+#[tokio::test]
+async fn test_output_format_html_produces_html() {
+    let text = "Title\n\nParagraph with bold text.";
+    let config = ExtractionConfig {
+        output_format: OutputFormat::Html,
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    // HTML format should be safe and not contain injection vectors
+    assert!(
+        !result.content.contains("<script>"),
+        "HTML format should be safe from injection"
+    );
+    assert!(!result.content.is_empty(), "Should produce content in HTML format");
+}
+/// Test result_format Unified produces content in single field
+#[tokio::test]
+async fn test_result_format_unified_structure() {
+    let text = "Sample content";
+    let config = ExtractionConfig {
+        result_format: ResultFormat::Unified,
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    // Unified format should have content in main content field
+    assert!(!result.content.is_empty(), "Unified format should have content");
+    // Elements should be None or empty for unified format
+    assert!(
+        result.elements.is_none() || result.elements.as_ref().unwrap().is_empty(),
+        "Unified format should not have elements"
+    );
+}
+/// Test result_format ElementBased produces element structure
+#[tokio::test]
+async fn test_result_format_element_based_structure() {
+    let text = "First paragraph here.\n\nSecond paragraph with more content.";
+    let config = ExtractionConfig {
+        result_format: ResultFormat::ElementBased,
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    // Element-based format should produce elements array
+    if let Some(elements) = &result.elements {
+        assert!(!elements.is_empty(), "Element-based format should have elements");
+        // Verify elements have expected structure
+        for element in elements {
+            assert!(!element.text.is_empty(), "Elements should have non-empty text");
+        }
+    }
+}
+/// Test chunking max_chars actually limits chunk size
+#[tokio::test]
+#[cfg(feature = "chunking")]
+async fn test_chunking_max_chars_limits_chunk_size() {
+    let long_text = "word ".repeat(500); // ~2500 characters
+    let config = ExtractionConfig {
+        chunking: Some(ChunkingConfig {
+            max_chars: 100,
+            max_overlap: 20,
+            embedding: None,
+            preset: None,
+        }),
+        ..Default::default()
+    };
+    let result = extract_bytes(long_text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    assert!(result.chunks.is_some(), "Chunking should produce chunks");
+    if let Some(chunks) = result.chunks {
+        assert!(chunks.len() > 1, "Long text should produce multiple chunks");
+        // Verify chunk size constraint: each chunk should respect max_chars
+        for (i, chunk) in chunks.iter().enumerate() {
+            assert!(
+                chunk.content.len() <= 100 + 20,
+                "Chunk {} exceeds max_chars + overlap: length = {}",
+                i,
+                chunk.content.len()
+            );
+        }
+    }
+}
+/// Test chunking with overlap creates overlapping chunks
+#[tokio::test]
+#[cfg(feature = "chunking")]
+async fn test_chunking_overlap_creates_overlap() {
+    let text = "First sentence. ".repeat(30); // ~480 characters
+    let config = ExtractionConfig {
+        chunking: Some(ChunkingConfig {
+            max_chars: 50,
+            max_overlap: 15,
+            embedding: None,
+            preset: None,
+        }),
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    if let Some(chunks) = result.chunks {
+        if chunks.len() >= 2 {
+            // Check if adjacent chunks have overlapping text
+            let chunk1_end = &chunks[0].content[chunks[0].content.len().saturating_sub(15)..];
+            let chunk2_start = &chunks[1].content[..chunks[1].content.len().min(15)];
+            // There should be some overlap in the text
+            let overlap_found = chunk1_end.chars().any(|c| c != ' ') && chunk2_start.chars().any(|c| c != ' ');
+            assert!(
+                overlap_found,
+                "Adjacent chunks should have overlapping non-whitespace text"
+            );
+        }
+    }
+}
+/// Test chunking disabled produces no chunks
+#[tokio::test]
+async fn test_chunking_disabled_produces_no_chunks() {
+    let long_text = "word ".repeat(500);
+    let config = ExtractionConfig {
+        chunking: None,
+        ..Default::default()
+    };
+    let result = extract_bytes(long_text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    assert!(result.chunks.is_none(), "Chunking disabled should produce no chunks");
+}
+/// Test use_cache true allows results to be cached
+#[tokio::test]
+async fn test_cache_enabled_allows_caching() {
+    let text = "Test content for caching";
+    let config = ExtractionConfig {
+        use_cache: true,
+        ..Default::default()
+    };
+    // Extract twice with same content
+    let result1 = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("First extraction should succeed");
+    let result2 = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Second extraction should succeed");
+    // Results should be identical
+    assert_eq!(
+        result1.content, result2.content,
+        "Cache enabled should produce consistent results"
+    );
+}
+/// Test use_cache false disables caching without crashing
+#[tokio::test]
+async fn test_cache_disabled_does_not_crash() {
+    let text = "Test content without caching";
+    let config = ExtractionConfig {
+        use_cache: false,
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Extraction with cache disabled should succeed");
+    assert!(!result.content.is_empty(), "Should still extract content");
+}
+/// Test quality_processing enabled produces quality score
+#[tokio::test]
+#[cfg(feature = "quality")]
+async fn test_quality_processing_enabled_produces_score() {
+    let text = "This is a well-structured document. It has proper sentences. And good formatting.";
+    let config = ExtractionConfig {
+        enable_quality_processing: true,
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    // Quality processing should add a quality_score to metadata
+    let has_quality_score = result.metadata.additional.contains_key("quality_score");
+    assert!(
+        has_quality_score,
+        "Quality processing enabled should produce quality_score in metadata"
+    );
+}
+/// Test quality_processing disabled does not produce score
+#[tokio::test]
+#[cfg(feature = "quality")]
+async fn test_quality_processing_disabled_no_score() {
+    let text = "This is a document.";
+    let config = ExtractionConfig {
+        enable_quality_processing: false,
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    assert!(
+        !result.metadata.additional.contains_key("quality_score"),
+        "Quality processing disabled should not produce quality_score"
+    );
+}
+/// Test output_format combinations with result_format
+#[tokio::test]
+async fn test_output_format_with_element_based() {
+    let html = b"<p>First paragraph</p><p>Second paragraph</p>";
+    let config = ExtractionConfig {
+        output_format: OutputFormat::Markdown,
+        result_format: ResultFormat::ElementBased,
+        ..Default::default()
+    };
+    let result = extract_bytes(html, "text/html", &config)
+        .await
+        .expect("Should extract successfully");
+    // Should have elements
+    assert!(result.elements.is_some(), "ElementBased format should produce elements");
+    // Content should still be markdown formatted
+    assert!(
+        !result.content.contains("<p>"),
+        "Output format should not contain HTML tags"
+    );
+}
+/// Test chunking respects overlap maximum
+#[tokio::test]
+#[cfg(feature = "chunking")]
+async fn test_chunking_overlap_maximum() {
+    let text = "x".repeat(200); // Simple repeated character
+    let config = ExtractionConfig {
+        chunking: Some(ChunkingConfig {
+            max_chars: 60,
+            max_overlap: 10,
+            embedding: None,
+            preset: None,
+        }),
+        ..Default::default()
+    };
+    let result = extract_bytes(text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    if let Some(chunks) = result.chunks {
+        // Verify max_overlap is not exceeded
+        for (i, chunk) in chunks.iter().enumerate() {
+            assert!(
+                chunk.content.len() <= 60 + 10,
+                "Chunk {} size {} exceeds max_chars (60) + max_overlap (10)",
+                i,
+                chunk.content.len()
+            );
+        }
+    }
+}
+/// Test large document extraction with multiple config options
+#[tokio::test]
+#[cfg(feature = "chunking")]
+async fn test_large_document_with_combined_config() {
+    let large_text = "This is a paragraph. ".repeat(100); // ~2000 characters
+    let config = ExtractionConfig {
+        output_format: OutputFormat::Plain,
+        chunking: Some(ChunkingConfig {
+            max_chars: 200,
+            max_overlap: 30,
+            embedding: None,
+            preset: None,
+        }),
+        use_cache: true,
+        enable_quality_processing: true,
+        ..Default::default()
+    };
+    let result = extract_bytes(large_text.as_bytes(), "text/plain", &config)
+        .await
+        .expect("Should extract successfully");
+    // Should have chunks due to size
+    assert!(result.chunks.is_some(), "Should produce chunks for large text");
+    // Should have quality score
+    #[cfg(feature = "quality")]
+    {
+        assert!(
+            result.metadata.additional.contains_key("quality_score"),
+            "Should have quality score"
+        );
+    }
+    // Should have content in plain format
+    assert!(!result.content.is_empty(), "Should have content");
+}

data/vendor/kreuzberg/tests/config_features.rs CHANGED Viewed

@@ -35,14 +35,14 @@ async fn test_chunking_enabled() {
         .expect("Should extract successfully");
     assert!(result.chunks.is_some(), "Chunks should be present");
-    let chunks = result.chunks.unwrap();
+    let chunks = result.chunks.expect("Operation failed");
     assert!(chunks.len() > 1, "Should have multiple chunks");
     assert!(result.metadata.additional.contains_key("chunk_count"));
-    let chunk_count = result.metadata.additional.get("chunk_count").unwrap();
+    let chunk_count = result.metadata.additional.get("chunk_count").expect("Value not found");
     assert_eq!(
         chunks.len(),
-        chunk_count.as_u64().unwrap() as usize,
+        chunk_count.as_u64().expect("Operation failed") as usize,
         "Chunks length should match chunk_count metadata"
     );
@@ -78,7 +78,7 @@ async fn test_chunking_with_overlap() {
         .expect("Should extract successfully");
     assert!(result.chunks.is_some(), "Chunks should be present");
-    let chunks = result.chunks.unwrap();
+    let chunks = result.chunks.expect("Operation failed");
     assert!(chunks.len() >= 2, "Should have at least 2 chunks");
     assert!(result.metadata.additional.contains_key("chunk_count"));
@@ -118,7 +118,7 @@ async fn test_chunking_custom_sizes() {
         .expect("Should extract successfully");
     assert!(result.chunks.is_some(), "Chunks should be present");
-    let chunks = result.chunks.unwrap();
+    let chunks = result.chunks.expect("Operation failed");
     assert!(!chunks.is_empty(), "Should have at least 1 chunk");
     assert!(result.metadata.additional.contains_key("chunk_count"));
@@ -178,7 +178,7 @@ async fn test_language_detection_single() {
         .expect("Should extract successfully");
     assert!(result.detected_languages.is_some(), "Should detect language");
-    let languages = result.detected_languages.unwrap();
+    let languages = result.detected_languages.expect("Operation failed");
     assert!(!languages.is_empty(), "Should detect at least one language");
     assert_eq!(languages[0], "eng", "Should detect English");
 }
@@ -205,7 +205,7 @@ async fn test_language_detection_multiple() {
         .expect("Should extract successfully");
     assert!(result.detected_languages.is_some(), "Should detect languages");
-    let languages = result.detected_languages.unwrap();
+    let languages = result.detected_languages.expect("Operation failed");
     assert!(!languages.is_empty(), "Should detect at least one language");
 }
@@ -424,7 +424,7 @@ async fn test_quality_processing_enabled() {
         .expect("Should extract successfully");
     if let Some(score) = result.metadata.additional.get("quality_score") {
-        let score_value = score.as_f64().unwrap();
+        let score_value = score.as_f64().expect("Operation failed");
         assert!((0.0..=1.0).contains(&score_value));
     }
@@ -463,16 +463,16 @@ async fn test_quality_threshold_filtering() {
         .metadata
         .additional
         .get("quality_score")
-        .unwrap()
+        .expect("Operation failed")
         .as_f64()
-        .unwrap();
+        .expect("Operation failed");
     let score_low = result_low
         .metadata
         .additional
         .get("quality_score")
-        .unwrap()
+        .expect("Operation failed")
         .as_f64()
-        .unwrap();
+        .expect("Operation failed");
     assert!((0.0..=1.0).contains(&score_high));
     assert!((0.0..=1.0).contains(&score_low));
@@ -528,7 +528,7 @@ async fn test_chunking_with_embeddings() {
         .expect("Should extract successfully");
     assert!(result.chunks.is_some(), "Chunks should be present");
-    let chunks = result.chunks.unwrap();
+    let chunks = result.chunks.expect("Operation failed");
     assert!(chunks.len() > 1, "Should have multiple chunks");
     println!("Metadata: {:?}", result.metadata.additional);
@@ -542,13 +542,17 @@ async fn test_chunking_with_embeddings() {
         "Should have embeddings_generated metadata"
     );
     assert_eq!(
-        result.metadata.additional.get("embeddings_generated").unwrap(),
+        result
+            .metadata
+            .additional
+            .get("embeddings_generated")
+            .expect("Value not found"),
         &serde_json::Value::Bool(true)
     );
     for chunk in &chunks {
         assert!(chunk.embedding.is_some(), "Each chunk should have an embedding");
-        let embedding = chunk.embedding.as_ref().unwrap();
+        let embedding = chunk.embedding.as_ref().expect("Operation failed");
         assert_eq!(
             embedding.len(),
             768,