RubyGems - kreuzberg - Versions diffs - 4.1.2 → 4.2.0 - Mend

kreuzberg 4.1.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

checksums.yaml +4 -4
data/Gemfile.lock +2 -2
data/README.md +1 -1
data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
data/kreuzberg.gemspec +13 -1
data/lib/kreuzberg/config.rb +70 -35
data/lib/kreuzberg/version.rb +1 -1
data/sig/kreuzberg.rbs +5 -1
data/spec/binding/batch_operations_spec.rb +80 -0
data/spec/binding/metadata_types_spec.rb +77 -57
data/spec/serialization_spec.rb +134 -0
data/spec/unit/config/output_format_spec.rb +380 -0
data/vendor/Cargo.toml +1 -1
data/vendor/kreuzberg/Cargo.toml +1 -1
data/vendor/kreuzberg/README.md +1 -1
data/vendor/kreuzberg/src/embeddings.rs +4 -4
data/vendor/kreuzberg/src/mcp/format.rs +237 -39
data/vendor/kreuzberg/src/mcp/params.rs +26 -33
data/vendor/kreuzberg/src/mcp/server.rs +6 -3
data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
data/vendor/kreuzberg/tests/api_embed.rs +84 -50
data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
data/vendor/kreuzberg/tests/api_tests.rs +298 -139
data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
data/vendor/kreuzberg/tests/config_features.rs +19 -15
data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
data/vendor/kreuzberg/tests/core_integration.rs +55 -53
data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
data/vendor/kreuzberg/tests/email_integration.rs +7 -7
data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/error_handling.rs +13 -11
data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
data/vendor/kreuzberg/tests/page_markers.rs +1 -1
data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
data/vendor/kreuzberg/tests/security_validation.rs +20 -19
data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
metadata +10 -2

data/vendor/kreuzberg/tests/api_tests.rs CHANGED Viewed

@@ -20,14 +20,21 @@ async fn test_health_endpoint() {
     let app = create_router(ExtractionConfig::default());
     let response = app
-        .oneshot(Request::builder().uri("/health").body(Body::empty()).unwrap())
+        .oneshot(
+            Request::builder()
+                .uri("/health")
+                .body(Body::empty())
+                .expect("Failed to create HTTP request body"),
+        )
         .await
-        .unwrap();
+        .expect("Failed to send HTTP request");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let health: HealthResponse = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let health: HealthResponse = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(health.status, "healthy");
     assert!(!health.version.is_empty());
@@ -39,14 +46,21 @@ async fn test_info_endpoint() {
     let app = create_router(ExtractionConfig::default());
     let response = app
-        .oneshot(Request::builder().uri("/info").body(Body::empty()).unwrap())
+        .oneshot(
+            Request::builder()
+                .uri("/info")
+                .body(Body::empty())
+                .expect("Failed to create HTTP request body"),
+        )
         .await
-        .unwrap();
+        .expect("Failed to send HTTP request");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let info: InfoResponse = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let info: InfoResponse = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert!(!info.version.is_empty());
     assert!(info.rust_backend);
@@ -67,10 +81,10 @@ async fn test_extract_no_files() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::BAD_REQUEST);
 }
@@ -100,19 +114,26 @@ async fn test_extract_text_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("Hello, world!"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("Hello, world!")
+    );
     assert!(
         results[0]["chunks"].is_null(),
@@ -158,19 +179,26 @@ async fn test_extract_with_config() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("Hello, world!"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("Hello, world!")
+    );
     assert!(
         results[0]["chunks"].is_null(),
@@ -214,10 +242,10 @@ async fn test_extract_invalid_config() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::BAD_REQUEST);
 }
@@ -253,19 +281,31 @@ async fn test_extract_multiple_files() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 2);
-    assert!(results[0]["content"].as_str().unwrap().contains("First file"));
-    assert!(results[1]["content"].as_str().unwrap().contains("Second file"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("First file")
+    );
+    assert!(
+        results[1]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("Second file")
+    );
     for result in &results {
         assert!(
@@ -304,19 +344,26 @@ async fn test_extract_markdown_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/markdown");
-    assert!(results[0]["content"].as_str().unwrap().contains("Heading"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("Heading")
+    );
 }
 /// Test extract endpoint with JSON content.
@@ -344,15 +391,17 @@ async fn test_extract_json_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "application/json");
@@ -384,19 +433,26 @@ async fn test_extract_xml_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "application/xml");
-    assert!(results[0]["content"].as_str().unwrap().contains("test"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("test")
+    );
 }
 /// Test extract endpoint with HTML content.
@@ -425,19 +481,26 @@ async fn test_extract_html_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/html");
-    assert!(results[0]["content"].as_str().unwrap().contains("Title"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("Title")
+    );
 }
 /// Test extract endpoint with missing Content-Type header.
@@ -451,10 +514,10 @@ async fn test_extract_missing_content_type() {
                 .method("POST")
                 .uri("/extract")
                 .body(Body::from("some data"))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(response.status() == StatusCode::BAD_REQUEST || response.status() == StatusCode::UNSUPPORTED_MEDIA_TYPE);
 }
@@ -484,15 +547,17 @@ async fn test_extract_empty_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
@@ -523,10 +588,10 @@ async fn test_extract_unsupported_mime_type() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(
         response.status() == StatusCode::UNPROCESSABLE_ENTITY || response.status() == StatusCode::INTERNAL_SERVER_ERROR
@@ -558,10 +623,10 @@ async fn test_extract_without_filename() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
 }
@@ -581,10 +646,10 @@ async fn test_extract_malformed_multipart() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(response.status().is_client_error() || response.status().is_server_error());
 }
@@ -595,9 +660,14 @@ async fn test_cors_headers() {
     let app = create_router(ExtractionConfig::default());
     let response = app
-        .oneshot(Request::builder().uri("/health").body(Body::empty()).unwrap())
+        .oneshot(
+            Request::builder()
+                .uri("/health")
+                .body(Body::empty())
+                .expect("Failed to create HTTP request body"),
+        )
         .await
-        .unwrap();
+        .expect("Failed to send HTTP request");
     assert_eq!(response.status(), StatusCode::OK);
@@ -618,10 +688,10 @@ async fn test_cors_preflight() {
                 .header("origin", "http://example.com")
                 .header("access-control-request-method", "POST")
                 .body(Body::empty())
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(response.status().is_success() || response.status() == StatusCode::NO_CONTENT);
 }
@@ -641,15 +711,17 @@ async fn test_error_response_format_validation() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::BAD_REQUEST);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let error: serde_json::Value = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let error: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert!(error["error_type"].is_string());
     assert!(error["message"].is_string());
@@ -686,18 +758,25 @@ async fn test_error_response_format_parsing() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::BAD_REQUEST);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let error: serde_json::Value = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let error: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(error["error_type"], "ValidationError");
-    assert!(error["message"].as_str().unwrap().contains("configuration"));
+    assert!(
+        error["message"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("configuration")
+    );
 }
 /// Test 404 error for non-existent endpoint.
@@ -706,9 +785,14 @@ async fn test_not_found_endpoint() {
     let app = create_router(ExtractionConfig::default());
     let response = app
-        .oneshot(Request::builder().uri("/nonexistent").body(Body::empty()).unwrap())
+        .oneshot(
+            Request::builder()
+                .uri("/nonexistent")
+                .body(Body::empty())
+                .expect("Failed to create HTTP request body"),
+        )
         .await
-        .unwrap();
+        .expect("Failed to send HTTP request");
     assert_eq!(response.status(), StatusCode::NOT_FOUND);
 }
@@ -738,15 +822,17 @@ async fn test_extract_large_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
@@ -786,7 +872,7 @@ async fn test_concurrent_requests() {
                         .uri("/extract")
                         .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                         .body(Body::from(body_clone))
-                        .unwrap(),
+                        .expect("Operation failed"),
                 )
                 .await
         });
@@ -795,7 +881,10 @@ async fn test_concurrent_requests() {
     }
     for handle in handles {
-        let response = handle.await.unwrap().unwrap();
+        let response = handle
+            .await
+            .expect("Async operation failed")
+            .expect("Async operation failed");
         assert_eq!(response.status(), StatusCode::OK);
     }
 }
@@ -806,14 +895,21 @@ async fn test_cache_stats_endpoint() {
     let app = create_router(ExtractionConfig::default());
     let response = app
-        .oneshot(Request::builder().uri("/cache/stats").body(Body::empty()).unwrap())
+        .oneshot(
+            Request::builder()
+                .uri("/cache/stats")
+                .body(Body::empty())
+                .expect("Failed to create HTTP request body"),
+        )
         .await
-        .unwrap();
+        .expect("Failed to send HTTP request");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let stats: serde_json::Value = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let stats: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert!(stats["directory"].is_string());
     assert!(stats["total_files"].is_number());
@@ -831,15 +927,17 @@ async fn test_cache_clear_endpoint() {
                 .method("DELETE")
                 .uri("/cache/clear")
                 .body(Body::empty())
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let clear_result: serde_json::Value = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let clear_result: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert!(clear_result["directory"].is_string());
     assert!(clear_result["removed_files"].is_number());
@@ -877,15 +975,17 @@ async fn test_extract_mixed_content_types() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 2);
     assert_eq!(results[0]["mime_type"], "text/plain");
@@ -921,10 +1021,10 @@ async fn test_extract_unknown_multipart_field() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
 }
@@ -953,10 +1053,10 @@ async fn test_extract_default_mime_type() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(
         response.status() == StatusCode::OK
@@ -997,10 +1097,10 @@ async fn test_size_limits_custom_limits() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK);
 }
@@ -1081,10 +1181,10 @@ async fn test_extract_file_larger_than_2mb() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(
         response.status(),
@@ -1092,12 +1192,19 @@ async fn test_extract_file_larger_than_2mb() {
         "3MB file should be accepted. If this fails with 400 or 413, the size limit fix is not working correctly."
     );
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("A"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("A")
+    );
 }
 /// Test extracting a 2MB file (just above the old Axum limit).
@@ -1128,10 +1235,10 @@ async fn test_extract_2mb_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(
         response.status(),
@@ -1139,12 +1246,19 @@ async fn test_extract_2mb_file() {
         "2MB file should be accepted (boundary case)"
     );
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("X"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("X")
+    );
 }
 /// Test extracting a 5MB file.
@@ -1174,19 +1288,26 @@ async fn test_extract_5mb_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK, "5MB file should be accepted");
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("B"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("B")
+    );
 }
 /// Test extracting a 10MB file.
@@ -1216,19 +1337,26 @@ async fn test_extract_10mb_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK, "10MB file should be accepted");
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("C"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("C")
+    );
 }
 /// Test extracting a 50MB file (half the default limit).
@@ -1259,19 +1387,26 @@ async fn test_extract_50mb_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(response.status(), StatusCode::OK, "50MB file should be accepted");
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("D"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("D")
+    );
 }
 /// Test extracting a 90MB file (near the 100MB default limit).
@@ -1302,10 +1437,10 @@ async fn test_extract_90mb_file() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(
         response.status(),
@@ -1313,12 +1448,19 @@ async fn test_extract_90mb_file() {
         "90MB file should be accepted (within default 100MB limit)"
     );
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 1);
     assert_eq!(results[0]["mime_type"], "text/plain");
-    assert!(results[0]["content"].as_str().unwrap().contains("E"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("E")
+    );
 }
 /// Test extracting a file over the 100MB default limit (HTTP 400/413).
@@ -1350,10 +1492,10 @@ async fn test_extract_file_over_default_limit() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(
         response.status() == StatusCode::BAD_REQUEST || response.status() == StatusCode::PAYLOAD_TOO_LARGE,
@@ -1402,10 +1544,10 @@ async fn test_extract_multiple_large_files_within_limit() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert_eq!(
         response.status(),
@@ -1413,16 +1555,33 @@ async fn test_extract_multiple_large_files_within_limit() {
         "Multiple files totaling 75MB should be accepted"
     );
-    let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
-    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).unwrap();
+    let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+        .await
+        .expect("Failed to read HTTP response body");
+    let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
     assert_eq!(results.len(), 3, "Should have 3 results");
     for result in &results {
         assert_eq!(result["mime_type"], "text/plain");
     }
-    assert!(results[0]["content"].as_str().unwrap().contains("G"));
-    assert!(results[1]["content"].as_str().unwrap().contains("H"));
-    assert!(results[2]["content"].as_str().unwrap().contains("I"));
+    assert!(
+        results[0]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("G")
+    );
+    assert!(
+        results[1]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("H")
+    );
+    assert!(
+        results[2]["content"]
+            .as_str()
+            .expect("Failed to extract string from JSON value")
+            .contains("I")
+    );
 }
 /// Test extracting multiple large files exceeding limit (HTTP 400/413).
@@ -1459,10 +1618,10 @@ async fn test_extract_multiple_large_files_exceeding_limit() {
                 .uri("/extract")
                 .header("content-type", format!("multipart/form-data; boundary={}", boundary))
                 .body(Body::from(body_content))
-                .unwrap(),
+                .expect("Operation failed"),
         )
         .await
-        .unwrap();
+        .expect("Operation failed");
     assert!(
         response.status() == StatusCode::BAD_REQUEST || response.status() == StatusCode::PAYLOAD_TOO_LARGE,