kreuzberg 4.1.2 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +26 -17
  5. data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
  6. data/kreuzberg.gemspec +13 -1
  7. data/lib/kreuzberg/cli.rb +16 -6
  8. data/lib/kreuzberg/cli_proxy.rb +3 -1
  9. data/lib/kreuzberg/config.rb +121 -39
  10. data/lib/kreuzberg/djot_content.rb +225 -0
  11. data/lib/kreuzberg/extraction_api.rb +20 -4
  12. data/lib/kreuzberg/result.rb +12 -2
  13. data/lib/kreuzberg/version.rb +1 -1
  14. data/lib/kreuzberg.rb +1 -0
  15. data/sig/kreuzberg.rbs +28 -12
  16. data/spec/binding/batch_operations_spec.rb +80 -0
  17. data/spec/binding/batch_spec.rb +6 -5
  18. data/spec/binding/error_recovery_spec.rb +3 -3
  19. data/spec/binding/metadata_types_spec.rb +77 -57
  20. data/spec/binding/tables_spec.rb +11 -2
  21. data/spec/serialization_spec.rb +134 -0
  22. data/spec/unit/config/output_format_spec.rb +380 -0
  23. data/vendor/Cargo.toml +1 -1
  24. data/vendor/kreuzberg/Cargo.toml +1 -1
  25. data/vendor/kreuzberg/README.md +1 -1
  26. data/vendor/kreuzberg/src/api/startup.rs +15 -1
  27. data/vendor/kreuzberg/src/core/config_validation/sections.rs +16 -4
  28. data/vendor/kreuzberg/src/core/extractor/file.rs +1 -2
  29. data/vendor/kreuzberg/src/core/extractor/mod.rs +2 -1
  30. data/vendor/kreuzberg/src/core/io.rs +7 -7
  31. data/vendor/kreuzberg/src/core/mime.rs +4 -4
  32. data/vendor/kreuzberg/src/embeddings.rs +4 -4
  33. data/vendor/kreuzberg/src/extraction/pptx/parser.rs +6 -0
  34. data/vendor/kreuzberg/src/mcp/format.rs +237 -39
  35. data/vendor/kreuzberg/src/mcp/params.rs +26 -33
  36. data/vendor/kreuzberg/src/mcp/server.rs +6 -3
  37. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
  38. data/vendor/kreuzberg/src/plugins/mod.rs +1 -0
  39. data/vendor/kreuzberg/src/plugins/registry/extractor.rs +251 -5
  40. data/vendor/kreuzberg/src/plugins/registry/ocr.rs +150 -2
  41. data/vendor/kreuzberg/src/plugins/registry/processor.rs +213 -5
  42. data/vendor/kreuzberg/src/plugins/registry/validator.rs +220 -4
  43. data/vendor/kreuzberg/src/plugins/startup_validation.rs +385 -0
  44. data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
  45. data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
  46. data/vendor/kreuzberg/tests/api_embed.rs +84 -50
  47. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
  48. data/vendor/kreuzberg/tests/api_tests.rs +298 -139
  49. data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
  50. data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
  51. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
  52. data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
  53. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
  54. data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
  55. data/vendor/kreuzberg/tests/config_behavioral.rs +416 -0
  56. data/vendor/kreuzberg/tests/config_features.rs +19 -15
  57. data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
  58. data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
  59. data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
  60. data/vendor/kreuzberg/tests/core_integration.rs +57 -57
  61. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
  62. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
  63. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
  64. data/vendor/kreuzberg/tests/email_integration.rs +7 -7
  65. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  66. data/vendor/kreuzberg/tests/error_handling.rs +13 -11
  67. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
  68. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  69. data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
  70. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
  71. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
  72. data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
  73. data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
  74. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
  75. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
  76. data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
  77. data/vendor/kreuzberg/tests/mime_detection.rs +75 -43
  78. data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
  79. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
  80. data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
  81. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
  82. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
  83. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
  84. data/vendor/kreuzberg/tests/page_markers.rs +1 -1
  85. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
  86. data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
  87. data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
  88. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
  89. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
  90. data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
  91. data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
  92. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +324 -31
  93. data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
  94. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
  95. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
  96. data/vendor/kreuzberg/tests/security_validation.rs +20 -19
  97. data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
  98. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
  99. data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
  100. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
  101. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
  102. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  103. metadata +12 -2
@@ -0,0 +1,385 @@
1
+ //! Startup validation for plugin registries.
2
+ //!
3
+ //! This module provides diagnostics and health checks for plugins
4
+ //! at server startup, helping operators diagnose issues in containerized
5
+ //! environments like Kubernetes.
6
+
7
+ use crate::Result;
8
+ use crate::plugins::registry::{
9
+ get_document_extractor_registry, get_ocr_backend_registry, get_post_processor_registry, get_validator_registry,
10
+ };
11
+
12
+ /// Plugin health status information.
13
+ ///
14
+ /// Contains diagnostic information about registered plugins for each type.
15
+ #[derive(Debug, Clone)]
16
+ pub struct PluginHealthStatus {
17
+ /// Number of registered OCR backends
18
+ pub ocr_backends_count: usize,
19
+ /// Names of registered OCR backends
20
+ pub ocr_backends: Vec<String>,
21
+ /// Number of registered document extractors
22
+ pub extractors_count: usize,
23
+ /// Names of registered document extractors
24
+ pub extractors: Vec<String>,
25
+ /// Number of registered post-processors
26
+ pub post_processors_count: usize,
27
+ /// Names of registered post-processors
28
+ pub post_processors: Vec<String>,
29
+ /// Number of registered validators
30
+ pub validators_count: usize,
31
+ /// Names of registered validators
32
+ pub validators: Vec<String>,
33
+ }
34
+
35
+ impl PluginHealthStatus {
36
+ /// Check plugin health and return status.
37
+ ///
38
+ /// This function reads all plugin registries and collects information
39
+ /// about registered plugins. It logs warnings if critical plugins are missing.
40
+ ///
41
+ /// # Returns
42
+ ///
43
+ /// `PluginHealthStatus` with counts and names of all registered plugins.
44
+ ///
45
+ /// # Example
46
+ ///
47
+ /// ```no_run
48
+ /// use kreuzberg::plugins::startup_validation::PluginHealthStatus;
49
+ ///
50
+ /// #[tokio::main]
51
+ /// async fn main() {
52
+ /// let status = PluginHealthStatus::check();
53
+ /// println!("OCR backends: {:?}", status.ocr_backends);
54
+ /// }
55
+ /// ```
56
+ pub fn check() -> Self {
57
+ let ocr_registry = get_ocr_backend_registry();
58
+ let ocr_backends = ocr_registry.read().map(|r| r.list()).unwrap_or_default();
59
+
60
+ let extractor_registry = get_document_extractor_registry();
61
+ let extractors = extractor_registry.read().map(|r| r.list()).unwrap_or_default();
62
+
63
+ let processor_registry = get_post_processor_registry();
64
+ let post_processors = processor_registry.read().map(|r| r.list()).unwrap_or_default();
65
+
66
+ let validator_registry = get_validator_registry();
67
+ let validators = validator_registry.read().map(|r| r.list()).unwrap_or_default();
68
+
69
+ let ocr_backends_count = ocr_backends.len();
70
+ let extractors_count = extractors.len();
71
+ let post_processors_count = post_processors.len();
72
+ let validators_count = validators.len();
73
+
74
+ PluginHealthStatus {
75
+ ocr_backends_count,
76
+ ocr_backends,
77
+ extractors_count,
78
+ extractors,
79
+ post_processors_count,
80
+ post_processors,
81
+ validators_count,
82
+ validators,
83
+ }
84
+ }
85
+ }
86
+
87
+ /// Validate plugin registries at startup and emit diagnostic logs.
88
+ ///
89
+ /// This function is designed to be called when the API server starts
90
+ /// to help diagnose configuration issues early. It checks:
91
+ ///
92
+ /// - Whether OCR backends are registered (warns if none)
93
+ /// - Whether document extractors are registered (warns if none)
94
+ /// - Environment variables that might affect plugin initialization
95
+ /// - File permission issues in containerized environments
96
+ ///
97
+ /// For Kubernetes deployments, this logs information that helps with
98
+ /// troubleshooting in the container logs.
99
+ ///
100
+ /// # Returns
101
+ ///
102
+ /// - `Ok(PluginHealthStatus)` with diagnostic information
103
+ /// - `Err(KreuzbergError)` if critical issues are detected (currently always succeeds)
104
+ ///
105
+ /// # Example
106
+ ///
107
+ /// ```no_run
108
+ /// use kreuzberg::plugins::startup_validation::validate_plugins_at_startup;
109
+ ///
110
+ /// #[tokio::main]
111
+ /// async fn main() -> kreuzberg::Result<()> {
112
+ /// let status = validate_plugins_at_startup()?;
113
+ /// println!("Plugins ready: {} backends registered", status.ocr_backends_count);
114
+ /// Ok(())
115
+ /// }
116
+ /// ```
117
+ pub fn validate_plugins_at_startup() -> Result<PluginHealthStatus> {
118
+ let status = PluginHealthStatus::check();
119
+
120
+ // Log OCR backend status
121
+ if status.ocr_backends_count == 0 {
122
+ tracing::warn!(
123
+ "No OCR backends registered. OCR functionality will be unavailable. \
124
+ This is normal if OCR is not required. \
125
+ If OCR is needed, check that: \
126
+ 1. The 'ocr' feature is enabled in Cargo.toml \
127
+ 2. TESSDATA_PREFIX environment variable is set (e.g., /usr/share/tesseract-ocr/tessdata) \
128
+ 3. Tessdata files exist and are readable (tessdata/*.traineddata) \
129
+ 4. In containers, mount tessdata volume or install tesseract-ocr package. \
130
+ See https://docs.kreuzberg.dev/guides/docker/ for Kubernetes setup."
131
+ );
132
+ } else {
133
+ tracing::info!(
134
+ "OCR backends registered: [{}]. Ready for OCR processing.",
135
+ status.ocr_backends.join(", ")
136
+ );
137
+ }
138
+
139
+ // Log document extractor status
140
+ if status.extractors_count == 0 {
141
+ tracing::warn!(
142
+ "No document extractors registered. \
143
+ Document extraction will fail. \
144
+ This usually indicates a configuration issue. \
145
+ Ensure extractors are properly registered during initialization."
146
+ );
147
+ } else {
148
+ tracing::info!("Document extractors registered: [{}]", status.extractors.join(", "));
149
+ }
150
+
151
+ // Log post-processor status
152
+ if status.post_processors_count > 0 {
153
+ tracing::info!("Post-processors registered: [{}]", status.post_processors.join(", "));
154
+ }
155
+
156
+ // Log validator status
157
+ if status.validators_count > 0 {
158
+ tracing::info!("Validators registered: [{}]", status.validators.join(", "));
159
+ }
160
+
161
+ // Check for common environment variables
162
+ check_environment_variables();
163
+
164
+ Ok(status)
165
+ }
166
+
167
+ /// Check and log relevant environment variables at startup.
168
+ ///
169
+ /// Logs diagnostics about environment variables that affect plugin behavior,
170
+ /// particularly useful for Kubernetes deployments where configuration
171
+ /// is often done via environment variables.
172
+ fn check_environment_variables() {
173
+ // Check TESSDATA_PREFIX for OCR
174
+ match std::env::var("TESSDATA_PREFIX") {
175
+ Ok(path) => {
176
+ tracing::debug!("TESSDATA_PREFIX={}", path);
177
+ // Verify the path exists
178
+ if let Ok(metadata) = std::fs::metadata(&path) {
179
+ if metadata.is_dir() {
180
+ tracing::debug!(
181
+ "TESSDATA_PREFIX directory exists and is readable. \
182
+ Tesseract should find trained data files."
183
+ );
184
+ } else {
185
+ tracing::warn!(
186
+ "TESSDATA_PREFIX={} exists but is not a directory. \
187
+ Tesseract may fail to initialize.",
188
+ path
189
+ );
190
+ }
191
+ } else {
192
+ tracing::warn!(
193
+ "TESSDATA_PREFIX={} does not exist or is not readable. \
194
+ Tesseract may fail to initialize. \
195
+ Check directory permissions in containerized environments.",
196
+ path
197
+ );
198
+ }
199
+ }
200
+ Err(_) => {
201
+ tracing::debug!("TESSDATA_PREFIX not set. Tesseract will use system default paths.");
202
+ }
203
+ }
204
+
205
+ // Check for common Kubernetes/Docker volume mount points
206
+ if std::path::Path::new("/usr/share/tesseract-ocr/tessdata").exists() {
207
+ tracing::debug!("Found tessdata at system default: /usr/share/tesseract-ocr/tessdata");
208
+ }
209
+
210
+ // Check RUST_LOG for debugging
211
+ if let Ok(log_level) = std::env::var("RUST_LOG") {
212
+ tracing::debug!("RUST_LOG={}", log_level);
213
+ }
214
+ }
215
+
216
+ #[cfg(test)]
217
+ mod tests {
218
+ use super::*;
219
+
220
+ #[test]
221
+ fn test_plugin_health_status_check() {
222
+ let status = PluginHealthStatus::check();
223
+ // Just verify the status can be created (counts are always non-negative)
224
+ let _ = status.ocr_backends_count;
225
+ let _ = status.extractors_count;
226
+ }
227
+
228
+ #[test]
229
+ fn test_validate_plugins_at_startup() {
230
+ // Initialize tracing for tests
231
+ let _ = tracing_subscriber::fmt()
232
+ .with_max_level(tracing::Level::DEBUG)
233
+ .with_test_writer()
234
+ .try_init();
235
+
236
+ let result = validate_plugins_at_startup();
237
+ assert!(result.is_ok());
238
+ let status = result.unwrap();
239
+ // Status created successfully (counts are always non-negative)
240
+ let _ = status.ocr_backends_count;
241
+ }
242
+
243
+ #[test]
244
+ fn test_plugin_health_status_ocr_backends_empty() {
245
+ let status = PluginHealthStatus::check();
246
+ // Status is valid even with no backends
247
+ assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
248
+ }
249
+
250
+ #[test]
251
+ fn test_plugin_health_status_extractors_empty() {
252
+ let status = PluginHealthStatus::check();
253
+ // Status is valid even with no extractors
254
+ assert_eq!(status.extractors.len(), status.extractors_count);
255
+ }
256
+
257
+ #[test]
258
+ fn test_plugin_health_status_post_processors_empty() {
259
+ let status = PluginHealthStatus::check();
260
+ // Status is valid even with no post-processors
261
+ assert_eq!(status.post_processors.len(), status.post_processors_count);
262
+ }
263
+
264
+ #[test]
265
+ fn test_plugin_health_status_validators_empty() {
266
+ let status = PluginHealthStatus::check();
267
+ // Status is valid even with no validators
268
+ assert_eq!(status.validators.len(), status.validators_count);
269
+ }
270
+
271
+ #[test]
272
+ fn test_validate_plugins_at_startup_returns_status() {
273
+ let _ = tracing_subscriber::fmt()
274
+ .with_max_level(tracing::Level::DEBUG)
275
+ .with_test_writer()
276
+ .try_init();
277
+
278
+ let result = validate_plugins_at_startup();
279
+ assert!(result.is_ok());
280
+
281
+ let status = result.unwrap();
282
+ // Verify all fields are present
283
+ assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
284
+ assert_eq!(status.extractors.len(), status.extractors_count);
285
+ assert_eq!(status.post_processors.len(), status.post_processors_count);
286
+ assert_eq!(status.validators.len(), status.validators_count);
287
+ }
288
+
289
+ #[test]
290
+ fn test_plugin_health_status_check_consistency() {
291
+ let status1 = PluginHealthStatus::check();
292
+ let status2 = PluginHealthStatus::check();
293
+
294
+ // Counts should be consistent between calls
295
+ assert_eq!(status1.ocr_backends_count, status2.ocr_backends_count);
296
+ assert_eq!(status1.extractors_count, status2.extractors_count);
297
+ assert_eq!(status1.post_processors_count, status2.post_processors_count);
298
+ assert_eq!(status1.validators_count, status2.validators_count);
299
+ }
300
+
301
+ #[test]
302
+ fn test_validate_plugins_at_startup_with_logging() {
303
+ // Initialize tracing with test writer
304
+ let _ = tracing_subscriber::fmt()
305
+ .with_max_level(tracing::Level::INFO)
306
+ .with_test_writer()
307
+ .try_init();
308
+
309
+ let result = validate_plugins_at_startup();
310
+ assert!(result.is_ok());
311
+
312
+ // Verify status is returned
313
+ let status = result.unwrap();
314
+ assert!(status.ocr_backends_count > 0);
315
+ }
316
+
317
+ #[test]
318
+ fn test_plugin_health_status_all_counts_valid() {
319
+ let status = PluginHealthStatus::check();
320
+
321
+ // All counts should be valid and consistent with vectors
322
+ assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
323
+ assert_eq!(status.extractors.len(), status.extractors_count);
324
+ assert_eq!(status.post_processors.len(), status.post_processors_count);
325
+ assert_eq!(status.validators.len(), status.validators_count);
326
+ }
327
+
328
+ #[test]
329
+ fn test_plugin_health_status_vec_sizes_match_counts() {
330
+ let status = PluginHealthStatus::check();
331
+
332
+ // Vector sizes should match their counts
333
+ assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
334
+ assert_eq!(status.extractors.len(), status.extractors_count);
335
+ assert_eq!(status.post_processors.len(), status.post_processors_count);
336
+ assert_eq!(status.validators.len(), status.validators_count);
337
+ }
338
+
339
+ #[test]
340
+ fn test_validate_plugins_at_startup_logs_warnings_and_info() {
341
+ let _ = tracing_subscriber::fmt()
342
+ .with_max_level(tracing::Level::DEBUG)
343
+ .with_test_writer()
344
+ .try_init();
345
+
346
+ // Call validation which should log warnings if no extractors
347
+ let result = validate_plugins_at_startup();
348
+ assert!(result.is_ok());
349
+
350
+ let status = result.unwrap();
351
+ assert_eq!(status.ocr_backends.len(), status.ocr_backends_count);
352
+ }
353
+
354
+ #[test]
355
+ fn test_check_environment_variables_with_rust_log() {
356
+ let _ = tracing_subscriber::fmt()
357
+ .with_max_level(tracing::Level::DEBUG)
358
+ .with_test_writer()
359
+ .try_init();
360
+
361
+ // This test just verifies that check_environment_variables doesn't panic
362
+ let result = validate_plugins_at_startup();
363
+ assert!(result.is_ok());
364
+ }
365
+
366
+ #[test]
367
+ fn test_plugin_health_status_clone() {
368
+ let status1 = PluginHealthStatus::check();
369
+ let status2 = status1.clone();
370
+
371
+ // Cloned status should be equal to original
372
+ assert_eq!(status1.ocr_backends_count, status2.ocr_backends_count);
373
+ assert_eq!(status1.extractors_count, status2.extractors_count);
374
+ assert_eq!(status1.post_processors_count, status2.post_processors_count);
375
+ assert_eq!(status1.validators_count, status2.validators_count);
376
+ }
377
+
378
+ #[test]
379
+ fn test_plugin_health_status_debug_format() {
380
+ let status = PluginHealthStatus::check();
381
+ let debug_str = format!("{:?}", status);
382
+ assert!(!debug_str.is_empty());
383
+ assert!(debug_str.contains("ocr_backends_count"));
384
+ }
385
+ }
@@ -26,10 +26,10 @@ async fn test_chunk_basic() {
26
26
  })
27
27
  .to_string(),
28
28
  ))
29
- .unwrap(),
29
+ .expect("Operation failed"),
30
30
  )
31
31
  .await
32
- .unwrap();
32
+ .expect("Operation failed");
33
33
 
34
34
  assert_eq!(response.status(), StatusCode::OK);
35
35
  }
@@ -44,10 +44,10 @@ async fn test_chunk_empty_text_returns_400() {
44
44
  .method("POST")
45
45
  .header("content-type", "application/json")
46
46
  .body(Body::from(json!({"text": ""}).to_string()))
47
- .unwrap(),
47
+ .expect("Operation failed"),
48
48
  )
49
49
  .await
50
- .unwrap();
50
+ .expect("Operation failed");
51
51
 
52
52
  assert_eq!(response.status(), StatusCode::BAD_REQUEST);
53
53
  }
@@ -68,10 +68,10 @@ async fn test_chunk_markdown_strategy() {
68
68
  })
69
69
  .to_string(),
70
70
  ))
71
- .unwrap(),
71
+ .expect("Operation failed"),
72
72
  )
73
73
  .await
74
- .unwrap();
74
+ .expect("Operation failed");
75
75
 
76
76
  assert_eq!(response.status(), StatusCode::OK);
77
77
  }
@@ -99,15 +99,17 @@ async fn test_chunk_response_structure() {
99
99
  })
100
100
  .to_string(),
101
101
  ))
102
- .unwrap(),
102
+ .expect("Operation failed"),
103
103
  )
104
104
  .await
105
- .unwrap();
105
+ .expect("Operation failed");
106
106
 
107
107
  assert_eq!(response.status(), StatusCode::OK);
108
108
 
109
- let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
110
- let chunk_response: ChunkResponse = serde_json::from_slice(&body).unwrap();
109
+ let body = axum::body::to_bytes(response.into_body(), usize::MAX)
110
+ .await
111
+ .expect("Failed to convert to bytes");
112
+ let chunk_response: ChunkResponse = serde_json::from_slice(&body).expect("Failed to deserialize");
111
113
 
112
114
  // Verify response structure
113
115
  assert!(chunk_response.chunk_count > 0);
@@ -143,10 +145,10 @@ async fn test_chunk_invalid_strategy_returns_400() {
143
145
  })
144
146
  .to_string(),
145
147
  ))
146
- .unwrap(),
148
+ .expect("Operation failed"),
147
149
  )
148
150
  .await
149
- .unwrap();
151
+ .expect("Operation failed");
150
152
 
151
153
  assert_eq!(response.status(), StatusCode::BAD_REQUEST);
152
154
  }
@@ -168,15 +170,17 @@ async fn test_chunk_with_defaults() {
168
170
  })
169
171
  .to_string(),
170
172
  ))
171
- .unwrap(),
173
+ .expect("Operation failed"),
172
174
  )
173
175
  .await
174
- .unwrap();
176
+ .expect("Operation failed");
175
177
 
176
178
  assert_eq!(response.status(), StatusCode::OK);
177
179
 
178
- let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
179
- let chunk_response: ChunkResponse = serde_json::from_slice(&body).unwrap();
180
+ let body = axum::body::to_bytes(response.into_body(), usize::MAX)
181
+ .await
182
+ .expect("Failed to convert to bytes");
183
+ let chunk_response: ChunkResponse = serde_json::from_slice(&body).expect("Failed to deserialize");
180
184
 
181
185
  // Verify defaults are applied
182
186
  assert_eq!(chunk_response.config.max_characters, 2000);
@@ -195,10 +199,10 @@ async fn test_chunk_malformed_json_returns_400() {
195
199
  .method("POST")
196
200
  .header("content-type", "application/json")
197
201
  .body(Body::from("{invalid json}"))
198
- .unwrap(),
202
+ .expect("Operation failed"),
199
203
  )
200
204
  .await
201
- .unwrap();
205
+ .expect("Operation failed");
202
206
 
203
207
  assert_eq!(response.status(), StatusCode::BAD_REQUEST);
204
208
  }
@@ -221,15 +225,17 @@ async fn test_chunk_case_insensitive_chunker_type() {
221
225
  })
222
226
  .to_string(),
223
227
  ))
224
- .unwrap(),
228
+ .expect("Operation failed"),
225
229
  )
226
230
  .await
227
- .unwrap();
231
+ .expect("Operation failed");
228
232
 
229
233
  assert_eq!(response.status(), StatusCode::OK);
230
234
 
231
- let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
232
- let chunk_response: ChunkResponse = serde_json::from_slice(&body).unwrap();
235
+ let body = axum::body::to_bytes(response.into_body(), usize::MAX)
236
+ .await
237
+ .expect("Failed to convert to bytes");
238
+ let chunk_response: ChunkResponse = serde_json::from_slice(&body).expect("Failed to deserialize");
233
239
 
234
240
  // Verify it's normalized to lowercase
235
241
  assert_eq!(chunk_response.chunker_type, "markdown");
@@ -258,15 +264,17 @@ async fn test_chunk_long_text() {
258
264
  })
259
265
  .to_string(),
260
266
  ))
261
- .unwrap(),
267
+ .expect("Operation failed"),
262
268
  )
263
269
  .await
264
- .unwrap();
270
+ .expect("Operation failed");
265
271
 
266
272
  assert_eq!(response.status(), StatusCode::OK);
267
273
 
268
- let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
269
- let chunk_response: ChunkResponse = serde_json::from_slice(&body).unwrap();
274
+ let body = axum::body::to_bytes(response.into_body(), usize::MAX)
275
+ .await
276
+ .expect("Failed to convert to bytes");
277
+ let chunk_response: ChunkResponse = serde_json::from_slice(&body).expect("Failed to deserialize");
270
278
 
271
279
  // Should have multiple chunks
272
280
  assert!(chunk_response.chunk_count > 1);
@@ -296,15 +304,17 @@ async fn test_chunk_custom_config() {
296
304
  })
297
305
  .to_string(),
298
306
  ))
299
- .unwrap(),
307
+ .expect("Operation failed"),
300
308
  )
301
309
  .await
302
- .unwrap();
310
+ .expect("Operation failed");
303
311
 
304
312
  assert_eq!(response.status(), StatusCode::OK);
305
313
 
306
- let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
307
- let chunk_response: ChunkResponse = serde_json::from_slice(&body).unwrap();
314
+ let body = axum::body::to_bytes(response.into_body(), usize::MAX)
315
+ .await
316
+ .expect("Failed to convert to bytes");
317
+ let chunk_response: ChunkResponse = serde_json::from_slice(&body).expect("Failed to deserialize");
308
318
 
309
319
  // Verify custom config was applied
310
320
  assert_eq!(chunk_response.config.max_characters, 30);