kreuzberg 4.1.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/kreuzberg.gemspec +13 -1
- data/lib/kreuzberg/config.rb +70 -35
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +5 -1
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +55 -53
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +10 -2
|
@@ -20,14 +20,21 @@ async fn test_health_endpoint() {
|
|
|
20
20
|
let app = create_router(ExtractionConfig::default());
|
|
21
21
|
|
|
22
22
|
let response = app
|
|
23
|
-
.oneshot(
|
|
23
|
+
.oneshot(
|
|
24
|
+
Request::builder()
|
|
25
|
+
.uri("/health")
|
|
26
|
+
.body(Body::empty())
|
|
27
|
+
.expect("Failed to create HTTP request body"),
|
|
28
|
+
)
|
|
24
29
|
.await
|
|
25
|
-
.
|
|
30
|
+
.expect("Failed to send HTTP request");
|
|
26
31
|
|
|
27
32
|
assert_eq!(response.status(), StatusCode::OK);
|
|
28
33
|
|
|
29
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
30
|
-
|
|
34
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
35
|
+
.await
|
|
36
|
+
.expect("Failed to read HTTP response body");
|
|
37
|
+
let health: HealthResponse = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
31
38
|
|
|
32
39
|
assert_eq!(health.status, "healthy");
|
|
33
40
|
assert!(!health.version.is_empty());
|
|
@@ -39,14 +46,21 @@ async fn test_info_endpoint() {
|
|
|
39
46
|
let app = create_router(ExtractionConfig::default());
|
|
40
47
|
|
|
41
48
|
let response = app
|
|
42
|
-
.oneshot(
|
|
49
|
+
.oneshot(
|
|
50
|
+
Request::builder()
|
|
51
|
+
.uri("/info")
|
|
52
|
+
.body(Body::empty())
|
|
53
|
+
.expect("Failed to create HTTP request body"),
|
|
54
|
+
)
|
|
43
55
|
.await
|
|
44
|
-
.
|
|
56
|
+
.expect("Failed to send HTTP request");
|
|
45
57
|
|
|
46
58
|
assert_eq!(response.status(), StatusCode::OK);
|
|
47
59
|
|
|
48
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
49
|
-
|
|
60
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
61
|
+
.await
|
|
62
|
+
.expect("Failed to read HTTP response body");
|
|
63
|
+
let info: InfoResponse = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
50
64
|
|
|
51
65
|
assert!(!info.version.is_empty());
|
|
52
66
|
assert!(info.rust_backend);
|
|
@@ -67,10 +81,10 @@ async fn test_extract_no_files() {
|
|
|
67
81
|
.uri("/extract")
|
|
68
82
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
69
83
|
.body(Body::from(body_content))
|
|
70
|
-
.
|
|
84
|
+
.expect("Operation failed"),
|
|
71
85
|
)
|
|
72
86
|
.await
|
|
73
|
-
.
|
|
87
|
+
.expect("Operation failed");
|
|
74
88
|
|
|
75
89
|
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
|
|
76
90
|
}
|
|
@@ -100,19 +114,26 @@ async fn test_extract_text_file() {
|
|
|
100
114
|
.uri("/extract")
|
|
101
115
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
102
116
|
.body(Body::from(body_content))
|
|
103
|
-
.
|
|
117
|
+
.expect("Operation failed"),
|
|
104
118
|
)
|
|
105
119
|
.await
|
|
106
|
-
.
|
|
120
|
+
.expect("Operation failed");
|
|
107
121
|
|
|
108
122
|
assert_eq!(response.status(), StatusCode::OK);
|
|
109
123
|
|
|
110
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
111
|
-
|
|
124
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
125
|
+
.await
|
|
126
|
+
.expect("Failed to read HTTP response body");
|
|
127
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
112
128
|
|
|
113
129
|
assert_eq!(results.len(), 1);
|
|
114
130
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
115
|
-
assert!(
|
|
131
|
+
assert!(
|
|
132
|
+
results[0]["content"]
|
|
133
|
+
.as_str()
|
|
134
|
+
.expect("Failed to extract string from JSON value")
|
|
135
|
+
.contains("Hello, world!")
|
|
136
|
+
);
|
|
116
137
|
|
|
117
138
|
assert!(
|
|
118
139
|
results[0]["chunks"].is_null(),
|
|
@@ -158,19 +179,26 @@ async fn test_extract_with_config() {
|
|
|
158
179
|
.uri("/extract")
|
|
159
180
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
160
181
|
.body(Body::from(body_content))
|
|
161
|
-
.
|
|
182
|
+
.expect("Operation failed"),
|
|
162
183
|
)
|
|
163
184
|
.await
|
|
164
|
-
.
|
|
185
|
+
.expect("Operation failed");
|
|
165
186
|
|
|
166
187
|
assert_eq!(response.status(), StatusCode::OK);
|
|
167
188
|
|
|
168
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
169
|
-
|
|
189
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
190
|
+
.await
|
|
191
|
+
.expect("Failed to read HTTP response body");
|
|
192
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
170
193
|
|
|
171
194
|
assert_eq!(results.len(), 1);
|
|
172
195
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
173
|
-
assert!(
|
|
196
|
+
assert!(
|
|
197
|
+
results[0]["content"]
|
|
198
|
+
.as_str()
|
|
199
|
+
.expect("Failed to extract string from JSON value")
|
|
200
|
+
.contains("Hello, world!")
|
|
201
|
+
);
|
|
174
202
|
|
|
175
203
|
assert!(
|
|
176
204
|
results[0]["chunks"].is_null(),
|
|
@@ -214,10 +242,10 @@ async fn test_extract_invalid_config() {
|
|
|
214
242
|
.uri("/extract")
|
|
215
243
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
216
244
|
.body(Body::from(body_content))
|
|
217
|
-
.
|
|
245
|
+
.expect("Operation failed"),
|
|
218
246
|
)
|
|
219
247
|
.await
|
|
220
|
-
.
|
|
248
|
+
.expect("Operation failed");
|
|
221
249
|
|
|
222
250
|
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
|
|
223
251
|
}
|
|
@@ -253,19 +281,31 @@ async fn test_extract_multiple_files() {
|
|
|
253
281
|
.uri("/extract")
|
|
254
282
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
255
283
|
.body(Body::from(body_content))
|
|
256
|
-
.
|
|
284
|
+
.expect("Operation failed"),
|
|
257
285
|
)
|
|
258
286
|
.await
|
|
259
|
-
.
|
|
287
|
+
.expect("Operation failed");
|
|
260
288
|
|
|
261
289
|
assert_eq!(response.status(), StatusCode::OK);
|
|
262
290
|
|
|
263
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
264
|
-
|
|
291
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
292
|
+
.await
|
|
293
|
+
.expect("Failed to read HTTP response body");
|
|
294
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
265
295
|
|
|
266
296
|
assert_eq!(results.len(), 2);
|
|
267
|
-
assert!(
|
|
268
|
-
|
|
297
|
+
assert!(
|
|
298
|
+
results[0]["content"]
|
|
299
|
+
.as_str()
|
|
300
|
+
.expect("Failed to extract string from JSON value")
|
|
301
|
+
.contains("First file")
|
|
302
|
+
);
|
|
303
|
+
assert!(
|
|
304
|
+
results[1]["content"]
|
|
305
|
+
.as_str()
|
|
306
|
+
.expect("Failed to extract string from JSON value")
|
|
307
|
+
.contains("Second file")
|
|
308
|
+
);
|
|
269
309
|
|
|
270
310
|
for result in &results {
|
|
271
311
|
assert!(
|
|
@@ -304,19 +344,26 @@ async fn test_extract_markdown_file() {
|
|
|
304
344
|
.uri("/extract")
|
|
305
345
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
306
346
|
.body(Body::from(body_content))
|
|
307
|
-
.
|
|
347
|
+
.expect("Operation failed"),
|
|
308
348
|
)
|
|
309
349
|
.await
|
|
310
|
-
.
|
|
350
|
+
.expect("Operation failed");
|
|
311
351
|
|
|
312
352
|
assert_eq!(response.status(), StatusCode::OK);
|
|
313
353
|
|
|
314
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
315
|
-
|
|
354
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
355
|
+
.await
|
|
356
|
+
.expect("Failed to read HTTP response body");
|
|
357
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
316
358
|
|
|
317
359
|
assert_eq!(results.len(), 1);
|
|
318
360
|
assert_eq!(results[0]["mime_type"], "text/markdown");
|
|
319
|
-
assert!(
|
|
361
|
+
assert!(
|
|
362
|
+
results[0]["content"]
|
|
363
|
+
.as_str()
|
|
364
|
+
.expect("Failed to extract string from JSON value")
|
|
365
|
+
.contains("Heading")
|
|
366
|
+
);
|
|
320
367
|
}
|
|
321
368
|
|
|
322
369
|
/// Test extract endpoint with JSON content.
|
|
@@ -344,15 +391,17 @@ async fn test_extract_json_file() {
|
|
|
344
391
|
.uri("/extract")
|
|
345
392
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
346
393
|
.body(Body::from(body_content))
|
|
347
|
-
.
|
|
394
|
+
.expect("Operation failed"),
|
|
348
395
|
)
|
|
349
396
|
.await
|
|
350
|
-
.
|
|
397
|
+
.expect("Operation failed");
|
|
351
398
|
|
|
352
399
|
assert_eq!(response.status(), StatusCode::OK);
|
|
353
400
|
|
|
354
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
355
|
-
|
|
401
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
402
|
+
.await
|
|
403
|
+
.expect("Failed to read HTTP response body");
|
|
404
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
356
405
|
|
|
357
406
|
assert_eq!(results.len(), 1);
|
|
358
407
|
assert_eq!(results[0]["mime_type"], "application/json");
|
|
@@ -384,19 +433,26 @@ async fn test_extract_xml_file() {
|
|
|
384
433
|
.uri("/extract")
|
|
385
434
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
386
435
|
.body(Body::from(body_content))
|
|
387
|
-
.
|
|
436
|
+
.expect("Operation failed"),
|
|
388
437
|
)
|
|
389
438
|
.await
|
|
390
|
-
.
|
|
439
|
+
.expect("Operation failed");
|
|
391
440
|
|
|
392
441
|
assert_eq!(response.status(), StatusCode::OK);
|
|
393
442
|
|
|
394
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
395
|
-
|
|
443
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
444
|
+
.await
|
|
445
|
+
.expect("Failed to read HTTP response body");
|
|
446
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
396
447
|
|
|
397
448
|
assert_eq!(results.len(), 1);
|
|
398
449
|
assert_eq!(results[0]["mime_type"], "application/xml");
|
|
399
|
-
assert!(
|
|
450
|
+
assert!(
|
|
451
|
+
results[0]["content"]
|
|
452
|
+
.as_str()
|
|
453
|
+
.expect("Failed to extract string from JSON value")
|
|
454
|
+
.contains("test")
|
|
455
|
+
);
|
|
400
456
|
}
|
|
401
457
|
|
|
402
458
|
/// Test extract endpoint with HTML content.
|
|
@@ -425,19 +481,26 @@ async fn test_extract_html_file() {
|
|
|
425
481
|
.uri("/extract")
|
|
426
482
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
427
483
|
.body(Body::from(body_content))
|
|
428
|
-
.
|
|
484
|
+
.expect("Operation failed"),
|
|
429
485
|
)
|
|
430
486
|
.await
|
|
431
|
-
.
|
|
487
|
+
.expect("Operation failed");
|
|
432
488
|
|
|
433
489
|
assert_eq!(response.status(), StatusCode::OK);
|
|
434
490
|
|
|
435
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
436
|
-
|
|
491
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
492
|
+
.await
|
|
493
|
+
.expect("Failed to read HTTP response body");
|
|
494
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
437
495
|
|
|
438
496
|
assert_eq!(results.len(), 1);
|
|
439
497
|
assert_eq!(results[0]["mime_type"], "text/html");
|
|
440
|
-
assert!(
|
|
498
|
+
assert!(
|
|
499
|
+
results[0]["content"]
|
|
500
|
+
.as_str()
|
|
501
|
+
.expect("Failed to extract string from JSON value")
|
|
502
|
+
.contains("Title")
|
|
503
|
+
);
|
|
441
504
|
}
|
|
442
505
|
|
|
443
506
|
/// Test extract endpoint with missing Content-Type header.
|
|
@@ -451,10 +514,10 @@ async fn test_extract_missing_content_type() {
|
|
|
451
514
|
.method("POST")
|
|
452
515
|
.uri("/extract")
|
|
453
516
|
.body(Body::from("some data"))
|
|
454
|
-
.
|
|
517
|
+
.expect("Operation failed"),
|
|
455
518
|
)
|
|
456
519
|
.await
|
|
457
|
-
.
|
|
520
|
+
.expect("Operation failed");
|
|
458
521
|
|
|
459
522
|
assert!(response.status() == StatusCode::BAD_REQUEST || response.status() == StatusCode::UNSUPPORTED_MEDIA_TYPE);
|
|
460
523
|
}
|
|
@@ -484,15 +547,17 @@ async fn test_extract_empty_file() {
|
|
|
484
547
|
.uri("/extract")
|
|
485
548
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
486
549
|
.body(Body::from(body_content))
|
|
487
|
-
.
|
|
550
|
+
.expect("Operation failed"),
|
|
488
551
|
)
|
|
489
552
|
.await
|
|
490
|
-
.
|
|
553
|
+
.expect("Operation failed");
|
|
491
554
|
|
|
492
555
|
assert_eq!(response.status(), StatusCode::OK);
|
|
493
556
|
|
|
494
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
495
|
-
|
|
557
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
558
|
+
.await
|
|
559
|
+
.expect("Failed to read HTTP response body");
|
|
560
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
496
561
|
|
|
497
562
|
assert_eq!(results.len(), 1);
|
|
498
563
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
@@ -523,10 +588,10 @@ async fn test_extract_unsupported_mime_type() {
|
|
|
523
588
|
.uri("/extract")
|
|
524
589
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
525
590
|
.body(Body::from(body_content))
|
|
526
|
-
.
|
|
591
|
+
.expect("Operation failed"),
|
|
527
592
|
)
|
|
528
593
|
.await
|
|
529
|
-
.
|
|
594
|
+
.expect("Operation failed");
|
|
530
595
|
|
|
531
596
|
assert!(
|
|
532
597
|
response.status() == StatusCode::UNPROCESSABLE_ENTITY || response.status() == StatusCode::INTERNAL_SERVER_ERROR
|
|
@@ -558,10 +623,10 @@ async fn test_extract_without_filename() {
|
|
|
558
623
|
.uri("/extract")
|
|
559
624
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
560
625
|
.body(Body::from(body_content))
|
|
561
|
-
.
|
|
626
|
+
.expect("Operation failed"),
|
|
562
627
|
)
|
|
563
628
|
.await
|
|
564
|
-
.
|
|
629
|
+
.expect("Operation failed");
|
|
565
630
|
|
|
566
631
|
assert_eq!(response.status(), StatusCode::OK);
|
|
567
632
|
}
|
|
@@ -581,10 +646,10 @@ async fn test_extract_malformed_multipart() {
|
|
|
581
646
|
.uri("/extract")
|
|
582
647
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
583
648
|
.body(Body::from(body_content))
|
|
584
|
-
.
|
|
649
|
+
.expect("Operation failed"),
|
|
585
650
|
)
|
|
586
651
|
.await
|
|
587
|
-
.
|
|
652
|
+
.expect("Operation failed");
|
|
588
653
|
|
|
589
654
|
assert!(response.status().is_client_error() || response.status().is_server_error());
|
|
590
655
|
}
|
|
@@ -595,9 +660,14 @@ async fn test_cors_headers() {
|
|
|
595
660
|
let app = create_router(ExtractionConfig::default());
|
|
596
661
|
|
|
597
662
|
let response = app
|
|
598
|
-
.oneshot(
|
|
663
|
+
.oneshot(
|
|
664
|
+
Request::builder()
|
|
665
|
+
.uri("/health")
|
|
666
|
+
.body(Body::empty())
|
|
667
|
+
.expect("Failed to create HTTP request body"),
|
|
668
|
+
)
|
|
599
669
|
.await
|
|
600
|
-
.
|
|
670
|
+
.expect("Failed to send HTTP request");
|
|
601
671
|
|
|
602
672
|
assert_eq!(response.status(), StatusCode::OK);
|
|
603
673
|
|
|
@@ -618,10 +688,10 @@ async fn test_cors_preflight() {
|
|
|
618
688
|
.header("origin", "http://example.com")
|
|
619
689
|
.header("access-control-request-method", "POST")
|
|
620
690
|
.body(Body::empty())
|
|
621
|
-
.
|
|
691
|
+
.expect("Operation failed"),
|
|
622
692
|
)
|
|
623
693
|
.await
|
|
624
|
-
.
|
|
694
|
+
.expect("Operation failed");
|
|
625
695
|
|
|
626
696
|
assert!(response.status().is_success() || response.status() == StatusCode::NO_CONTENT);
|
|
627
697
|
}
|
|
@@ -641,15 +711,17 @@ async fn test_error_response_format_validation() {
|
|
|
641
711
|
.uri("/extract")
|
|
642
712
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
643
713
|
.body(Body::from(body_content))
|
|
644
|
-
.
|
|
714
|
+
.expect("Operation failed"),
|
|
645
715
|
)
|
|
646
716
|
.await
|
|
647
|
-
.
|
|
717
|
+
.expect("Operation failed");
|
|
648
718
|
|
|
649
719
|
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
|
|
650
720
|
|
|
651
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
652
|
-
|
|
721
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
722
|
+
.await
|
|
723
|
+
.expect("Failed to read HTTP response body");
|
|
724
|
+
let error: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
653
725
|
|
|
654
726
|
assert!(error["error_type"].is_string());
|
|
655
727
|
assert!(error["message"].is_string());
|
|
@@ -686,18 +758,25 @@ async fn test_error_response_format_parsing() {
|
|
|
686
758
|
.uri("/extract")
|
|
687
759
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
688
760
|
.body(Body::from(body_content))
|
|
689
|
-
.
|
|
761
|
+
.expect("Operation failed"),
|
|
690
762
|
)
|
|
691
763
|
.await
|
|
692
|
-
.
|
|
764
|
+
.expect("Operation failed");
|
|
693
765
|
|
|
694
766
|
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
|
|
695
767
|
|
|
696
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
697
|
-
|
|
768
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
769
|
+
.await
|
|
770
|
+
.expect("Failed to read HTTP response body");
|
|
771
|
+
let error: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
698
772
|
|
|
699
773
|
assert_eq!(error["error_type"], "ValidationError");
|
|
700
|
-
assert!(
|
|
774
|
+
assert!(
|
|
775
|
+
error["message"]
|
|
776
|
+
.as_str()
|
|
777
|
+
.expect("Failed to extract string from JSON value")
|
|
778
|
+
.contains("configuration")
|
|
779
|
+
);
|
|
701
780
|
}
|
|
702
781
|
|
|
703
782
|
/// Test 404 error for non-existent endpoint.
|
|
@@ -706,9 +785,14 @@ async fn test_not_found_endpoint() {
|
|
|
706
785
|
let app = create_router(ExtractionConfig::default());
|
|
707
786
|
|
|
708
787
|
let response = app
|
|
709
|
-
.oneshot(
|
|
788
|
+
.oneshot(
|
|
789
|
+
Request::builder()
|
|
790
|
+
.uri("/nonexistent")
|
|
791
|
+
.body(Body::empty())
|
|
792
|
+
.expect("Failed to create HTTP request body"),
|
|
793
|
+
)
|
|
710
794
|
.await
|
|
711
|
-
.
|
|
795
|
+
.expect("Failed to send HTTP request");
|
|
712
796
|
|
|
713
797
|
assert_eq!(response.status(), StatusCode::NOT_FOUND);
|
|
714
798
|
}
|
|
@@ -738,15 +822,17 @@ async fn test_extract_large_file() {
|
|
|
738
822
|
.uri("/extract")
|
|
739
823
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
740
824
|
.body(Body::from(body_content))
|
|
741
|
-
.
|
|
825
|
+
.expect("Operation failed"),
|
|
742
826
|
)
|
|
743
827
|
.await
|
|
744
|
-
.
|
|
828
|
+
.expect("Operation failed");
|
|
745
829
|
|
|
746
830
|
assert_eq!(response.status(), StatusCode::OK);
|
|
747
831
|
|
|
748
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
749
|
-
|
|
832
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
833
|
+
.await
|
|
834
|
+
.expect("Failed to read HTTP response body");
|
|
835
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
750
836
|
|
|
751
837
|
assert_eq!(results.len(), 1);
|
|
752
838
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
@@ -786,7 +872,7 @@ async fn test_concurrent_requests() {
|
|
|
786
872
|
.uri("/extract")
|
|
787
873
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
788
874
|
.body(Body::from(body_clone))
|
|
789
|
-
.
|
|
875
|
+
.expect("Operation failed"),
|
|
790
876
|
)
|
|
791
877
|
.await
|
|
792
878
|
});
|
|
@@ -795,7 +881,10 @@ async fn test_concurrent_requests() {
|
|
|
795
881
|
}
|
|
796
882
|
|
|
797
883
|
for handle in handles {
|
|
798
|
-
let response = handle
|
|
884
|
+
let response = handle
|
|
885
|
+
.await
|
|
886
|
+
.expect("Async operation failed")
|
|
887
|
+
.expect("Async operation failed");
|
|
799
888
|
assert_eq!(response.status(), StatusCode::OK);
|
|
800
889
|
}
|
|
801
890
|
}
|
|
@@ -806,14 +895,21 @@ async fn test_cache_stats_endpoint() {
|
|
|
806
895
|
let app = create_router(ExtractionConfig::default());
|
|
807
896
|
|
|
808
897
|
let response = app
|
|
809
|
-
.oneshot(
|
|
898
|
+
.oneshot(
|
|
899
|
+
Request::builder()
|
|
900
|
+
.uri("/cache/stats")
|
|
901
|
+
.body(Body::empty())
|
|
902
|
+
.expect("Failed to create HTTP request body"),
|
|
903
|
+
)
|
|
810
904
|
.await
|
|
811
|
-
.
|
|
905
|
+
.expect("Failed to send HTTP request");
|
|
812
906
|
|
|
813
907
|
assert_eq!(response.status(), StatusCode::OK);
|
|
814
908
|
|
|
815
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
816
|
-
|
|
909
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
910
|
+
.await
|
|
911
|
+
.expect("Failed to read HTTP response body");
|
|
912
|
+
let stats: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
817
913
|
|
|
818
914
|
assert!(stats["directory"].is_string());
|
|
819
915
|
assert!(stats["total_files"].is_number());
|
|
@@ -831,15 +927,17 @@ async fn test_cache_clear_endpoint() {
|
|
|
831
927
|
.method("DELETE")
|
|
832
928
|
.uri("/cache/clear")
|
|
833
929
|
.body(Body::empty())
|
|
834
|
-
.
|
|
930
|
+
.expect("Operation failed"),
|
|
835
931
|
)
|
|
836
932
|
.await
|
|
837
|
-
.
|
|
933
|
+
.expect("Operation failed");
|
|
838
934
|
|
|
839
935
|
assert_eq!(response.status(), StatusCode::OK);
|
|
840
936
|
|
|
841
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
842
|
-
|
|
937
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
938
|
+
.await
|
|
939
|
+
.expect("Failed to read HTTP response body");
|
|
940
|
+
let clear_result: serde_json::Value = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
843
941
|
|
|
844
942
|
assert!(clear_result["directory"].is_string());
|
|
845
943
|
assert!(clear_result["removed_files"].is_number());
|
|
@@ -877,15 +975,17 @@ async fn test_extract_mixed_content_types() {
|
|
|
877
975
|
.uri("/extract")
|
|
878
976
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
879
977
|
.body(Body::from(body_content))
|
|
880
|
-
.
|
|
978
|
+
.expect("Operation failed"),
|
|
881
979
|
)
|
|
882
980
|
.await
|
|
883
|
-
.
|
|
981
|
+
.expect("Operation failed");
|
|
884
982
|
|
|
885
983
|
assert_eq!(response.status(), StatusCode::OK);
|
|
886
984
|
|
|
887
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
888
|
-
|
|
985
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
986
|
+
.await
|
|
987
|
+
.expect("Failed to read HTTP response body");
|
|
988
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
889
989
|
|
|
890
990
|
assert_eq!(results.len(), 2);
|
|
891
991
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
@@ -921,10 +1021,10 @@ async fn test_extract_unknown_multipart_field() {
|
|
|
921
1021
|
.uri("/extract")
|
|
922
1022
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
923
1023
|
.body(Body::from(body_content))
|
|
924
|
-
.
|
|
1024
|
+
.expect("Operation failed"),
|
|
925
1025
|
)
|
|
926
1026
|
.await
|
|
927
|
-
.
|
|
1027
|
+
.expect("Operation failed");
|
|
928
1028
|
|
|
929
1029
|
assert_eq!(response.status(), StatusCode::OK);
|
|
930
1030
|
}
|
|
@@ -953,10 +1053,10 @@ async fn test_extract_default_mime_type() {
|
|
|
953
1053
|
.uri("/extract")
|
|
954
1054
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
955
1055
|
.body(Body::from(body_content))
|
|
956
|
-
.
|
|
1056
|
+
.expect("Operation failed"),
|
|
957
1057
|
)
|
|
958
1058
|
.await
|
|
959
|
-
.
|
|
1059
|
+
.expect("Operation failed");
|
|
960
1060
|
|
|
961
1061
|
assert!(
|
|
962
1062
|
response.status() == StatusCode::OK
|
|
@@ -997,10 +1097,10 @@ async fn test_size_limits_custom_limits() {
|
|
|
997
1097
|
.uri("/extract")
|
|
998
1098
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
999
1099
|
.body(Body::from(body_content))
|
|
1000
|
-
.
|
|
1100
|
+
.expect("Operation failed"),
|
|
1001
1101
|
)
|
|
1002
1102
|
.await
|
|
1003
|
-
.
|
|
1103
|
+
.expect("Operation failed");
|
|
1004
1104
|
|
|
1005
1105
|
assert_eq!(response.status(), StatusCode::OK);
|
|
1006
1106
|
}
|
|
@@ -1081,10 +1181,10 @@ async fn test_extract_file_larger_than_2mb() {
|
|
|
1081
1181
|
.uri("/extract")
|
|
1082
1182
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1083
1183
|
.body(Body::from(body_content))
|
|
1084
|
-
.
|
|
1184
|
+
.expect("Operation failed"),
|
|
1085
1185
|
)
|
|
1086
1186
|
.await
|
|
1087
|
-
.
|
|
1187
|
+
.expect("Operation failed");
|
|
1088
1188
|
|
|
1089
1189
|
assert_eq!(
|
|
1090
1190
|
response.status(),
|
|
@@ -1092,12 +1192,19 @@ async fn test_extract_file_larger_than_2mb() {
|
|
|
1092
1192
|
"3MB file should be accepted. If this fails with 400 or 413, the size limit fix is not working correctly."
|
|
1093
1193
|
);
|
|
1094
1194
|
|
|
1095
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1096
|
-
|
|
1195
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1196
|
+
.await
|
|
1197
|
+
.expect("Failed to read HTTP response body");
|
|
1198
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1097
1199
|
|
|
1098
1200
|
assert_eq!(results.len(), 1);
|
|
1099
1201
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
1100
|
-
assert!(
|
|
1202
|
+
assert!(
|
|
1203
|
+
results[0]["content"]
|
|
1204
|
+
.as_str()
|
|
1205
|
+
.expect("Failed to extract string from JSON value")
|
|
1206
|
+
.contains("A")
|
|
1207
|
+
);
|
|
1101
1208
|
}
|
|
1102
1209
|
|
|
1103
1210
|
/// Test extracting a 2MB file (just above the old Axum limit).
|
|
@@ -1128,10 +1235,10 @@ async fn test_extract_2mb_file() {
|
|
|
1128
1235
|
.uri("/extract")
|
|
1129
1236
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1130
1237
|
.body(Body::from(body_content))
|
|
1131
|
-
.
|
|
1238
|
+
.expect("Operation failed"),
|
|
1132
1239
|
)
|
|
1133
1240
|
.await
|
|
1134
|
-
.
|
|
1241
|
+
.expect("Operation failed");
|
|
1135
1242
|
|
|
1136
1243
|
assert_eq!(
|
|
1137
1244
|
response.status(),
|
|
@@ -1139,12 +1246,19 @@ async fn test_extract_2mb_file() {
|
|
|
1139
1246
|
"2MB file should be accepted (boundary case)"
|
|
1140
1247
|
);
|
|
1141
1248
|
|
|
1142
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1143
|
-
|
|
1249
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1250
|
+
.await
|
|
1251
|
+
.expect("Failed to read HTTP response body");
|
|
1252
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1144
1253
|
|
|
1145
1254
|
assert_eq!(results.len(), 1);
|
|
1146
1255
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
1147
|
-
assert!(
|
|
1256
|
+
assert!(
|
|
1257
|
+
results[0]["content"]
|
|
1258
|
+
.as_str()
|
|
1259
|
+
.expect("Failed to extract string from JSON value")
|
|
1260
|
+
.contains("X")
|
|
1261
|
+
);
|
|
1148
1262
|
}
|
|
1149
1263
|
|
|
1150
1264
|
/// Test extracting a 5MB file.
|
|
@@ -1174,19 +1288,26 @@ async fn test_extract_5mb_file() {
|
|
|
1174
1288
|
.uri("/extract")
|
|
1175
1289
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1176
1290
|
.body(Body::from(body_content))
|
|
1177
|
-
.
|
|
1291
|
+
.expect("Operation failed"),
|
|
1178
1292
|
)
|
|
1179
1293
|
.await
|
|
1180
|
-
.
|
|
1294
|
+
.expect("Operation failed");
|
|
1181
1295
|
|
|
1182
1296
|
assert_eq!(response.status(), StatusCode::OK, "5MB file should be accepted");
|
|
1183
1297
|
|
|
1184
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1185
|
-
|
|
1298
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1299
|
+
.await
|
|
1300
|
+
.expect("Failed to read HTTP response body");
|
|
1301
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1186
1302
|
|
|
1187
1303
|
assert_eq!(results.len(), 1);
|
|
1188
1304
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
1189
|
-
assert!(
|
|
1305
|
+
assert!(
|
|
1306
|
+
results[0]["content"]
|
|
1307
|
+
.as_str()
|
|
1308
|
+
.expect("Failed to extract string from JSON value")
|
|
1309
|
+
.contains("B")
|
|
1310
|
+
);
|
|
1190
1311
|
}
|
|
1191
1312
|
|
|
1192
1313
|
/// Test extracting a 10MB file.
|
|
@@ -1216,19 +1337,26 @@ async fn test_extract_10mb_file() {
|
|
|
1216
1337
|
.uri("/extract")
|
|
1217
1338
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1218
1339
|
.body(Body::from(body_content))
|
|
1219
|
-
.
|
|
1340
|
+
.expect("Operation failed"),
|
|
1220
1341
|
)
|
|
1221
1342
|
.await
|
|
1222
|
-
.
|
|
1343
|
+
.expect("Operation failed");
|
|
1223
1344
|
|
|
1224
1345
|
assert_eq!(response.status(), StatusCode::OK, "10MB file should be accepted");
|
|
1225
1346
|
|
|
1226
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1227
|
-
|
|
1347
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1348
|
+
.await
|
|
1349
|
+
.expect("Failed to read HTTP response body");
|
|
1350
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1228
1351
|
|
|
1229
1352
|
assert_eq!(results.len(), 1);
|
|
1230
1353
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
1231
|
-
assert!(
|
|
1354
|
+
assert!(
|
|
1355
|
+
results[0]["content"]
|
|
1356
|
+
.as_str()
|
|
1357
|
+
.expect("Failed to extract string from JSON value")
|
|
1358
|
+
.contains("C")
|
|
1359
|
+
);
|
|
1232
1360
|
}
|
|
1233
1361
|
|
|
1234
1362
|
/// Test extracting a 50MB file (half the default limit).
|
|
@@ -1259,19 +1387,26 @@ async fn test_extract_50mb_file() {
|
|
|
1259
1387
|
.uri("/extract")
|
|
1260
1388
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1261
1389
|
.body(Body::from(body_content))
|
|
1262
|
-
.
|
|
1390
|
+
.expect("Operation failed"),
|
|
1263
1391
|
)
|
|
1264
1392
|
.await
|
|
1265
|
-
.
|
|
1393
|
+
.expect("Operation failed");
|
|
1266
1394
|
|
|
1267
1395
|
assert_eq!(response.status(), StatusCode::OK, "50MB file should be accepted");
|
|
1268
1396
|
|
|
1269
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1270
|
-
|
|
1397
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1398
|
+
.await
|
|
1399
|
+
.expect("Failed to read HTTP response body");
|
|
1400
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1271
1401
|
|
|
1272
1402
|
assert_eq!(results.len(), 1);
|
|
1273
1403
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
1274
|
-
assert!(
|
|
1404
|
+
assert!(
|
|
1405
|
+
results[0]["content"]
|
|
1406
|
+
.as_str()
|
|
1407
|
+
.expect("Failed to extract string from JSON value")
|
|
1408
|
+
.contains("D")
|
|
1409
|
+
);
|
|
1275
1410
|
}
|
|
1276
1411
|
|
|
1277
1412
|
/// Test extracting a 90MB file (near the 100MB default limit).
|
|
@@ -1302,10 +1437,10 @@ async fn test_extract_90mb_file() {
|
|
|
1302
1437
|
.uri("/extract")
|
|
1303
1438
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1304
1439
|
.body(Body::from(body_content))
|
|
1305
|
-
.
|
|
1440
|
+
.expect("Operation failed"),
|
|
1306
1441
|
)
|
|
1307
1442
|
.await
|
|
1308
|
-
.
|
|
1443
|
+
.expect("Operation failed");
|
|
1309
1444
|
|
|
1310
1445
|
assert_eq!(
|
|
1311
1446
|
response.status(),
|
|
@@ -1313,12 +1448,19 @@ async fn test_extract_90mb_file() {
|
|
|
1313
1448
|
"90MB file should be accepted (within default 100MB limit)"
|
|
1314
1449
|
);
|
|
1315
1450
|
|
|
1316
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1317
|
-
|
|
1451
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1452
|
+
.await
|
|
1453
|
+
.expect("Failed to read HTTP response body");
|
|
1454
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1318
1455
|
|
|
1319
1456
|
assert_eq!(results.len(), 1);
|
|
1320
1457
|
assert_eq!(results[0]["mime_type"], "text/plain");
|
|
1321
|
-
assert!(
|
|
1458
|
+
assert!(
|
|
1459
|
+
results[0]["content"]
|
|
1460
|
+
.as_str()
|
|
1461
|
+
.expect("Failed to extract string from JSON value")
|
|
1462
|
+
.contains("E")
|
|
1463
|
+
);
|
|
1322
1464
|
}
|
|
1323
1465
|
|
|
1324
1466
|
/// Test extracting a file over the 100MB default limit (HTTP 400/413).
|
|
@@ -1350,10 +1492,10 @@ async fn test_extract_file_over_default_limit() {
|
|
|
1350
1492
|
.uri("/extract")
|
|
1351
1493
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1352
1494
|
.body(Body::from(body_content))
|
|
1353
|
-
.
|
|
1495
|
+
.expect("Operation failed"),
|
|
1354
1496
|
)
|
|
1355
1497
|
.await
|
|
1356
|
-
.
|
|
1498
|
+
.expect("Operation failed");
|
|
1357
1499
|
|
|
1358
1500
|
assert!(
|
|
1359
1501
|
response.status() == StatusCode::BAD_REQUEST || response.status() == StatusCode::PAYLOAD_TOO_LARGE,
|
|
@@ -1402,10 +1544,10 @@ async fn test_extract_multiple_large_files_within_limit() {
|
|
|
1402
1544
|
.uri("/extract")
|
|
1403
1545
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1404
1546
|
.body(Body::from(body_content))
|
|
1405
|
-
.
|
|
1547
|
+
.expect("Operation failed"),
|
|
1406
1548
|
)
|
|
1407
1549
|
.await
|
|
1408
|
-
.
|
|
1550
|
+
.expect("Operation failed");
|
|
1409
1551
|
|
|
1410
1552
|
assert_eq!(
|
|
1411
1553
|
response.status(),
|
|
@@ -1413,16 +1555,33 @@ async fn test_extract_multiple_large_files_within_limit() {
|
|
|
1413
1555
|
"Multiple files totaling 75MB should be accepted"
|
|
1414
1556
|
);
|
|
1415
1557
|
|
|
1416
|
-
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1417
|
-
|
|
1558
|
+
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
|
|
1559
|
+
.await
|
|
1560
|
+
.expect("Failed to read HTTP response body");
|
|
1561
|
+
let results: Vec<serde_json::Value> = serde_json::from_slice(&body).expect("Failed to deserialize JSON response");
|
|
1418
1562
|
|
|
1419
1563
|
assert_eq!(results.len(), 3, "Should have 3 results");
|
|
1420
1564
|
for result in &results {
|
|
1421
1565
|
assert_eq!(result["mime_type"], "text/plain");
|
|
1422
1566
|
}
|
|
1423
|
-
assert!(
|
|
1424
|
-
|
|
1425
|
-
|
|
1567
|
+
assert!(
|
|
1568
|
+
results[0]["content"]
|
|
1569
|
+
.as_str()
|
|
1570
|
+
.expect("Failed to extract string from JSON value")
|
|
1571
|
+
.contains("G")
|
|
1572
|
+
);
|
|
1573
|
+
assert!(
|
|
1574
|
+
results[1]["content"]
|
|
1575
|
+
.as_str()
|
|
1576
|
+
.expect("Failed to extract string from JSON value")
|
|
1577
|
+
.contains("H")
|
|
1578
|
+
);
|
|
1579
|
+
assert!(
|
|
1580
|
+
results[2]["content"]
|
|
1581
|
+
.as_str()
|
|
1582
|
+
.expect("Failed to extract string from JSON value")
|
|
1583
|
+
.contains("I")
|
|
1584
|
+
);
|
|
1426
1585
|
}
|
|
1427
1586
|
|
|
1428
1587
|
/// Test extracting multiple large files exceeding limit (HTTP 400/413).
|
|
@@ -1459,10 +1618,10 @@ async fn test_extract_multiple_large_files_exceeding_limit() {
|
|
|
1459
1618
|
.uri("/extract")
|
|
1460
1619
|
.header("content-type", format!("multipart/form-data; boundary={}", boundary))
|
|
1461
1620
|
.body(Body::from(body_content))
|
|
1462
|
-
.
|
|
1621
|
+
.expect("Operation failed"),
|
|
1463
1622
|
)
|
|
1464
1623
|
.await
|
|
1465
|
-
.
|
|
1624
|
+
.expect("Operation failed");
|
|
1466
1625
|
|
|
1467
1626
|
assert!(
|
|
1468
1627
|
response.status() == StatusCode::BAD_REQUEST || response.status() == StatusCode::PAYLOAD_TOO_LARGE,
|