mdify-cli 2.6.0__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-2.6.0/mdify_cli.egg-info → mdify_cli-2.7.0}/PKG-INFO +1 -1
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify/__init__.py +1 -1
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify/docling_client.py +10 -2
- {mdify_cli-2.6.0 → mdify_cli-2.7.0/mdify_cli.egg-info}/PKG-INFO +1 -1
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/pyproject.toml +1 -1
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/tests/test_docling_client.py +94 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/LICENSE +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/README.md +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/assets/mdify.png +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify/__main__.py +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify/cli.py +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify/container.py +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/setup.cfg +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/tests/test_cli.py +0 -0
- {mdify_cli-2.6.0 → mdify_cli-2.7.0}/tests/test_container.py +0 -0
|
@@ -4,6 +4,8 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
import mimetypes
|
|
8
|
+
|
|
7
9
|
import requests
|
|
8
10
|
|
|
9
11
|
|
|
@@ -40,6 +42,12 @@ class DoclingHTTPError(DoclingClientError):
|
|
|
40
42
|
super().__init__(f"HTTP {status_code}: {message}")
|
|
41
43
|
|
|
42
44
|
|
|
45
|
+
def _get_mime_type(file_path: Path) -> str:
|
|
46
|
+
"""Get MIME type for file, with fallback for unknown types."""
|
|
47
|
+
mime_type, _ = mimetypes.guess_type(str(file_path))
|
|
48
|
+
return mime_type or "application/octet-stream"
|
|
49
|
+
|
|
50
|
+
|
|
43
51
|
def check_health(base_url: str) -> bool:
|
|
44
52
|
"""Check if docling-serve is healthy.
|
|
45
53
|
|
|
@@ -77,7 +85,7 @@ def convert_file(
|
|
|
77
85
|
with open(file_path, "rb") as f:
|
|
78
86
|
response = requests.post(
|
|
79
87
|
f"{base_url}/v1/convert/file",
|
|
80
|
-
files={"files": (file_path.name, f,
|
|
88
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
81
89
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
82
90
|
)
|
|
83
91
|
|
|
@@ -126,7 +134,7 @@ def convert_file_async(
|
|
|
126
134
|
with open(file_path, "rb") as f:
|
|
127
135
|
response = requests.post(
|
|
128
136
|
f"{base_url}/v1/convert/file/async",
|
|
129
|
-
files={"files": (file_path.name, f,
|
|
137
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
130
138
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
131
139
|
)
|
|
132
140
|
|
|
@@ -356,3 +356,97 @@ class TestDoclingHTTPError:
|
|
|
356
356
|
error = DoclingHTTPError(400, "Bad Request")
|
|
357
357
|
|
|
358
358
|
assert isinstance(error, Exception)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
class TestMimeTypeDetection:
|
|
362
|
+
"""Test MIME type detection in file conversion."""
|
|
363
|
+
|
|
364
|
+
def test_convert_file_sends_correct_mime_for_xlsx(self, tmp_path):
|
|
365
|
+
"""Test that .xlsx files are sent with correct MIME type."""
|
|
366
|
+
test_file = tmp_path / "test.xlsx"
|
|
367
|
+
test_file.write_bytes(b"fake xlsx content")
|
|
368
|
+
|
|
369
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
370
|
+
mock_response = Mock()
|
|
371
|
+
mock_response.status_code = 200
|
|
372
|
+
mock_response.json.return_value = [
|
|
373
|
+
{"content": "# Test Spreadsheet\n\nContent here."}
|
|
374
|
+
]
|
|
375
|
+
mock_post.return_value = mock_response
|
|
376
|
+
|
|
377
|
+
convert_file("http://localhost:5001", test_file)
|
|
378
|
+
|
|
379
|
+
mock_post.assert_called_once()
|
|
380
|
+
call_args = mock_post.call_args
|
|
381
|
+
files_param = call_args[1]["files"]
|
|
382
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
383
|
+
assert (
|
|
384
|
+
mime_type
|
|
385
|
+
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
def test_convert_file_sends_correct_mime_for_pdf(self, tmp_path):
|
|
389
|
+
"""Test that .pdf files are sent with correct MIME type (regression test)."""
|
|
390
|
+
test_file = tmp_path / "test.pdf"
|
|
391
|
+
test_file.write_bytes(b"fake pdf content")
|
|
392
|
+
|
|
393
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
394
|
+
mock_response = Mock()
|
|
395
|
+
mock_response.status_code = 200
|
|
396
|
+
mock_response.json.return_value = [
|
|
397
|
+
{"content": "# Test Document\n\nContent here."}
|
|
398
|
+
]
|
|
399
|
+
mock_post.return_value = mock_response
|
|
400
|
+
|
|
401
|
+
convert_file("http://localhost:5001", test_file)
|
|
402
|
+
|
|
403
|
+
mock_post.assert_called_once()
|
|
404
|
+
call_args = mock_post.call_args
|
|
405
|
+
files_param = call_args[1]["files"]
|
|
406
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
407
|
+
assert mime_type == "application/pdf"
|
|
408
|
+
|
|
409
|
+
def test_convert_file_sends_correct_mime_for_docx(self, tmp_path):
|
|
410
|
+
"""Test that .docx files are sent with correct MIME type."""
|
|
411
|
+
test_file = tmp_path / "test.docx"
|
|
412
|
+
test_file.write_bytes(b"fake docx content")
|
|
413
|
+
|
|
414
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
415
|
+
mock_response = Mock()
|
|
416
|
+
mock_response.status_code = 200
|
|
417
|
+
mock_response.json.return_value = [
|
|
418
|
+
{"content": "# Test Document\n\nContent here."}
|
|
419
|
+
]
|
|
420
|
+
mock_post.return_value = mock_response
|
|
421
|
+
|
|
422
|
+
convert_file("http://localhost:5001", test_file)
|
|
423
|
+
|
|
424
|
+
mock_post.assert_called_once()
|
|
425
|
+
call_args = mock_post.call_args
|
|
426
|
+
files_param = call_args[1]["files"]
|
|
427
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
428
|
+
assert (
|
|
429
|
+
mime_type
|
|
430
|
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
def test_convert_file_fallback_for_unknown_extension(self, tmp_path):
|
|
434
|
+
"""Test that unknown file extensions fall back to application/octet-stream."""
|
|
435
|
+
test_file = tmp_path / "test.unknownext123"
|
|
436
|
+
test_file.write_bytes(b"fake unknown content")
|
|
437
|
+
|
|
438
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
439
|
+
mock_response = Mock()
|
|
440
|
+
mock_response.status_code = 200
|
|
441
|
+
mock_response.json.return_value = [
|
|
442
|
+
{"content": "# Test Content\n\nContent here."}
|
|
443
|
+
]
|
|
444
|
+
mock_post.return_value = mock_response
|
|
445
|
+
|
|
446
|
+
convert_file("http://localhost:5001", test_file)
|
|
447
|
+
|
|
448
|
+
mock_post.assert_called_once()
|
|
449
|
+
call_args = mock_post.call_args
|
|
450
|
+
files_param = call_args[1]["files"]
|
|
451
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
452
|
+
assert mime_type == "application/octet-stream"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|