mdify-cli 2.5.0__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-2.5.0/mdify_cli.egg-info → mdify_cli-2.7.0}/PKG-INFO +1 -1
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify/__init__.py +1 -1
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify/container.py +35 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify/docling_client.py +10 -2
- {mdify_cli-2.5.0 → mdify_cli-2.7.0/mdify_cli.egg-info}/PKG-INFO +1 -1
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/pyproject.toml +1 -1
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/tests/test_container.py +104 -1
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/tests/test_docling_client.py +94 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/LICENSE +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/README.md +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/assets/mdify.png +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify/__main__.py +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify/cli.py +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/setup.cfg +0 -0
- {mdify_cli-2.5.0 → mdify_cli-2.7.0}/tests/test_cli.py +0 -0
|
@@ -41,6 +41,39 @@ class DoclingContainer:
|
|
|
41
41
|
"""Return base URL for API requests."""
|
|
42
42
|
return f"http://localhost:{self.port}"
|
|
43
43
|
|
|
44
|
+
def _cleanup_stale_containers(self) -> None:
|
|
45
|
+
"""Stop any existing mdify-serve containers.
|
|
46
|
+
|
|
47
|
+
This handles the case where a previous run left a container running
|
|
48
|
+
(e.g., due to crash, interrupt, or timeout).
|
|
49
|
+
"""
|
|
50
|
+
# Find running containers matching mdify-serve-* pattern
|
|
51
|
+
result = subprocess.run(
|
|
52
|
+
[
|
|
53
|
+
self.runtime,
|
|
54
|
+
"ps",
|
|
55
|
+
"--filter",
|
|
56
|
+
"name=mdify-serve-",
|
|
57
|
+
"--format",
|
|
58
|
+
"{{.Names}}",
|
|
59
|
+
],
|
|
60
|
+
capture_output=True,
|
|
61
|
+
text=True,
|
|
62
|
+
check=False,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if result.returncode != 0 or not result.stdout.strip():
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Stop each stale container
|
|
69
|
+
for container_name in result.stdout.strip().split("\n"):
|
|
70
|
+
if container_name:
|
|
71
|
+
subprocess.run(
|
|
72
|
+
[self.runtime, "stop", container_name],
|
|
73
|
+
capture_output=True,
|
|
74
|
+
check=False,
|
|
75
|
+
)
|
|
76
|
+
|
|
44
77
|
def start(self, timeout: int = 120) -> None:
|
|
45
78
|
"""Start container and wait for health check.
|
|
46
79
|
|
|
@@ -51,6 +84,8 @@ class DoclingContainer:
|
|
|
51
84
|
subprocess.CalledProcessError: If container fails to start
|
|
52
85
|
TimeoutError: If health check doesn't pass within timeout
|
|
53
86
|
"""
|
|
87
|
+
self._cleanup_stale_containers()
|
|
88
|
+
|
|
54
89
|
# Start container in detached mode
|
|
55
90
|
cmd = [
|
|
56
91
|
self.runtime,
|
|
@@ -4,6 +4,8 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
import mimetypes
|
|
8
|
+
|
|
7
9
|
import requests
|
|
8
10
|
|
|
9
11
|
|
|
@@ -40,6 +42,12 @@ class DoclingHTTPError(DoclingClientError):
|
|
|
40
42
|
super().__init__(f"HTTP {status_code}: {message}")
|
|
41
43
|
|
|
42
44
|
|
|
45
|
+
def _get_mime_type(file_path: Path) -> str:
|
|
46
|
+
"""Get MIME type for file, with fallback for unknown types."""
|
|
47
|
+
mime_type, _ = mimetypes.guess_type(str(file_path))
|
|
48
|
+
return mime_type or "application/octet-stream"
|
|
49
|
+
|
|
50
|
+
|
|
43
51
|
def check_health(base_url: str) -> bool:
|
|
44
52
|
"""Check if docling-serve is healthy.
|
|
45
53
|
|
|
@@ -77,7 +85,7 @@ def convert_file(
|
|
|
77
85
|
with open(file_path, "rb") as f:
|
|
78
86
|
response = requests.post(
|
|
79
87
|
f"{base_url}/v1/convert/file",
|
|
80
|
-
files={"files": (file_path.name, f,
|
|
88
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
81
89
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
82
90
|
)
|
|
83
91
|
|
|
@@ -126,7 +134,7 @@ def convert_file_async(
|
|
|
126
134
|
with open(file_path, "rb") as f:
|
|
127
135
|
response = requests.post(
|
|
128
136
|
f"{base_url}/v1/convert/file/async",
|
|
129
|
-
files={"files": (file_path.name, f,
|
|
137
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
130
138
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
131
139
|
)
|
|
132
140
|
|
|
@@ -311,7 +311,110 @@ class TestDoclingContainerIntegration:
|
|
|
311
311
|
container1 = DoclingContainer("docker", "image1", port=5001)
|
|
312
312
|
container2 = DoclingContainer("docker", "image2", port=5002)
|
|
313
313
|
|
|
314
|
-
# Each should be independent
|
|
315
314
|
assert container1.port == 5001
|
|
316
315
|
assert container2.port == 5002
|
|
317
316
|
assert container1.container_name != container2.container_name
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class TestDoclingContainerCleanup:
|
|
320
|
+
"""Test cleanup of stale containers."""
|
|
321
|
+
|
|
322
|
+
def test_cleanup_no_stale_containers(self):
|
|
323
|
+
"""Test cleanup runs when no stale containers exist."""
|
|
324
|
+
with patch("mdify.container.subprocess.run") as mock_run, patch(
|
|
325
|
+
"mdify.container.check_health"
|
|
326
|
+
) as mock_health:
|
|
327
|
+
ps_result = Mock()
|
|
328
|
+
ps_result.returncode = 0
|
|
329
|
+
ps_result.stdout = ""
|
|
330
|
+
|
|
331
|
+
run_result = Mock()
|
|
332
|
+
run_result.stdout = "container_id\n"
|
|
333
|
+
|
|
334
|
+
mock_run.side_effect = [ps_result, run_result]
|
|
335
|
+
mock_health.return_value = True
|
|
336
|
+
|
|
337
|
+
container = DoclingContainer("docker", "test-image")
|
|
338
|
+
container.start(timeout=5)
|
|
339
|
+
|
|
340
|
+
ps_call = mock_run.call_args_list[0][0][0]
|
|
341
|
+
assert "ps" in ps_call
|
|
342
|
+
assert "--filter" in ps_call
|
|
343
|
+
assert "name=mdify-serve-" in ps_call
|
|
344
|
+
|
|
345
|
+
def test_cleanup_stops_stale_containers(self):
|
|
346
|
+
"""Test cleanup finds and stops stale containers."""
|
|
347
|
+
with patch("mdify.container.subprocess.run") as mock_run, patch(
|
|
348
|
+
"mdify.container.check_health"
|
|
349
|
+
) as mock_health:
|
|
350
|
+
ps_result = Mock()
|
|
351
|
+
ps_result.returncode = 0
|
|
352
|
+
ps_result.stdout = "mdify-serve-abc123\nmdify-serve-def456\n"
|
|
353
|
+
|
|
354
|
+
stop_result1 = Mock()
|
|
355
|
+
stop_result2 = Mock()
|
|
356
|
+
|
|
357
|
+
run_result = Mock()
|
|
358
|
+
run_result.stdout = "container_id\n"
|
|
359
|
+
|
|
360
|
+
mock_run.side_effect = [ps_result, stop_result1, stop_result2, run_result]
|
|
361
|
+
mock_health.return_value = True
|
|
362
|
+
|
|
363
|
+
container = DoclingContainer("docker", "test-image")
|
|
364
|
+
container.start(timeout=5)
|
|
365
|
+
|
|
366
|
+
ps_call = mock_run.call_args_list[0][0][0]
|
|
367
|
+
assert "ps" in ps_call
|
|
368
|
+
|
|
369
|
+
stop_calls = [
|
|
370
|
+
call for call in mock_run.call_args_list if "stop" in str(call)
|
|
371
|
+
]
|
|
372
|
+
assert len(stop_calls) == 2
|
|
373
|
+
assert "mdify-serve-abc123" in str(stop_calls[0])
|
|
374
|
+
assert "mdify-serve-def456" in str(stop_calls[1])
|
|
375
|
+
|
|
376
|
+
def test_cleanup_handles_subprocess_error(self):
|
|
377
|
+
"""Test cleanup handles subprocess errors gracefully."""
|
|
378
|
+
with patch("mdify.container.subprocess.run") as mock_run, patch(
|
|
379
|
+
"mdify.container.check_health"
|
|
380
|
+
) as mock_health:
|
|
381
|
+
ps_result = Mock()
|
|
382
|
+
ps_result.returncode = 1
|
|
383
|
+
ps_result.stdout = ""
|
|
384
|
+
|
|
385
|
+
run_result = Mock()
|
|
386
|
+
run_result.stdout = "container_id\n"
|
|
387
|
+
|
|
388
|
+
mock_run.side_effect = [ps_result, run_result]
|
|
389
|
+
mock_health.return_value = True
|
|
390
|
+
|
|
391
|
+
container = DoclingContainer("docker", "test-image")
|
|
392
|
+
container.start(timeout=5)
|
|
393
|
+
|
|
394
|
+
assert container.container_id == "container_id"
|
|
395
|
+
|
|
396
|
+
def test_start_calls_cleanup(self):
|
|
397
|
+
"""Test that start() calls _cleanup_stale_containers()."""
|
|
398
|
+
with patch("mdify.container.subprocess.run") as mock_run, patch(
|
|
399
|
+
"mdify.container.check_health"
|
|
400
|
+
) as mock_health:
|
|
401
|
+
ps_result = Mock()
|
|
402
|
+
ps_result.returncode = 0
|
|
403
|
+
ps_result.stdout = ""
|
|
404
|
+
|
|
405
|
+
run_result = Mock()
|
|
406
|
+
run_result.stdout = "new_container_id\n"
|
|
407
|
+
|
|
408
|
+
mock_run.side_effect = [ps_result, run_result]
|
|
409
|
+
mock_health.return_value = True
|
|
410
|
+
|
|
411
|
+
container = DoclingContainer("docker", "test-image")
|
|
412
|
+
container.start(timeout=5)
|
|
413
|
+
|
|
414
|
+
all_calls = mock_run.call_args_list
|
|
415
|
+
ps_called = any("ps" in str(call) for call in all_calls[:1])
|
|
416
|
+
run_called = any("run" in str(call) for call in all_calls)
|
|
417
|
+
|
|
418
|
+
assert ps_called
|
|
419
|
+
assert run_called
|
|
420
|
+
assert "ps" in all_calls[0][0][0]
|
|
@@ -356,3 +356,97 @@ class TestDoclingHTTPError:
|
|
|
356
356
|
error = DoclingHTTPError(400, "Bad Request")
|
|
357
357
|
|
|
358
358
|
assert isinstance(error, Exception)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
class TestMimeTypeDetection:
|
|
362
|
+
"""Test MIME type detection in file conversion."""
|
|
363
|
+
|
|
364
|
+
def test_convert_file_sends_correct_mime_for_xlsx(self, tmp_path):
|
|
365
|
+
"""Test that .xlsx files are sent with correct MIME type."""
|
|
366
|
+
test_file = tmp_path / "test.xlsx"
|
|
367
|
+
test_file.write_bytes(b"fake xlsx content")
|
|
368
|
+
|
|
369
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
370
|
+
mock_response = Mock()
|
|
371
|
+
mock_response.status_code = 200
|
|
372
|
+
mock_response.json.return_value = [
|
|
373
|
+
{"content": "# Test Spreadsheet\n\nContent here."}
|
|
374
|
+
]
|
|
375
|
+
mock_post.return_value = mock_response
|
|
376
|
+
|
|
377
|
+
convert_file("http://localhost:5001", test_file)
|
|
378
|
+
|
|
379
|
+
mock_post.assert_called_once()
|
|
380
|
+
call_args = mock_post.call_args
|
|
381
|
+
files_param = call_args[1]["files"]
|
|
382
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
383
|
+
assert (
|
|
384
|
+
mime_type
|
|
385
|
+
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
def test_convert_file_sends_correct_mime_for_pdf(self, tmp_path):
|
|
389
|
+
"""Test that .pdf files are sent with correct MIME type (regression test)."""
|
|
390
|
+
test_file = tmp_path / "test.pdf"
|
|
391
|
+
test_file.write_bytes(b"fake pdf content")
|
|
392
|
+
|
|
393
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
394
|
+
mock_response = Mock()
|
|
395
|
+
mock_response.status_code = 200
|
|
396
|
+
mock_response.json.return_value = [
|
|
397
|
+
{"content": "# Test Document\n\nContent here."}
|
|
398
|
+
]
|
|
399
|
+
mock_post.return_value = mock_response
|
|
400
|
+
|
|
401
|
+
convert_file("http://localhost:5001", test_file)
|
|
402
|
+
|
|
403
|
+
mock_post.assert_called_once()
|
|
404
|
+
call_args = mock_post.call_args
|
|
405
|
+
files_param = call_args[1]["files"]
|
|
406
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
407
|
+
assert mime_type == "application/pdf"
|
|
408
|
+
|
|
409
|
+
def test_convert_file_sends_correct_mime_for_docx(self, tmp_path):
|
|
410
|
+
"""Test that .docx files are sent with correct MIME type."""
|
|
411
|
+
test_file = tmp_path / "test.docx"
|
|
412
|
+
test_file.write_bytes(b"fake docx content")
|
|
413
|
+
|
|
414
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
415
|
+
mock_response = Mock()
|
|
416
|
+
mock_response.status_code = 200
|
|
417
|
+
mock_response.json.return_value = [
|
|
418
|
+
{"content": "# Test Document\n\nContent here."}
|
|
419
|
+
]
|
|
420
|
+
mock_post.return_value = mock_response
|
|
421
|
+
|
|
422
|
+
convert_file("http://localhost:5001", test_file)
|
|
423
|
+
|
|
424
|
+
mock_post.assert_called_once()
|
|
425
|
+
call_args = mock_post.call_args
|
|
426
|
+
files_param = call_args[1]["files"]
|
|
427
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
428
|
+
assert (
|
|
429
|
+
mime_type
|
|
430
|
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
def test_convert_file_fallback_for_unknown_extension(self, tmp_path):
|
|
434
|
+
"""Test that unknown file extensions fall back to application/octet-stream."""
|
|
435
|
+
test_file = tmp_path / "test.unknownext123"
|
|
436
|
+
test_file.write_bytes(b"fake unknown content")
|
|
437
|
+
|
|
438
|
+
with patch("mdify.docling_client.requests.post") as mock_post:
|
|
439
|
+
mock_response = Mock()
|
|
440
|
+
mock_response.status_code = 200
|
|
441
|
+
mock_response.json.return_value = [
|
|
442
|
+
{"content": "# Test Content\n\nContent here."}
|
|
443
|
+
]
|
|
444
|
+
mock_post.return_value = mock_response
|
|
445
|
+
|
|
446
|
+
convert_file("http://localhost:5001", test_file)
|
|
447
|
+
|
|
448
|
+
mock_post.assert_called_once()
|
|
449
|
+
call_args = mock_post.call_args
|
|
450
|
+
files_param = call_args[1]["files"]
|
|
451
|
+
filename, file_obj, mime_type = files_param["files"]
|
|
452
|
+
assert mime_type == "application/octet-stream"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|