mdify-cli 2.5.0__py3-none-any.whl → 2.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdify/__init__.py +1 -1
- mdify/cli.py +176 -26
- mdify/container.py +35 -0
- mdify/docling_client.py +61 -22
- {mdify_cli-2.5.0.dist-info → mdify_cli-2.10.0.dist-info}/METADATA +51 -4
- mdify_cli-2.10.0.dist-info/RECORD +12 -0
- mdify_cli-2.5.0.dist-info/RECORD +0 -12
- {mdify_cli-2.5.0.dist-info → mdify_cli-2.10.0.dist-info}/WHEEL +0 -0
- {mdify_cli-2.5.0.dist-info → mdify_cli-2.10.0.dist-info}/entry_points.txt +0 -0
- {mdify_cli-2.5.0.dist-info → mdify_cli-2.10.0.dist-info}/licenses/LICENSE +0 -0
- {mdify_cli-2.5.0.dist-info → mdify_cli-2.10.0.dist-info}/top_level.txt +0 -0
mdify/__init__.py
CHANGED
mdify/cli.py
CHANGED
|
@@ -10,6 +10,7 @@ is lightweight and has no ML dependencies.
|
|
|
10
10
|
import argparse
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
|
+
import platform
|
|
13
14
|
import shutil
|
|
14
15
|
import subprocess
|
|
15
16
|
import sys
|
|
@@ -33,7 +34,9 @@ CHECK_INTERVAL_SECONDS = 86400 # 24 hours
|
|
|
33
34
|
# Container configuration
|
|
34
35
|
DEFAULT_IMAGE = "ghcr.io/docling-project/docling-serve-cpu:main"
|
|
35
36
|
GPU_IMAGE = "ghcr.io/docling-project/docling-serve-cu126:main"
|
|
36
|
-
SUPPORTED_RUNTIMES = ("docker", "podman")
|
|
37
|
+
SUPPORTED_RUNTIMES = ("docker", "podman", "orbstack", "colima", "container")
|
|
38
|
+
MACOS_RUNTIMES_PRIORITY = ("container", "orbstack", "colima", "podman", "docker")
|
|
39
|
+
OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
|
|
37
40
|
|
|
38
41
|
|
|
39
42
|
# =============================================================================
|
|
@@ -151,34 +154,117 @@ def check_for_update(force: bool = False) -> None:
|
|
|
151
154
|
# =============================================================================
|
|
152
155
|
|
|
153
156
|
|
|
154
|
-
def
|
|
157
|
+
def is_daemon_running(runtime: str) -> bool:
|
|
158
|
+
"""
|
|
159
|
+
Check if a container runtime daemon is running.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
runtime: Path to container runtime executable
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
True if daemon is running and responsive, False otherwise.
|
|
166
|
+
"""
|
|
167
|
+
try:
|
|
168
|
+
runtime_name = os.path.basename(runtime)
|
|
169
|
+
|
|
170
|
+
# Apple Container uses 'container system status' to check daemon
|
|
171
|
+
if runtime_name == "container":
|
|
172
|
+
result = subprocess.run(
|
|
173
|
+
[runtime, "system", "status"],
|
|
174
|
+
capture_output=True,
|
|
175
|
+
timeout=5,
|
|
176
|
+
check=False,
|
|
177
|
+
)
|
|
178
|
+
return result.returncode == 0
|
|
179
|
+
|
|
180
|
+
# Other runtimes use --version check
|
|
181
|
+
result = subprocess.run(
|
|
182
|
+
[runtime, "--version"],
|
|
183
|
+
capture_output=True,
|
|
184
|
+
timeout=5,
|
|
185
|
+
check=False,
|
|
186
|
+
)
|
|
187
|
+
return result.returncode == 0
|
|
188
|
+
except (OSError, subprocess.TimeoutExpired):
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def detect_runtime(preferred: Optional[str] = None, explicit: bool = True) -> Optional[str]:
|
|
155
193
|
"""
|
|
156
194
|
Detect available container runtime.
|
|
157
195
|
|
|
196
|
+
First checks MDIFY_CONTAINER_RUNTIME environment variable for explicit override.
|
|
197
|
+
On macOS, tries native tools first (OrbStack → Colima → Podman → Docker).
|
|
198
|
+
On other platforms, tries Docker → Podman.
|
|
199
|
+
|
|
158
200
|
Args:
|
|
159
|
-
preferred: Preferred runtime (
|
|
160
|
-
explicit: If True,
|
|
161
|
-
If False, silently use alternative without warning.
|
|
162
|
-
Note: This only controls warning emission; selection order
|
|
163
|
-
is always preferred → alternative regardless of this flag.
|
|
201
|
+
preferred: Preferred runtime name override (deprecated, use MDIFY_CONTAINER_RUNTIME)
|
|
202
|
+
explicit: If True, print info about detection/fallback choices.
|
|
164
203
|
|
|
165
204
|
Returns:
|
|
166
205
|
Path to runtime executable, or None if not found.
|
|
167
206
|
"""
|
|
168
|
-
#
|
|
169
|
-
|
|
170
|
-
if
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
207
|
+
# Check for explicit environment variable override
|
|
208
|
+
env_runtime = os.environ.get("MDIFY_CONTAINER_RUNTIME", "").strip().lower()
|
|
209
|
+
if env_runtime:
|
|
210
|
+
if env_runtime not in SUPPORTED_RUNTIMES:
|
|
211
|
+
print(
|
|
212
|
+
f"Warning: MDIFY_CONTAINER_RUNTIME='{env_runtime}' is not supported. "
|
|
213
|
+
f"Supported: {', '.join(SUPPORTED_RUNTIMES)}",
|
|
214
|
+
file=sys.stderr,
|
|
215
|
+
)
|
|
216
|
+
else:
|
|
217
|
+
runtime_path = shutil.which(env_runtime)
|
|
218
|
+
if runtime_path:
|
|
219
|
+
if explicit:
|
|
220
|
+
print(f"Using runtime from MDIFY_CONTAINER_RUNTIME: {env_runtime}")
|
|
221
|
+
return runtime_path
|
|
222
|
+
else:
|
|
223
|
+
print(
|
|
224
|
+
f"Warning: MDIFY_CONTAINER_RUNTIME='{env_runtime}' specified but not found in PATH",
|
|
225
|
+
file=sys.stderr,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Determine runtime priority based on OS
|
|
229
|
+
is_macos = platform.system() == "Darwin"
|
|
230
|
+
if is_macos:
|
|
231
|
+
runtime_priority = MACOS_RUNTIMES_PRIORITY
|
|
177
232
|
if explicit:
|
|
233
|
+
print(f"Detected macOS: checking for native container tools...")
|
|
234
|
+
else:
|
|
235
|
+
runtime_priority = OTHER_RUNTIMES_PRIORITY
|
|
236
|
+
|
|
237
|
+
# Try each runtime in priority order
|
|
238
|
+
found_but_not_running = []
|
|
239
|
+
for runtime_name in runtime_priority:
|
|
240
|
+
runtime_path = shutil.which(runtime_name)
|
|
241
|
+
if runtime_path:
|
|
242
|
+
# Check if daemon is running
|
|
243
|
+
if is_daemon_running(runtime_path):
|
|
244
|
+
if explicit:
|
|
245
|
+
print(f"Using container runtime: {runtime_name}")
|
|
246
|
+
return runtime_path
|
|
247
|
+
else:
|
|
248
|
+
found_but_not_running.append((runtime_name, runtime_path))
|
|
249
|
+
|
|
250
|
+
# If we found tools but none are running, warn and ask user to start one
|
|
251
|
+
if found_but_not_running:
|
|
252
|
+
print(
|
|
253
|
+
f"\nWarning: Found container runtime(s) but daemon is not running:",
|
|
254
|
+
file=sys.stderr,
|
|
255
|
+
)
|
|
256
|
+
for runtime_name, runtime_path in found_but_not_running:
|
|
257
|
+
print(f" - {runtime_name} ({runtime_path})", file=sys.stderr)
|
|
258
|
+
print(
|
|
259
|
+
"\nPlease start one of these tools before running mdify.",
|
|
260
|
+
file=sys.stderr,
|
|
261
|
+
)
|
|
262
|
+
if is_macos:
|
|
178
263
|
print(
|
|
179
|
-
|
|
264
|
+
" macOS tip: Start OrbStack, Colima, or Podman Desktop application",
|
|
265
|
+
file=sys.stderr,
|
|
180
266
|
)
|
|
181
|
-
return
|
|
267
|
+
return None
|
|
182
268
|
|
|
183
269
|
return None
|
|
184
270
|
|
|
@@ -195,6 +281,27 @@ def check_image_exists(runtime: str, image: str) -> bool:
|
|
|
195
281
|
True if image exists locally.
|
|
196
282
|
"""
|
|
197
283
|
try:
|
|
284
|
+
runtime_name = os.path.basename(runtime)
|
|
285
|
+
|
|
286
|
+
# Apple Container uses 'image list' command (two words)
|
|
287
|
+
if runtime_name == "container":
|
|
288
|
+
result = subprocess.run(
|
|
289
|
+
[runtime, "image", "list", "--format", "json"],
|
|
290
|
+
capture_output=True,
|
|
291
|
+
check=False,
|
|
292
|
+
)
|
|
293
|
+
if result.returncode == 0 and result.stdout:
|
|
294
|
+
try:
|
|
295
|
+
images = json.loads(result.stdout.decode())
|
|
296
|
+
# Check if image exists in the list
|
|
297
|
+
for img in images:
|
|
298
|
+
if img.get("name") == image or image in img.get("repoTags", []):
|
|
299
|
+
return True
|
|
300
|
+
except json.JSONDecodeError:
|
|
301
|
+
pass
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
# Docker/Podman/OrbStack/Colima use standard 'image inspect'
|
|
198
305
|
result = subprocess.run(
|
|
199
306
|
[runtime, "image", "inspect", image],
|
|
200
307
|
capture_output=True,
|
|
@@ -221,6 +328,18 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
|
|
|
221
328
|
print(f"Pulling image: {image}")
|
|
222
329
|
|
|
223
330
|
try:
|
|
331
|
+
runtime_name = os.path.basename(runtime)
|
|
332
|
+
|
|
333
|
+
# Apple Container uses 'image pull' command (two words)
|
|
334
|
+
if runtime_name == "container":
|
|
335
|
+
result = subprocess.run(
|
|
336
|
+
[runtime, "image", "pull", image],
|
|
337
|
+
capture_output=quiet,
|
|
338
|
+
check=False,
|
|
339
|
+
)
|
|
340
|
+
return result.returncode == 0
|
|
341
|
+
|
|
342
|
+
# Docker/Podman/OrbStack/Colima use standard 'pull'
|
|
224
343
|
result = subprocess.run(
|
|
225
344
|
[runtime, "pull", image],
|
|
226
345
|
capture_output=quiet,
|
|
@@ -302,7 +421,7 @@ def get_free_space(path: str) -> int:
|
|
|
302
421
|
|
|
303
422
|
def get_storage_root(runtime: str) -> Optional[str]:
|
|
304
423
|
"""
|
|
305
|
-
Get the storage root directory for Docker or
|
|
424
|
+
Get the storage root directory for Docker, Podman, OrbStack, or Colima.
|
|
306
425
|
|
|
307
426
|
Args:
|
|
308
427
|
runtime: Path to container runtime executable
|
|
@@ -331,6 +450,18 @@ def get_storage_root(runtime: str) -> Optional[str]:
|
|
|
331
450
|
if result.stdout:
|
|
332
451
|
info = json.loads(result.stdout.decode())
|
|
333
452
|
return info.get("store", {}).get("graphRoot")
|
|
453
|
+
elif runtime_name == "orbstack":
|
|
454
|
+
# OrbStack stores containers in ~/.orbstack
|
|
455
|
+
home = os.path.expanduser("~")
|
|
456
|
+
return os.path.join(home, ".orbstack")
|
|
457
|
+
elif runtime_name == "colima":
|
|
458
|
+
# Colima stores containers in ~/.colima
|
|
459
|
+
home = os.path.expanduser("~")
|
|
460
|
+
return os.path.join(home, ".colima")
|
|
461
|
+
elif runtime_name == "container":
|
|
462
|
+
# Apple Container stores data in Application Support
|
|
463
|
+
home = os.path.expanduser("~")
|
|
464
|
+
return os.path.join(home, "Library", "Application Support", "com.apple.container")
|
|
334
465
|
return None
|
|
335
466
|
except (OSError, json.JSONDecodeError):
|
|
336
467
|
return None
|
|
@@ -390,8 +521,8 @@ class Spinner:
|
|
|
390
521
|
self.running = False
|
|
391
522
|
if self.thread:
|
|
392
523
|
self.thread.join(timeout=0.5)
|
|
393
|
-
# Clear the spinner line
|
|
394
|
-
print(f"\r{' ' *
|
|
524
|
+
# Clear the spinner line with enough spaces to cover the longest possible line
|
|
525
|
+
print(f"\r{' ' * 120}\r", end="", flush=True)
|
|
395
526
|
|
|
396
527
|
|
|
397
528
|
# =============================================================================
|
|
@@ -639,6 +770,7 @@ Examples:
|
|
|
639
770
|
|
|
640
771
|
def main() -> int:
|
|
641
772
|
"""Main entry point for the CLI."""
|
|
773
|
+
print(f"mdify v{__version__}", file=sys.stderr)
|
|
642
774
|
args = parse_args()
|
|
643
775
|
|
|
644
776
|
# Handle --check-update flag
|
|
@@ -660,15 +792,14 @@ def main() -> int:
|
|
|
660
792
|
return 1
|
|
661
793
|
|
|
662
794
|
# Detect container runtime
|
|
663
|
-
|
|
795
|
+
# If --runtime is specified, treat as explicit user choice
|
|
664
796
|
explicit = args.runtime is not None
|
|
665
|
-
runtime = detect_runtime(preferred, explicit=explicit)
|
|
797
|
+
runtime = detect_runtime(preferred=args.runtime, explicit=explicit)
|
|
666
798
|
if runtime is None:
|
|
667
799
|
print(
|
|
668
800
|
f"Error: Container runtime not found ({', '.join(SUPPORTED_RUNTIMES)})",
|
|
669
801
|
file=sys.stderr,
|
|
670
802
|
)
|
|
671
|
-
print("Please install Docker or Podman to use mdify.", file=sys.stderr)
|
|
672
803
|
return 2
|
|
673
804
|
|
|
674
805
|
# Handle image pull policy
|
|
@@ -752,7 +883,8 @@ def main() -> int:
|
|
|
752
883
|
return 1
|
|
753
884
|
elif args.pull == "never" and not image_exists:
|
|
754
885
|
print(f"Error: Image not found locally: {image}", file=sys.stderr)
|
|
755
|
-
|
|
886
|
+
runtime_name = os.path.basename(runtime)
|
|
887
|
+
print(f"Run with --pull=missing or pull manually: {runtime_name} pull {image}")
|
|
756
888
|
return 1
|
|
757
889
|
|
|
758
890
|
# Resolve paths (use absolute() as fallback if resolve() fails due to permissions)
|
|
@@ -786,6 +918,8 @@ def main() -> int:
|
|
|
786
918
|
|
|
787
919
|
if not args.quiet:
|
|
788
920
|
print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
|
|
921
|
+
print(f"Source: {input_path.resolve()}")
|
|
922
|
+
print(f"Output: {output_dir.resolve()}")
|
|
789
923
|
print(f"Using runtime: {runtime}")
|
|
790
924
|
print(f"Using image: {image}")
|
|
791
925
|
print()
|
|
@@ -848,6 +982,7 @@ def main() -> int:
|
|
|
848
982
|
)
|
|
849
983
|
elapsed = time.time() - start_time
|
|
850
984
|
|
|
985
|
+
# Stop spinner before any output
|
|
851
986
|
if not args.quiet:
|
|
852
987
|
spinner.stop()
|
|
853
988
|
|
|
@@ -870,12 +1005,27 @@ def main() -> int:
|
|
|
870
1005
|
except Exception as e:
|
|
871
1006
|
elapsed = time.time() - start_time
|
|
872
1007
|
failed_count += 1
|
|
1008
|
+
# Stop spinner before printing error
|
|
873
1009
|
if not args.quiet:
|
|
874
1010
|
spinner.stop()
|
|
1011
|
+
|
|
1012
|
+
# Check if container is still healthy
|
|
1013
|
+
error_msg = str(e)
|
|
1014
|
+
if "Connection refused" in error_msg or "Connection aborted" in error_msg or "RemoteDisconnected" in error_msg:
|
|
1015
|
+
if not container.is_ready():
|
|
1016
|
+
if not args.quiet:
|
|
1017
|
+
print(
|
|
1018
|
+
f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
|
|
1019
|
+
)
|
|
1020
|
+
print(f" Error: Container crashed (file may be too complex or large)", file=sys.stderr)
|
|
1021
|
+
print(f" Stopping remaining conversions", file=sys.stderr)
|
|
1022
|
+
break
|
|
1023
|
+
|
|
1024
|
+
if not args.quiet:
|
|
875
1025
|
print(
|
|
876
1026
|
f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
|
|
877
1027
|
)
|
|
878
|
-
print(f" Error: {
|
|
1028
|
+
print(f" Error: {error_msg}", file=sys.stderr)
|
|
879
1029
|
|
|
880
1030
|
total_elapsed = time.time() - conversion_start
|
|
881
1031
|
|
mdify/container.py
CHANGED
|
@@ -41,6 +41,39 @@ class DoclingContainer:
|
|
|
41
41
|
"""Return base URL for API requests."""
|
|
42
42
|
return f"http://localhost:{self.port}"
|
|
43
43
|
|
|
44
|
+
def _cleanup_stale_containers(self) -> None:
|
|
45
|
+
"""Stop any existing mdify-serve containers.
|
|
46
|
+
|
|
47
|
+
This handles the case where a previous run left a container running
|
|
48
|
+
(e.g., due to crash, interrupt, or timeout).
|
|
49
|
+
"""
|
|
50
|
+
# Find running containers matching mdify-serve-* pattern
|
|
51
|
+
result = subprocess.run(
|
|
52
|
+
[
|
|
53
|
+
self.runtime,
|
|
54
|
+
"ps",
|
|
55
|
+
"--filter",
|
|
56
|
+
"name=mdify-serve-",
|
|
57
|
+
"--format",
|
|
58
|
+
"{{.Names}}",
|
|
59
|
+
],
|
|
60
|
+
capture_output=True,
|
|
61
|
+
text=True,
|
|
62
|
+
check=False,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if result.returncode != 0 or not result.stdout.strip():
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Stop each stale container
|
|
69
|
+
for container_name in result.stdout.strip().split("\n"):
|
|
70
|
+
if container_name:
|
|
71
|
+
subprocess.run(
|
|
72
|
+
[self.runtime, "stop", container_name],
|
|
73
|
+
capture_output=True,
|
|
74
|
+
check=False,
|
|
75
|
+
)
|
|
76
|
+
|
|
44
77
|
def start(self, timeout: int = 120) -> None:
|
|
45
78
|
"""Start container and wait for health check.
|
|
46
79
|
|
|
@@ -51,6 +84,8 @@ class DoclingContainer:
|
|
|
51
84
|
subprocess.CalledProcessError: If container fails to start
|
|
52
85
|
TimeoutError: If health check doesn't pass within timeout
|
|
53
86
|
"""
|
|
87
|
+
self._cleanup_stale_containers()
|
|
88
|
+
|
|
54
89
|
# Start container in detached mode
|
|
55
90
|
cmd = [
|
|
56
91
|
self.runtime,
|
mdify/docling_client.py
CHANGED
|
@@ -4,6 +4,8 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
import mimetypes
|
|
8
|
+
|
|
7
9
|
import requests
|
|
8
10
|
|
|
9
11
|
|
|
@@ -40,6 +42,48 @@ class DoclingHTTPError(DoclingClientError):
|
|
|
40
42
|
super().__init__(f"HTTP {status_code}: {message}")
|
|
41
43
|
|
|
42
44
|
|
|
45
|
+
def _get_mime_type(file_path: Path) -> str:
|
|
46
|
+
"""Get MIME type for file, with fallback for unknown types."""
|
|
47
|
+
mime_type, _ = mimetypes.guess_type(str(file_path))
|
|
48
|
+
return mime_type or "application/octet-stream"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _extract_content(result_data) -> str:
|
|
52
|
+
"""Extract content from API response, supporting both old and new formats.
|
|
53
|
+
|
|
54
|
+
Supports:
|
|
55
|
+
- New format: {"document": {"md_content": "..."}}
|
|
56
|
+
- Fallback: {"document": {"content": "..."}}
|
|
57
|
+
- Old format: {"content": "..."}
|
|
58
|
+
- List format: [{"document": {...}} or {"content": "..."}]
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
result_data: Response data from docling-serve API
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Extracted content string, or empty string if not found
|
|
65
|
+
"""
|
|
66
|
+
if isinstance(result_data, dict):
|
|
67
|
+
# New format with document field
|
|
68
|
+
if "document" in result_data:
|
|
69
|
+
doc = result_data["document"]
|
|
70
|
+
# Try md_content first, then content
|
|
71
|
+
return doc.get("md_content", "") or doc.get("content", "")
|
|
72
|
+
# Old format without document field
|
|
73
|
+
return result_data.get("content", "")
|
|
74
|
+
elif isinstance(result_data, list) and len(result_data) > 0:
|
|
75
|
+
# List format - process first item
|
|
76
|
+
first_result = result_data[0]
|
|
77
|
+
if isinstance(first_result, dict):
|
|
78
|
+
if "document" in first_result:
|
|
79
|
+
doc = first_result["document"]
|
|
80
|
+
# Try md_content first, then content
|
|
81
|
+
return doc.get("md_content", "") or doc.get("content", "")
|
|
82
|
+
# Old format without document field
|
|
83
|
+
return first_result.get("content", "")
|
|
84
|
+
return ""
|
|
85
|
+
|
|
86
|
+
|
|
43
87
|
def check_health(base_url: str) -> bool:
|
|
44
88
|
"""Check if docling-serve is healthy.
|
|
45
89
|
|
|
@@ -77,7 +121,7 @@ def convert_file(
|
|
|
77
121
|
with open(file_path, "rb") as f:
|
|
78
122
|
response = requests.post(
|
|
79
123
|
f"{base_url}/v1/convert/file",
|
|
80
|
-
files={"files": (file_path.name, f,
|
|
124
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
81
125
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
82
126
|
)
|
|
83
127
|
|
|
@@ -87,17 +131,10 @@ def convert_file(
|
|
|
87
131
|
)
|
|
88
132
|
|
|
89
133
|
result_data = response.json()
|
|
134
|
+
content = _extract_content(result_data)
|
|
90
135
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
first_result = result_data[0]
|
|
94
|
-
return ConvertResult(
|
|
95
|
-
content=first_result.get("content", ""), format=to_format, success=True
|
|
96
|
-
)
|
|
97
|
-
elif isinstance(result_data, dict):
|
|
98
|
-
return ConvertResult(
|
|
99
|
-
content=result_data.get("content", ""), format=to_format, success=True
|
|
100
|
-
)
|
|
136
|
+
if content or isinstance(result_data, (dict, list)):
|
|
137
|
+
return ConvertResult(content=content, format=to_format, success=True)
|
|
101
138
|
else:
|
|
102
139
|
raise DoclingHTTPError(200, f"Unexpected response format: {result_data}")
|
|
103
140
|
|
|
@@ -126,7 +163,7 @@ def convert_file_async(
|
|
|
126
163
|
with open(file_path, "rb") as f:
|
|
127
164
|
response = requests.post(
|
|
128
165
|
f"{base_url}/v1/convert/file/async",
|
|
129
|
-
files={"files": (file_path.name, f,
|
|
166
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
130
167
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
131
168
|
)
|
|
132
169
|
|
|
@@ -202,19 +239,21 @@ def get_result(base_url: str, task_id: str) -> ConvertResult:
|
|
|
202
239
|
)
|
|
203
240
|
|
|
204
241
|
result_data = response.json()
|
|
242
|
+
content = _extract_content(result_data)
|
|
205
243
|
|
|
206
|
-
#
|
|
207
|
-
|
|
244
|
+
# Determine format from response, defaulting to "md"
|
|
245
|
+
result_format = "md"
|
|
246
|
+
if isinstance(result_data, dict):
|
|
247
|
+
result_format = result_data.get("format", "md")
|
|
248
|
+
elif isinstance(result_data, list) and len(result_data) > 0:
|
|
208
249
|
first_result = result_data[0]
|
|
250
|
+
if isinstance(first_result, dict):
|
|
251
|
+
result_format = first_result.get("format", "md")
|
|
252
|
+
|
|
253
|
+
if content or isinstance(result_data, (dict, list)):
|
|
209
254
|
return ConvertResult(
|
|
210
|
-
content=
|
|
211
|
-
format=
|
|
212
|
-
success=True,
|
|
213
|
-
)
|
|
214
|
-
elif isinstance(result_data, dict):
|
|
215
|
-
return ConvertResult(
|
|
216
|
-
content=result_data.get("content", ""),
|
|
217
|
-
format=result_data.get("format", "md"),
|
|
255
|
+
content=content,
|
|
256
|
+
format=result_format,
|
|
218
257
|
success=True,
|
|
219
258
|
)
|
|
220
259
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mdify-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.10.0
|
|
4
4
|
Summary: Convert PDFs and document images into structured Markdown for LLM workflows
|
|
5
5
|
Author: tiroq
|
|
6
6
|
License-Expression: MIT
|
|
@@ -42,7 +42,10 @@ A lightweight CLI for converting documents to Markdown. The CLI is fast to insta
|
|
|
42
42
|
## Requirements
|
|
43
43
|
|
|
44
44
|
- **Python 3.8+**
|
|
45
|
-
- **Docker** or
|
|
45
|
+
- **Docker**, **Podman**, or native macOS container tools (for document conversion)
|
|
46
|
+
- On macOS: Supports Apple Container (macOS 26+), OrbStack, Colima, Podman, or Docker Desktop
|
|
47
|
+
- On Linux: Docker or Podman
|
|
48
|
+
- Auto-detects available tools
|
|
46
49
|
|
|
47
50
|
## Installation
|
|
48
51
|
|
|
@@ -56,6 +59,13 @@ pipx install mdify-cli
|
|
|
56
59
|
|
|
57
60
|
Restart your terminal after installation.
|
|
58
61
|
|
|
62
|
+
For containerized document conversion, install one of these (or use Docker Desktop):
|
|
63
|
+
- **Apple Container** (macOS 26+): Download from https://github.com/apple/container/releases
|
|
64
|
+
- **OrbStack** (recommended): `brew install orbstack`
|
|
65
|
+
- **Colima**: `brew install colima && colima start`
|
|
66
|
+
- **Podman**: `brew install podman && podman machine init && podman machine start`
|
|
67
|
+
- **Docker Desktop**: Available at https://www.docker.com/products/docker-desktop
|
|
68
|
+
|
|
59
69
|
### Linux
|
|
60
70
|
|
|
61
71
|
```bash
|
|
@@ -142,13 +152,50 @@ The first conversion takes longer (~30-60s) as the container loads ML models int
|
|
|
142
152
|
| `-m, --mask` | ⚠️ **Deprecated**: PII masking not supported in current version |
|
|
143
153
|
| `--gpu` | Use GPU-accelerated container (requires NVIDIA GPU and nvidia-container-toolkit) |
|
|
144
154
|
| `--port PORT` | Container port (default: 5001) |
|
|
145
|
-
| `--runtime RUNTIME` | Container runtime: docker or
|
|
155
|
+
| `--runtime RUNTIME` | Container runtime: docker, podman, orbstack, colima, or container (auto-detected) |
|
|
146
156
|
| `--image IMAGE` | Custom container image (default: ghcr.io/docling-project/docling-serve-cpu:main) |
|
|
147
157
|
| `--pull POLICY` | Image pull policy: always, missing, never (default: missing) |
|
|
148
158
|
| `--check-update` | Check for available updates and exit |
|
|
149
159
|
| `--version` | Show version and exit |
|
|
150
160
|
|
|
151
|
-
###
|
|
161
|
+
### Container Runtime Selection
|
|
162
|
+
|
|
163
|
+
mdify automatically detects and uses the best available container runtime. The detection order differs by platform:
|
|
164
|
+
|
|
165
|
+
**macOS (recommended):**
|
|
166
|
+
1. Apple Container (native, macOS 26+ required)
|
|
167
|
+
2. OrbStack (lightweight, fast)
|
|
168
|
+
3. Colima (open-source alternative)
|
|
169
|
+
4. Podman (via Podman machine)
|
|
170
|
+
5. Docker Desktop (full Docker)
|
|
171
|
+
|
|
172
|
+
**Linux:**
|
|
173
|
+
1. Docker
|
|
174
|
+
2. Podman
|
|
175
|
+
|
|
176
|
+
**Override runtime:**
|
|
177
|
+
Use the `MDIFY_CONTAINER_RUNTIME` environment variable to force a specific runtime:
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
export MDIFY_CONTAINER_RUNTIME=orbstack
|
|
181
|
+
mdify document.pdf
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Or inline:
|
|
185
|
+
```bash
|
|
186
|
+
MDIFY_CONTAINER_RUNTIME=colima mdify document.pdf
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Supported values:** `docker`, `podman`, `orbstack`, `colima`, `container`
|
|
190
|
+
|
|
191
|
+
If the selected runtime is installed but not running, mdify will display a helpful warning:
|
|
192
|
+
```
|
|
193
|
+
Warning: Found container runtime(s) but daemon is not running:
|
|
194
|
+
- orbstack (/opt/homebrew/bin/orbstack)
|
|
195
|
+
|
|
196
|
+
Please start one of these tools before running mdify.
|
|
197
|
+
macOS tip: Start OrbStack, Colima, or Podman Desktop application
|
|
198
|
+
```
|
|
152
199
|
|
|
153
200
|
With `--flat`, all output files are placed directly in the output directory. Directory paths are incorporated into filenames to prevent collisions:
|
|
154
201
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
+
mdify/__init__.py,sha256=CjafoKZ-RjxyPtnUKVPmPjquU0FlfXDBkDDWZWb1ryw,91
|
|
3
|
+
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
+
mdify/cli.py,sha256=ZQMirATkanV0l8v0-A000uXPy7BaT5BPbVFSNSnpLJM,34817
|
|
5
|
+
mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
|
|
6
|
+
mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
|
|
7
|
+
mdify_cli-2.10.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
8
|
+
mdify_cli-2.10.0.dist-info/METADATA,sha256=c2dtC7VEMeSqPhgbmQBLEM5N6zOkIXfWDPocCz9Ye8E,9623
|
|
9
|
+
mdify_cli-2.10.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
+
mdify_cli-2.10.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
11
|
+
mdify_cli-2.10.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
12
|
+
mdify_cli-2.10.0.dist-info/RECORD,,
|
mdify_cli-2.5.0.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
-
mdify/__init__.py,sha256=lH-hnX0KOG9_zJ_QZ-A_kQFPYghziohhpm7nmxVZc7w,90
|
|
3
|
-
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
-
mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
|
|
5
|
-
mdify/container.py,sha256=2oh9NyvFr9lCRb2YYpM_qKP3PPmAin0DbxvNP3m69jw,4158
|
|
6
|
-
mdify/docling_client.py,sha256=_9qjL5yOOeJahOg6an2P6Iii1xkeR6wmNJZG4Q6NRkk,6553
|
|
7
|
-
mdify_cli-2.5.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
8
|
-
mdify_cli-2.5.0.dist-info/METADATA,sha256=egwIWB2tV9F41fcUf3RvfszEJGb--AQVDN3ybI1FFt0,7923
|
|
9
|
-
mdify_cli-2.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
-
mdify_cli-2.5.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
11
|
-
mdify_cli-2.5.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
12
|
-
mdify_cli-2.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|