mdify-cli 2.7.0__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.7.0"
3
+ __version__ = "2.9.0"
mdify/cli.py CHANGED
@@ -10,6 +10,7 @@ is lightweight and has no ML dependencies.
10
10
  import argparse
11
11
  import json
12
12
  import os
13
+ import platform
13
14
  import shutil
14
15
  import subprocess
15
16
  import sys
@@ -33,7 +34,9 @@ CHECK_INTERVAL_SECONDS = 86400 # 24 hours
33
34
  # Container configuration
34
35
  DEFAULT_IMAGE = "ghcr.io/docling-project/docling-serve-cpu:main"
35
36
  GPU_IMAGE = "ghcr.io/docling-project/docling-serve-cu126:main"
36
- SUPPORTED_RUNTIMES = ("docker", "podman")
37
+ SUPPORTED_RUNTIMES = ("docker", "podman", "orbstack", "colima", "container")
38
+ MACOS_RUNTIMES_PRIORITY = ("container", "orbstack", "colima", "podman", "docker")
39
+ OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
37
40
 
38
41
 
39
42
  # =============================================================================
@@ -151,34 +154,117 @@ def check_for_update(force: bool = False) -> None:
151
154
  # =============================================================================
152
155
 
153
156
 
154
- def detect_runtime(preferred: str, explicit: bool = True) -> Optional[str]:
157
+ def is_daemon_running(runtime: str) -> bool:
158
+ """
159
+ Check if a container runtime daemon is running.
160
+
161
+ Args:
162
+ runtime: Path to container runtime executable
163
+
164
+ Returns:
165
+ True if daemon is running and responsive, False otherwise.
166
+ """
167
+ try:
168
+ runtime_name = os.path.basename(runtime)
169
+
170
+ # Apple Container uses 'container system status' to check daemon
171
+ if runtime_name == "container":
172
+ result = subprocess.run(
173
+ [runtime, "system", "status"],
174
+ capture_output=True,
175
+ timeout=5,
176
+ check=False,
177
+ )
178
+ return result.returncode == 0
179
+
180
+ # Other runtimes use --version check
181
+ result = subprocess.run(
182
+ [runtime, "--version"],
183
+ capture_output=True,
184
+ timeout=5,
185
+ check=False,
186
+ )
187
+ return result.returncode == 0
188
+ except (OSError, subprocess.TimeoutExpired):
189
+ return False
190
+
191
+
192
+ def detect_runtime(preferred: Optional[str] = None, explicit: bool = True) -> Optional[str]:
155
193
  """
156
194
  Detect available container runtime.
157
195
 
196
+ First checks MDIFY_CONTAINER_RUNTIME environment variable for explicit override.
197
+ On macOS, tries native tools first (OrbStack → Colima → Podman → Docker).
198
+ On other platforms, tries Docker → Podman.
199
+
158
200
  Args:
159
- preferred: Preferred runtime ('docker' or 'podman')
160
- explicit: If True, warn when falling back to alternative.
161
- If False, silently use alternative without warning.
162
- Note: This only controls warning emission; selection order
163
- is always preferred → alternative regardless of this flag.
201
+ preferred: Preferred runtime name override (deprecated, use MDIFY_CONTAINER_RUNTIME)
202
+ explicit: If True, print info about detection/fallback choices.
164
203
 
165
204
  Returns:
166
205
  Path to runtime executable, or None if not found.
167
206
  """
168
- # Try preferred runtime first
169
- runtime_path = shutil.which(preferred)
170
- if runtime_path:
171
- return runtime_path
172
-
173
- # Try alternative
174
- alternative = "podman" if preferred == "docker" else "docker"
175
- runtime_path = shutil.which(alternative)
176
- if runtime_path:
207
+ # Check for explicit environment variable override
208
+ env_runtime = os.environ.get("MDIFY_CONTAINER_RUNTIME", "").strip().lower()
209
+ if env_runtime:
210
+ if env_runtime not in SUPPORTED_RUNTIMES:
211
+ print(
212
+ f"Warning: MDIFY_CONTAINER_RUNTIME='{env_runtime}' is not supported. "
213
+ f"Supported: {', '.join(SUPPORTED_RUNTIMES)}",
214
+ file=sys.stderr,
215
+ )
216
+ else:
217
+ runtime_path = shutil.which(env_runtime)
218
+ if runtime_path:
219
+ if explicit:
220
+ print(f"Using runtime from MDIFY_CONTAINER_RUNTIME: {env_runtime}")
221
+ return runtime_path
222
+ else:
223
+ print(
224
+ f"Warning: MDIFY_CONTAINER_RUNTIME='{env_runtime}' specified but not found in PATH",
225
+ file=sys.stderr,
226
+ )
227
+
228
+ # Determine runtime priority based on OS
229
+ is_macos = platform.system() == "Darwin"
230
+ if is_macos:
231
+ runtime_priority = MACOS_RUNTIMES_PRIORITY
177
232
  if explicit:
233
+ print(f"Detected macOS: checking for native container tools...")
234
+ else:
235
+ runtime_priority = OTHER_RUNTIMES_PRIORITY
236
+
237
+ # Try each runtime in priority order
238
+ found_but_not_running = []
239
+ for runtime_name in runtime_priority:
240
+ runtime_path = shutil.which(runtime_name)
241
+ if runtime_path:
242
+ # Check if daemon is running
243
+ if is_daemon_running(runtime_path):
244
+ if explicit:
245
+ print(f"Using container runtime: {runtime_name}")
246
+ return runtime_path
247
+ else:
248
+ found_but_not_running.append((runtime_name, runtime_path))
249
+
250
+ # If we found tools but none are running, warn and ask user to start one
251
+ if found_but_not_running:
252
+ print(
253
+ f"\nWarning: Found container runtime(s) but daemon is not running:",
254
+ file=sys.stderr,
255
+ )
256
+ for runtime_name, runtime_path in found_but_not_running:
257
+ print(f" - {runtime_name} ({runtime_path})", file=sys.stderr)
258
+ print(
259
+ "\nPlease start one of these tools before running mdify.",
260
+ file=sys.stderr,
261
+ )
262
+ if is_macos:
178
263
  print(
179
- f"Warning: {preferred} not found, using {alternative}", file=sys.stderr
264
+ " macOS tip: Start OrbStack, Colima, or Podman Desktop application",
265
+ file=sys.stderr,
180
266
  )
181
- return runtime_path
267
+ return None
182
268
 
183
269
  return None
184
270
 
@@ -302,7 +388,7 @@ def get_free_space(path: str) -> int:
302
388
 
303
389
  def get_storage_root(runtime: str) -> Optional[str]:
304
390
  """
305
- Get the storage root directory for Docker or Podman.
391
+ Get the storage root directory for Docker, Podman, OrbStack, or Colima.
306
392
 
307
393
  Args:
308
394
  runtime: Path to container runtime executable
@@ -331,6 +417,18 @@ def get_storage_root(runtime: str) -> Optional[str]:
331
417
  if result.stdout:
332
418
  info = json.loads(result.stdout.decode())
333
419
  return info.get("store", {}).get("graphRoot")
420
+ elif runtime_name == "orbstack":
421
+ # OrbStack stores containers in ~/.orbstack
422
+ home = os.path.expanduser("~")
423
+ return os.path.join(home, ".orbstack")
424
+ elif runtime_name == "colima":
425
+ # Colima stores containers in ~/.colima
426
+ home = os.path.expanduser("~")
427
+ return os.path.join(home, ".colima")
428
+ elif runtime_name == "container":
429
+ # Apple Container stores data in Application Support
430
+ home = os.path.expanduser("~")
431
+ return os.path.join(home, "Library", "Application Support", "com.apple.container")
334
432
  return None
335
433
  except (OSError, json.JSONDecodeError):
336
434
  return None
@@ -660,15 +758,14 @@ def main() -> int:
660
758
  return 1
661
759
 
662
760
  # Detect container runtime
663
- preferred = args.runtime if args.runtime else "docker"
761
+ # If --runtime is specified, treat as explicit user choice
664
762
  explicit = args.runtime is not None
665
- runtime = detect_runtime(preferred, explicit=explicit)
763
+ runtime = detect_runtime(preferred=args.runtime, explicit=explicit)
666
764
  if runtime is None:
667
765
  print(
668
766
  f"Error: Container runtime not found ({', '.join(SUPPORTED_RUNTIMES)})",
669
767
  file=sys.stderr,
670
768
  )
671
- print("Please install Docker or Podman to use mdify.", file=sys.stderr)
672
769
  return 2
673
770
 
674
771
  # Handle image pull policy
@@ -752,7 +849,8 @@ def main() -> int:
752
849
  return 1
753
850
  elif args.pull == "never" and not image_exists:
754
851
  print(f"Error: Image not found locally: {image}", file=sys.stderr)
755
- print(f"Run with --pull=missing or pull manually: {preferred} pull {image}")
852
+ runtime_name = os.path.basename(runtime)
853
+ print(f"Run with --pull=missing or pull manually: {runtime_name} pull {image}")
756
854
  return 1
757
855
 
758
856
  # Resolve paths (use absolute() as fallback if resolve() fails due to permissions)
mdify/docling_client.py CHANGED
@@ -48,6 +48,42 @@ def _get_mime_type(file_path: Path) -> str:
48
48
  return mime_type or "application/octet-stream"
49
49
 
50
50
 
51
+ def _extract_content(result_data) -> str:
52
+ """Extract content from API response, supporting both old and new formats.
53
+
54
+ Supports:
55
+ - New format: {"document": {"md_content": "..."}}
56
+ - Fallback: {"document": {"content": "..."}}
57
+ - Old format: {"content": "..."}
58
+ - List format: [{"document": {...}} or {"content": "..."}]
59
+
60
+ Args:
61
+ result_data: Response data from docling-serve API
62
+
63
+ Returns:
64
+ Extracted content string, or empty string if not found
65
+ """
66
+ if isinstance(result_data, dict):
67
+ # New format with document field
68
+ if "document" in result_data:
69
+ doc = result_data["document"]
70
+ # Try md_content first, then content
71
+ return doc.get("md_content", "") or doc.get("content", "")
72
+ # Old format without document field
73
+ return result_data.get("content", "")
74
+ elif isinstance(result_data, list) and len(result_data) > 0:
75
+ # List format - process first item
76
+ first_result = result_data[0]
77
+ if isinstance(first_result, dict):
78
+ if "document" in first_result:
79
+ doc = first_result["document"]
80
+ # Try md_content first, then content
81
+ return doc.get("md_content", "") or doc.get("content", "")
82
+ # Old format without document field
83
+ return first_result.get("content", "")
84
+ return ""
85
+
86
+
51
87
  def check_health(base_url: str) -> bool:
52
88
  """Check if docling-serve is healthy.
53
89
 
@@ -95,17 +131,10 @@ def convert_file(
95
131
  )
96
132
 
97
133
  result_data = response.json()
134
+ content = _extract_content(result_data)
98
135
 
99
- # docling-serve returns results in a list format
100
- if isinstance(result_data, list) and len(result_data) > 0:
101
- first_result = result_data[0]
102
- return ConvertResult(
103
- content=first_result.get("content", ""), format=to_format, success=True
104
- )
105
- elif isinstance(result_data, dict):
106
- return ConvertResult(
107
- content=result_data.get("content", ""), format=to_format, success=True
108
- )
136
+ if content or isinstance(result_data, (dict, list)):
137
+ return ConvertResult(content=content, format=to_format, success=True)
109
138
  else:
110
139
  raise DoclingHTTPError(200, f"Unexpected response format: {result_data}")
111
140
 
@@ -210,19 +239,21 @@ def get_result(base_url: str, task_id: str) -> ConvertResult:
210
239
  )
211
240
 
212
241
  result_data = response.json()
242
+ content = _extract_content(result_data)
213
243
 
214
- # Similar to sync conversion, handle list or dict format
215
- if isinstance(result_data, list) and len(result_data) > 0:
244
+ # Determine format from response, defaulting to "md"
245
+ result_format = "md"
246
+ if isinstance(result_data, dict):
247
+ result_format = result_data.get("format", "md")
248
+ elif isinstance(result_data, list) and len(result_data) > 0:
216
249
  first_result = result_data[0]
250
+ if isinstance(first_result, dict):
251
+ result_format = first_result.get("format", "md")
252
+
253
+ if content or isinstance(result_data, (dict, list)):
217
254
  return ConvertResult(
218
- content=first_result.get("content", ""),
219
- format=first_result.get("format", "md"),
220
- success=True,
221
- )
222
- elif isinstance(result_data, dict):
223
- return ConvertResult(
224
- content=result_data.get("content", ""),
225
- format=result_data.get("format", "md"),
255
+ content=content,
256
+ format=result_format,
226
257
  success=True,
227
258
  )
228
259
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.7.0
3
+ Version: 2.9.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -42,7 +42,10 @@ A lightweight CLI for converting documents to Markdown. The CLI is fast to insta
42
42
  ## Requirements
43
43
 
44
44
  - **Python 3.8+**
45
- - **Docker** or **Podman** (for document conversion)
45
+ - **Docker**, **Podman**, or native macOS container tools (for document conversion)
46
+ - On macOS: Supports Apple Container (macOS 26+), OrbStack, Colima, Podman, or Docker Desktop
47
+ - On Linux: Docker or Podman
48
+ - Auto-detects available tools
46
49
 
47
50
  ## Installation
48
51
 
@@ -56,6 +59,13 @@ pipx install mdify-cli
56
59
 
57
60
  Restart your terminal after installation.
58
61
 
62
+ For containerized document conversion, install one of these (or use Docker Desktop):
63
+ - **Apple Container** (macOS 26+): Download from https://github.com/apple/container/releases
64
+ - **OrbStack** (recommended): `brew install orbstack`
65
+ - **Colima**: `brew install colima && colima start`
66
+ - **Podman**: `brew install podman && podman machine init && podman machine start`
67
+ - **Docker Desktop**: Available at https://www.docker.com/products/docker-desktop
68
+
59
69
  ### Linux
60
70
 
61
71
  ```bash
@@ -142,13 +152,50 @@ The first conversion takes longer (~30-60s) as the container loads ML models int
142
152
  | `-m, --mask` | ⚠️ **Deprecated**: PII masking not supported in current version |
143
153
  | `--gpu` | Use GPU-accelerated container (requires NVIDIA GPU and nvidia-container-toolkit) |
144
154
  | `--port PORT` | Container port (default: 5001) |
145
- | `--runtime RUNTIME` | Container runtime: docker or podman (auto-detected) |
155
+ | `--runtime RUNTIME` | Container runtime: docker, podman, orbstack, colima, or container (auto-detected) |
146
156
  | `--image IMAGE` | Custom container image (default: ghcr.io/docling-project/docling-serve-cpu:main) |
147
157
  | `--pull POLICY` | Image pull policy: always, missing, never (default: missing) |
148
158
  | `--check-update` | Check for available updates and exit |
149
159
  | `--version` | Show version and exit |
150
160
 
151
- ### Flat Mode
161
+ ### Container Runtime Selection
162
+
163
+ mdify automatically detects and uses the best available container runtime. The detection order differs by platform:
164
+
165
+ **macOS (recommended):**
166
+ 1. Apple Container (native, macOS 26+ required)
167
+ 2. OrbStack (lightweight, fast)
168
+ 3. Colima (open-source alternative)
169
+ 4. Podman (via Podman machine)
170
+ 5. Docker Desktop (full Docker)
171
+
172
+ **Linux:**
173
+ 1. Docker
174
+ 2. Podman
175
+
176
+ **Override runtime:**
177
+ Use the `MDIFY_CONTAINER_RUNTIME` environment variable to force a specific runtime:
178
+
179
+ ```bash
180
+ export MDIFY_CONTAINER_RUNTIME=orbstack
181
+ mdify document.pdf
182
+ ```
183
+
184
+ Or inline:
185
+ ```bash
186
+ MDIFY_CONTAINER_RUNTIME=colima mdify document.pdf
187
+ ```
188
+
189
+ **Supported values:** `docker`, `podman`, `orbstack`, `colima`, `container`
190
+
191
+ If the selected runtime is installed but not running, mdify will display a helpful warning:
192
+ ```
193
+ Warning: Found container runtime(s) but daemon is not running:
194
+ - orbstack (/opt/homebrew/bin/orbstack)
195
+
196
+ Please start one of these tools before running mdify.
197
+ macOS tip: Start OrbStack, Colima, or Podman Desktop application
198
+ ```
152
199
 
153
200
  With `--flat`, all output files are placed directly in the output directory. Directory paths are incorporated into filenames to prevent collisions:
154
201
 
@@ -0,0 +1,12 @@
1
+ assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
+ mdify/__init__.py,sha256=PSgJ3K4AOS5lkZLc88TMzg38yuZ6s4Z_q-KDUP1XyI0,90
3
+ mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
+ mdify/cli.py,sha256=Zdl1bnZ2fXdDNrnjwy2pu-g6NY5VHqDJRqopkhn5Dxk,32340
5
+ mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
6
+ mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
7
+ mdify_cli-2.9.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
+ mdify_cli-2.9.0.dist-info/METADATA,sha256=yu2RXWJ-V6IiN0Cdtgmn_mD9MlHy16MHamnZvB0Plo0,9622
9
+ mdify_cli-2.9.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
+ mdify_cli-2.9.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
+ mdify_cli-2.9.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
+ mdify_cli-2.9.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
- mdify/__init__.py,sha256=ymBvtqVt-BtORLCI0ZO674etO8tlMJxzghl39z6gCUg,90
3
- mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
- mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
5
- mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
6
- mdify/docling_client.py,sha256=9QWPmd0W5APzf6LeUrdDBAru6E4d89w2q8WqGVlJoHg,6807
7
- mdify_cli-2.7.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
- mdify_cli-2.7.0.dist-info/METADATA,sha256=4v5CMHOhZ2LKgRgH7xm7hOUUYwahYCRJSCMcGtNja5g,7923
9
- mdify_cli-2.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
- mdify_cli-2.7.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
- mdify_cli-2.7.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
- mdify_cli-2.7.0.dist-info/RECORD,,