mdify-cli 2.3.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.3.0"
3
+ __version__ = "2.6.0"
mdify/cli.py CHANGED
@@ -232,6 +232,44 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
232
232
  return False
233
233
 
234
234
 
235
+ def get_image_size_estimate(runtime: str, image: str) -> Optional[int]:
236
+ """
237
+ Estimate image size by querying registry manifest.
238
+
239
+ Runs `<runtime> manifest inspect --verbose <image>` and sums all layer sizes
240
+ across all architectures, then applies 50% buffer for decompression.
241
+
242
+ Args:
243
+ runtime: Path to container runtime
244
+ image: Image name/tag
245
+
246
+ Returns:
247
+ Estimated size in bytes with 50% buffer, or None if command fails.
248
+ """
249
+ try:
250
+ result = subprocess.run(
251
+ [runtime, "manifest", "inspect", "--verbose", image],
252
+ capture_output=True,
253
+ check=False,
254
+ )
255
+ if result.returncode != 0:
256
+ return None
257
+
258
+ manifest_data = json.loads(result.stdout.decode())
259
+
260
+ # Sum all layer sizes across all architectures
261
+ total_size = 0
262
+ for manifest in manifest_data.get("Manifests", []):
263
+ oci_manifest = manifest.get("OCIManifest", {})
264
+ for layer in oci_manifest.get("layers", []):
265
+ total_size += layer.get("size", 0)
266
+
267
+ # Apply 50% buffer for decompression (compressed -> uncompressed)
268
+ return int(total_size * 1.5)
269
+ except (json.JSONDecodeError, KeyError, ValueError, OSError):
270
+ return None
271
+
272
+
235
273
  def format_size(size_bytes: int) -> str:
236
274
  """Format file size in human-readable format."""
237
275
  for unit in ["B", "KB", "MB", "GB"]:
@@ -254,6 +292,71 @@ def format_duration(seconds: float) -> str:
254
292
  return f"{hours}h {mins}m {secs:.0f}s"
255
293
 
256
294
 
295
+ def get_free_space(path: str) -> int:
296
+ """Get free disk space for the given path in bytes."""
297
+ try:
298
+ return shutil.disk_usage(path).free
299
+ except (FileNotFoundError, OSError):
300
+ return 0
301
+
302
+
303
+ def get_storage_root(runtime: str) -> Optional[str]:
304
+ """
305
+ Get the storage root directory for Docker or Podman.
306
+
307
+ Args:
308
+ runtime: Path to container runtime executable
309
+
310
+ Returns:
311
+ Storage root path as string, or None if command fails.
312
+ """
313
+ try:
314
+ # Extract runtime name from path (e.g., /usr/bin/docker -> docker)
315
+ runtime_name = os.path.basename(runtime)
316
+
317
+ if runtime_name == "docker":
318
+ result = subprocess.run(
319
+ [runtime, "system", "info", "--format", "{{.DockerRootDir}}"],
320
+ capture_output=True,
321
+ check=False,
322
+ )
323
+ if result.stdout:
324
+ return result.stdout.decode().strip()
325
+ elif runtime_name == "podman":
326
+ result = subprocess.run(
327
+ [runtime, "info", "--format", "json"],
328
+ capture_output=True,
329
+ check=False,
330
+ )
331
+ if result.stdout:
332
+ info = json.loads(result.stdout.decode())
333
+ return info.get("store", {}).get("graphRoot")
334
+ return None
335
+ except (OSError, json.JSONDecodeError):
336
+ return None
337
+
338
+
339
+ def confirm_proceed(message: str, default_no: bool = True) -> bool:
340
+ """
341
+ Prompt user for confirmation with a y/N prompt.
342
+
343
+ Args:
344
+ message: The confirmation message to display
345
+ default_no: If True, shows [y/N] (default no). If False, shows [Y/n] (default yes)
346
+
347
+ Returns:
348
+ True if user entered 'y' or 'Y', False otherwise.
349
+ Returns False immediately if stdin is not a TTY (non-interactive).
350
+ """
351
+ if not sys.stdin.isatty():
352
+ return False
353
+
354
+ prompt = "[y/N]" if default_no else "[Y/n]"
355
+ print(f"{message} {prompt}", file=sys.stderr)
356
+ response = input()
357
+ return response.lower() == "y"
358
+
359
+
257
360
  class Spinner:
258
361
  """A simple spinner to show progress during long operations."""
259
362
 
@@ -455,6 +558,13 @@ Examples:
455
558
  help="Suppress progress messages",
456
559
  )
457
560
 
561
+ parser.add_argument(
562
+ "-y",
563
+ "--yes",
564
+ action="store_true",
565
+ help="Skip confirmation prompts (for scripts/CI)",
566
+ )
567
+
458
568
  parser.add_argument(
459
569
  "-m",
460
570
  "--mask",
@@ -572,6 +682,70 @@ def main() -> int:
572
682
 
573
683
  image_exists = check_image_exists(runtime, image)
574
684
 
685
+ # NOTE: Docker Desktop on macOS/Windows uses a VM, so disk space checks may not
686
+ # accurately reflect available space in the container's filesystem. Remote Docker
687
+ # daemons (DOCKER_HOST) are also not supported. In these cases, the check will
688
+ # gracefully degrade (warn and proceed).
689
+
690
+ # Check disk space before pulling image (skip if pull=never or image exists with pull=missing)
691
+ will_pull = args.pull == "always" or (args.pull == "missing" and not image_exists)
692
+ if will_pull:
693
+ storage_root = get_storage_root(runtime)
694
+ if storage_root:
695
+ image_size = get_image_size_estimate(runtime, image)
696
+ if image_size:
697
+ free_space = get_free_space(storage_root)
698
+ if free_space < image_size:
699
+ print(
700
+ f"Warning: Not enough free disk space on {storage_root}",
701
+ file=sys.stderr,
702
+ )
703
+ print(
704
+ f" Available: {format_size(free_space)}",
705
+ file=sys.stderr,
706
+ )
707
+ print(
708
+ f" Required: {format_size(image_size)} (estimated)",
709
+ file=sys.stderr,
710
+ )
711
+ if args.yes:
712
+ print(" Proceeding anyway (--yes flag set)", file=sys.stderr)
713
+ elif not sys.stdin.isatty():
714
+ print(
715
+ " Run with --yes to proceed anyway, or free up disk space",
716
+ file=sys.stderr,
717
+ )
718
+ return 1
719
+ elif not confirm_proceed("Continue anyway?"):
720
+ return 130
721
+ elif free_space - image_size < 1024 * 1024 * 1024:
722
+ print(
723
+ f"Warning: Less than 1 GB would remain after pulling image on {storage_root}",
724
+ file=sys.stderr,
725
+ )
726
+ print(
727
+ f" Available: {format_size(free_space)}",
728
+ file=sys.stderr,
729
+ )
730
+ print(
731
+ f" Required: {format_size(image_size)} (estimated)",
732
+ file=sys.stderr,
733
+ )
734
+ print(
735
+ f" Remaining: {format_size(free_space - image_size)}",
736
+ file=sys.stderr,
737
+ )
738
+ if args.yes:
739
+ print(" Proceeding anyway (--yes flag set)", file=sys.stderr)
740
+ elif not sys.stdin.isatty():
741
+ print(
742
+ " Run with --yes to proceed anyway, or free up disk space",
743
+ file=sys.stderr,
744
+ )
745
+ return 1
746
+ elif not confirm_proceed("Continue anyway?"):
747
+ return 130
748
+
575
749
  if args.pull == "always" or (args.pull == "missing" and not image_exists):
576
750
  if not pull_image(runtime, image, args.quiet):
577
751
  print(f"Error: Failed to pull image: {image}", file=sys.stderr)
mdify/container.py CHANGED
@@ -41,6 +41,39 @@ class DoclingContainer:
41
41
  """Return base URL for API requests."""
42
42
  return f"http://localhost:{self.port}"
43
43
 
44
+ def _cleanup_stale_containers(self) -> None:
45
+ """Stop any existing mdify-serve containers.
46
+
47
+ This handles the case where a previous run left a container running
48
+ (e.g., due to crash, interrupt, or timeout).
49
+ """
50
+ # Find running containers matching mdify-serve-* pattern
51
+ result = subprocess.run(
52
+ [
53
+ self.runtime,
54
+ "ps",
55
+ "--filter",
56
+ "name=mdify-serve-",
57
+ "--format",
58
+ "{{.Names}}",
59
+ ],
60
+ capture_output=True,
61
+ text=True,
62
+ check=False,
63
+ )
64
+
65
+ if result.returncode != 0 or not result.stdout.strip():
66
+ return
67
+
68
+ # Stop each stale container
69
+ for container_name in result.stdout.strip().split("\n"):
70
+ if container_name:
71
+ subprocess.run(
72
+ [self.runtime, "stop", container_name],
73
+ capture_output=True,
74
+ check=False,
75
+ )
76
+
44
77
  def start(self, timeout: int = 120) -> None:
45
78
  """Start container and wait for health check.
46
79
 
@@ -51,6 +84,8 @@ class DoclingContainer:
51
84
  subprocess.CalledProcessError: If container fails to start
52
85
  TimeoutError: If health check doesn't pass within timeout
53
86
  """
87
+ self._cleanup_stale_containers()
88
+
54
89
  # Start container in detached mode
55
90
  cmd = [
56
91
  self.runtime,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.3.0
3
+ Version: 2.6.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -0,0 +1,12 @@
1
+ assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
+ mdify/__init__.py,sha256=4mWutp3KF_BH9sz_oEPFBoN7Ee6vamK3cHDBpUtRQVY,90
3
+ mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
+ mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
5
+ mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
6
+ mdify/docling_client.py,sha256=_9qjL5yOOeJahOg6an2P6Iii1xkeR6wmNJZG4Q6NRkk,6553
7
+ mdify_cli-2.6.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
+ mdify_cli-2.6.0.dist-info/METADATA,sha256=NcyfsGSLiSkz0NkRdc6g5pOervCpXJbWEIDSPnYSvFk,7923
9
+ mdify_cli-2.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
+ mdify_cli-2.6.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
+ mdify_cli-2.6.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
+ mdify_cli-2.6.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
- mdify/__init__.py,sha256=FzoeSkFpZDcp937vaz9TNjscUbhK5z4MH3d6EMV_9fA,90
3
- mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
- mdify/cli.py,sha256=5cplmrH8pk8VvW-bA6deAvGpkhH57KPLoajp4C0U7q8,22080
5
- mdify/container.py,sha256=2oh9NyvFr9lCRb2YYpM_qKP3PPmAin0DbxvNP3m69jw,4158
6
- mdify/docling_client.py,sha256=_9qjL5yOOeJahOg6an2P6Iii1xkeR6wmNJZG4Q6NRkk,6553
7
- mdify_cli-2.3.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
- mdify_cli-2.3.0.dist-info/METADATA,sha256=gnebNztlcGOaJGG7zN9GE-4FUZvtfij2BUJG0VId3f0,7923
9
- mdify_cli-2.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
- mdify_cli-2.3.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
- mdify_cli-2.3.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
- mdify_cli-2.3.0.dist-info/RECORD,,