PyPI - mdify-cli - Versions diffs - 1.4.1__py3-none-any.whl → 2.9.1__py3-none-any.whl - Mend

mdify-cli 1.4.1py3-none-any.whl → 2.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

mdify/__init__.py +1 -1
mdify/cli.py +587 -219
mdify/container.py +167 -0
mdify/docling_client.py +263 -0
{mdify_cli-1.4.1.dist-info → mdify_cli-2.9.1.dist-info}/METADATA +92 -20
mdify_cli-2.9.1.dist-info/RECORD +12 -0
{mdify_cli-1.4.1.dist-info → mdify_cli-2.9.1.dist-info}/WHEEL +1 -1
mdify_cli-1.4.1.dist-info/RECORD +0 -10
{mdify_cli-1.4.1.dist-info → mdify_cli-2.9.1.dist-info}/entry_points.txt +0 -0
{mdify_cli-1.4.1.dist-info → mdify_cli-2.9.1.dist-info}/licenses/LICENSE +0 -0
{mdify_cli-1.4.1.dist-info → mdify_cli-2.9.1.dist-info}/top_level.txt +0 -0

mdify/cli.py CHANGED Viewed

@@ -10,6 +10,7 @@ is lightweight and has no ML dependencies.
 import argparse
 import json
 import os
+import platform
 import shutil
 import subprocess
 import sys
@@ -21,6 +22,8 @@ from urllib.error import URLError
 from urllib.request import urlopen
 from . import __version__
+from mdify.container import DoclingContainer
+from mdify.docling_client import convert_file
 # Configuration
 MDIFY_HOME = Path.home() / ".mdify"
@@ -29,18 +32,22 @@ PYPI_API_URL = "https://pypi.org/pypi/mdify-cli/json"
 CHECK_INTERVAL_SECONDS = 86400  # 24 hours
 # Container configuration
-DEFAULT_IMAGE = "ghcr.io/tiroq/mdify-runtime:latest"
-SUPPORTED_RUNTIMES = ("docker", "podman")
+DEFAULT_IMAGE = "ghcr.io/docling-project/docling-serve-cpu:main"
+GPU_IMAGE = "ghcr.io/docling-project/docling-serve-cu126:main"
+SUPPORTED_RUNTIMES = ("docker", "podman", "orbstack", "colima", "container")
+MACOS_RUNTIMES_PRIORITY = ("container", "orbstack", "colima", "podman", "docker")
+OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
 # =============================================================================
 # Update checking functions
 # =============================================================================
 def _get_remote_version(timeout: int = 5) -> Optional[str]:
     """
     Fetch the latest version from PyPI.
     Returns:
         Version string (e.g., "1.1.0") or None if fetch failed.
     """
@@ -56,16 +63,16 @@ def _get_remote_version(timeout: int = 5) -> Optional[str]:
 def _should_check_for_update() -> bool:
     """
     Determine if we should check for updates based on last check time.
     Returns:
         True if check should be performed, False otherwise.
     """
     if os.environ.get("MDIFY_NO_UPDATE_CHECK", "").lower() in ("1", "true", "yes"):
         return False
     if not LAST_CHECK_FILE.exists():
         return True
     try:
         last_check = float(LAST_CHECK_FILE.read_text().strip())
         elapsed = time.time() - last_check
@@ -86,18 +93,18 @@ def _update_last_check_time() -> None:
 def _compare_versions(current: str, remote: str) -> bool:
     """
     Compare version strings.
     Returns:
         True if remote version is newer than current.
     """
     try:
         current_parts = [int(x) for x in current.split(".")]
         remote_parts = [int(x) for x in remote.split(".")]
         max_len = max(len(current_parts), len(remote_parts))
         current_parts.extend([0] * (max_len - len(current_parts)))
         remote_parts.extend([0] * (max_len - len(remote_parts)))
         return remote_parts > current_parts
     except (ValueError, AttributeError):
         return False
@@ -106,15 +113,15 @@ def _compare_versions(current: str, remote: str) -> bool:
 def check_for_update(force: bool = False) -> None:
     """
     Check for updates and prompt user to upgrade if available.
     Args:
         force: If True, check regardless of last check time and show errors.
     """
     if not force and not _should_check_for_update():
         return
     remote_version = _get_remote_version()
     if remote_version is None:
         if force:
             print(
@@ -124,19 +131,19 @@ def check_for_update(force: bool = False) -> None:
             )
             sys.exit(1)
         return
     _update_last_check_time()
     if not _compare_versions(__version__, remote_version):
         if force:
             print(f"mdify is up to date (version {__version__})")
         return
-    print(f"\n{'='*50}")
+    print(f"\n{'=' * 50}")
     print(f"A new version of mdify-cli is available!")
     print(f"  Current version: {__version__}")
     print(f"  Latest version:  {remote_version}")
-    print(f"{'='*50}")
+    print(f"{'=' * 50}")
     print(f"\nTo upgrade, run:")
     print(f"  pipx upgrade mdify-cli")
     print(f"  # or: pip install --upgrade mdify-cli\n")
@@ -146,43 +153,155 @@ def check_for_update(force: bool = False) -> None:
 # Container runtime functions
 # =============================================================================
-def detect_runtime(preferred: str) -> Optional[str]:
+def is_daemon_running(runtime: str) -> bool:
     """
-    Detect available container runtime.
+    Check if a container runtime daemon is running.
     Args:
-        preferred: Preferred runtime ('docker' or 'podman')
+        runtime: Path to container runtime executable
+    Returns:
+        True if daemon is running and responsive, False otherwise.
+    """
+    try:
+        runtime_name = os.path.basename(runtime)
+        # Apple Container uses 'container system status' to check daemon
+        if runtime_name == "container":
+            result = subprocess.run(
+                [runtime, "system", "status"],
+                capture_output=True,
+                timeout=5,
+                check=False,
+            )
+            return result.returncode == 0
+        # Other runtimes use --version check
+        result = subprocess.run(
+            [runtime, "--version"],
+            capture_output=True,
+            timeout=5,
+            check=False,
+        )
+        return result.returncode == 0
+    except (OSError, subprocess.TimeoutExpired):
+        return False
+def detect_runtime(preferred: Optional[str] = None, explicit: bool = True) -> Optional[str]:
+    """
+    Detect available container runtime.
+    First checks MDIFY_CONTAINER_RUNTIME environment variable for explicit override.
+    On macOS, tries native tools first (OrbStack → Colima → Podman → Docker).
+    On other platforms, tries Docker → Podman.
+    Args:
+        preferred: Preferred runtime name override (deprecated, use MDIFY_CONTAINER_RUNTIME)
+        explicit: If True, print info about detection/fallback choices.
     Returns:
         Path to runtime executable, or None if not found.
     """
-    # Try preferred runtime first
-    runtime_path = shutil.which(preferred)
-    if runtime_path:
-        return runtime_path
-    # Try alternative
-    alternative = "podman" if preferred == "docker" else "docker"
-    runtime_path = shutil.which(alternative)
-    if runtime_path:
-        print(f"Warning: {preferred} not found, using {alternative}", file=sys.stderr)
-        return runtime_path
+    # Check for explicit environment variable override
+    env_runtime = os.environ.get("MDIFY_CONTAINER_RUNTIME", "").strip().lower()
+    if env_runtime:
+        if env_runtime not in SUPPORTED_RUNTIMES:
+            print(
+                f"Warning: MDIFY_CONTAINER_RUNTIME='{env_runtime}' is not supported. "
+                f"Supported: {', '.join(SUPPORTED_RUNTIMES)}",
+                file=sys.stderr,
+            )
+        else:
+            runtime_path = shutil.which(env_runtime)
+            if runtime_path:
+                if explicit:
+                    print(f"Using runtime from MDIFY_CONTAINER_RUNTIME: {env_runtime}")
+                return runtime_path
+            else:
+                print(
+                    f"Warning: MDIFY_CONTAINER_RUNTIME='{env_runtime}' specified but not found in PATH",
+                    file=sys.stderr,
+                )
+    # Determine runtime priority based on OS
+    is_macos = platform.system() == "Darwin"
+    if is_macos:
+        runtime_priority = MACOS_RUNTIMES_PRIORITY
+        if explicit:
+            print(f"Detected macOS: checking for native container tools...")
+    else:
+        runtime_priority = OTHER_RUNTIMES_PRIORITY
+    # Try each runtime in priority order
+    found_but_not_running = []
+    for runtime_name in runtime_priority:
+        runtime_path = shutil.which(runtime_name)
+        if runtime_path:
+            # Check if daemon is running
+            if is_daemon_running(runtime_path):
+                if explicit:
+                    print(f"Using container runtime: {runtime_name}")
+                return runtime_path
+            else:
+                found_but_not_running.append((runtime_name, runtime_path))
+    # If we found tools but none are running, warn and ask user to start one
+    if found_but_not_running:
+        print(
+            f"\nWarning: Found container runtime(s) but daemon is not running:",
+            file=sys.stderr,
+        )
+        for runtime_name, runtime_path in found_but_not_running:
+            print(f"  - {runtime_name} ({runtime_path})", file=sys.stderr)
+        print(
+            "\nPlease start one of these tools before running mdify.",
+            file=sys.stderr,
+        )
+        if is_macos:
+            print(
+                "  macOS tip: Start OrbStack, Colima, or Podman Desktop application",
+                file=sys.stderr,
+            )
+        return None
     return None
 def check_image_exists(runtime: str, image: str) -> bool:
     """
     Check if container image exists locally.
     Args:
         runtime: Path to container runtime
         image: Image name/tag
     Returns:
         True if image exists locally.
     """
     try:
+        runtime_name = os.path.basename(runtime)
+        # Apple Container uses 'image-list' command
+        if runtime_name == "container":
+            result = subprocess.run(
+                [runtime, "image-list", "--format", "json"],
+                capture_output=True,
+                check=False,
+            )
+            if result.returncode == 0 and result.stdout:
+                try:
+                    images = json.loads(result.stdout.decode())
+                    # Check if image exists in the list
+                    for img in images:
+                        if img.get("name") == image or image in img.get("repoTags", []):
+                            return True
+                except json.JSONDecodeError:
+                    pass
+            return False
+        # Docker/Podman/OrbStack/Colima use standard 'image inspect'
         result = subprocess.run(
             [runtime, "image", "inspect", image],
             capture_output=True,
@@ -196,19 +315,31 @@ def check_image_exists(runtime: str, image: str) -> bool:
 def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
     """
     Pull container image.
     Args:
         runtime: Path to container runtime
         image: Image name/tag
         quiet: Suppress progress output
     Returns:
         True if pull succeeded.
     """
     if not quiet:
         print(f"Pulling image: {image}")
     try:
+        runtime_name = os.path.basename(runtime)
+        # Apple Container uses 'image-pull' command
+        if runtime_name == "container":
+            result = subprocess.run(
+                [runtime, "image-pull", image],
+                capture_output=quiet,
+                check=False,
+            )
+            return result.returncode == 0
+        # Docker/Podman/OrbStack/Colima use standard 'pull'
         result = subprocess.run(
             [runtime, "pull", image],
             capture_output=quiet,
@@ -220,11 +351,49 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
         return False
+def get_image_size_estimate(runtime: str, image: str) -> Optional[int]:
+    """
+    Estimate image size by querying registry manifest.
+    Runs `<runtime> manifest inspect --verbose <image>` and sums all layer sizes
+    across all architectures, then applies 50% buffer for decompression.
+    Args:
+        runtime: Path to container runtime
+        image: Image name/tag
+    Returns:
+        Estimated size in bytes with 50% buffer, or None if command fails.
+    """
+    try:
+        result = subprocess.run(
+            [runtime, "manifest", "inspect", "--verbose", image],
+            capture_output=True,
+            check=False,
+        )
+        if result.returncode != 0:
+            return None
+        manifest_data = json.loads(result.stdout.decode())
+        # Sum all layer sizes across all architectures
+        total_size = 0
+        for manifest in manifest_data.get("Manifests", []):
+            oci_manifest = manifest.get("OCIManifest", {})
+            for layer in oci_manifest.get("layers", []):
+                total_size += layer.get("size", 0)
+        # Apply 50% buffer for decompression (compressed -> uncompressed)
+        return int(total_size * 1.5)
+    except (json.JSONDecodeError, KeyError, ValueError, OSError):
+        return None
 def format_size(size_bytes: int) -> str:
     """Format file size in human-readable format."""
-    for unit in ['B', 'KB', 'MB', 'GB']:
+    for unit in ["B", "KB", "MB", "GB"]:
         if size_bytes < 1024:
-            return f"{size_bytes:.1f} {unit}" if unit != 'B' else f"{size_bytes} {unit}"
+            return f"{size_bytes:.1f} {unit}" if unit != "B" else f"{size_bytes} {unit}"
         size_bytes /= 1024
     return f"{size_bytes:.1f} TB"
@@ -242,31 +411,112 @@ def format_duration(seconds: float) -> str:
     return f"{hours}h {mins}m {secs:.0f}s"
+def get_free_space(path: str) -> int:
+    """Get free disk space for the given path in bytes."""
+    try:
+        return shutil.disk_usage(path).free
+    except (FileNotFoundError, OSError):
+        return 0
+def get_storage_root(runtime: str) -> Optional[str]:
+    """
+    Get the storage root directory for Docker, Podman, OrbStack, or Colima.
+    Args:
+        runtime: Path to container runtime executable
+    Returns:
+        Storage root path as string, or None if command fails.
+    """
+    try:
+        # Extract runtime name from path (e.g., /usr/bin/docker -> docker)
+        runtime_name = os.path.basename(runtime)
+        if runtime_name == "docker":
+            result = subprocess.run(
+                [runtime, "system", "info", "--format", "{{.DockerRootDir}}"],
+                capture_output=True,
+                check=False,
+            )
+            if result.stdout:
+                return result.stdout.decode().strip()
+        elif runtime_name == "podman":
+            result = subprocess.run(
+                [runtime, "info", "--format", "json"],
+                capture_output=True,
+                check=False,
+            )
+            if result.stdout:
+                info = json.loads(result.stdout.decode())
+                return info.get("store", {}).get("graphRoot")
+        elif runtime_name == "orbstack":
+            # OrbStack stores containers in ~/.orbstack
+            home = os.path.expanduser("~")
+            return os.path.join(home, ".orbstack")
+        elif runtime_name == "colima":
+            # Colima stores containers in ~/.colima
+            home = os.path.expanduser("~")
+            return os.path.join(home, ".colima")
+        elif runtime_name == "container":
+            # Apple Container stores data in Application Support
+            home = os.path.expanduser("~")
+            return os.path.join(home, "Library", "Application Support", "com.apple.container")
+        return None
+    except (OSError, json.JSONDecodeError):
+        return None
+def confirm_proceed(message: str, default_no: bool = True) -> bool:
+    """
+    Prompt user for confirmation with a y/N prompt.
+    Args:
+        message: The confirmation message to display
+        default_no: If True, shows [y/N] (default no). If False, shows [Y/n] (default yes)
+    Returns:
+        True if user entered 'y' or 'Y', False otherwise.
+        Returns False immediately if stdin is not a TTY (non-interactive).
+    """
+    if not sys.stdin.isatty():
+        return False
+    prompt = "[y/N]" if default_no else "[Y/n]"
+    print(f"{message} {prompt}", file=sys.stderr)
+    response = input()
+    return response.lower() == "y"
 class Spinner:
     """A simple spinner to show progress during long operations."""
     def __init__(self):
-        self.frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
+        self.frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
         self.running = False
         self.thread = None
         self.start_time = None
     def _spin(self):
         idx = 0
         while self.running:
             elapsed = time.time() - self.start_time
             frame = self.frames[idx % len(self.frames)]
-            print(f"\r{self.prefix} {frame} ({format_duration(elapsed)})", end="", flush=True)
+            print(
+                f"\r{self.prefix} {frame} ({format_duration(elapsed)})",
+                end="",
+                flush=True,
+            )
             idx += 1
             time.sleep(0.1)
     def start(self, prefix: str = ""):
         self.prefix = prefix
         self.running = True
         self.start_time = time.time()
         self.thread = threading.Thread(target=self._spin, daemon=True)
         self.thread.start()
     def stop(self):
         self.running = False
         if self.thread:
@@ -275,93 +525,45 @@ class Spinner:
         print(f"\r{' ' * 80}\r", end="", flush=True)
-def run_container(
-    runtime: str,
-    image: str,
-    input_file: Path,
-    output_file: Path,
-    mask_pii: bool = False,
-) -> Tuple[bool, str, float]:
-    """
-    Run container to convert a single file.
-    Args:
-        runtime: Path to container runtime
-        image: Image name/tag
-        input_file: Absolute path to input file
-        output_file: Absolute path to output file
-        mask_pii: Whether to mask PII in images
-    Returns:
-        Tuple of (success: bool, message: str, elapsed_seconds: float)
-    """
-    start_time = time.time()
-    # Ensure output directory exists
-    output_file.parent.mkdir(parents=True, exist_ok=True)
-    # Mount directories
-    input_dir = input_file.parent
-    output_dir = output_file.parent
-    # Container paths
-    container_in = f"/work/in/{input_file.name}"
-    container_out = f"/work/out/{output_file.name}"
-    cmd = [
-        runtime, "run", "--rm",
-        "-v", f"{input_dir}:/work/in:ro",
-        "-v", f"{output_dir}:/work/out",
-        image,
-        "--in", container_in,
-        "--out", container_out,
-    ]
-    if mask_pii:
-        cmd.append("--mask")
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        elapsed = time.time() - start_time
-        if result.returncode == 0:
-            return True, "success", elapsed
-        else:
-            error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
-            return False, error_msg, elapsed
-    except OSError as e:
-        elapsed = time.time() - start_time
-        return False, str(e), elapsed
 # =============================================================================
 # File handling functions
 # =============================================================================
 # Supported file extensions (based on Docling InputFormat)
 SUPPORTED_EXTENSIONS = {
-    '.pdf', '.docx', '.pptx', '.html', '.htm',
-    '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.tif',  # images
-    '.asciidoc', '.adoc', '.asc',  # asciidoc
-    '.md', '.markdown',  # markdown
-    '.csv', '.xlsx',  # spreadsheets
-    '.xml',  # XML formats
-    '.json',  # JSON docling
-    '.mp3', '.wav', '.m4a', '.flac',  # audio
-    '.vtt',  # subtitles
+    ".pdf",
+    ".docx",
+    ".pptx",
+    ".html",
+    ".htm",
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".bmp",
+    ".tiff",
+    ".tif",  # images
+    ".asciidoc",
+    ".adoc",
+    ".asc",  # asciidoc
+    ".md",
+    ".markdown",  # markdown
+    ".csv",
+    ".xlsx",  # spreadsheets
+    ".xml",  # XML formats
+    ".json",  # JSON docling
+    ".mp3",
+    ".wav",
+    ".m4a",
+    ".flac",  # audio
+    ".vtt",  # subtitles
 }
 def get_files_to_convert(input_path: Path, mask: str, recursive: bool) -> List[Path]:
     """Get list of files to convert based on input path and options."""
     files = []
     if input_path.is_file():
         files.append(input_path)
     elif input_path.is_dir():
@@ -369,19 +571,19 @@ def get_files_to_convert(input_path: Path, mask: str, recursive: bool) -> List[P
             files = list(input_path.rglob(mask))
         else:
             files = list(input_path.glob(mask))
         # Filter to only files
         files = [f for f in files if f.is_file()]
     else:
         raise FileNotFoundError(f"Input path does not exist: {input_path}")
     # Filter out hidden files and unsupported formats
     files = [
-        f for f in files
-        if not f.name.startswith('.')
-        and f.suffix.lower() in SUPPORTED_EXTENSIONS
+        f
+        for f in files
+        if not f.name.startswith(".") and f.suffix.lower() in SUPPORTED_EXTENSIONS
     ]
     return files
@@ -414,7 +616,7 @@ def get_output_path(
             output_path = output_dir / relative_path.parent / output_name
         except ValueError:
             output_path = output_dir / output_name
         return output_path
@@ -422,6 +624,7 @@ def get_output_path(
 # CLI argument parsing
 # =============================================================================
 def parse_args() -> argparse.Namespace:
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(
@@ -436,74 +639,99 @@ Examples:
   mdify ./docs --runtime podman          Use Podman instead of Docker
 """,
     )
     parser.add_argument(
         "input",
         type=str,
         nargs="?",
         help="Input file or directory to convert",
     )
     parser.add_argument(
-        "-o", "--out-dir",
+        "-o",
+        "--out-dir",
         type=str,
         default="output",
         help="Output directory for converted files (default: output)",
     )
     parser.add_argument(
-        "-g", "--glob",
+        "-g",
+        "--glob",
         type=str,
         default="*",
         help="Glob pattern for filtering files in directory (default: *)",
     )
     parser.add_argument(
-        "-r", "--recursive",
+        "-r",
+        "--recursive",
         action="store_true",
         help="Recursively scan directories",
     )
     parser.add_argument(
         "--flat",
         action="store_true",
         help="Disable directory structure preservation in output",
     )
     parser.add_argument(
         "--overwrite",
         action="store_true",
         help="Overwrite existing output files",
     )
     parser.add_argument(
-        "-q", "--quiet",
+        "-q",
+        "--quiet",
         action="store_true",
         help="Suppress progress messages",
     )
     parser.add_argument(
-        "-m", "--mask",
+        "-y",
+        "--yes",
+        action="store_true",
+        help="Skip confirmation prompts (for scripts/CI)",
+    )
+    parser.add_argument(
+        "-m",
+        "--mask",
         action="store_true",
         help="Mask PII and sensitive content in document images",
     )
+    parser.add_argument(
+        "--gpu",
+        action="store_true",
+        help="Use GPU-accelerated container image (docling-serve-cu126)",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=5001,
+        help="Port for docling-serve container (default: 5001)",
+    )
     # Container options
     parser.add_argument(
         "--runtime",
         type=str,
         choices=SUPPORTED_RUNTIMES,
-        default="docker",
-        help="Container runtime to use (default: docker)",
+        default=None,
+        help="Container runtime to use (auto-detects docker or podman if not specified)",
     )
     parser.add_argument(
         "--image",
         type=str,
         default=DEFAULT_IMAGE,
         help=f"Container image to use (default: {DEFAULT_IMAGE})",
     )
     parser.add_argument(
         "--pull",
         type=str,
@@ -511,20 +739,27 @@ Examples:
         default="missing",
         help="Image pull policy: always, missing, never (default: missing)",
     )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=None,
+        help="Conversion timeout in seconds (default: 1200, can be set via MDIFY_TIMEOUT env var)",
+    )
     # Utility options
     parser.add_argument(
         "--check-update",
         action="store_true",
         help="Check for available updates and exit",
     )
     parser.add_argument(
         "--version",
         action="version",
         version=f"mdify {__version__}",
     )
     return parser.parse_args()
@@ -532,137 +767,270 @@ Examples:
 # Main entry point
 # =============================================================================
 def main() -> int:
     """Main entry point for the CLI."""
     args = parse_args()
     # Handle --check-update flag
     if args.check_update:
         check_for_update(force=True)
         return 0
     # Check for updates (daily, silent on errors)
     check_for_update(force=False)
+    # Resolve timeout value: CLI > env > default 1200
+    timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
     # Validate input is provided
     if args.input is None:
         print("Error: Input file or directory is required", file=sys.stderr)
         print("Usage: mdify <input> [options]", file=sys.stderr)
         print("       mdify --help for more information", file=sys.stderr)
         return 1
     # Detect container runtime
-    runtime = detect_runtime(args.runtime)
+    # If --runtime is specified, treat as explicit user choice
+    explicit = args.runtime is not None
+    runtime = detect_runtime(preferred=args.runtime, explicit=explicit)
     if runtime is None:
         print(
             f"Error: Container runtime not found ({', '.join(SUPPORTED_RUNTIMES)})",
             file=sys.stderr,
         )
-        print("Please install Docker or Podman to use mdify.", file=sys.stderr)
         return 2
     # Handle image pull policy
-    image = args.image
+    # Determine image based on --gpu flag
+    if args.gpu:
+        image = GPU_IMAGE
+    elif args.image:
+        image = args.image
+    else:
+        image = DEFAULT_IMAGE
     image_exists = check_image_exists(runtime, image)
+    # NOTE: Docker Desktop on macOS/Windows uses a VM, so disk space checks may not
+    # accurately reflect available space in the container's filesystem. Remote Docker
+    # daemons (DOCKER_HOST) are also not supported. In these cases, the check will
+    # gracefully degrade (warn and proceed).
+    # Check disk space before pulling image (skip if pull=never or image exists with pull=missing)
+    will_pull = args.pull == "always" or (args.pull == "missing" and not image_exists)
+    if will_pull:
+        storage_root = get_storage_root(runtime)
+        if storage_root:
+            image_size = get_image_size_estimate(runtime, image)
+            if image_size:
+                free_space = get_free_space(storage_root)
+                if free_space < image_size:
+                    print(
+                        f"Warning: Not enough free disk space on {storage_root}",
+                        file=sys.stderr,
+                    )
+                    print(
+                        f"  Available: {format_size(free_space)}",
+                        file=sys.stderr,
+                    )
+                    print(
+                        f"  Required:  {format_size(image_size)} (estimated)",
+                        file=sys.stderr,
+                    )
+                    if args.yes:
+                        print("  Proceeding anyway (--yes flag set)", file=sys.stderr)
+                    elif not sys.stdin.isatty():
+                        print(
+                            "  Run with --yes to proceed anyway, or free up disk space",
+                            file=sys.stderr,
+                        )
+                        return 1
+                    elif not confirm_proceed("Continue anyway?"):
+                        return 130
+                elif free_space - image_size < 1024 * 1024 * 1024:
+                    print(
+                        f"Warning: Less than 1 GB would remain after pulling image on {storage_root}",
+                        file=sys.stderr,
+                    )
+                    print(
+                        f"  Available: {format_size(free_space)}",
+                        file=sys.stderr,
+                    )
+                    print(
+                        f"  Required:  {format_size(image_size)} (estimated)",
+                        file=sys.stderr,
+                    )
+                    print(
+                        f"  Remaining: {format_size(free_space - image_size)}",
+                        file=sys.stderr,
+                    )
+                    if args.yes:
+                        print("  Proceeding anyway (--yes flag set)", file=sys.stderr)
+                    elif not sys.stdin.isatty():
+                        print(
+                            "  Run with --yes to proceed anyway, or free up disk space",
+                            file=sys.stderr,
+                        )
+                        return 1
+                    elif not confirm_proceed("Continue anyway?"):
+                        return 130
     if args.pull == "always" or (args.pull == "missing" and not image_exists):
         if not pull_image(runtime, image, args.quiet):
             print(f"Error: Failed to pull image: {image}", file=sys.stderr)
             return 1
     elif args.pull == "never" and not image_exists:
         print(f"Error: Image not found locally: {image}", file=sys.stderr)
-        print(f"Run with --pull=missing or pull manually: {args.runtime} pull {image}")
+        runtime_name = os.path.basename(runtime)
+        print(f"Run with --pull=missing or pull manually: {runtime_name} pull {image}")
         return 1
-    # Resolve paths
-    input_path = Path(args.input).resolve()
-    output_dir = Path(args.out_dir).resolve()
+    # Resolve paths (use absolute() as fallback if resolve() fails due to permissions)
+    try:
+        input_path = Path(args.input).resolve()
+    except PermissionError:
+        input_path = Path(args.input).absolute()
+    try:
+        output_dir = Path(args.out_dir).resolve()
+    except PermissionError:
+        output_dir = Path(args.out_dir).absolute()
     # Validate input
     if not input_path.exists():
         print(f"Error: Input path does not exist: {input_path}", file=sys.stderr)
         return 1
     # Get files to convert
     try:
         files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
     except Exception as e:
         print(f"Error: {e}", file=sys.stderr)
         return 1
     if not files_to_convert:
         print(f"No files found to convert in: {input_path}", file=sys.stderr)
         return 1
     total_files = len(files_to_convert)
     total_size = sum(f.stat().st_size for f in files_to_convert)
     if not args.quiet:
         print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
         print(f"Using runtime: {runtime}")
         print(f"Using image: {image}")
         print()
+    if args.mask:
+        print(
+            "Warning: --mask is not supported with docling-serve and will be ignored",
+            file=sys.stderr,
+        )
     # Determine input base for directory structure preservation
     if input_path.is_file():
         input_base = input_path.parent
     else:
         input_base = input_path
-    # Convert files
     success_count = 0
     skipped_count = 0
     failed_count = 0
-    conversion_start = time.time()
-    spinner = Spinner()
-    for idx, input_file in enumerate(files_to_convert, 1):
-        output_file = get_output_path(input_file, input_base, output_dir, args.flat)
-        file_size = input_file.stat().st_size
-        progress = f"[{idx}/{total_files}]"
-        # Check if output exists and skip if not overwriting
-        if output_file.exists() and not args.overwrite:
-            if not args.quiet:
-                print(f"{progress} Skipped (exists): {input_file.name}")
-            skipped_count += 1
-            continue
-        # Show spinner while processing
+    total_elapsed = 0.0
+    try:
         if not args.quiet:
-            spinner.start(f"{progress} Processing: {input_file.name} ({format_size(file_size)})")
-        success, result, elapsed = run_container(
-            runtime, image, input_file, output_file, args.mask
-        )
+            print(f"Starting docling-serve container...")
+            print()
+        with DoclingContainer(runtime, image, args.port, timeout=timeout) as container:
+            # Convert files
+            conversion_start = time.time()
+            spinner = Spinner()
+            for idx, input_file in enumerate(files_to_convert, 1):
+                output_file = get_output_path(
+                    input_file, input_base, output_dir, args.flat
+                )
+                file_size = input_file.stat().st_size
+                progress = f"[{idx}/{total_files}]"
+                # Check if output exists and skip if not overwriting
+                if output_file.exists() and not args.overwrite:
+                    if not args.quiet:
+                        print(f"{progress} Skipped (exists): {input_file.name}")
+                    skipped_count += 1
+                    continue
+                # Ensure output directory exists
+                output_file.parent.mkdir(parents=True, exist_ok=True)
+                # Show spinner while processing
+                if not args.quiet:
+                    spinner.start(
+                        f"{progress} Processing: {input_file.name} ({format_size(file_size)})"
+                    )
+                start_time = time.time()
+                try:
+                    # Convert via HTTP API
+                    result = convert_file(
+                        container.base_url, input_file, to_format="md"
+                    )
+                    elapsed = time.time() - start_time
+                    if not args.quiet:
+                        spinner.stop()
+                    if result.success:
+                        # Write result to output file
+                        output_file.write_text(result.content)
+                        success_count += 1
+                        if not args.quiet:
+                            print(
+                                f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
+                            )
+                    else:
+                        failed_count += 1
+                        error_msg = result.error or "Unknown error"
+                        if not args.quiet:
+                            print(
+                                f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
+                            )
+                            print(f"    Error: {error_msg}", file=sys.stderr)
+                except Exception as e:
+                    elapsed = time.time() - start_time
+                    failed_count += 1
+                    if not args.quiet:
+                        spinner.stop()
+                        print(
+                            f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
+                        )
+                        print(f"    Error: {str(e)}", file=sys.stderr)
+            total_elapsed = time.time() - conversion_start
+        # Print summary
         if not args.quiet:
-            spinner.stop()
-        if success:
-            success_count += 1
-            if not args.quiet:
-                print(f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})")
-        else:
-            failed_count += 1
-            if not args.quiet:
-                print(f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})")
-                print(f"    Error: {result}", file=sys.stderr)
-    total_elapsed = time.time() - conversion_start
-    # Print summary
-    if not args.quiet:
-        print()
-        print("=" * 50)
-        print("Conversion Summary:")
-        print(f"  Total files:     {total_files}")
-        print(f"  Successful:      {success_count}")
-        print(f"  Skipped:         {skipped_count}")
-        print(f"  Failed:          {failed_count}")
-        print(f"  Total time:      {format_duration(total_elapsed)}")
-        print("=" * 50)
+            print()
+            print("=" * 50)
+            print("Conversion Summary:")
+            print(f"  Total files:     {total_files}")
+            print(f"  Successful:      {success_count}")
+            print(f"  Skipped:         {skipped_count}")
+            print(f"  Failed:          {failed_count}")
+            print(f"  Total time:      {format_duration(total_elapsed)}")
+            print("=" * 50)
+    except KeyboardInterrupt:
+        if not args.quiet:
+            print("\n\nInterrupted by user. Container stopped.")
+            if success_count > 0 or skipped_count > 0 or failed_count > 0:
+                print(
+                    f"Partial progress: {success_count} successful, {failed_count} failed, {skipped_count} skipped"
+                )
+        return 130
     # Return appropriate exit code
     if failed_count > 0:
         return 1

mdify-cli 1.4.1__py3-none-any.whl → 2.9.1__py3-none-any.whl

mdify-cli 1.4.1py3-none-any.whl → 2.9.1py3-none-any.whl