PyPI - modelinfo-cli - Versions diffs - 1.4.2__tar.gz → 1.4.4__tar.gz - Mend

modelinfo-cli 1.4.2tar.gz → 1.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: modelinfo-cli
-Version: 1.4.2
+Version: 1.4.4
 Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
 Author: ModelInfo Contributors
 License: MIT
@@ -164,12 +164,15 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
 | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
 | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
+| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
 | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
 | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
 | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
+| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 ## Architecture

{modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/README.md RENAMED Viewed

@@ -146,12 +146,15 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
 | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
 | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
+| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
 | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
 | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
 | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
+| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 ## Architecture

{modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "modelinfo-cli"
-version = "1.4.2"
+version = "1.4.4"
 description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
 readme = "README.md"
 requires-python = ">=3.10"

{modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/__init__.py RENAMED Viewed

@@ -2,4 +2,4 @@
 modelinfo - A high-performance CLI utility for inspecting ML model checkpoints.
 """
-__version__ = "1.4.2"
+__version__ = "1.4.4"

{modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/cli.py RENAMED Viewed

@@ -1,9 +1,9 @@
 import argparse
 import json
+import math
 import os
 import sys
 from typing import Sequence
 from modelinfo.architecture import identify_architecture_name
 from modelinfo.calculator import calculate_footprint
 from modelinfo.parsers.gguf import parse_gguf_header
@@ -12,6 +12,43 @@ from modelinfo.parsers.safetensors import parse_safetensors_header
 from modelinfo.ui import console, print_model_info, print_compare_info
+class VersionAction(argparse.Action):
+    def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"):
+        super().__init__(
+            option_strings=option_strings,
+            dest=dest,
+            default=default,
+            nargs=0,
+            help=help,
+        )
+    def __call__(self, parser, namespace, values, option_string=None):
+        from importlib.metadata import PackageNotFoundError, version
+        from modelinfo import __version__
+        try:
+            ver = version("modelinfo-cli")
+        except PackageNotFoundError:
+            ver = __version__
+        print(f"{parser.prog} {ver}")
+        parser.exit()
+def _positive_int(value: str) -> int:
+    ivalue = int(value)
+    if ivalue < 1:
+        raise argparse.ArgumentTypeError("batch size must be at least 1")
+    return ivalue
+def _positive_float(value: str) -> float:
+    fvalue = float(value)
+    if not math.isfinite(fvalue) or fvalue <= 0:
+        raise argparse.ArgumentTypeError("timeout must be a finite number greater than 0")
+    return fvalue
 def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         prog="modelinfo",
@@ -30,6 +67,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         default=None,
         help="Context length for dynamic KV cache footprint calculation.",
     )
+    parser.add_argument(
+        "--batch-size",
+        type=_positive_int,
+        default=1,
+        help="Batch size for dynamic KV cache footprint calculation.",
+    )
     parser.add_argument(
         "--max-vram",
         type=float,
@@ -47,6 +90,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         action="store_true",
         help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
     )
+    parser.add_argument(
+        "--timeout",
+        type=_positive_float,
+        default=10.0,
+        help="Network timeout in seconds for remote Hugging Face fetches.",
+    )
     parser.add_argument(
         "--topology",
         type=str,
@@ -72,6 +121,11 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         default=0.9,
         help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.",
     )
+    parser.add_argument(
+        "-v",
+        "--version",
+        action=VersionAction,
+    )
     return parser.parse_args(argv)
@@ -79,8 +133,10 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
 def analyze_model(
     file_path: str,
     context_override: int | None,
-    gpu_count: int = 1,
+    gpu_count: int = 1,
+    batch_size: int = 1,
     fetch_tensors: bool = False,
+    timeout: float = 10.0,
     topology: str = "pcie4",
     strategy: str = "tp",
     is_vllm: bool = False,
@@ -95,7 +151,9 @@ def analyze_model(
     if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
-        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
+        tensors, config, format_name, disk_size = fetch_huggingface_repo(
+            file_path, fetch_tensors=fetch_tensors, timeout=timeout
+        )
     elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
         tensors = parse_safetensors_header(file_path)
         format_name = "SafeTensors"
@@ -114,6 +172,8 @@ def analyze_model(
     elif file_path_lower.endswith(".pt") or file_path_lower.endswith(".bin"):
         tensors = parse_pytorch_header(file_path)
         format_name = "PyTorch"
+    elif os.path.isdir(file_path):
+        raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.")
     else:
         raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.")
@@ -135,6 +195,7 @@ def analyze_model(
     footprint = calculate_footprint(
         tensors,
         context_length=context_length,
+        batch_size=batch_size,
         config=config,
         gpu_count=gpu_count,
         topology=topology,
@@ -190,43 +251,39 @@ def main(argv: Sequence[str] | None = None) -> int:
         models = []
         for model_path in args.file:
-            try:
-                info = analyze_model(
-                    model_path,
-                    args.context,
-                    gpu_count,
-                    fetch_tensors=args.tensors,
-                    topology=args.topology,
-                    strategy=args.strategy,
-                    is_vllm=args.vllm,
-                    gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
-                    gpu_util=args.gpu_util
-                )
-                models.append((model_path.split("/")[-1], info))
-            except Exception as e:
-                console.print(f"[red]Error analyzing model '{model_path}': {e}[/red]")
-                return 1
+            info = analyze_model(
+                model_path,
+                args.context,
+                gpu_count=gpu_count,
+                batch_size=args.batch_size,
+                fetch_tensors=args.tensors,
+                timeout=args.timeout,
+                topology=args.topology,
+                strategy=args.strategy,
+                is_vllm=args.vllm,
+                gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
+                gpu_util=args.gpu_util
+            )
+            models.append((model_path.split("/")[-1], info))
         print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
         return 0
     file_path = args.file[0]
-    try:
-        info = analyze_model(
-            file_path,
-            args.context,
-            gpu_count,
-            fetch_tensors=args.tensors,
-            topology=args.topology,
-            strategy=args.strategy,
-            is_vllm=args.vllm,
-            gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
-            gpu_util=args.gpu_util
-        )
-    except Exception as e:
-        console.print(f"[red]Error: {e}[/red]")
-        return 1
+    info = analyze_model(
+        file_path,
+        args.context,
+        gpu_count=gpu_count,
+        batch_size=args.batch_size,
+        fetch_tensors=args.tensors,
+        timeout=args.timeout,
+        topology=args.topology,
+        strategy=args.strategy,
+        is_vllm=args.vllm,
+        gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
+        gpu_util=args.gpu_util
+    )
     print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
     return 0

{modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/hardware.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import re
 import subprocess
-from typing import Tuple
+from typing import Optional, Tuple
 KNOWN_GPUS = {
     # --- NVIDIA Consumer (RTX 50/40/30/20/10 Series & Titans) ---
@@ -21,6 +21,7 @@ KNOWN_GPUS = {
     "rtx4060ti16gb": 16.0,
     "rtx4060ti": 8.0,
     "rtx4060": 8.0,
+    "rtx4050": 6.0,
     "rtx3090ti": 24.0,
     "rtx3090": 24.0,
     "rtx3080ti": 12.0,
@@ -31,6 +32,7 @@ KNOWN_GPUS = {
     "rtx3060ti": 8.0,
     "rtx306012gb": 12.0,
     "rtx3060": 8.0,
+    "rtx3050ti": 4.0,
     "rtx3050": 8.0,
     "rtx2080ti": 11.0,
     "rtx2080super": 8.0,
@@ -40,6 +42,11 @@ KNOWN_GPUS = {
     "rtx2060super": 8.0,
     "rtx206012gb": 12.0,
     "rtx2060": 6.0,
+    "gtx1660super": 6.0,
+    "gtx1660ti": 6.0,
+    "gtx1660": 6.0,
+    "gtx1650super": 4.0,
+    "gtx1650": 4.0,
     "gtx1080ti": 11.0,
     "gtx1080": 8.0,
     "gtx1070ti": 8.0,
@@ -50,7 +57,6 @@ KNOWN_GPUS = {
     "titanxp": 12.0,
     "titanxpascal": 12.0,
     "titanx": 12.0,
     # --- NVIDIA Data Center / Workstation ---
     "b200": 192.0,
     "b100": 192.0,
@@ -82,7 +88,6 @@ KNOWN_GPUS = {
     "rtxa4000": 16.0,
     "quadrortx8000": 48.0,
     "quadrortx6000": 24.0,
     # --- AMD Consumer (RX 9000/7000/6000 Series) ---
     "rx9070xt": 16.0,
     "rx9070": 16.0,
@@ -106,7 +111,8 @@ KNOWN_GPUS = {
     "rx6650xt": 8.0,
     "rx6600xt": 8.0,
     "rx6600": 8.0,
+    "rx580": 8.0,
+    "rx570": 4.0,
     # --- AMD Data Center / Pro ---
     "mi300x": 192.0,
     "mi250x": 128.0,
@@ -114,7 +120,6 @@ KNOWN_GPUS = {
     "prow7900": 48.0,
     "prow7800": 32.0,
     "prow6800": 32.0,
     # --- Intel Consumer & Accelerators ---
     "arcb580": 12.0,
     "b580": 12.0,
@@ -128,63 +133,162 @@ KNOWN_GPUS = {
     "gaudi2": 96.0,
 }
 def normalize_gpu_string(name: str) -> str:
     """Strips vendor fluff, spaces, and hyphens to map correctly to KNOWN_GPUS."""
     name = name.lower()
     # Remove common vendor/marketing fluff that disrupts core identifiers
-    fluff_words = ["nvidia", "geforce", "amd", "radeon", "intel", "arc", "generation", "edition", "graphics", "accelerator"]
+    fluff_words = [
+        "nvidia",
+        "geforce",
+        "amd",
+        "radeon",
+        "intel",
+        "arc",
+        "generation",
+        "edition",
+        "graphics",
+        "accelerator",
+    ]
     for word in fluff_words:
         name = name.replace(word, "")
-    return re.sub(r'[\s\-]', '', name)
-def detect_local_gpu() -> Tuple[str, float, int]:
-    # 1. NVIDIA
+    return re.sub(r"[\s\-]", "", name)
+def _detect_nvidia_gpu() -> Optional[Tuple[str, float, int]]:
     try:
         result = subprocess.run(
-            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
-            capture_output=True, text=True, check=True
+            [
+                "nvidia-smi",
+                "--query-gpu=name,memory.total",
+                "--format=csv,noheader,nounits",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
         )
-        lines = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
+        lines = [
+            line.strip() for line in result.stdout.strip().split("\n") if line.strip()
+        ]
         if lines:
             total_mb = 0
             for line in lines:
-                parts = line.split(',')
+                parts = line.split(",")
                 if len(parts) >= 2:
                     total_mb += int(parts[1].strip())
             gpu_count = len(lines)
-            first_name = lines[0].split(',')[0].strip()
-            display_name = f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
+            first_name = lines[0].split(",")[0].strip()
+            display_name = (
+                f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
+            )
             return display_name, total_mb / 1024.0, gpu_count
     except Exception:
         pass
-    # 2. AMD (ROCm)
+    return None
+def _detect_amd_gpu() -> Optional[Tuple[str, float, int]]:
     try:
         result = subprocess.run(
             ["rocm-smi", "--showmeminfo", "vram"],
-            capture_output=True, text=True, check=True
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
         )
-        lines = [line for line in result.stdout.strip().split('\n') if "Total Memory (B):" in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if "Total Memory (B):" in line
+        ]
         if lines:
             total_bytes = 0
             gpu_count = len(lines)
             for line in lines:
-                parts = line.split(':')
+                parts = line.split(":")
                 if len(parts) >= 2:
                     total_bytes += int(parts[1].strip())
-            display_name = f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
+            display_name = (
+                f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
+            )
             return display_name, total_bytes / (1024.0**3), gpu_count
     except Exception:
         pass
-    # 3. Apple Silicon
+    return None
+def _parse_intel_vram(size_str: str) -> Optional[float]:
+    match = re.search(r"([\d\.]+)\s*([a-zA-Z]*)", size_str)
+    if not match:
+        return None
+    val = float(match.group(1))
+    unit = match.group(2).lower()
+    if unit in ("gib", "gb"):
+        val *= 1024.0
+    elif unit in ("kib", "kb"):
+        val /= 1024.0
+    elif unit == "b":
+        val /= (1024.0 * 1024.0)
+    return val
+def _parse_xpu_smi_output(stdout: str) -> Tuple[list[str], float, int]:
+    gpu_names: list[str] = []
+    total_mib: float = 0.0
+    parsed_memory_entries: int = 0
+    for line in stdout.splitlines():
+        lower_line = line.lower()
+        if "device name:" in lower_line:
+            idx = lower_line.index("device name:")
+            name = line[idx + len("device name:"):].split("|")[0].strip()
+            gpu_names.append(name)
+        elif "memory physical size:" in lower_line:
+            idx = lower_line.index("memory physical size:")
+            size_str = line[idx + len("memory physical size:"):].split("|")[0].strip()
+            val = _parse_intel_vram(size_str)
+            if val is not None:
+                total_mib += val
+                parsed_memory_entries += 1
+    return gpu_names, total_mib, parsed_memory_entries
+def _detect_intel_gpu() -> Optional[Tuple[str, float, int]]:
+    try:
+        result = subprocess.run(
+            ["xpu-smi", "discovery"],
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
+        )
+        gpu_names, total_mib, parsed_memory_entries = _parse_xpu_smi_output(result.stdout)
+        if gpu_names and parsed_memory_entries == len(gpu_names) and total_mib > 0.0:
+            gpu_count = len(gpu_names)
+            first_name = gpu_names[0]
+            display_name = (
+                f"Intel Multi-GPU ({gpu_count}x {first_name})" if gpu_count > 1 else first_name
+            )
+            return display_name, total_mib / 1024.0, gpu_count
+    except Exception:
+        pass
+    return None
+def _detect_apple_gpu() -> Optional[Tuple[str, float, int]]:
     try:
         result = subprocess.run(
             ["sysctl", "hw.memsize"],
-            capture_output=True, text=True, check=True
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
         )
         total_bytes = int(result.stdout.strip().split()[1])
         # Apply 75% operational heuristic for Apple Silicon wire limits
@@ -192,34 +296,62 @@ def detect_local_gpu() -> Tuple[str, float, int]:
         return "Apple Silicon (Unified Memory)", vram_gb, 1
     except Exception:
         pass
+    return None
+def detect_local_gpu() -> Tuple[str, float, int]:
+    # 1. NVIDIA
+    nvidia_res = _detect_nvidia_gpu()
+    if nvidia_res is not None:
+        return nvidia_res
+    # 2. AMD (ROCm)
+    amd_res = _detect_amd_gpu()
+    if amd_res is not None:
+        return amd_res
+    # 3. Intel (xpu-smi)
+    intel_res = _detect_intel_gpu()
+    if intel_res is not None:
+        return intel_res
+    # 4. Apple Silicon
+    apple_res = _detect_apple_gpu()
+    if apple_res is not None:
+        return apple_res
     return "Unknown", 8.0, 1
 def resolve_gpu(target: str) -> Tuple[str, float, int]:
     if target.lower() == "auto":
         return detect_local_gpu()
     # Apple Silicon routing trap
     lower_target = target.lower()
-    if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(r'^m[1-4](-?(pro|max|ultra))?$', lower_target):
-        raise ValueError("Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits.")
+    if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
+        r"^m[1-4](-?(pro|max|ultra))?$", lower_target
+    ):
+        raise ValueError(
+            "Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits."
+        )
     # Parse potential multi-GPU format e.g., "2x RTX4090"
     gpu_count = 1
-    match = re.match(r'^(\d+)x\s*(.+)$', lower_target)
+    match = re.match(r"^(\d+)x\s*(.+)$", lower_target)
     if match:
         gpu_count = int(match.group(1))
         target_name = match.group(2)
     else:
         target_name = target
     normalized = normalize_gpu_string(target_name)
     if normalized in KNOWN_GPUS:
         vram_gb = KNOWN_GPUS[normalized] * gpu_count
         display_name = f"{gpu_count}x {target_name}" if gpu_count > 1 else target_name
         return display_name, vram_gb, gpu_count
     # If the user passed a pure number, assume GB
     try:
         vram_gb = float(normalized) * gpu_count
@@ -227,5 +359,17 @@ def resolve_gpu(target: str) -> Tuple[str, float, int]:
         return display_name, vram_gb, gpu_count
     except ValueError:
         pass
-    raise ValueError(f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value.")
+    import difflib
+    matches = difflib.get_close_matches(normalized, KNOWN_GPUS.keys(), n=3, cutoff=0.6)
+    if matches:
+        suggestions = ", ".join(matches)
+        raise ValueError(
+            f"Unknown GPU target '{target}'. Did you mean: {suggestions}? "
+            f"Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
+        )
+    raise ValueError(
+        f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
+    )

modelinfo-cli 1.4.2__tar.gz → 1.4.4__tar.gz

modelinfo-cli 1.4.2tar.gz → 1.4.4tar.gz