PyPI - modelinfo-cli - Versions diffs - 1.4.3__tar.gz → 1.4.4__tar.gz - Mend

modelinfo-cli 1.4.3tar.gz → 1.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: modelinfo-cli
-Version: 1.4.3
+Version: 1.4.4
 Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
 Author: ModelInfo Contributors
 License: MIT
@@ -164,12 +164,15 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
 | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
 | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
+| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
 | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
 | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
 | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
+| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 ## Architecture

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/README.md RENAMED Viewed

@@ -146,12 +146,15 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
 | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
 | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
+| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
 | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
 | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
 | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
+| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 ## Architecture

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "modelinfo-cli"
-version = "1.4.3"
+version = "1.4.4"
 description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
 readme = "README.md"
 requires-python = ">=3.10"

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/__init__.py RENAMED Viewed

@@ -2,4 +2,4 @@
 modelinfo - A high-performance CLI utility for inspecting ML model checkpoints.
 """
-__version__ = "1.4.3"
+__version__ = "1.4.4"

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/cli.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import argparse
 import json
+import math
 import os
 import sys
 from typing import Sequence
@@ -34,6 +35,20 @@ class VersionAction(argparse.Action):
         parser.exit()
+def _positive_int(value: str) -> int:
+    ivalue = int(value)
+    if ivalue < 1:
+        raise argparse.ArgumentTypeError("batch size must be at least 1")
+    return ivalue
+def _positive_float(value: str) -> float:
+    fvalue = float(value)
+    if not math.isfinite(fvalue) or fvalue <= 0:
+        raise argparse.ArgumentTypeError("timeout must be a finite number greater than 0")
+    return fvalue
 def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         prog="modelinfo",
@@ -52,6 +67,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         default=None,
         help="Context length for dynamic KV cache footprint calculation.",
     )
+    parser.add_argument(
+        "--batch-size",
+        type=_positive_int,
+        default=1,
+        help="Batch size for dynamic KV cache footprint calculation.",
+    )
     parser.add_argument(
         "--max-vram",
         type=float,
@@ -69,6 +90,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         action="store_true",
         help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
     )
+    parser.add_argument(
+        "--timeout",
+        type=_positive_float,
+        default=10.0,
+        help="Network timeout in seconds for remote Hugging Face fetches.",
+    )
     parser.add_argument(
         "--topology",
         type=str,
@@ -106,8 +133,10 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
 def analyze_model(
     file_path: str,
     context_override: int | None,
-    gpu_count: int = 1,
+    gpu_count: int = 1,
+    batch_size: int = 1,
     fetch_tensors: bool = False,
+    timeout: float = 10.0,
     topology: str = "pcie4",
     strategy: str = "tp",
     is_vllm: bool = False,
@@ -122,7 +151,9 @@ def analyze_model(
     if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
-        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
+        tensors, config, format_name, disk_size = fetch_huggingface_repo(
+            file_path, fetch_tensors=fetch_tensors, timeout=timeout
+        )
     elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
         tensors = parse_safetensors_header(file_path)
         format_name = "SafeTensors"
@@ -164,6 +195,7 @@ def analyze_model(
     footprint = calculate_footprint(
         tensors,
         context_length=context_length,
+        batch_size=batch_size,
         config=config,
         gpu_count=gpu_count,
         topology=topology,
@@ -222,8 +254,10 @@ def main(argv: Sequence[str] | None = None) -> int:
             info = analyze_model(
                 model_path,
                 args.context,
-                gpu_count,
+                gpu_count=gpu_count,
+                batch_size=args.batch_size,
                 fetch_tensors=args.tensors,
+                timeout=args.timeout,
                 topology=args.topology,
                 strategy=args.strategy,
                 is_vllm=args.vllm,
@@ -240,8 +274,10 @@ def main(argv: Sequence[str] | None = None) -> int:
     info = analyze_model(
         file_path,
         args.context,
-        gpu_count,
+        gpu_count=gpu_count,
+        batch_size=args.batch_size,
         fetch_tensors=args.tensors,
+        timeout=args.timeout,
         topology=args.topology,
         strategy=args.strategy,
         is_vllm=args.vllm,

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/hardware.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import re
 import subprocess
-from typing import Tuple
+from typing import Optional, Tuple
 KNOWN_GPUS = {
     # --- NVIDIA Consumer (RTX 50/40/30/20/10 Series & Titans) ---
@@ -21,7 +21,7 @@ KNOWN_GPUS = {
     "rtx4060ti16gb": 16.0,
     "rtx4060ti": 8.0,
     "rtx4060": 8.0,
-    "rtx4050" : 6.0,
+    "rtx4050": 6.0,
     "rtx3090ti": 24.0,
     "rtx3090": 24.0,
     "rtx3080ti": 12.0,
@@ -32,7 +32,7 @@ KNOWN_GPUS = {
     "rtx3060ti": 8.0,
     "rtx306012gb": 12.0,
     "rtx3060": 8.0,
-    "rtx3050ti" : 4.0,
+    "rtx3050ti": 4.0,
     "rtx3050": 8.0,
     "rtx2080ti": 11.0,
     "rtx2080super": 8.0,
@@ -43,7 +43,7 @@ KNOWN_GPUS = {
     "rtx206012gb": 12.0,
     "rtx2060": 6.0,
     "gtx1660super": 6.0,
-    "gtx1660ti" : 6.0,
+    "gtx1660ti": 6.0,
     "gtx1660": 6.0,
     "gtx1650super": 4.0,
     "gtx1650": 4.0,
@@ -57,7 +57,6 @@ KNOWN_GPUS = {
     "titanxp": 12.0,
     "titanxpascal": 12.0,
     "titanx": 12.0,
     # --- NVIDIA Data Center / Workstation ---
     "b200": 192.0,
     "b100": 192.0,
@@ -89,7 +88,6 @@ KNOWN_GPUS = {
     "rtxa4000": 16.0,
     "quadrortx8000": 48.0,
     "quadrortx6000": 24.0,
     # --- AMD Consumer (RX 9000/7000/6000 Series) ---
     "rx9070xt": 16.0,
     "rx9070": 16.0,
@@ -115,8 +113,6 @@ KNOWN_GPUS = {
     "rx6600": 8.0,
     "rx580": 8.0,
     "rx570": 4.0,
     # --- AMD Data Center / Pro ---
     "mi300x": 192.0,
     "mi250x": 128.0,
@@ -124,7 +120,6 @@ KNOWN_GPUS = {
     "prow7900": 48.0,
     "prow7800": 32.0,
     "prow6800": 32.0,
     # --- Intel Consumer & Accelerators ---
     "arcb580": 12.0,
     "b580": 12.0,
@@ -138,63 +133,162 @@ KNOWN_GPUS = {
     "gaudi2": 96.0,
 }
 def normalize_gpu_string(name: str) -> str:
     """Strips vendor fluff, spaces, and hyphens to map correctly to KNOWN_GPUS."""
     name = name.lower()
     # Remove common vendor/marketing fluff that disrupts core identifiers
-    fluff_words = ["nvidia", "geforce", "amd", "radeon", "intel", "arc", "generation", "edition", "graphics", "accelerator"]
+    fluff_words = [
+        "nvidia",
+        "geforce",
+        "amd",
+        "radeon",
+        "intel",
+        "arc",
+        "generation",
+        "edition",
+        "graphics",
+        "accelerator",
+    ]
     for word in fluff_words:
         name = name.replace(word, "")
-    return re.sub(r'[\s\-]', '', name)
-def detect_local_gpu() -> Tuple[str, float, int]:
-    # 1. NVIDIA
+    return re.sub(r"[\s\-]", "", name)
+def _detect_nvidia_gpu() -> Optional[Tuple[str, float, int]]:
     try:
         result = subprocess.run(
-            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
-            capture_output=True, text=True, check=True
+            [
+                "nvidia-smi",
+                "--query-gpu=name,memory.total",
+                "--format=csv,noheader,nounits",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
         )
-        lines = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
+        lines = [
+            line.strip() for line in result.stdout.strip().split("\n") if line.strip()
+        ]
         if lines:
             total_mb = 0
             for line in lines:
-                parts = line.split(',')
+                parts = line.split(",")
                 if len(parts) >= 2:
                     total_mb += int(parts[1].strip())
             gpu_count = len(lines)
-            first_name = lines[0].split(',')[0].strip()
-            display_name = f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
+            first_name = lines[0].split(",")[0].strip()
+            display_name = (
+                f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
+            )
             return display_name, total_mb / 1024.0, gpu_count
     except Exception:
         pass
-    # 2. AMD (ROCm)
+    return None
+def _detect_amd_gpu() -> Optional[Tuple[str, float, int]]:
     try:
         result = subprocess.run(
             ["rocm-smi", "--showmeminfo", "vram"],
-            capture_output=True, text=True, check=True
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
         )
-        lines = [line for line in result.stdout.strip().split('\n') if "Total Memory (B):" in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if "Total Memory (B):" in line
+        ]
         if lines:
             total_bytes = 0
             gpu_count = len(lines)
             for line in lines:
-                parts = line.split(':')
+                parts = line.split(":")
                 if len(parts) >= 2:
                     total_bytes += int(parts[1].strip())
-            display_name = f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
+            display_name = (
+                f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
+            )
             return display_name, total_bytes / (1024.0**3), gpu_count
     except Exception:
         pass
-    # 3. Apple Silicon
+    return None
+def _parse_intel_vram(size_str: str) -> Optional[float]:
+    match = re.search(r"([\d\.]+)\s*([a-zA-Z]*)", size_str)
+    if not match:
+        return None
+    val = float(match.group(1))
+    unit = match.group(2).lower()
+    if unit in ("gib", "gb"):
+        val *= 1024.0
+    elif unit in ("kib", "kb"):
+        val /= 1024.0
+    elif unit == "b":
+        val /= (1024.0 * 1024.0)
+    return val
+def _parse_xpu_smi_output(stdout: str) -> Tuple[list[str], float, int]:
+    gpu_names: list[str] = []
+    total_mib: float = 0.0
+    parsed_memory_entries: int = 0
+    for line in stdout.splitlines():
+        lower_line = line.lower()
+        if "device name:" in lower_line:
+            idx = lower_line.index("device name:")
+            name = line[idx + len("device name:"):].split("|")[0].strip()
+            gpu_names.append(name)
+        elif "memory physical size:" in lower_line:
+            idx = lower_line.index("memory physical size:")
+            size_str = line[idx + len("memory physical size:"):].split("|")[0].strip()
+            val = _parse_intel_vram(size_str)
+            if val is not None:
+                total_mib += val
+                parsed_memory_entries += 1
+    return gpu_names, total_mib, parsed_memory_entries
+def _detect_intel_gpu() -> Optional[Tuple[str, float, int]]:
+    try:
+        result = subprocess.run(
+            ["xpu-smi", "discovery"],
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
+        )
+        gpu_names, total_mib, parsed_memory_entries = _parse_xpu_smi_output(result.stdout)
+        if gpu_names and parsed_memory_entries == len(gpu_names) and total_mib > 0.0:
+            gpu_count = len(gpu_names)
+            first_name = gpu_names[0]
+            display_name = (
+                f"Intel Multi-GPU ({gpu_count}x {first_name})" if gpu_count > 1 else first_name
+            )
+            return display_name, total_mib / 1024.0, gpu_count
+    except Exception:
+        pass
+    return None
+def _detect_apple_gpu() -> Optional[Tuple[str, float, int]]:
     try:
         result = subprocess.run(
             ["sysctl", "hw.memsize"],
-            capture_output=True, text=True, check=True
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=2.0,
         )
         total_bytes = int(result.stdout.strip().split()[1])
         # Apply 75% operational heuristic for Apple Silicon wire limits
@@ -202,34 +296,62 @@ def detect_local_gpu() -> Tuple[str, float, int]:
         return "Apple Silicon (Unified Memory)", vram_gb, 1
     except Exception:
         pass
+    return None
+def detect_local_gpu() -> Tuple[str, float, int]:
+    # 1. NVIDIA
+    nvidia_res = _detect_nvidia_gpu()
+    if nvidia_res is not None:
+        return nvidia_res
+    # 2. AMD (ROCm)
+    amd_res = _detect_amd_gpu()
+    if amd_res is not None:
+        return amd_res
+    # 3. Intel (xpu-smi)
+    intel_res = _detect_intel_gpu()
+    if intel_res is not None:
+        return intel_res
+    # 4. Apple Silicon
+    apple_res = _detect_apple_gpu()
+    if apple_res is not None:
+        return apple_res
     return "Unknown", 8.0, 1
 def resolve_gpu(target: str) -> Tuple[str, float, int]:
     if target.lower() == "auto":
         return detect_local_gpu()
     # Apple Silicon routing trap
     lower_target = target.lower()
-    if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(r'^m[1-4](-?(pro|max|ultra))?$', lower_target):
-        raise ValueError("Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits.")
+    if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
+        r"^m[1-4](-?(pro|max|ultra))?$", lower_target
+    ):
+        raise ValueError(
+            "Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits."
+        )
     # Parse potential multi-GPU format e.g., "2x RTX4090"
     gpu_count = 1
-    match = re.match(r'^(\d+)x\s*(.+)$', lower_target)
+    match = re.match(r"^(\d+)x\s*(.+)$", lower_target)
     if match:
         gpu_count = int(match.group(1))
         target_name = match.group(2)
     else:
         target_name = target
     normalized = normalize_gpu_string(target_name)
     if normalized in KNOWN_GPUS:
         vram_gb = KNOWN_GPUS[normalized] * gpu_count
         display_name = f"{gpu_count}x {target_name}" if gpu_count > 1 else target_name
         return display_name, vram_gb, gpu_count
     # If the user passed a pure number, assume GB
     try:
         vram_gb = float(normalized) * gpu_count
@@ -237,5 +359,17 @@ def resolve_gpu(target: str) -> Tuple[str, float, int]:
         return display_name, vram_gb, gpu_count
     except ValueError:
         pass
-    raise ValueError(f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value.")
+    import difflib
+    matches = difflib.get_close_matches(normalized, KNOWN_GPUS.keys(), n=3, cutoff=0.6)
+    if matches:
+        suggestions = ", ".join(matches)
+        raise ValueError(
+            f"Unknown GPU target '{target}'. Did you mean: {suggestions}? "
+            f"Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
+        )
+    raise ValueError(
+        f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
+    )

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/huggingface.py RENAMED Viewed

@@ -3,9 +3,27 @@ import json
 import os
 import struct
 import urllib.error
+import urllib.parse
 import urllib.request
 from typing import Any, Dict, Tuple
+def _get_hf_endpoint() -> str:
+    endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co").strip()
+    if not endpoint:
+        raise ValueError("HF_ENDPOINT is set but empty; expected a valid HTTP(S) URL")
+    endpoint = endpoint.rstrip("/")
+    if not endpoint.startswith("https://"):
+        raise ValueError(
+            f"HF_ENDPOINT must use https:// scheme, got: {endpoint}"
+        )
+    parsed = urllib.parse.urlparse(endpoint)
+    if not parsed.netloc:
+        raise ValueError(
+            f"HF_ENDPOINT must include a valid hostname, got: {endpoint}"
+        )
+    return endpoint
 def _get_hf_token() -> str | None:
     token = os.environ.get("HF_TOKEN")
     if token:
@@ -29,7 +47,12 @@ def _get_hf_token() -> str | None:
     return None
-def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None) -> bytes:
+def _make_request(
+    url: str,
+    headers: Dict[str, str] = None,
+    limit: int | None = None,
+    timeout: float = 10.0,
+) -> bytes:
     if headers is None:
         headers = {}
@@ -39,7 +62,7 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
     req = urllib.request.Request(url, headers=headers)
     try:
-        with urllib.request.urlopen(req, timeout=10) as response:
+        with urllib.request.urlopen(req, timeout=timeout) as response:
             if limit is not None:
                 return response.read(limit)
             return response.read()
@@ -50,16 +73,16 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
            raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
         raise
-def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
-    url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
+def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]:
+    url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/{filename}"
     # 1. Fetch the first 500KB in a single roundtrip
     headers = {"Range": "bytes=0-500000"}
     try:
-        chunk = _make_request(url, headers=headers, limit=500000)
+        chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout)
     except urllib.error.HTTPError as e:
         if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
-            chunk = _make_request(url, limit=500000)
+            chunk = _make_request(url, limit=500000, timeout=timeout)
         else:
             raise
@@ -74,18 +97,18 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
     else:
         # 3. Double-roundtrip only if the header is massive (>500KB)
         headers = {"Range": f"bytes=8-{8+header_size-1}"}
-        json_bytes = _make_request(url, headers=headers, limit=header_size)
+        json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout)
     return json.loads(json_bytes)
-def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
+def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
     """
     Fetches the metadata directly from the Hugging Face Hub over the network.
     Returns: (tensors, config, format_name, disk_size)
     """
-    api_url = f"https://huggingface.co/api/models/{repo_id}"
+    api_url = f"{_get_hf_endpoint()}/api/models/{repo_id}"
     try:
-        api_data = json.loads(_make_request(api_url).decode("utf-8"))
+        api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
     except urllib.error.HTTPError as e:
         if e.code == 401:
             raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
@@ -98,16 +121,16 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
     config = None
     if "config.json" in filenames:
-        config_url = f"https://huggingface.co/{repo_id}/resolve/main/config.json"
-        config = json.loads(_make_request(config_url).decode("utf-8"))
+        config_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/config.json"
+        config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
     tensors = {}
     total_size = 0.0
     if "model.safetensors.index.json" in filenames:
         # Sharded SafeTensors
-        index_url = f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors.index.json"
-        index_data = json.loads(_make_request(index_url).decode("utf-8"))
+        index_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors.index.json"
+        index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
         weight_map = index_data.get("weight_map", {})
         unique_shards = list(set(weight_map.values()))
@@ -128,7 +151,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
             }
         else:
             def fetch_shard(shard: str):
-                return shard, _fetch_safetensors_header(repo_id, shard)
+                return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
             with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
                 future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
@@ -149,17 +172,17 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
         # Single SafeTensors
         # Determine total size first
-        req = urllib.request.Request(f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors", method="HEAD")
+        req = urllib.request.Request(f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors", method="HEAD")
         token = _get_hf_token()
         if token:
             req.add_header("Authorization", f"Bearer {token}")
         try:
-            with urllib.request.urlopen(req) as response:
+            with urllib.request.urlopen(req, timeout=timeout) as response:
                 total_size = int(response.headers.get("Content-Length", 0))
         except Exception:
             pass
-        header = _fetch_safetensors_header(repo_id, "model.safetensors")
+        header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
         tensors = header
         format_name = "SafeTensors"

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: modelinfo-cli
-Version: 1.4.3
+Version: 1.4.4
 Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
 Author: ModelInfo Contributors
 License: MIT
@@ -164,12 +164,15 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
 | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
 | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
+| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
 | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
 | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
 | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
+| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 ## Architecture

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -23,4 +23,5 @@ src/modelinfo_cli.egg-info/top_level.txt
 tests/test_calculator.py
 tests/test_cli.py
 tests/test_constraints.py
+tests/test_hardware.py
 tests/test_parsers.py

modelinfo_cli-1.4.4/tests/test_cli.py ADDED Viewed

@@ -0,0 +1,179 @@
+import pytest
+import modelinfo.cli as cli
+from modelinfo import __version__
+from modelinfo.cli import parse_args
+def test_version_flag_prints_installed_version(capsys):
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--version"])
+    assert exc_info.value.code == 0
+    assert f"modelinfo {__version__}" in capsys.readouterr().out
+def test_batch_size_flag_defaults_to_one():
+    args = parse_args(["model.gguf"])
+    assert args.batch_size == 1
+def test_batch_size_flag_accepts_integer():
+    args = parse_args(["--batch-size", "4", "model.gguf"])
+    assert args.batch_size == 4
+def test_batch_size_flag_rejects_zero():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--batch-size", "0", "model.gguf"])
+    assert exc_info.value.code == 2
+def test_batch_size_flag_rejects_negative():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--batch-size", "-1", "model.gguf"])
+    assert exc_info.value.code == 2
+def test_timeout_flag_defaults_to_ten_seconds():
+    args = parse_args(["model.gguf"])
+    assert args.timeout == 10.0
+def test_timeout_flag_accepts_float():
+    args = parse_args(["--timeout", "30.5", "model.gguf"])
+    assert args.timeout == 30.5
+def test_timeout_flag_rejects_zero():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--timeout", "0", "model.gguf"])
+    assert exc_info.value.code == 2
+def test_timeout_flag_rejects_negative():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--timeout", "-1", "model.gguf"])
+    assert exc_info.value.code == 2
+def test_timeout_flag_rejects_nan():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--timeout", "nan", "model.gguf"])
+    assert exc_info.value.code == 2
+def test_timeout_flag_rejects_inf():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--timeout", "inf", "model.gguf"])
+    assert exc_info.value.code == 2
+def test_analyze_model_passes_batch_size_to_footprint(monkeypatch, tmp_path):
+    model_path = tmp_path / "model.gguf"
+    model_path.write_bytes(b"mock")
+    captured = {}
+    def fake_parse_gguf_header(file_path):
+        assert file_path == str(model_path)
+        return {
+            "model.layers.0.self_attn.k_proj.weight": {"shape": [1, 1], "dtype": "F16"}
+        }
+    def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
+        captured["batch_size"] = batch_size
+        captured["context_length"] = context_length
+        return {
+            "total_params": 1,
+            "base_memory_bytes": 2.0,
+            "kv_cache_bytes": float(batch_size),
+            "overhead_bytes": 0.0,
+            "total_memory_bytes": 2.0 + batch_size,
+            "num_layers": 1,
+            "kv_dim": 1,
+            "primary_dtype": "F16",
+            "kv_is_estimate": False,
+            "penalty_percentage": 0.0,
+            "vllm_metrics": {},
+        }
+    monkeypatch.setattr(cli, "parse_gguf_header", fake_parse_gguf_header)
+    monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
+    monkeypatch.setattr(
+        cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
+    )
+    info = cli.analyze_model(str(model_path), context_override=128, batch_size=4)
+    assert captured == {"batch_size": 4, "context_length": 128}
+    assert info["footprint"]["kv_cache_bytes"] == 4.0
+def test_analyze_model_passes_timeout_to_huggingface(monkeypatch):
+    captured = {}
+    def fake_exists(path):
+        return False
+    def fake_fetch(repo_id, *, fetch_tensors, timeout):
+        captured["repo_id"] = repo_id
+        captured["fetch_tensors"] = fetch_tensors
+        captured["timeout"] = timeout
+        return (
+            {
+                "model.layers.0.self_attn.k_proj.weight": {
+                    "shape": [1, 1],
+                    "dtype": "F16",
+                }
+            },
+            None,
+            "SafeTensors",
+            7.0,
+        )
+    def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
+        return {
+            "total_params": 1,
+            "base_memory_bytes": 2.0,
+            "kv_cache_bytes": 1.0,
+            "overhead_bytes": 0.0,
+            "total_memory_bytes": 3.0,
+            "num_layers": 1,
+            "kv_dim": 1,
+            "primary_dtype": "F16",
+            "kv_is_estimate": False,
+            "penalty_percentage": 0.0,
+            "vllm_metrics": {},
+        }
+    from modelinfo.parsers import huggingface
+    monkeypatch.setattr(cli.os.path, "exists", fake_exists)
+    monkeypatch.setattr(huggingface, "fetch_huggingface_repo", fake_fetch)
+    monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
+    monkeypatch.setattr(
+        cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
+    )
+    cli.analyze_model(
+        "org/model",
+        context_override=128,
+        fetch_tensors=True,
+        timeout=22.5,
+    )
+    assert captured == {
+        "repo_id": "org/model",
+        "fetch_tensors": True,
+        "timeout": 22.5,
+    }

modelinfo_cli-1.4.4/tests/test_hardware.py ADDED Viewed

@@ -0,0 +1,255 @@
+import subprocess
+import pytest
+from modelinfo import hardware
+def completed(stdout: str) -> subprocess.CompletedProcess:
+    return subprocess.CompletedProcess(args=[], returncode=0, stdout=stdout)
+def test_normalize_gpu_string_removes_vendor_fluff_and_separators():
+    assert hardware.normalize_gpu_string("NVIDIA GeForce RTX 4090") == "rtx4090"
+    assert (
+        hardware.normalize_gpu_string("AMD Radeon RX-7900 XTX Graphics") == "rx7900xtx"
+    )
+    assert hardware.normalize_gpu_string("Intel Arc A770 Edition") == "a770"
+def test_resolve_gpu_matches_known_gpu():
+    assert hardware.resolve_gpu("NVIDIA GeForce RTX 4090") == (
+        "NVIDIA GeForce RTX 4090",
+        24.0,
+        1,
+    )
+def test_resolve_gpu_handles_multi_gpu_string():
+    assert hardware.resolve_gpu("2x RTX4090") == ("2x rtx4090", 48.0, 2)
+def test_resolve_gpu_accepts_numeric_vram_target():
+    assert hardware.resolve_gpu("16") == ("Custom (16.0 GB)", 16.0, 1)
+    assert hardware.resolve_gpu("4x 12") == ("Custom (48.0 GB)", 48.0, 4)
+def test_resolve_gpu_delegates_auto_detection(monkeypatch):
+    monkeypatch.setattr(hardware, "detect_local_gpu", lambda: ("Local GPU", 12.0, 1))
+    assert hardware.resolve_gpu("auto") == ("Local GPU", 12.0, 1)
+def test_resolve_gpu_rejects_apple_silicon_shortcuts():
+    with pytest.raises(ValueError, match="Apple Silicon VRAM varies"):
+        hardware.resolve_gpu("m3-max")
+def test_resolve_gpu_rejects_unknown_gpu_name():
+    with pytest.raises(ValueError, match="Unknown GPU target 'Mystery GPU'"):
+        hardware.resolve_gpu("Mystery GPU")
+def test_resolve_gpu_suggests_close_matches():
+    with pytest.raises(
+        ValueError,
+        match="Unknown GPU target 'rtx490'\\. Did you mean:.*rtx4090",
+    ):
+        hardware.resolve_gpu("rtx490")
+def test_detect_local_gpu_reads_nvidia_smi(monkeypatch):
+    def fake_run(command, **kwargs):
+        assert command == [
+            "nvidia-smi",
+            "--query-gpu=name,memory.total",
+            "--format=csv,noheader,nounits",
+        ]
+        assert kwargs == {
+            "capture_output": True,
+            "text": True,
+            "check": True,
+            "timeout": 2.0,
+        }
+        return completed("NVIDIA GeForce RTX 4090, 24576\n")
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    assert hardware.detect_local_gpu() == ("NVIDIA GeForce RTX 4090", 24.0, 1)
+def test_detect_local_gpu_sums_multiple_nvidia_gpus(monkeypatch):
+    monkeypatch.setattr(
+        hardware.subprocess,
+        "run",
+        lambda *args, **kwargs: completed(
+            "NVIDIA GeForce RTX 4090, 24576\nNVIDIA GeForce RTX 4090, 24576\n"
+        ),
+    )
+    assert hardware.detect_local_gpu() == (
+        "Multi-GPU: 2x NVIDIA GeForce RTX 4090",
+        48.0,
+        2,
+    )
+def test_detect_local_gpu_falls_back_to_rocm_smi(monkeypatch):
+    def fake_run(command, **kwargs):
+        if command[0] == "nvidia-smi":
+            raise FileNotFoundError("nvidia-smi not installed")
+        assert command == ["rocm-smi", "--showmeminfo", "vram"]
+        return completed(
+            "Total Memory (B): 17179869184\nTotal Memory (B): 17179869184\n"
+        )
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    assert hardware.detect_local_gpu() == ("AMD Multi-GPU (2x)", 32.0, 2)
+def test_detect_local_gpu_falls_back_to_xpu_smi(monkeypatch):
+    def fake_run(command, **kwargs):
+        if command[0] in {"nvidia-smi", "rocm-smi"}:
+            raise FileNotFoundError(command[0])
+        assert command == ["xpu-smi", "discovery"]  # nosec
+        stdout = (
+            "+-----------+------------------------------------------------------+\n"
+            "| Device ID | Device Information                                   |\n"
+            "+-----------+------------------------------------------------------+\n"
+            "| 0         | Device Name: Intel(R) Arc(TM) A770 Graphics          |\n"
+            "|           | Vendor Name: Intel(R) Corporation                    |\n"
+            "|           | Memory Physical Size: 16384.00 MiB                   |\n"
+            "+-----------+------------------------------------------------------+\n"
+        )
+        return completed(stdout)
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    assert hardware.detect_local_gpu() == ("Intel(R) Arc(TM) A770 Graphics", 16.0, 1)  # nosec
+def test_detect_local_gpu_sums_multiple_intel_gpus(monkeypatch):
+    def fake_run(command, **kwargs):
+        if command[0] in {"nvidia-smi", "rocm-smi"}:
+            raise FileNotFoundError(command[0])
+        assert command == ["xpu-smi", "discovery"]  # nosec
+        stdout = (
+            "+-----------+------------------------------------------------------+\n"
+            "| Device ID | Device Information                                   |\n"
+            "+-----------+------------------------------------------------------+\n"
+            "| 0         | Device Name: Intel(R) Data Center GPU Flex 170       |\n"
+            "|           | Memory Physical Size: 16384.00 MiB                   |\n"
+            "+-----------+------------------------------------------------------+\n"
+            "| 1         | Device Name: Intel(R) Data Center GPU Flex 170       |\n"
+            "|           | Memory Physical Size: 16384.00 MiB                   |\n"
+            "+-----------+------------------------------------------------------+\n"
+        )
+        return completed(stdout)
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    assert hardware.detect_local_gpu() == (  # nosec
+        "Intel Multi-GPU (2x Intel(R) Data Center GPU Flex 170)",
+        32.0,
+        2,
+    )
+def test_detect_local_gpu_intel_unit_conversions(monkeypatch):
+    test_cases = [
+        ("16.00 GiB", 16.0),
+        ("16.00 GB", 16.0),
+        ("16777216.00 KiB", 16.0),
+        ("17179869184.00 B", 16.0),
+        ("16384.00 MiB", 16.0),
+        ("16384.00 MB", 16.0),
+        ("16384.00", 16.0),  # Default MiB unit
+    ]
+    for size_str, expected_vram in test_cases:
+        def fake_run(command, s=size_str, **kwargs):
+            if command[0] in {"nvidia-smi", "rocm-smi"}:
+                raise FileNotFoundError(command[0])
+            assert command == ["xpu-smi", "discovery"]  # nosec
+            stdout = (
+                "+-----------+------------------------------------------------------+\n"
+                "| Device ID | Device Information                                   |\n"
+                "+-----------+------------------------------------------------------+\n"
+                "| 0         | Device Name: Intel(R) Arc(TM) A770 Graphics          |\n"
+                f"|           | Memory Physical Size: {s}                     |\n"
+                "+-----------+------------------------------------------------------+\n"
+            )
+            return completed(stdout)
+        monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+        assert hardware.detect_local_gpu() == ("Intel(R) Arc(TM) A770 Graphics", expected_vram, 1)  # nosec
+def test_detect_local_gpu_falls_back_on_malformed_xpu_smi(monkeypatch):
+    def fake_run(command, **kwargs):
+        if command[0] in {"nvidia-smi", "rocm-smi"}:
+            raise FileNotFoundError(command[0])
+        if command[0] == "xpu-smi":
+            # Returns device name but no parseable memory size
+            stdout = (
+                "+-----------+------------------------------------------------------+\n"
+                "| Device ID | Device Information                                   |\n"
+                "+-----------+------------------------------------------------------+\n"
+                "| 0         | Device Name: Intel(R) Arc(TM) A770 Graphics          |\n"
+                "|           | Vendor Name: Intel(R) Corporation                    |\n"
+                "|           | Memory Physical Size: N/A                            |\n"
+                "+-----------+------------------------------------------------------+\n"
+            )
+            return completed(stdout)
+        raise FileNotFoundError(command[0])
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    # Since xpu-smi didn't return valid memory, detect_local_gpu should fall back to default/next
+    assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1)  # nosec
+def test_detect_local_gpu_falls_back_on_mismatched_intel_count(monkeypatch):
+    def fake_run(command, **kwargs):
+        if command[0] in {"nvidia-smi", "rocm-smi"}:
+            raise FileNotFoundError(command[0])
+        if command[0] == "xpu-smi":
+            # 2 GPUs, but only 1 has memory size
+            stdout = (
+                "+-----------+------------------------------------------------------+\n"
+                "| Device ID | Device Information                                   |\n"
+                "+-----------+------------------------------------------------------+\n"
+                "| 0         | Device Name: Intel(R) Arc(TM) A770 Graphics          |\n"
+                "|           | Memory Physical Size: 16384.00 MiB                   |\n"
+                "+-----------+------------------------------------------------------+\n"
+                "| 1         | Device Name: Intel(R) Arc(TM) A770 Graphics          |\n"
+                "+-----------+------------------------------------------------------+\n"
+            )
+            return completed(stdout)
+        raise FileNotFoundError(command[0])
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    # Since device count (2) != memory entries count (1), it must fall back
+    assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1)  # nosec
+def test_detect_local_gpu_falls_back_to_apple_unified_memory(monkeypatch):
+    def fake_run(command, **kwargs):
+        if command[0] in {"nvidia-smi", "rocm-smi", "xpu-smi"}:
+            raise FileNotFoundError(command[0])
+        assert command == ["sysctl", "hw.memsize"]
+        return completed("hw.memsize: 17179869184\n")
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    assert hardware.detect_local_gpu() == ("Apple Silicon (Unified Memory)", 12.0, 1)
+def test_detect_local_gpu_returns_default_when_detection_fails(monkeypatch):
+    def fake_run(command, **kwargs):
+        raise FileNotFoundError(command[0])
+    monkeypatch.setattr(hardware.subprocess, "run", fake_run)
+    assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1)

{modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_parsers.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
 import pytest
+from modelinfo.parsers.huggingface import _get_hf_endpoint
 from modelinfo.parsers.safetensors import parse_safetensors_header
 FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
@@ -45,3 +46,39 @@ def test_gguf_parser_metadata():
     # Verify the architecture bypass parses it to titlecase and prevents "Unknown Architecture"
     arch_name = identify_architecture_name(tensors, num_layers=1)
     assert arch_name == "Qwen2 (1 transformer layers)"
+def test_hf_endpoint_valid_https(monkeypatch):
+    """Valid https:// endpoint is accepted."""
+    monkeypatch.setenv("HF_ENDPOINT", "https://huggingface.co")
+    assert _get_hf_endpoint() == "https://huggingface.co"
+def test_hf_endpoint_default_https(monkeypatch):
+    """Default endpoint when HF_ENDPOINT is not set."""
+    monkeypatch.delenv("HF_ENDPOINT", raising=False)
+    endpoint = _get_hf_endpoint()
+    assert endpoint == "https://huggingface.co"
+def test_hf_endpoint_rejects_http(monkeypatch):
+    """http:// scheme is rejected with ValueError."""
+    monkeypatch.setenv("HF_ENDPOINT", "http://localhost:8080")
+    with pytest.raises(ValueError, match="must use https:// scheme"):
+        _get_hf_endpoint()
+def test_hf_endpoint_rejects_empty(monkeypatch):
+    """Empty string is rejected with ValueError."""
+    monkeypatch.setenv("HF_ENDPOINT", "")
+    with pytest.raises(ValueError):
+        _get_hf_endpoint()
+def test_hf_endpoint_rejects_no_hostname(monkeypatch):
+    """URL without a hostname is rejected with ValueError."""
+    monkeypatch.setenv("HF_ENDPOINT", "https:///repo")
+    with pytest.raises(ValueError, match="must include a valid hostname"):
+        _get_hf_endpoint()

modelinfo_cli-1.4.3/tests/test_cli.py DELETED Viewed

@@ -1,12 +0,0 @@
-import pytest
-from modelinfo import __version__
-from modelinfo.cli import parse_args
-def test_version_flag_prints_installed_version(capsys):
-    with pytest.raises(SystemExit) as exc_info:
-        parse_args(["--version"])
-    assert exc_info.value.code == 0
-    assert f"modelinfo {__version__}" in capsys.readouterr().out