modelinfo-cli 1.4.3__tar.gz → 1.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/PKG-INFO +4 -1
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/README.md +3 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/pyproject.toml +1 -1
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/__init__.py +1 -1
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/cli.py +40 -4
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/hardware.py +176 -42
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/huggingface.py +41 -18
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/PKG-INFO +4 -1
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/SOURCES.txt +1 -0
- modelinfo_cli-1.4.4/tests/test_cli.py +179 -0
- modelinfo_cli-1.4.4/tests/test_hardware.py +255 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_parsers.py +37 -0
- modelinfo_cli-1.4.3/tests/test_cli.py +0 -12
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/LICENSE +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/setup.cfg +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/__main__.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/architecture.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/calculator.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/__init__.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/base.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/gguf.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/pytorch.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/safetensors.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/ui.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/dependency_links.txt +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/entry_points.txt +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/requires.txt +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/top_level.txt +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_calculator.py +0 -0
- {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_constraints.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
@@ -164,12 +164,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
|
|
|
164
164
|
| `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
|
|
165
165
|
| `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
|
|
166
166
|
| `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
|
|
167
|
+
| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
|
|
167
168
|
| `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
|
|
168
169
|
| `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
|
|
169
170
|
| `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
|
|
170
171
|
| `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
|
|
171
172
|
| `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
|
|
172
173
|
| `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
|
|
174
|
+
| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
|
|
175
|
+
| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
|
|
173
176
|
|
|
174
177
|
## Architecture
|
|
175
178
|
|
|
@@ -146,12 +146,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
|
|
|
146
146
|
| `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
|
|
147
147
|
| `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
|
|
148
148
|
| `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
|
|
149
|
+
| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
|
|
149
150
|
| `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
|
|
150
151
|
| `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
|
|
151
152
|
| `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
|
|
152
153
|
| `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
|
|
153
154
|
| `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
|
|
154
155
|
| `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
|
|
156
|
+
| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
|
|
157
|
+
| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
|
|
155
158
|
|
|
156
159
|
## Architecture
|
|
157
160
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "modelinfo-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.4"
|
|
8
8
|
description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import json
|
|
3
|
+
import math
|
|
3
4
|
import os
|
|
4
5
|
import sys
|
|
5
6
|
from typing import Sequence
|
|
@@ -34,6 +35,20 @@ class VersionAction(argparse.Action):
|
|
|
34
35
|
parser.exit()
|
|
35
36
|
|
|
36
37
|
|
|
38
|
+
def _positive_int(value: str) -> int:
|
|
39
|
+
ivalue = int(value)
|
|
40
|
+
if ivalue < 1:
|
|
41
|
+
raise argparse.ArgumentTypeError("batch size must be at least 1")
|
|
42
|
+
return ivalue
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _positive_float(value: str) -> float:
|
|
46
|
+
fvalue = float(value)
|
|
47
|
+
if not math.isfinite(fvalue) or fvalue <= 0:
|
|
48
|
+
raise argparse.ArgumentTypeError("timeout must be a finite number greater than 0")
|
|
49
|
+
return fvalue
|
|
50
|
+
|
|
51
|
+
|
|
37
52
|
def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
38
53
|
parser = argparse.ArgumentParser(
|
|
39
54
|
prog="modelinfo",
|
|
@@ -52,6 +67,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
52
67
|
default=None,
|
|
53
68
|
help="Context length for dynamic KV cache footprint calculation.",
|
|
54
69
|
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--batch-size",
|
|
72
|
+
type=_positive_int,
|
|
73
|
+
default=1,
|
|
74
|
+
help="Batch size for dynamic KV cache footprint calculation.",
|
|
75
|
+
)
|
|
55
76
|
parser.add_argument(
|
|
56
77
|
"--max-vram",
|
|
57
78
|
type=float,
|
|
@@ -69,6 +90,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
69
90
|
action="store_true",
|
|
70
91
|
help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
|
|
71
92
|
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"--timeout",
|
|
95
|
+
type=_positive_float,
|
|
96
|
+
default=10.0,
|
|
97
|
+
help="Network timeout in seconds for remote Hugging Face fetches.",
|
|
98
|
+
)
|
|
72
99
|
parser.add_argument(
|
|
73
100
|
"--topology",
|
|
74
101
|
type=str,
|
|
@@ -106,8 +133,10 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
106
133
|
def analyze_model(
|
|
107
134
|
file_path: str,
|
|
108
135
|
context_override: int | None,
|
|
109
|
-
gpu_count: int = 1,
|
|
136
|
+
gpu_count: int = 1,
|
|
137
|
+
batch_size: int = 1,
|
|
110
138
|
fetch_tensors: bool = False,
|
|
139
|
+
timeout: float = 10.0,
|
|
111
140
|
topology: str = "pcie4",
|
|
112
141
|
strategy: str = "tp",
|
|
113
142
|
is_vllm: bool = False,
|
|
@@ -122,7 +151,9 @@ def analyze_model(
|
|
|
122
151
|
|
|
123
152
|
if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
|
|
124
153
|
from modelinfo.parsers.huggingface import fetch_huggingface_repo
|
|
125
|
-
tensors, config, format_name, disk_size = fetch_huggingface_repo(
|
|
154
|
+
tensors, config, format_name, disk_size = fetch_huggingface_repo(
|
|
155
|
+
file_path, fetch_tensors=fetch_tensors, timeout=timeout
|
|
156
|
+
)
|
|
126
157
|
elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
|
|
127
158
|
tensors = parse_safetensors_header(file_path)
|
|
128
159
|
format_name = "SafeTensors"
|
|
@@ -164,6 +195,7 @@ def analyze_model(
|
|
|
164
195
|
footprint = calculate_footprint(
|
|
165
196
|
tensors,
|
|
166
197
|
context_length=context_length,
|
|
198
|
+
batch_size=batch_size,
|
|
167
199
|
config=config,
|
|
168
200
|
gpu_count=gpu_count,
|
|
169
201
|
topology=topology,
|
|
@@ -222,8 +254,10 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
222
254
|
info = analyze_model(
|
|
223
255
|
model_path,
|
|
224
256
|
args.context,
|
|
225
|
-
gpu_count,
|
|
257
|
+
gpu_count=gpu_count,
|
|
258
|
+
batch_size=args.batch_size,
|
|
226
259
|
fetch_tensors=args.tensors,
|
|
260
|
+
timeout=args.timeout,
|
|
227
261
|
topology=args.topology,
|
|
228
262
|
strategy=args.strategy,
|
|
229
263
|
is_vllm=args.vllm,
|
|
@@ -240,8 +274,10 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
240
274
|
info = analyze_model(
|
|
241
275
|
file_path,
|
|
242
276
|
args.context,
|
|
243
|
-
gpu_count,
|
|
277
|
+
gpu_count=gpu_count,
|
|
278
|
+
batch_size=args.batch_size,
|
|
244
279
|
fetch_tensors=args.tensors,
|
|
280
|
+
timeout=args.timeout,
|
|
245
281
|
topology=args.topology,
|
|
246
282
|
strategy=args.strategy,
|
|
247
283
|
is_vllm=args.vllm,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import subprocess
|
|
3
|
-
from typing import Tuple
|
|
3
|
+
from typing import Optional, Tuple
|
|
4
4
|
|
|
5
5
|
KNOWN_GPUS = {
|
|
6
6
|
# --- NVIDIA Consumer (RTX 50/40/30/20/10 Series & Titans) ---
|
|
@@ -21,7 +21,7 @@ KNOWN_GPUS = {
|
|
|
21
21
|
"rtx4060ti16gb": 16.0,
|
|
22
22
|
"rtx4060ti": 8.0,
|
|
23
23
|
"rtx4060": 8.0,
|
|
24
|
-
"rtx4050"
|
|
24
|
+
"rtx4050": 6.0,
|
|
25
25
|
"rtx3090ti": 24.0,
|
|
26
26
|
"rtx3090": 24.0,
|
|
27
27
|
"rtx3080ti": 12.0,
|
|
@@ -32,7 +32,7 @@ KNOWN_GPUS = {
|
|
|
32
32
|
"rtx3060ti": 8.0,
|
|
33
33
|
"rtx306012gb": 12.0,
|
|
34
34
|
"rtx3060": 8.0,
|
|
35
|
-
"rtx3050ti"
|
|
35
|
+
"rtx3050ti": 4.0,
|
|
36
36
|
"rtx3050": 8.0,
|
|
37
37
|
"rtx2080ti": 11.0,
|
|
38
38
|
"rtx2080super": 8.0,
|
|
@@ -43,7 +43,7 @@ KNOWN_GPUS = {
|
|
|
43
43
|
"rtx206012gb": 12.0,
|
|
44
44
|
"rtx2060": 6.0,
|
|
45
45
|
"gtx1660super": 6.0,
|
|
46
|
-
"gtx1660ti"
|
|
46
|
+
"gtx1660ti": 6.0,
|
|
47
47
|
"gtx1660": 6.0,
|
|
48
48
|
"gtx1650super": 4.0,
|
|
49
49
|
"gtx1650": 4.0,
|
|
@@ -57,7 +57,6 @@ KNOWN_GPUS = {
|
|
|
57
57
|
"titanxp": 12.0,
|
|
58
58
|
"titanxpascal": 12.0,
|
|
59
59
|
"titanx": 12.0,
|
|
60
|
-
|
|
61
60
|
# --- NVIDIA Data Center / Workstation ---
|
|
62
61
|
"b200": 192.0,
|
|
63
62
|
"b100": 192.0,
|
|
@@ -89,7 +88,6 @@ KNOWN_GPUS = {
|
|
|
89
88
|
"rtxa4000": 16.0,
|
|
90
89
|
"quadrortx8000": 48.0,
|
|
91
90
|
"quadrortx6000": 24.0,
|
|
92
|
-
|
|
93
91
|
# --- AMD Consumer (RX 9000/7000/6000 Series) ---
|
|
94
92
|
"rx9070xt": 16.0,
|
|
95
93
|
"rx9070": 16.0,
|
|
@@ -115,8 +113,6 @@ KNOWN_GPUS = {
|
|
|
115
113
|
"rx6600": 8.0,
|
|
116
114
|
"rx580": 8.0,
|
|
117
115
|
"rx570": 4.0,
|
|
118
|
-
|
|
119
|
-
|
|
120
116
|
# --- AMD Data Center / Pro ---
|
|
121
117
|
"mi300x": 192.0,
|
|
122
118
|
"mi250x": 128.0,
|
|
@@ -124,7 +120,6 @@ KNOWN_GPUS = {
|
|
|
124
120
|
"prow7900": 48.0,
|
|
125
121
|
"prow7800": 32.0,
|
|
126
122
|
"prow6800": 32.0,
|
|
127
|
-
|
|
128
123
|
# --- Intel Consumer & Accelerators ---
|
|
129
124
|
"arcb580": 12.0,
|
|
130
125
|
"b580": 12.0,
|
|
@@ -138,63 +133,162 @@ KNOWN_GPUS = {
|
|
|
138
133
|
"gaudi2": 96.0,
|
|
139
134
|
}
|
|
140
135
|
|
|
136
|
+
|
|
141
137
|
def normalize_gpu_string(name: str) -> str:
|
|
142
138
|
"""Strips vendor fluff, spaces, and hyphens to map correctly to KNOWN_GPUS."""
|
|
143
139
|
name = name.lower()
|
|
144
|
-
|
|
140
|
+
|
|
145
141
|
# Remove common vendor/marketing fluff that disrupts core identifiers
|
|
146
|
-
fluff_words = [
|
|
142
|
+
fluff_words = [
|
|
143
|
+
"nvidia",
|
|
144
|
+
"geforce",
|
|
145
|
+
"amd",
|
|
146
|
+
"radeon",
|
|
147
|
+
"intel",
|
|
148
|
+
"arc",
|
|
149
|
+
"generation",
|
|
150
|
+
"edition",
|
|
151
|
+
"graphics",
|
|
152
|
+
"accelerator",
|
|
153
|
+
]
|
|
147
154
|
for word in fluff_words:
|
|
148
155
|
name = name.replace(word, "")
|
|
149
|
-
|
|
150
|
-
return re.sub(r'[\s\-]', '', name)
|
|
151
156
|
|
|
152
|
-
|
|
153
|
-
|
|
157
|
+
return re.sub(r"[\s\-]", "", name)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _detect_nvidia_gpu() -> Optional[Tuple[str, float, int]]:
|
|
154
161
|
try:
|
|
155
162
|
result = subprocess.run(
|
|
156
|
-
[
|
|
157
|
-
|
|
163
|
+
[
|
|
164
|
+
"nvidia-smi",
|
|
165
|
+
"--query-gpu=name,memory.total",
|
|
166
|
+
"--format=csv,noheader,nounits",
|
|
167
|
+
],
|
|
168
|
+
capture_output=True,
|
|
169
|
+
text=True,
|
|
170
|
+
check=True,
|
|
171
|
+
timeout=2.0,
|
|
158
172
|
)
|
|
159
|
-
lines = [
|
|
173
|
+
lines = [
|
|
174
|
+
line.strip() for line in result.stdout.strip().split("\n") if line.strip()
|
|
175
|
+
]
|
|
160
176
|
if lines:
|
|
161
177
|
total_mb = 0
|
|
162
178
|
for line in lines:
|
|
163
|
-
parts = line.split(
|
|
179
|
+
parts = line.split(",")
|
|
164
180
|
if len(parts) >= 2:
|
|
165
181
|
total_mb += int(parts[1].strip())
|
|
166
|
-
|
|
182
|
+
|
|
167
183
|
gpu_count = len(lines)
|
|
168
|
-
first_name = lines[0].split(
|
|
169
|
-
display_name =
|
|
184
|
+
first_name = lines[0].split(",")[0].strip()
|
|
185
|
+
display_name = (
|
|
186
|
+
f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
|
|
187
|
+
)
|
|
170
188
|
return display_name, total_mb / 1024.0, gpu_count
|
|
171
189
|
except Exception:
|
|
172
190
|
pass
|
|
173
|
-
|
|
174
|
-
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _detect_amd_gpu() -> Optional[Tuple[str, float, int]]:
|
|
175
195
|
try:
|
|
176
196
|
result = subprocess.run(
|
|
177
197
|
["rocm-smi", "--showmeminfo", "vram"],
|
|
178
|
-
capture_output=True,
|
|
198
|
+
capture_output=True,
|
|
199
|
+
text=True,
|
|
200
|
+
check=True,
|
|
201
|
+
timeout=2.0,
|
|
179
202
|
)
|
|
180
|
-
lines = [
|
|
203
|
+
lines = [
|
|
204
|
+
line
|
|
205
|
+
for line in result.stdout.strip().split("\n")
|
|
206
|
+
if "Total Memory (B):" in line
|
|
207
|
+
]
|
|
181
208
|
if lines:
|
|
182
209
|
total_bytes = 0
|
|
183
210
|
gpu_count = len(lines)
|
|
184
211
|
for line in lines:
|
|
185
|
-
parts = line.split(
|
|
212
|
+
parts = line.split(":")
|
|
186
213
|
if len(parts) >= 2:
|
|
187
214
|
total_bytes += int(parts[1].strip())
|
|
188
|
-
display_name =
|
|
215
|
+
display_name = (
|
|
216
|
+
f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
|
|
217
|
+
)
|
|
189
218
|
return display_name, total_bytes / (1024.0**3), gpu_count
|
|
190
219
|
except Exception:
|
|
191
220
|
pass
|
|
192
|
-
|
|
193
|
-
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _parse_intel_vram(size_str: str) -> Optional[float]:
|
|
225
|
+
match = re.search(r"([\d\.]+)\s*([a-zA-Z]*)", size_str)
|
|
226
|
+
if not match:
|
|
227
|
+
return None
|
|
228
|
+
val = float(match.group(1))
|
|
229
|
+
unit = match.group(2).lower()
|
|
230
|
+
if unit in ("gib", "gb"):
|
|
231
|
+
val *= 1024.0
|
|
232
|
+
elif unit in ("kib", "kb"):
|
|
233
|
+
val /= 1024.0
|
|
234
|
+
elif unit == "b":
|
|
235
|
+
val /= (1024.0 * 1024.0)
|
|
236
|
+
return val
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _parse_xpu_smi_output(stdout: str) -> Tuple[list[str], float, int]:
|
|
240
|
+
gpu_names: list[str] = []
|
|
241
|
+
total_mib: float = 0.0
|
|
242
|
+
parsed_memory_entries: int = 0
|
|
243
|
+
|
|
244
|
+
for line in stdout.splitlines():
|
|
245
|
+
lower_line = line.lower()
|
|
246
|
+
if "device name:" in lower_line:
|
|
247
|
+
idx = lower_line.index("device name:")
|
|
248
|
+
name = line[idx + len("device name:"):].split("|")[0].strip()
|
|
249
|
+
gpu_names.append(name)
|
|
250
|
+
elif "memory physical size:" in lower_line:
|
|
251
|
+
idx = lower_line.index("memory physical size:")
|
|
252
|
+
size_str = line[idx + len("memory physical size:"):].split("|")[0].strip()
|
|
253
|
+
val = _parse_intel_vram(size_str)
|
|
254
|
+
if val is not None:
|
|
255
|
+
total_mib += val
|
|
256
|
+
parsed_memory_entries += 1
|
|
257
|
+
|
|
258
|
+
return gpu_names, total_mib, parsed_memory_entries
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _detect_intel_gpu() -> Optional[Tuple[str, float, int]]:
|
|
262
|
+
try:
|
|
263
|
+
result = subprocess.run(
|
|
264
|
+
["xpu-smi", "discovery"],
|
|
265
|
+
capture_output=True,
|
|
266
|
+
text=True,
|
|
267
|
+
check=True,
|
|
268
|
+
timeout=2.0,
|
|
269
|
+
)
|
|
270
|
+
gpu_names, total_mib, parsed_memory_entries = _parse_xpu_smi_output(result.stdout)
|
|
271
|
+
|
|
272
|
+
if gpu_names and parsed_memory_entries == len(gpu_names) and total_mib > 0.0:
|
|
273
|
+
gpu_count = len(gpu_names)
|
|
274
|
+
first_name = gpu_names[0]
|
|
275
|
+
display_name = (
|
|
276
|
+
f"Intel Multi-GPU ({gpu_count}x {first_name})" if gpu_count > 1 else first_name
|
|
277
|
+
)
|
|
278
|
+
return display_name, total_mib / 1024.0, gpu_count
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _detect_apple_gpu() -> Optional[Tuple[str, float, int]]:
|
|
194
285
|
try:
|
|
195
286
|
result = subprocess.run(
|
|
196
287
|
["sysctl", "hw.memsize"],
|
|
197
|
-
capture_output=True,
|
|
288
|
+
capture_output=True,
|
|
289
|
+
text=True,
|
|
290
|
+
check=True,
|
|
291
|
+
timeout=2.0,
|
|
198
292
|
)
|
|
199
293
|
total_bytes = int(result.stdout.strip().split()[1])
|
|
200
294
|
# Apply 75% operational heuristic for Apple Silicon wire limits
|
|
@@ -202,34 +296,62 @@ def detect_local_gpu() -> Tuple[str, float, int]:
|
|
|
202
296
|
return "Apple Silicon (Unified Memory)", vram_gb, 1
|
|
203
297
|
except Exception:
|
|
204
298
|
pass
|
|
205
|
-
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def detect_local_gpu() -> Tuple[str, float, int]:
|
|
303
|
+
# 1. NVIDIA
|
|
304
|
+
nvidia_res = _detect_nvidia_gpu()
|
|
305
|
+
if nvidia_res is not None:
|
|
306
|
+
return nvidia_res
|
|
307
|
+
|
|
308
|
+
# 2. AMD (ROCm)
|
|
309
|
+
amd_res = _detect_amd_gpu()
|
|
310
|
+
if amd_res is not None:
|
|
311
|
+
return amd_res
|
|
312
|
+
|
|
313
|
+
# 3. Intel (xpu-smi)
|
|
314
|
+
intel_res = _detect_intel_gpu()
|
|
315
|
+
if intel_res is not None:
|
|
316
|
+
return intel_res
|
|
317
|
+
|
|
318
|
+
# 4. Apple Silicon
|
|
319
|
+
apple_res = _detect_apple_gpu()
|
|
320
|
+
if apple_res is not None:
|
|
321
|
+
return apple_res
|
|
322
|
+
|
|
206
323
|
return "Unknown", 8.0, 1
|
|
207
324
|
|
|
325
|
+
|
|
208
326
|
def resolve_gpu(target: str) -> Tuple[str, float, int]:
|
|
209
327
|
if target.lower() == "auto":
|
|
210
328
|
return detect_local_gpu()
|
|
211
|
-
|
|
329
|
+
|
|
212
330
|
# Apple Silicon routing trap
|
|
213
331
|
lower_target = target.lower()
|
|
214
|
-
if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
|
|
215
|
-
|
|
216
|
-
|
|
332
|
+
if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
|
|
333
|
+
r"^m[1-4](-?(pro|max|ultra))?$", lower_target
|
|
334
|
+
):
|
|
335
|
+
raise ValueError(
|
|
336
|
+
"Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits."
|
|
337
|
+
)
|
|
338
|
+
|
|
217
339
|
# Parse potential multi-GPU format e.g., "2x RTX4090"
|
|
218
340
|
gpu_count = 1
|
|
219
|
-
match = re.match(r
|
|
341
|
+
match = re.match(r"^(\d+)x\s*(.+)$", lower_target)
|
|
220
342
|
if match:
|
|
221
343
|
gpu_count = int(match.group(1))
|
|
222
344
|
target_name = match.group(2)
|
|
223
345
|
else:
|
|
224
346
|
target_name = target
|
|
225
|
-
|
|
347
|
+
|
|
226
348
|
normalized = normalize_gpu_string(target_name)
|
|
227
|
-
|
|
349
|
+
|
|
228
350
|
if normalized in KNOWN_GPUS:
|
|
229
351
|
vram_gb = KNOWN_GPUS[normalized] * gpu_count
|
|
230
352
|
display_name = f"{gpu_count}x {target_name}" if gpu_count > 1 else target_name
|
|
231
353
|
return display_name, vram_gb, gpu_count
|
|
232
|
-
|
|
354
|
+
|
|
233
355
|
# If the user passed a pure number, assume GB
|
|
234
356
|
try:
|
|
235
357
|
vram_gb = float(normalized) * gpu_count
|
|
@@ -237,5 +359,17 @@ def resolve_gpu(target: str) -> Tuple[str, float, int]:
|
|
|
237
359
|
return display_name, vram_gb, gpu_count
|
|
238
360
|
except ValueError:
|
|
239
361
|
pass
|
|
240
|
-
|
|
241
|
-
|
|
362
|
+
|
|
363
|
+
import difflib
|
|
364
|
+
|
|
365
|
+
matches = difflib.get_close_matches(normalized, KNOWN_GPUS.keys(), n=3, cutoff=0.6)
|
|
366
|
+
if matches:
|
|
367
|
+
suggestions = ", ".join(matches)
|
|
368
|
+
raise ValueError(
|
|
369
|
+
f"Unknown GPU target '{target}'. Did you mean: {suggestions}? "
|
|
370
|
+
f"Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
raise ValueError(
|
|
374
|
+
f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
|
|
375
|
+
)
|
|
@@ -3,9 +3,27 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import struct
|
|
5
5
|
import urllib.error
|
|
6
|
+
import urllib.parse
|
|
6
7
|
import urllib.request
|
|
7
8
|
from typing import Any, Dict, Tuple
|
|
8
9
|
|
|
10
|
+
def _get_hf_endpoint() -> str:
|
|
11
|
+
endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co").strip()
|
|
12
|
+
if not endpoint:
|
|
13
|
+
raise ValueError("HF_ENDPOINT is set but empty; expected a valid HTTP(S) URL")
|
|
14
|
+
endpoint = endpoint.rstrip("/")
|
|
15
|
+
if not endpoint.startswith("https://"):
|
|
16
|
+
raise ValueError(
|
|
17
|
+
f"HF_ENDPOINT must use https:// scheme, got: {endpoint}"
|
|
18
|
+
)
|
|
19
|
+
parsed = urllib.parse.urlparse(endpoint)
|
|
20
|
+
if not parsed.netloc:
|
|
21
|
+
raise ValueError(
|
|
22
|
+
f"HF_ENDPOINT must include a valid hostname, got: {endpoint}"
|
|
23
|
+
)
|
|
24
|
+
return endpoint
|
|
25
|
+
|
|
26
|
+
|
|
9
27
|
def _get_hf_token() -> str | None:
|
|
10
28
|
token = os.environ.get("HF_TOKEN")
|
|
11
29
|
if token:
|
|
@@ -29,7 +47,12 @@ def _get_hf_token() -> str | None:
|
|
|
29
47
|
|
|
30
48
|
return None
|
|
31
49
|
|
|
32
|
-
def _make_request(
|
|
50
|
+
def _make_request(
|
|
51
|
+
url: str,
|
|
52
|
+
headers: Dict[str, str] = None,
|
|
53
|
+
limit: int | None = None,
|
|
54
|
+
timeout: float = 10.0,
|
|
55
|
+
) -> bytes:
|
|
33
56
|
if headers is None:
|
|
34
57
|
headers = {}
|
|
35
58
|
|
|
@@ -39,7 +62,7 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
|
|
|
39
62
|
|
|
40
63
|
req = urllib.request.Request(url, headers=headers)
|
|
41
64
|
try:
|
|
42
|
-
with urllib.request.urlopen(req, timeout=
|
|
65
|
+
with urllib.request.urlopen(req, timeout=timeout) as response:
|
|
43
66
|
if limit is not None:
|
|
44
67
|
return response.read(limit)
|
|
45
68
|
return response.read()
|
|
@@ -50,16 +73,16 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
|
|
|
50
73
|
raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
|
|
51
74
|
raise
|
|
52
75
|
|
|
53
|
-
def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
|
|
54
|
-
url = f"
|
|
76
|
+
def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]:
|
|
77
|
+
url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/{filename}"
|
|
55
78
|
|
|
56
79
|
# 1. Fetch the first 500KB in a single roundtrip
|
|
57
80
|
headers = {"Range": "bytes=0-500000"}
|
|
58
81
|
try:
|
|
59
|
-
chunk = _make_request(url, headers=headers, limit=500000)
|
|
82
|
+
chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout)
|
|
60
83
|
except urllib.error.HTTPError as e:
|
|
61
84
|
if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
|
|
62
|
-
chunk = _make_request(url, limit=500000)
|
|
85
|
+
chunk = _make_request(url, limit=500000, timeout=timeout)
|
|
63
86
|
else:
|
|
64
87
|
raise
|
|
65
88
|
|
|
@@ -74,18 +97,18 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
|
|
|
74
97
|
else:
|
|
75
98
|
# 3. Double-roundtrip only if the header is massive (>500KB)
|
|
76
99
|
headers = {"Range": f"bytes=8-{8+header_size-1}"}
|
|
77
|
-
json_bytes = _make_request(url, headers=headers, limit=header_size)
|
|
100
|
+
json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout)
|
|
78
101
|
|
|
79
102
|
return json.loads(json_bytes)
|
|
80
103
|
|
|
81
|
-
def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
|
|
104
|
+
def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
|
|
82
105
|
"""
|
|
83
106
|
Fetches the metadata directly from the Hugging Face Hub over the network.
|
|
84
107
|
Returns: (tensors, config, format_name, disk_size)
|
|
85
108
|
"""
|
|
86
|
-
api_url = f"
|
|
109
|
+
api_url = f"{_get_hf_endpoint()}/api/models/{repo_id}"
|
|
87
110
|
try:
|
|
88
|
-
api_data = json.loads(_make_request(api_url).decode("utf-8"))
|
|
111
|
+
api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
|
|
89
112
|
except urllib.error.HTTPError as e:
|
|
90
113
|
if e.code == 401:
|
|
91
114
|
raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
|
|
@@ -98,16 +121,16 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
|
|
|
98
121
|
|
|
99
122
|
config = None
|
|
100
123
|
if "config.json" in filenames:
|
|
101
|
-
config_url = f"
|
|
102
|
-
config = json.loads(_make_request(config_url).decode("utf-8"))
|
|
124
|
+
config_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/config.json"
|
|
125
|
+
config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
|
|
103
126
|
|
|
104
127
|
tensors = {}
|
|
105
128
|
total_size = 0.0
|
|
106
129
|
|
|
107
130
|
if "model.safetensors.index.json" in filenames:
|
|
108
131
|
# Sharded SafeTensors
|
|
109
|
-
index_url = f"
|
|
110
|
-
index_data = json.loads(_make_request(index_url).decode("utf-8"))
|
|
132
|
+
index_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors.index.json"
|
|
133
|
+
index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
|
|
111
134
|
|
|
112
135
|
weight_map = index_data.get("weight_map", {})
|
|
113
136
|
unique_shards = list(set(weight_map.values()))
|
|
@@ -128,7 +151,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
|
|
|
128
151
|
}
|
|
129
152
|
else:
|
|
130
153
|
def fetch_shard(shard: str):
|
|
131
|
-
return shard, _fetch_safetensors_header(repo_id, shard)
|
|
154
|
+
return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
|
|
132
155
|
|
|
133
156
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
|
|
134
157
|
future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
|
|
@@ -149,17 +172,17 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
|
|
|
149
172
|
# Single SafeTensors
|
|
150
173
|
|
|
151
174
|
# Determine total size first
|
|
152
|
-
req = urllib.request.Request(f"
|
|
175
|
+
req = urllib.request.Request(f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors", method="HEAD")
|
|
153
176
|
token = _get_hf_token()
|
|
154
177
|
if token:
|
|
155
178
|
req.add_header("Authorization", f"Bearer {token}")
|
|
156
179
|
try:
|
|
157
|
-
with urllib.request.urlopen(req) as response:
|
|
180
|
+
with urllib.request.urlopen(req, timeout=timeout) as response:
|
|
158
181
|
total_size = int(response.headers.get("Content-Length", 0))
|
|
159
182
|
except Exception:
|
|
160
183
|
pass
|
|
161
184
|
|
|
162
|
-
header = _fetch_safetensors_header(repo_id, "model.safetensors")
|
|
185
|
+
header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
|
|
163
186
|
tensors = header
|
|
164
187
|
|
|
165
188
|
format_name = "SafeTensors"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
@@ -164,12 +164,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
|
|
|
164
164
|
| `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
|
|
165
165
|
| `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
|
|
166
166
|
| `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
|
|
167
|
+
| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
|
|
167
168
|
| `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
|
|
168
169
|
| `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
|
|
169
170
|
| `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
|
|
170
171
|
| `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
|
|
171
172
|
| `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
|
|
172
173
|
| `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
|
|
174
|
+
| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
|
|
175
|
+
| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
|
|
173
176
|
|
|
174
177
|
## Architecture
|
|
175
178
|
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
import modelinfo.cli as cli
|
|
4
|
+
from modelinfo import __version__
|
|
5
|
+
from modelinfo.cli import parse_args
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_version_flag_prints_installed_version(capsys):
|
|
9
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
10
|
+
parse_args(["--version"])
|
|
11
|
+
|
|
12
|
+
assert exc_info.value.code == 0
|
|
13
|
+
assert f"modelinfo {__version__}" in capsys.readouterr().out
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_batch_size_flag_defaults_to_one():
|
|
17
|
+
args = parse_args(["model.gguf"])
|
|
18
|
+
|
|
19
|
+
assert args.batch_size == 1
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_batch_size_flag_accepts_integer():
|
|
23
|
+
args = parse_args(["--batch-size", "4", "model.gguf"])
|
|
24
|
+
|
|
25
|
+
assert args.batch_size == 4
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_batch_size_flag_rejects_zero():
|
|
29
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
30
|
+
parse_args(["--batch-size", "0", "model.gguf"])
|
|
31
|
+
|
|
32
|
+
assert exc_info.value.code == 2
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_batch_size_flag_rejects_negative():
|
|
36
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
37
|
+
parse_args(["--batch-size", "-1", "model.gguf"])
|
|
38
|
+
|
|
39
|
+
assert exc_info.value.code == 2
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_timeout_flag_defaults_to_ten_seconds():
|
|
43
|
+
args = parse_args(["model.gguf"])
|
|
44
|
+
|
|
45
|
+
assert args.timeout == 10.0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_timeout_flag_accepts_float():
|
|
49
|
+
args = parse_args(["--timeout", "30.5", "model.gguf"])
|
|
50
|
+
|
|
51
|
+
assert args.timeout == 30.5
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_timeout_flag_rejects_zero():
|
|
55
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
56
|
+
parse_args(["--timeout", "0", "model.gguf"])
|
|
57
|
+
|
|
58
|
+
assert exc_info.value.code == 2
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_timeout_flag_rejects_negative():
|
|
62
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
63
|
+
parse_args(["--timeout", "-1", "model.gguf"])
|
|
64
|
+
|
|
65
|
+
assert exc_info.value.code == 2
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_timeout_flag_rejects_nan():
|
|
69
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
70
|
+
parse_args(["--timeout", "nan", "model.gguf"])
|
|
71
|
+
|
|
72
|
+
assert exc_info.value.code == 2
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_timeout_flag_rejects_inf():
|
|
76
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
77
|
+
parse_args(["--timeout", "inf", "model.gguf"])
|
|
78
|
+
|
|
79
|
+
assert exc_info.value.code == 2
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_analyze_model_passes_batch_size_to_footprint(monkeypatch, tmp_path):
|
|
83
|
+
model_path = tmp_path / "model.gguf"
|
|
84
|
+
model_path.write_bytes(b"mock")
|
|
85
|
+
captured = {}
|
|
86
|
+
|
|
87
|
+
def fake_parse_gguf_header(file_path):
|
|
88
|
+
assert file_path == str(model_path)
|
|
89
|
+
return {
|
|
90
|
+
"model.layers.0.self_attn.k_proj.weight": {"shape": [1, 1], "dtype": "F16"}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
|
|
94
|
+
captured["batch_size"] = batch_size
|
|
95
|
+
captured["context_length"] = context_length
|
|
96
|
+
return {
|
|
97
|
+
"total_params": 1,
|
|
98
|
+
"base_memory_bytes": 2.0,
|
|
99
|
+
"kv_cache_bytes": float(batch_size),
|
|
100
|
+
"overhead_bytes": 0.0,
|
|
101
|
+
"total_memory_bytes": 2.0 + batch_size,
|
|
102
|
+
"num_layers": 1,
|
|
103
|
+
"kv_dim": 1,
|
|
104
|
+
"primary_dtype": "F16",
|
|
105
|
+
"kv_is_estimate": False,
|
|
106
|
+
"penalty_percentage": 0.0,
|
|
107
|
+
"vllm_metrics": {},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
monkeypatch.setattr(cli, "parse_gguf_header", fake_parse_gguf_header)
|
|
111
|
+
monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
|
|
112
|
+
monkeypatch.setattr(
|
|
113
|
+
cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
info = cli.analyze_model(str(model_path), context_override=128, batch_size=4)
|
|
117
|
+
|
|
118
|
+
assert captured == {"batch_size": 4, "context_length": 128}
|
|
119
|
+
assert info["footprint"]["kv_cache_bytes"] == 4.0
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def test_analyze_model_passes_timeout_to_huggingface(monkeypatch):
|
|
123
|
+
captured = {}
|
|
124
|
+
|
|
125
|
+
def fake_exists(path):
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
def fake_fetch(repo_id, *, fetch_tensors, timeout):
|
|
129
|
+
captured["repo_id"] = repo_id
|
|
130
|
+
captured["fetch_tensors"] = fetch_tensors
|
|
131
|
+
captured["timeout"] = timeout
|
|
132
|
+
return (
|
|
133
|
+
{
|
|
134
|
+
"model.layers.0.self_attn.k_proj.weight": {
|
|
135
|
+
"shape": [1, 1],
|
|
136
|
+
"dtype": "F16",
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
None,
|
|
140
|
+
"SafeTensors",
|
|
141
|
+
7.0,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
|
|
145
|
+
return {
|
|
146
|
+
"total_params": 1,
|
|
147
|
+
"base_memory_bytes": 2.0,
|
|
148
|
+
"kv_cache_bytes": 1.0,
|
|
149
|
+
"overhead_bytes": 0.0,
|
|
150
|
+
"total_memory_bytes": 3.0,
|
|
151
|
+
"num_layers": 1,
|
|
152
|
+
"kv_dim": 1,
|
|
153
|
+
"primary_dtype": "F16",
|
|
154
|
+
"kv_is_estimate": False,
|
|
155
|
+
"penalty_percentage": 0.0,
|
|
156
|
+
"vllm_metrics": {},
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
from modelinfo.parsers import huggingface
|
|
160
|
+
|
|
161
|
+
monkeypatch.setattr(cli.os.path, "exists", fake_exists)
|
|
162
|
+
monkeypatch.setattr(huggingface, "fetch_huggingface_repo", fake_fetch)
|
|
163
|
+
monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
|
|
164
|
+
monkeypatch.setattr(
|
|
165
|
+
cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
cli.analyze_model(
|
|
169
|
+
"org/model",
|
|
170
|
+
context_override=128,
|
|
171
|
+
fetch_tensors=True,
|
|
172
|
+
timeout=22.5,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
assert captured == {
|
|
176
|
+
"repo_id": "org/model",
|
|
177
|
+
"fetch_tensors": True,
|
|
178
|
+
"timeout": 22.5,
|
|
179
|
+
}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from modelinfo import hardware
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def completed(stdout: str) -> subprocess.CompletedProcess:
|
|
9
|
+
return subprocess.CompletedProcess(args=[], returncode=0, stdout=stdout)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_normalize_gpu_string_removes_vendor_fluff_and_separators():
|
|
13
|
+
assert hardware.normalize_gpu_string("NVIDIA GeForce RTX 4090") == "rtx4090"
|
|
14
|
+
assert (
|
|
15
|
+
hardware.normalize_gpu_string("AMD Radeon RX-7900 XTX Graphics") == "rx7900xtx"
|
|
16
|
+
)
|
|
17
|
+
assert hardware.normalize_gpu_string("Intel Arc A770 Edition") == "a770"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_resolve_gpu_matches_known_gpu():
|
|
21
|
+
assert hardware.resolve_gpu("NVIDIA GeForce RTX 4090") == (
|
|
22
|
+
"NVIDIA GeForce RTX 4090",
|
|
23
|
+
24.0,
|
|
24
|
+
1,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_resolve_gpu_handles_multi_gpu_string():
|
|
29
|
+
assert hardware.resolve_gpu("2x RTX4090") == ("2x rtx4090", 48.0, 2)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_resolve_gpu_accepts_numeric_vram_target():
|
|
33
|
+
assert hardware.resolve_gpu("16") == ("Custom (16.0 GB)", 16.0, 1)
|
|
34
|
+
assert hardware.resolve_gpu("4x 12") == ("Custom (48.0 GB)", 48.0, 4)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_resolve_gpu_delegates_auto_detection(monkeypatch):
|
|
38
|
+
monkeypatch.setattr(hardware, "detect_local_gpu", lambda: ("Local GPU", 12.0, 1))
|
|
39
|
+
|
|
40
|
+
assert hardware.resolve_gpu("auto") == ("Local GPU", 12.0, 1)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_resolve_gpu_rejects_apple_silicon_shortcuts():
|
|
44
|
+
with pytest.raises(ValueError, match="Apple Silicon VRAM varies"):
|
|
45
|
+
hardware.resolve_gpu("m3-max")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_resolve_gpu_rejects_unknown_gpu_name():
|
|
49
|
+
with pytest.raises(ValueError, match="Unknown GPU target 'Mystery GPU'"):
|
|
50
|
+
hardware.resolve_gpu("Mystery GPU")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_resolve_gpu_suggests_close_matches():
|
|
54
|
+
with pytest.raises(
|
|
55
|
+
ValueError,
|
|
56
|
+
match="Unknown GPU target 'rtx490'\\. Did you mean:.*rtx4090",
|
|
57
|
+
):
|
|
58
|
+
hardware.resolve_gpu("rtx490")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_detect_local_gpu_reads_nvidia_smi(monkeypatch):
|
|
62
|
+
def fake_run(command, **kwargs):
|
|
63
|
+
assert command == [
|
|
64
|
+
"nvidia-smi",
|
|
65
|
+
"--query-gpu=name,memory.total",
|
|
66
|
+
"--format=csv,noheader,nounits",
|
|
67
|
+
]
|
|
68
|
+
assert kwargs == {
|
|
69
|
+
"capture_output": True,
|
|
70
|
+
"text": True,
|
|
71
|
+
"check": True,
|
|
72
|
+
"timeout": 2.0,
|
|
73
|
+
}
|
|
74
|
+
return completed("NVIDIA GeForce RTX 4090, 24576\n")
|
|
75
|
+
|
|
76
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
77
|
+
|
|
78
|
+
assert hardware.detect_local_gpu() == ("NVIDIA GeForce RTX 4090", 24.0, 1)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_detect_local_gpu_sums_multiple_nvidia_gpus(monkeypatch):
|
|
82
|
+
monkeypatch.setattr(
|
|
83
|
+
hardware.subprocess,
|
|
84
|
+
"run",
|
|
85
|
+
lambda *args, **kwargs: completed(
|
|
86
|
+
"NVIDIA GeForce RTX 4090, 24576\nNVIDIA GeForce RTX 4090, 24576\n"
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
assert hardware.detect_local_gpu() == (
|
|
91
|
+
"Multi-GPU: 2x NVIDIA GeForce RTX 4090",
|
|
92
|
+
48.0,
|
|
93
|
+
2,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_detect_local_gpu_falls_back_to_rocm_smi(monkeypatch):
|
|
98
|
+
def fake_run(command, **kwargs):
|
|
99
|
+
if command[0] == "nvidia-smi":
|
|
100
|
+
raise FileNotFoundError("nvidia-smi not installed")
|
|
101
|
+
assert command == ["rocm-smi", "--showmeminfo", "vram"]
|
|
102
|
+
return completed(
|
|
103
|
+
"Total Memory (B): 17179869184\nTotal Memory (B): 17179869184\n"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
107
|
+
|
|
108
|
+
assert hardware.detect_local_gpu() == ("AMD Multi-GPU (2x)", 32.0, 2)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_detect_local_gpu_falls_back_to_xpu_smi(monkeypatch):
|
|
112
|
+
def fake_run(command, **kwargs):
|
|
113
|
+
if command[0] in {"nvidia-smi", "rocm-smi"}:
|
|
114
|
+
raise FileNotFoundError(command[0])
|
|
115
|
+
assert command == ["xpu-smi", "discovery"] # nosec
|
|
116
|
+
stdout = (
|
|
117
|
+
"+-----------+------------------------------------------------------+\n"
|
|
118
|
+
"| Device ID | Device Information |\n"
|
|
119
|
+
"+-----------+------------------------------------------------------+\n"
|
|
120
|
+
"| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
|
|
121
|
+
"| | Vendor Name: Intel(R) Corporation |\n"
|
|
122
|
+
"| | Memory Physical Size: 16384.00 MiB |\n"
|
|
123
|
+
"+-----------+------------------------------------------------------+\n"
|
|
124
|
+
)
|
|
125
|
+
return completed(stdout)
|
|
126
|
+
|
|
127
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
128
|
+
|
|
129
|
+
assert hardware.detect_local_gpu() == ("Intel(R) Arc(TM) A770 Graphics", 16.0, 1) # nosec
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_detect_local_gpu_sums_multiple_intel_gpus(monkeypatch):
|
|
133
|
+
def fake_run(command, **kwargs):
|
|
134
|
+
if command[0] in {"nvidia-smi", "rocm-smi"}:
|
|
135
|
+
raise FileNotFoundError(command[0])
|
|
136
|
+
assert command == ["xpu-smi", "discovery"] # nosec
|
|
137
|
+
stdout = (
|
|
138
|
+
"+-----------+------------------------------------------------------+\n"
|
|
139
|
+
"| Device ID | Device Information |\n"
|
|
140
|
+
"+-----------+------------------------------------------------------+\n"
|
|
141
|
+
"| 0 | Device Name: Intel(R) Data Center GPU Flex 170 |\n"
|
|
142
|
+
"| | Memory Physical Size: 16384.00 MiB |\n"
|
|
143
|
+
"+-----------+------------------------------------------------------+\n"
|
|
144
|
+
"| 1 | Device Name: Intel(R) Data Center GPU Flex 170 |\n"
|
|
145
|
+
"| | Memory Physical Size: 16384.00 MiB |\n"
|
|
146
|
+
"+-----------+------------------------------------------------------+\n"
|
|
147
|
+
)
|
|
148
|
+
return completed(stdout)
|
|
149
|
+
|
|
150
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
151
|
+
|
|
152
|
+
assert hardware.detect_local_gpu() == ( # nosec
|
|
153
|
+
"Intel Multi-GPU (2x Intel(R) Data Center GPU Flex 170)",
|
|
154
|
+
32.0,
|
|
155
|
+
2,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_detect_local_gpu_intel_unit_conversions(monkeypatch):
|
|
160
|
+
test_cases = [
|
|
161
|
+
("16.00 GiB", 16.0),
|
|
162
|
+
("16.00 GB", 16.0),
|
|
163
|
+
("16777216.00 KiB", 16.0),
|
|
164
|
+
("17179869184.00 B", 16.0),
|
|
165
|
+
("16384.00 MiB", 16.0),
|
|
166
|
+
("16384.00 MB", 16.0),
|
|
167
|
+
("16384.00", 16.0), # Default MiB unit
|
|
168
|
+
]
|
|
169
|
+
for size_str, expected_vram in test_cases:
|
|
170
|
+
def fake_run(command, s=size_str, **kwargs):
|
|
171
|
+
if command[0] in {"nvidia-smi", "rocm-smi"}:
|
|
172
|
+
raise FileNotFoundError(command[0])
|
|
173
|
+
assert command == ["xpu-smi", "discovery"] # nosec
|
|
174
|
+
stdout = (
|
|
175
|
+
"+-----------+------------------------------------------------------+\n"
|
|
176
|
+
"| Device ID | Device Information |\n"
|
|
177
|
+
"+-----------+------------------------------------------------------+\n"
|
|
178
|
+
"| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
|
|
179
|
+
f"| | Memory Physical Size: {s} |\n"
|
|
180
|
+
"+-----------+------------------------------------------------------+\n"
|
|
181
|
+
)
|
|
182
|
+
return completed(stdout)
|
|
183
|
+
|
|
184
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
185
|
+
assert hardware.detect_local_gpu() == ("Intel(R) Arc(TM) A770 Graphics", expected_vram, 1) # nosec
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def test_detect_local_gpu_falls_back_on_malformed_xpu_smi(monkeypatch):
|
|
189
|
+
def fake_run(command, **kwargs):
|
|
190
|
+
if command[0] in {"nvidia-smi", "rocm-smi"}:
|
|
191
|
+
raise FileNotFoundError(command[0])
|
|
192
|
+
if command[0] == "xpu-smi":
|
|
193
|
+
# Returns device name but no parseable memory size
|
|
194
|
+
stdout = (
|
|
195
|
+
"+-----------+------------------------------------------------------+\n"
|
|
196
|
+
"| Device ID | Device Information |\n"
|
|
197
|
+
"+-----------+------------------------------------------------------+\n"
|
|
198
|
+
"| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
|
|
199
|
+
"| | Vendor Name: Intel(R) Corporation |\n"
|
|
200
|
+
"| | Memory Physical Size: N/A |\n"
|
|
201
|
+
"+-----------+------------------------------------------------------+\n"
|
|
202
|
+
)
|
|
203
|
+
return completed(stdout)
|
|
204
|
+
raise FileNotFoundError(command[0])
|
|
205
|
+
|
|
206
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
207
|
+
|
|
208
|
+
# Since xpu-smi didn't return valid memory, detect_local_gpu should fall back to default/next
|
|
209
|
+
assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1) # nosec
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_detect_local_gpu_falls_back_on_mismatched_intel_count(monkeypatch):
|
|
213
|
+
def fake_run(command, **kwargs):
|
|
214
|
+
if command[0] in {"nvidia-smi", "rocm-smi"}:
|
|
215
|
+
raise FileNotFoundError(command[0])
|
|
216
|
+
if command[0] == "xpu-smi":
|
|
217
|
+
# 2 GPUs, but only 1 has memory size
|
|
218
|
+
stdout = (
|
|
219
|
+
"+-----------+------------------------------------------------------+\n"
|
|
220
|
+
"| Device ID | Device Information |\n"
|
|
221
|
+
"+-----------+------------------------------------------------------+\n"
|
|
222
|
+
"| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
|
|
223
|
+
"| | Memory Physical Size: 16384.00 MiB |\n"
|
|
224
|
+
"+-----------+------------------------------------------------------+\n"
|
|
225
|
+
"| 1 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
|
|
226
|
+
"+-----------+------------------------------------------------------+\n"
|
|
227
|
+
)
|
|
228
|
+
return completed(stdout)
|
|
229
|
+
raise FileNotFoundError(command[0])
|
|
230
|
+
|
|
231
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
232
|
+
|
|
233
|
+
# Since device count (2) != memory entries count (1), it must fall back
|
|
234
|
+
assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1) # nosec
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def test_detect_local_gpu_falls_back_to_apple_unified_memory(monkeypatch):
|
|
238
|
+
def fake_run(command, **kwargs):
|
|
239
|
+
if command[0] in {"nvidia-smi", "rocm-smi", "xpu-smi"}:
|
|
240
|
+
raise FileNotFoundError(command[0])
|
|
241
|
+
assert command == ["sysctl", "hw.memsize"]
|
|
242
|
+
return completed("hw.memsize: 17179869184\n")
|
|
243
|
+
|
|
244
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
245
|
+
|
|
246
|
+
assert hardware.detect_local_gpu() == ("Apple Silicon (Unified Memory)", 12.0, 1)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_detect_local_gpu_returns_default_when_detection_fails(monkeypatch):
|
|
250
|
+
def fake_run(command, **kwargs):
|
|
251
|
+
raise FileNotFoundError(command[0])
|
|
252
|
+
|
|
253
|
+
monkeypatch.setattr(hardware.subprocess, "run", fake_run)
|
|
254
|
+
|
|
255
|
+
assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pytest
|
|
3
|
+
from modelinfo.parsers.huggingface import _get_hf_endpoint
|
|
3
4
|
from modelinfo.parsers.safetensors import parse_safetensors_header
|
|
4
5
|
|
|
5
6
|
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
|
@@ -45,3 +46,39 @@ def test_gguf_parser_metadata():
|
|
|
45
46
|
# Verify the architecture bypass parses it to titlecase and prevents "Unknown Architecture"
|
|
46
47
|
arch_name = identify_architecture_name(tensors, num_layers=1)
|
|
47
48
|
assert arch_name == "Qwen2 (1 transformer layers)"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_hf_endpoint_valid_https(monkeypatch):
|
|
54
|
+
"""Valid https:// endpoint is accepted."""
|
|
55
|
+
monkeypatch.setenv("HF_ENDPOINT", "https://huggingface.co")
|
|
56
|
+
assert _get_hf_endpoint() == "https://huggingface.co"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_hf_endpoint_default_https(monkeypatch):
|
|
60
|
+
"""Default endpoint when HF_ENDPOINT is not set."""
|
|
61
|
+
monkeypatch.delenv("HF_ENDPOINT", raising=False)
|
|
62
|
+
endpoint = _get_hf_endpoint()
|
|
63
|
+
assert endpoint == "https://huggingface.co"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_hf_endpoint_rejects_http(monkeypatch):
|
|
67
|
+
"""http:// scheme is rejected with ValueError."""
|
|
68
|
+
monkeypatch.setenv("HF_ENDPOINT", "http://localhost:8080")
|
|
69
|
+
with pytest.raises(ValueError, match="must use https:// scheme"):
|
|
70
|
+
_get_hf_endpoint()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_hf_endpoint_rejects_empty(monkeypatch):
|
|
74
|
+
"""Empty string is rejected with ValueError."""
|
|
75
|
+
monkeypatch.setenv("HF_ENDPOINT", "")
|
|
76
|
+
with pytest.raises(ValueError):
|
|
77
|
+
_get_hf_endpoint()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_hf_endpoint_rejects_no_hostname(monkeypatch):
|
|
81
|
+
"""URL without a hostname is rejected with ValueError."""
|
|
82
|
+
monkeypatch.setenv("HF_ENDPOINT", "https:///repo")
|
|
83
|
+
with pytest.raises(ValueError, match="must include a valid hostname"):
|
|
84
|
+
_get_hf_endpoint()
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
from modelinfo import __version__
|
|
4
|
-
from modelinfo.cli import parse_args
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def test_version_flag_prints_installed_version(capsys):
|
|
8
|
-
with pytest.raises(SystemExit) as exc_info:
|
|
9
|
-
parse_args(["--version"])
|
|
10
|
-
|
|
11
|
-
assert exc_info.value.code == 0
|
|
12
|
-
assert f"modelinfo {__version__}" in capsys.readouterr().out
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|