modelinfo-cli 1.4.3__tar.gz → 1.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/PKG-INFO +4 -1
  2. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/README.md +3 -0
  3. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/pyproject.toml +1 -1
  4. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/__init__.py +1 -1
  5. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/cli.py +40 -4
  6. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/hardware.py +176 -42
  7. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/huggingface.py +41 -18
  8. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/PKG-INFO +4 -1
  9. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/SOURCES.txt +1 -0
  10. modelinfo_cli-1.4.4/tests/test_cli.py +179 -0
  11. modelinfo_cli-1.4.4/tests/test_hardware.py +255 -0
  12. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_parsers.py +37 -0
  13. modelinfo_cli-1.4.3/tests/test_cli.py +0 -12
  14. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/LICENSE +0 -0
  15. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/setup.cfg +0 -0
  16. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/__main__.py +0 -0
  17. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/architecture.py +0 -0
  18. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/calculator.py +0 -0
  19. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/__init__.py +0 -0
  20. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/base.py +0 -0
  21. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/gguf.py +0 -0
  22. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/pytorch.py +0 -0
  23. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/safetensors.py +0 -0
  24. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo/ui.py +0 -0
  25. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/dependency_links.txt +0 -0
  26. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/entry_points.txt +0 -0
  27. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/requires.txt +0 -0
  28. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/top_level.txt +0 -0
  29. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_calculator.py +0 -0
  30. {modelinfo_cli-1.4.3 → modelinfo_cli-1.4.4}/tests/test_constraints.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelinfo-cli
3
- Version: 1.4.3
3
+ Version: 1.4.4
4
4
  Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
5
5
  Author: ModelInfo Contributors
6
6
  License: MIT
@@ -164,12 +164,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
164
164
  | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
165
165
  | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
166
166
  | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
167
+ | `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
167
168
  | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
168
169
  | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
169
170
  | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
170
171
  | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
171
172
  | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
172
173
  | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
174
+ | `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
175
+ | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
173
176
 
174
177
  ## Architecture
175
178
 
@@ -146,12 +146,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
146
146
  | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
147
147
  | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
148
148
  | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
149
+ | `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
149
150
  | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
150
151
  | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
151
152
  | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
152
153
  | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
153
154
  | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
154
155
  | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
156
+ | `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
157
+ | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
155
158
 
156
159
  ## Architecture
157
160
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "modelinfo-cli"
7
- version = "1.4.3"
7
+ version = "1.4.4"
8
8
  description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -2,4 +2,4 @@
2
2
  modelinfo - A high-performance CLI utility for inspecting ML model checkpoints.
3
3
  """
4
4
 
5
- __version__ = "1.4.3"
5
+ __version__ = "1.4.4"
@@ -1,5 +1,6 @@
1
1
  import argparse
2
2
  import json
3
+ import math
3
4
  import os
4
5
  import sys
5
6
  from typing import Sequence
@@ -34,6 +35,20 @@ class VersionAction(argparse.Action):
34
35
  parser.exit()
35
36
 
36
37
 
38
+ def _positive_int(value: str) -> int:
39
+ ivalue = int(value)
40
+ if ivalue < 1:
41
+ raise argparse.ArgumentTypeError("batch size must be at least 1")
42
+ return ivalue
43
+
44
+
45
+ def _positive_float(value: str) -> float:
46
+ fvalue = float(value)
47
+ if not math.isfinite(fvalue) or fvalue <= 0:
48
+ raise argparse.ArgumentTypeError("timeout must be a finite number greater than 0")
49
+ return fvalue
50
+
51
+
37
52
  def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
38
53
  parser = argparse.ArgumentParser(
39
54
  prog="modelinfo",
@@ -52,6 +67,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
52
67
  default=None,
53
68
  help="Context length for dynamic KV cache footprint calculation.",
54
69
  )
70
+ parser.add_argument(
71
+ "--batch-size",
72
+ type=_positive_int,
73
+ default=1,
74
+ help="Batch size for dynamic KV cache footprint calculation.",
75
+ )
55
76
  parser.add_argument(
56
77
  "--max-vram",
57
78
  type=float,
@@ -69,6 +90,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
69
90
  action="store_true",
70
91
  help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
71
92
  )
93
+ parser.add_argument(
94
+ "--timeout",
95
+ type=_positive_float,
96
+ default=10.0,
97
+ help="Network timeout in seconds for remote Hugging Face fetches.",
98
+ )
72
99
  parser.add_argument(
73
100
  "--topology",
74
101
  type=str,
@@ -106,8 +133,10 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
106
133
  def analyze_model(
107
134
  file_path: str,
108
135
  context_override: int | None,
109
- gpu_count: int = 1,
136
+ gpu_count: int = 1,
137
+ batch_size: int = 1,
110
138
  fetch_tensors: bool = False,
139
+ timeout: float = 10.0,
111
140
  topology: str = "pcie4",
112
141
  strategy: str = "tp",
113
142
  is_vllm: bool = False,
@@ -122,7 +151,9 @@ def analyze_model(
122
151
 
123
152
  if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
124
153
  from modelinfo.parsers.huggingface import fetch_huggingface_repo
125
- tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
154
+ tensors, config, format_name, disk_size = fetch_huggingface_repo(
155
+ file_path, fetch_tensors=fetch_tensors, timeout=timeout
156
+ )
126
157
  elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
127
158
  tensors = parse_safetensors_header(file_path)
128
159
  format_name = "SafeTensors"
@@ -164,6 +195,7 @@ def analyze_model(
164
195
  footprint = calculate_footprint(
165
196
  tensors,
166
197
  context_length=context_length,
198
+ batch_size=batch_size,
167
199
  config=config,
168
200
  gpu_count=gpu_count,
169
201
  topology=topology,
@@ -222,8 +254,10 @@ def main(argv: Sequence[str] | None = None) -> int:
222
254
  info = analyze_model(
223
255
  model_path,
224
256
  args.context,
225
- gpu_count,
257
+ gpu_count=gpu_count,
258
+ batch_size=args.batch_size,
226
259
  fetch_tensors=args.tensors,
260
+ timeout=args.timeout,
227
261
  topology=args.topology,
228
262
  strategy=args.strategy,
229
263
  is_vllm=args.vllm,
@@ -240,8 +274,10 @@ def main(argv: Sequence[str] | None = None) -> int:
240
274
  info = analyze_model(
241
275
  file_path,
242
276
  args.context,
243
- gpu_count,
277
+ gpu_count=gpu_count,
278
+ batch_size=args.batch_size,
244
279
  fetch_tensors=args.tensors,
280
+ timeout=args.timeout,
245
281
  topology=args.topology,
246
282
  strategy=args.strategy,
247
283
  is_vllm=args.vllm,
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import subprocess
3
- from typing import Tuple
3
+ from typing import Optional, Tuple
4
4
 
5
5
  KNOWN_GPUS = {
6
6
  # --- NVIDIA Consumer (RTX 50/40/30/20/10 Series & Titans) ---
@@ -21,7 +21,7 @@ KNOWN_GPUS = {
21
21
  "rtx4060ti16gb": 16.0,
22
22
  "rtx4060ti": 8.0,
23
23
  "rtx4060": 8.0,
24
- "rtx4050" : 6.0,
24
+ "rtx4050": 6.0,
25
25
  "rtx3090ti": 24.0,
26
26
  "rtx3090": 24.0,
27
27
  "rtx3080ti": 12.0,
@@ -32,7 +32,7 @@ KNOWN_GPUS = {
32
32
  "rtx3060ti": 8.0,
33
33
  "rtx306012gb": 12.0,
34
34
  "rtx3060": 8.0,
35
- "rtx3050ti" : 4.0,
35
+ "rtx3050ti": 4.0,
36
36
  "rtx3050": 8.0,
37
37
  "rtx2080ti": 11.0,
38
38
  "rtx2080super": 8.0,
@@ -43,7 +43,7 @@ KNOWN_GPUS = {
43
43
  "rtx206012gb": 12.0,
44
44
  "rtx2060": 6.0,
45
45
  "gtx1660super": 6.0,
46
- "gtx1660ti" : 6.0,
46
+ "gtx1660ti": 6.0,
47
47
  "gtx1660": 6.0,
48
48
  "gtx1650super": 4.0,
49
49
  "gtx1650": 4.0,
@@ -57,7 +57,6 @@ KNOWN_GPUS = {
57
57
  "titanxp": 12.0,
58
58
  "titanxpascal": 12.0,
59
59
  "titanx": 12.0,
60
-
61
60
  # --- NVIDIA Data Center / Workstation ---
62
61
  "b200": 192.0,
63
62
  "b100": 192.0,
@@ -89,7 +88,6 @@ KNOWN_GPUS = {
89
88
  "rtxa4000": 16.0,
90
89
  "quadrortx8000": 48.0,
91
90
  "quadrortx6000": 24.0,
92
-
93
91
  # --- AMD Consumer (RX 9000/7000/6000 Series) ---
94
92
  "rx9070xt": 16.0,
95
93
  "rx9070": 16.0,
@@ -115,8 +113,6 @@ KNOWN_GPUS = {
115
113
  "rx6600": 8.0,
116
114
  "rx580": 8.0,
117
115
  "rx570": 4.0,
118
-
119
-
120
116
  # --- AMD Data Center / Pro ---
121
117
  "mi300x": 192.0,
122
118
  "mi250x": 128.0,
@@ -124,7 +120,6 @@ KNOWN_GPUS = {
124
120
  "prow7900": 48.0,
125
121
  "prow7800": 32.0,
126
122
  "prow6800": 32.0,
127
-
128
123
  # --- Intel Consumer & Accelerators ---
129
124
  "arcb580": 12.0,
130
125
  "b580": 12.0,
@@ -138,63 +133,162 @@ KNOWN_GPUS = {
138
133
  "gaudi2": 96.0,
139
134
  }
140
135
 
136
+
141
137
  def normalize_gpu_string(name: str) -> str:
142
138
  """Strips vendor fluff, spaces, and hyphens to map correctly to KNOWN_GPUS."""
143
139
  name = name.lower()
144
-
140
+
145
141
  # Remove common vendor/marketing fluff that disrupts core identifiers
146
- fluff_words = ["nvidia", "geforce", "amd", "radeon", "intel", "arc", "generation", "edition", "graphics", "accelerator"]
142
+ fluff_words = [
143
+ "nvidia",
144
+ "geforce",
145
+ "amd",
146
+ "radeon",
147
+ "intel",
148
+ "arc",
149
+ "generation",
150
+ "edition",
151
+ "graphics",
152
+ "accelerator",
153
+ ]
147
154
  for word in fluff_words:
148
155
  name = name.replace(word, "")
149
-
150
- return re.sub(r'[\s\-]', '', name)
151
156
 
152
- def detect_local_gpu() -> Tuple[str, float, int]:
153
- # 1. NVIDIA
157
+ return re.sub(r"[\s\-]", "", name)
158
+
159
+
160
+ def _detect_nvidia_gpu() -> Optional[Tuple[str, float, int]]:
154
161
  try:
155
162
  result = subprocess.run(
156
- ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
157
- capture_output=True, text=True, check=True
163
+ [
164
+ "nvidia-smi",
165
+ "--query-gpu=name,memory.total",
166
+ "--format=csv,noheader,nounits",
167
+ ],
168
+ capture_output=True,
169
+ text=True,
170
+ check=True,
171
+ timeout=2.0,
158
172
  )
159
- lines = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
173
+ lines = [
174
+ line.strip() for line in result.stdout.strip().split("\n") if line.strip()
175
+ ]
160
176
  if lines:
161
177
  total_mb = 0
162
178
  for line in lines:
163
- parts = line.split(',')
179
+ parts = line.split(",")
164
180
  if len(parts) >= 2:
165
181
  total_mb += int(parts[1].strip())
166
-
182
+
167
183
  gpu_count = len(lines)
168
- first_name = lines[0].split(',')[0].strip()
169
- display_name = f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
184
+ first_name = lines[0].split(",")[0].strip()
185
+ display_name = (
186
+ f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
187
+ )
170
188
  return display_name, total_mb / 1024.0, gpu_count
171
189
  except Exception:
172
190
  pass
173
-
174
- # 2. AMD (ROCm)
191
+ return None
192
+
193
+
194
+ def _detect_amd_gpu() -> Optional[Tuple[str, float, int]]:
175
195
  try:
176
196
  result = subprocess.run(
177
197
  ["rocm-smi", "--showmeminfo", "vram"],
178
- capture_output=True, text=True, check=True
198
+ capture_output=True,
199
+ text=True,
200
+ check=True,
201
+ timeout=2.0,
179
202
  )
180
- lines = [line for line in result.stdout.strip().split('\n') if "Total Memory (B):" in line]
203
+ lines = [
204
+ line
205
+ for line in result.stdout.strip().split("\n")
206
+ if "Total Memory (B):" in line
207
+ ]
181
208
  if lines:
182
209
  total_bytes = 0
183
210
  gpu_count = len(lines)
184
211
  for line in lines:
185
- parts = line.split(':')
212
+ parts = line.split(":")
186
213
  if len(parts) >= 2:
187
214
  total_bytes += int(parts[1].strip())
188
- display_name = f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
215
+ display_name = (
216
+ f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
217
+ )
189
218
  return display_name, total_bytes / (1024.0**3), gpu_count
190
219
  except Exception:
191
220
  pass
192
-
193
- # 3. Apple Silicon
221
+ return None
222
+
223
+
224
+ def _parse_intel_vram(size_str: str) -> Optional[float]:
225
+ match = re.search(r"([\d\.]+)\s*([a-zA-Z]*)", size_str)
226
+ if not match:
227
+ return None
228
+ val = float(match.group(1))
229
+ unit = match.group(2).lower()
230
+ if unit in ("gib", "gb"):
231
+ val *= 1024.0
232
+ elif unit in ("kib", "kb"):
233
+ val /= 1024.0
234
+ elif unit == "b":
235
+ val /= (1024.0 * 1024.0)
236
+ return val
237
+
238
+
239
+ def _parse_xpu_smi_output(stdout: str) -> Tuple[list[str], float, int]:
240
+ gpu_names: list[str] = []
241
+ total_mib: float = 0.0
242
+ parsed_memory_entries: int = 0
243
+
244
+ for line in stdout.splitlines():
245
+ lower_line = line.lower()
246
+ if "device name:" in lower_line:
247
+ idx = lower_line.index("device name:")
248
+ name = line[idx + len("device name:"):].split("|")[0].strip()
249
+ gpu_names.append(name)
250
+ elif "memory physical size:" in lower_line:
251
+ idx = lower_line.index("memory physical size:")
252
+ size_str = line[idx + len("memory physical size:"):].split("|")[0].strip()
253
+ val = _parse_intel_vram(size_str)
254
+ if val is not None:
255
+ total_mib += val
256
+ parsed_memory_entries += 1
257
+
258
+ return gpu_names, total_mib, parsed_memory_entries
259
+
260
+
261
+ def _detect_intel_gpu() -> Optional[Tuple[str, float, int]]:
262
+ try:
263
+ result = subprocess.run(
264
+ ["xpu-smi", "discovery"],
265
+ capture_output=True,
266
+ text=True,
267
+ check=True,
268
+ timeout=2.0,
269
+ )
270
+ gpu_names, total_mib, parsed_memory_entries = _parse_xpu_smi_output(result.stdout)
271
+
272
+ if gpu_names and parsed_memory_entries == len(gpu_names) and total_mib > 0.0:
273
+ gpu_count = len(gpu_names)
274
+ first_name = gpu_names[0]
275
+ display_name = (
276
+ f"Intel Multi-GPU ({gpu_count}x {first_name})" if gpu_count > 1 else first_name
277
+ )
278
+ return display_name, total_mib / 1024.0, gpu_count
279
+ except Exception:
280
+ pass
281
+ return None
282
+
283
+
284
+ def _detect_apple_gpu() -> Optional[Tuple[str, float, int]]:
194
285
  try:
195
286
  result = subprocess.run(
196
287
  ["sysctl", "hw.memsize"],
197
- capture_output=True, text=True, check=True
288
+ capture_output=True,
289
+ text=True,
290
+ check=True,
291
+ timeout=2.0,
198
292
  )
199
293
  total_bytes = int(result.stdout.strip().split()[1])
200
294
  # Apply 75% operational heuristic for Apple Silicon wire limits
@@ -202,34 +296,62 @@ def detect_local_gpu() -> Tuple[str, float, int]:
202
296
  return "Apple Silicon (Unified Memory)", vram_gb, 1
203
297
  except Exception:
204
298
  pass
205
-
299
+ return None
300
+
301
+
302
+ def detect_local_gpu() -> Tuple[str, float, int]:
303
+ # 1. NVIDIA
304
+ nvidia_res = _detect_nvidia_gpu()
305
+ if nvidia_res is not None:
306
+ return nvidia_res
307
+
308
+ # 2. AMD (ROCm)
309
+ amd_res = _detect_amd_gpu()
310
+ if amd_res is not None:
311
+ return amd_res
312
+
313
+ # 3. Intel (xpu-smi)
314
+ intel_res = _detect_intel_gpu()
315
+ if intel_res is not None:
316
+ return intel_res
317
+
318
+ # 4. Apple Silicon
319
+ apple_res = _detect_apple_gpu()
320
+ if apple_res is not None:
321
+ return apple_res
322
+
206
323
  return "Unknown", 8.0, 1
207
324
 
325
+
208
326
  def resolve_gpu(target: str) -> Tuple[str, float, int]:
209
327
  if target.lower() == "auto":
210
328
  return detect_local_gpu()
211
-
329
+
212
330
  # Apple Silicon routing trap
213
331
  lower_target = target.lower()
214
- if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(r'^m[1-4](-?(pro|max|ultra))?$', lower_target):
215
- raise ValueError("Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits.")
216
-
332
+ if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
333
+ r"^m[1-4](-?(pro|max|ultra))?$", lower_target
334
+ ):
335
+ raise ValueError(
336
+ "Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits."
337
+ )
338
+
217
339
  # Parse potential multi-GPU format e.g., "2x RTX4090"
218
340
  gpu_count = 1
219
- match = re.match(r'^(\d+)x\s*(.+)$', lower_target)
341
+ match = re.match(r"^(\d+)x\s*(.+)$", lower_target)
220
342
  if match:
221
343
  gpu_count = int(match.group(1))
222
344
  target_name = match.group(2)
223
345
  else:
224
346
  target_name = target
225
-
347
+
226
348
  normalized = normalize_gpu_string(target_name)
227
-
349
+
228
350
  if normalized in KNOWN_GPUS:
229
351
  vram_gb = KNOWN_GPUS[normalized] * gpu_count
230
352
  display_name = f"{gpu_count}x {target_name}" if gpu_count > 1 else target_name
231
353
  return display_name, vram_gb, gpu_count
232
-
354
+
233
355
  # If the user passed a pure number, assume GB
234
356
  try:
235
357
  vram_gb = float(normalized) * gpu_count
@@ -237,5 +359,17 @@ def resolve_gpu(target: str) -> Tuple[str, float, int]:
237
359
  return display_name, vram_gb, gpu_count
238
360
  except ValueError:
239
361
  pass
240
-
241
- raise ValueError(f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value.")
362
+
363
+ import difflib
364
+
365
+ matches = difflib.get_close_matches(normalized, KNOWN_GPUS.keys(), n=3, cutoff=0.6)
366
+ if matches:
367
+ suggestions = ", ".join(matches)
368
+ raise ValueError(
369
+ f"Unknown GPU target '{target}'. Did you mean: {suggestions}? "
370
+ f"Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
371
+ )
372
+
373
+ raise ValueError(
374
+ f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
375
+ )
@@ -3,9 +3,27 @@ import json
3
3
  import os
4
4
  import struct
5
5
  import urllib.error
6
+ import urllib.parse
6
7
  import urllib.request
7
8
  from typing import Any, Dict, Tuple
8
9
 
10
+ def _get_hf_endpoint() -> str:
11
+ endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co").strip()
12
+ if not endpoint:
13
+ raise ValueError("HF_ENDPOINT is set but empty; expected a valid HTTP(S) URL")
14
+ endpoint = endpoint.rstrip("/")
15
+ if not endpoint.startswith("https://"):
16
+ raise ValueError(
17
+ f"HF_ENDPOINT must use https:// scheme, got: {endpoint}"
18
+ )
19
+ parsed = urllib.parse.urlparse(endpoint)
20
+ if not parsed.netloc:
21
+ raise ValueError(
22
+ f"HF_ENDPOINT must include a valid hostname, got: {endpoint}"
23
+ )
24
+ return endpoint
25
+
26
+
9
27
  def _get_hf_token() -> str | None:
10
28
  token = os.environ.get("HF_TOKEN")
11
29
  if token:
@@ -29,7 +47,12 @@ def _get_hf_token() -> str | None:
29
47
 
30
48
  return None
31
49
 
32
- def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None) -> bytes:
50
+ def _make_request(
51
+ url: str,
52
+ headers: Dict[str, str] = None,
53
+ limit: int | None = None,
54
+ timeout: float = 10.0,
55
+ ) -> bytes:
33
56
  if headers is None:
34
57
  headers = {}
35
58
 
@@ -39,7 +62,7 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
39
62
 
40
63
  req = urllib.request.Request(url, headers=headers)
41
64
  try:
42
- with urllib.request.urlopen(req, timeout=10) as response:
65
+ with urllib.request.urlopen(req, timeout=timeout) as response:
43
66
  if limit is not None:
44
67
  return response.read(limit)
45
68
  return response.read()
@@ -50,16 +73,16 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
50
73
  raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
51
74
  raise
52
75
 
53
- def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
54
- url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
76
+ def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]:
77
+ url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/{filename}"
55
78
 
56
79
  # 1. Fetch the first 500KB in a single roundtrip
57
80
  headers = {"Range": "bytes=0-500000"}
58
81
  try:
59
- chunk = _make_request(url, headers=headers, limit=500000)
82
+ chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout)
60
83
  except urllib.error.HTTPError as e:
61
84
  if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
62
- chunk = _make_request(url, limit=500000)
85
+ chunk = _make_request(url, limit=500000, timeout=timeout)
63
86
  else:
64
87
  raise
65
88
 
@@ -74,18 +97,18 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
74
97
  else:
75
98
  # 3. Double-roundtrip only if the header is massive (>500KB)
76
99
  headers = {"Range": f"bytes=8-{8+header_size-1}"}
77
- json_bytes = _make_request(url, headers=headers, limit=header_size)
100
+ json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout)
78
101
 
79
102
  return json.loads(json_bytes)
80
103
 
81
- def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
104
+ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
82
105
  """
83
106
  Fetches the metadata directly from the Hugging Face Hub over the network.
84
107
  Returns: (tensors, config, format_name, disk_size)
85
108
  """
86
- api_url = f"https://huggingface.co/api/models/{repo_id}"
109
+ api_url = f"{_get_hf_endpoint()}/api/models/{repo_id}"
87
110
  try:
88
- api_data = json.loads(_make_request(api_url).decode("utf-8"))
111
+ api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
89
112
  except urllib.error.HTTPError as e:
90
113
  if e.code == 401:
91
114
  raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
@@ -98,16 +121,16 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
98
121
 
99
122
  config = None
100
123
  if "config.json" in filenames:
101
- config_url = f"https://huggingface.co/{repo_id}/resolve/main/config.json"
102
- config = json.loads(_make_request(config_url).decode("utf-8"))
124
+ config_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/config.json"
125
+ config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
103
126
 
104
127
  tensors = {}
105
128
  total_size = 0.0
106
129
 
107
130
  if "model.safetensors.index.json" in filenames:
108
131
  # Sharded SafeTensors
109
- index_url = f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors.index.json"
110
- index_data = json.loads(_make_request(index_url).decode("utf-8"))
132
+ index_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors.index.json"
133
+ index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
111
134
 
112
135
  weight_map = index_data.get("weight_map", {})
113
136
  unique_shards = list(set(weight_map.values()))
@@ -128,7 +151,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
128
151
  }
129
152
  else:
130
153
  def fetch_shard(shard: str):
131
- return shard, _fetch_safetensors_header(repo_id, shard)
154
+ return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
132
155
 
133
156
  with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
134
157
  future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
@@ -149,17 +172,17 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
149
172
  # Single SafeTensors
150
173
 
151
174
  # Determine total size first
152
- req = urllib.request.Request(f"https://huggingface.co/{repo_id}/resolve/main/model.safetensors", method="HEAD")
175
+ req = urllib.request.Request(f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors", method="HEAD")
153
176
  token = _get_hf_token()
154
177
  if token:
155
178
  req.add_header("Authorization", f"Bearer {token}")
156
179
  try:
157
- with urllib.request.urlopen(req) as response:
180
+ with urllib.request.urlopen(req, timeout=timeout) as response:
158
181
  total_size = int(response.headers.get("Content-Length", 0))
159
182
  except Exception:
160
183
  pass
161
184
 
162
- header = _fetch_safetensors_header(repo_id, "model.safetensors")
185
+ header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
163
186
  tensors = header
164
187
 
165
188
  format_name = "SafeTensors"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelinfo-cli
3
- Version: 1.4.3
3
+ Version: 1.4.4
4
4
  Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
5
5
  Author: ModelInfo Contributors
6
6
  License: MIT
@@ -164,12 +164,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
164
164
  | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
165
165
  | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
166
166
  | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
167
+ | `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
167
168
  | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
168
169
  | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
169
170
  | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
170
171
  | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
171
172
  | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
172
173
  | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
174
+ | `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
175
+ | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
173
176
 
174
177
  ## Architecture
175
178
 
@@ -23,4 +23,5 @@ src/modelinfo_cli.egg-info/top_level.txt
23
23
  tests/test_calculator.py
24
24
  tests/test_cli.py
25
25
  tests/test_constraints.py
26
+ tests/test_hardware.py
26
27
  tests/test_parsers.py
@@ -0,0 +1,179 @@
1
+ import pytest
2
+
3
+ import modelinfo.cli as cli
4
+ from modelinfo import __version__
5
+ from modelinfo.cli import parse_args
6
+
7
+
8
+ def test_version_flag_prints_installed_version(capsys):
9
+ with pytest.raises(SystemExit) as exc_info:
10
+ parse_args(["--version"])
11
+
12
+ assert exc_info.value.code == 0
13
+ assert f"modelinfo {__version__}" in capsys.readouterr().out
14
+
15
+
16
+ def test_batch_size_flag_defaults_to_one():
17
+ args = parse_args(["model.gguf"])
18
+
19
+ assert args.batch_size == 1
20
+
21
+
22
+ def test_batch_size_flag_accepts_integer():
23
+ args = parse_args(["--batch-size", "4", "model.gguf"])
24
+
25
+ assert args.batch_size == 4
26
+
27
+
28
+ def test_batch_size_flag_rejects_zero():
29
+ with pytest.raises(SystemExit) as exc_info:
30
+ parse_args(["--batch-size", "0", "model.gguf"])
31
+
32
+ assert exc_info.value.code == 2
33
+
34
+
35
+ def test_batch_size_flag_rejects_negative():
36
+ with pytest.raises(SystemExit) as exc_info:
37
+ parse_args(["--batch-size", "-1", "model.gguf"])
38
+
39
+ assert exc_info.value.code == 2
40
+
41
+
42
+ def test_timeout_flag_defaults_to_ten_seconds():
43
+ args = parse_args(["model.gguf"])
44
+
45
+ assert args.timeout == 10.0
46
+
47
+
48
+ def test_timeout_flag_accepts_float():
49
+ args = parse_args(["--timeout", "30.5", "model.gguf"])
50
+
51
+ assert args.timeout == 30.5
52
+
53
+
54
+ def test_timeout_flag_rejects_zero():
55
+ with pytest.raises(SystemExit) as exc_info:
56
+ parse_args(["--timeout", "0", "model.gguf"])
57
+
58
+ assert exc_info.value.code == 2
59
+
60
+
61
+ def test_timeout_flag_rejects_negative():
62
+ with pytest.raises(SystemExit) as exc_info:
63
+ parse_args(["--timeout", "-1", "model.gguf"])
64
+
65
+ assert exc_info.value.code == 2
66
+
67
+
68
+ def test_timeout_flag_rejects_nan():
69
+ with pytest.raises(SystemExit) as exc_info:
70
+ parse_args(["--timeout", "nan", "model.gguf"])
71
+
72
+ assert exc_info.value.code == 2
73
+
74
+
75
+ def test_timeout_flag_rejects_inf():
76
+ with pytest.raises(SystemExit) as exc_info:
77
+ parse_args(["--timeout", "inf", "model.gguf"])
78
+
79
+ assert exc_info.value.code == 2
80
+
81
+
82
+ def test_analyze_model_passes_batch_size_to_footprint(monkeypatch, tmp_path):
83
+ model_path = tmp_path / "model.gguf"
84
+ model_path.write_bytes(b"mock")
85
+ captured = {}
86
+
87
+ def fake_parse_gguf_header(file_path):
88
+ assert file_path == str(model_path)
89
+ return {
90
+ "model.layers.0.self_attn.k_proj.weight": {"shape": [1, 1], "dtype": "F16"}
91
+ }
92
+
93
+ def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
94
+ captured["batch_size"] = batch_size
95
+ captured["context_length"] = context_length
96
+ return {
97
+ "total_params": 1,
98
+ "base_memory_bytes": 2.0,
99
+ "kv_cache_bytes": float(batch_size),
100
+ "overhead_bytes": 0.0,
101
+ "total_memory_bytes": 2.0 + batch_size,
102
+ "num_layers": 1,
103
+ "kv_dim": 1,
104
+ "primary_dtype": "F16",
105
+ "kv_is_estimate": False,
106
+ "penalty_percentage": 0.0,
107
+ "vllm_metrics": {},
108
+ }
109
+
110
+ monkeypatch.setattr(cli, "parse_gguf_header", fake_parse_gguf_header)
111
+ monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
112
+ monkeypatch.setattr(
113
+ cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
114
+ )
115
+
116
+ info = cli.analyze_model(str(model_path), context_override=128, batch_size=4)
117
+
118
+ assert captured == {"batch_size": 4, "context_length": 128}
119
+ assert info["footprint"]["kv_cache_bytes"] == 4.0
120
+
121
+
122
+ def test_analyze_model_passes_timeout_to_huggingface(monkeypatch):
123
+ captured = {}
124
+
125
+ def fake_exists(path):
126
+ return False
127
+
128
+ def fake_fetch(repo_id, *, fetch_tensors, timeout):
129
+ captured["repo_id"] = repo_id
130
+ captured["fetch_tensors"] = fetch_tensors
131
+ captured["timeout"] = timeout
132
+ return (
133
+ {
134
+ "model.layers.0.self_attn.k_proj.weight": {
135
+ "shape": [1, 1],
136
+ "dtype": "F16",
137
+ }
138
+ },
139
+ None,
140
+ "SafeTensors",
141
+ 7.0,
142
+ )
143
+
144
+ def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
145
+ return {
146
+ "total_params": 1,
147
+ "base_memory_bytes": 2.0,
148
+ "kv_cache_bytes": 1.0,
149
+ "overhead_bytes": 0.0,
150
+ "total_memory_bytes": 3.0,
151
+ "num_layers": 1,
152
+ "kv_dim": 1,
153
+ "primary_dtype": "F16",
154
+ "kv_is_estimate": False,
155
+ "penalty_percentage": 0.0,
156
+ "vllm_metrics": {},
157
+ }
158
+
159
+ from modelinfo.parsers import huggingface
160
+
161
+ monkeypatch.setattr(cli.os.path, "exists", fake_exists)
162
+ monkeypatch.setattr(huggingface, "fetch_huggingface_repo", fake_fetch)
163
+ monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
164
+ monkeypatch.setattr(
165
+ cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
166
+ )
167
+
168
+ cli.analyze_model(
169
+ "org/model",
170
+ context_override=128,
171
+ fetch_tensors=True,
172
+ timeout=22.5,
173
+ )
174
+
175
+ assert captured == {
176
+ "repo_id": "org/model",
177
+ "fetch_tensors": True,
178
+ "timeout": 22.5,
179
+ }
@@ -0,0 +1,255 @@
1
+ import subprocess
2
+
3
+ import pytest
4
+
5
+ from modelinfo import hardware
6
+
7
+
8
+ def completed(stdout: str) -> subprocess.CompletedProcess:
9
+ return subprocess.CompletedProcess(args=[], returncode=0, stdout=stdout)
10
+
11
+
12
+ def test_normalize_gpu_string_removes_vendor_fluff_and_separators():
13
+ assert hardware.normalize_gpu_string("NVIDIA GeForce RTX 4090") == "rtx4090"
14
+ assert (
15
+ hardware.normalize_gpu_string("AMD Radeon RX-7900 XTX Graphics") == "rx7900xtx"
16
+ )
17
+ assert hardware.normalize_gpu_string("Intel Arc A770 Edition") == "a770"
18
+
19
+
20
+ def test_resolve_gpu_matches_known_gpu():
21
+ assert hardware.resolve_gpu("NVIDIA GeForce RTX 4090") == (
22
+ "NVIDIA GeForce RTX 4090",
23
+ 24.0,
24
+ 1,
25
+ )
26
+
27
+
28
+ def test_resolve_gpu_handles_multi_gpu_string():
29
+ assert hardware.resolve_gpu("2x RTX4090") == ("2x rtx4090", 48.0, 2)
30
+
31
+
32
+ def test_resolve_gpu_accepts_numeric_vram_target():
33
+ assert hardware.resolve_gpu("16") == ("Custom (16.0 GB)", 16.0, 1)
34
+ assert hardware.resolve_gpu("4x 12") == ("Custom (48.0 GB)", 48.0, 4)
35
+
36
+
37
+ def test_resolve_gpu_delegates_auto_detection(monkeypatch):
38
+ monkeypatch.setattr(hardware, "detect_local_gpu", lambda: ("Local GPU", 12.0, 1))
39
+
40
+ assert hardware.resolve_gpu("auto") == ("Local GPU", 12.0, 1)
41
+
42
+
43
+ def test_resolve_gpu_rejects_apple_silicon_shortcuts():
44
+ with pytest.raises(ValueError, match="Apple Silicon VRAM varies"):
45
+ hardware.resolve_gpu("m3-max")
46
+
47
+
48
+ def test_resolve_gpu_rejects_unknown_gpu_name():
49
+ with pytest.raises(ValueError, match="Unknown GPU target 'Mystery GPU'"):
50
+ hardware.resolve_gpu("Mystery GPU")
51
+
52
+
53
+ def test_resolve_gpu_suggests_close_matches():
54
+ with pytest.raises(
55
+ ValueError,
56
+ match="Unknown GPU target 'rtx490'\\. Did you mean:.*rtx4090",
57
+ ):
58
+ hardware.resolve_gpu("rtx490")
59
+
60
+
61
+ def test_detect_local_gpu_reads_nvidia_smi(monkeypatch):
62
+ def fake_run(command, **kwargs):
63
+ assert command == [
64
+ "nvidia-smi",
65
+ "--query-gpu=name,memory.total",
66
+ "--format=csv,noheader,nounits",
67
+ ]
68
+ assert kwargs == {
69
+ "capture_output": True,
70
+ "text": True,
71
+ "check": True,
72
+ "timeout": 2.0,
73
+ }
74
+ return completed("NVIDIA GeForce RTX 4090, 24576\n")
75
+
76
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
77
+
78
+ assert hardware.detect_local_gpu() == ("NVIDIA GeForce RTX 4090", 24.0, 1)
79
+
80
+
81
+ def test_detect_local_gpu_sums_multiple_nvidia_gpus(monkeypatch):
82
+ monkeypatch.setattr(
83
+ hardware.subprocess,
84
+ "run",
85
+ lambda *args, **kwargs: completed(
86
+ "NVIDIA GeForce RTX 4090, 24576\nNVIDIA GeForce RTX 4090, 24576\n"
87
+ ),
88
+ )
89
+
90
+ assert hardware.detect_local_gpu() == (
91
+ "Multi-GPU: 2x NVIDIA GeForce RTX 4090",
92
+ 48.0,
93
+ 2,
94
+ )
95
+
96
+
97
+ def test_detect_local_gpu_falls_back_to_rocm_smi(monkeypatch):
98
+ def fake_run(command, **kwargs):
99
+ if command[0] == "nvidia-smi":
100
+ raise FileNotFoundError("nvidia-smi not installed")
101
+ assert command == ["rocm-smi", "--showmeminfo", "vram"]
102
+ return completed(
103
+ "Total Memory (B): 17179869184\nTotal Memory (B): 17179869184\n"
104
+ )
105
+
106
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
107
+
108
+ assert hardware.detect_local_gpu() == ("AMD Multi-GPU (2x)", 32.0, 2)
109
+
110
+
111
+ def test_detect_local_gpu_falls_back_to_xpu_smi(monkeypatch):
112
+ def fake_run(command, **kwargs):
113
+ if command[0] in {"nvidia-smi", "rocm-smi"}:
114
+ raise FileNotFoundError(command[0])
115
+ assert command == ["xpu-smi", "discovery"] # nosec
116
+ stdout = (
117
+ "+-----------+------------------------------------------------------+\n"
118
+ "| Device ID | Device Information |\n"
119
+ "+-----------+------------------------------------------------------+\n"
120
+ "| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
121
+ "| | Vendor Name: Intel(R) Corporation |\n"
122
+ "| | Memory Physical Size: 16384.00 MiB |\n"
123
+ "+-----------+------------------------------------------------------+\n"
124
+ )
125
+ return completed(stdout)
126
+
127
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
128
+
129
+ assert hardware.detect_local_gpu() == ("Intel(R) Arc(TM) A770 Graphics", 16.0, 1) # nosec
130
+
131
+
132
+ def test_detect_local_gpu_sums_multiple_intel_gpus(monkeypatch):
133
+ def fake_run(command, **kwargs):
134
+ if command[0] in {"nvidia-smi", "rocm-smi"}:
135
+ raise FileNotFoundError(command[0])
136
+ assert command == ["xpu-smi", "discovery"] # nosec
137
+ stdout = (
138
+ "+-----------+------------------------------------------------------+\n"
139
+ "| Device ID | Device Information |\n"
140
+ "+-----------+------------------------------------------------------+\n"
141
+ "| 0 | Device Name: Intel(R) Data Center GPU Flex 170 |\n"
142
+ "| | Memory Physical Size: 16384.00 MiB |\n"
143
+ "+-----------+------------------------------------------------------+\n"
144
+ "| 1 | Device Name: Intel(R) Data Center GPU Flex 170 |\n"
145
+ "| | Memory Physical Size: 16384.00 MiB |\n"
146
+ "+-----------+------------------------------------------------------+\n"
147
+ )
148
+ return completed(stdout)
149
+
150
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
151
+
152
+ assert hardware.detect_local_gpu() == ( # nosec
153
+ "Intel Multi-GPU (2x Intel(R) Data Center GPU Flex 170)",
154
+ 32.0,
155
+ 2,
156
+ )
157
+
158
+
159
+ def test_detect_local_gpu_intel_unit_conversions(monkeypatch):
160
+ test_cases = [
161
+ ("16.00 GiB", 16.0),
162
+ ("16.00 GB", 16.0),
163
+ ("16777216.00 KiB", 16.0),
164
+ ("17179869184.00 B", 16.0),
165
+ ("16384.00 MiB", 16.0),
166
+ ("16384.00 MB", 16.0),
167
+ ("16384.00", 16.0), # Default MiB unit
168
+ ]
169
+ for size_str, expected_vram in test_cases:
170
+ def fake_run(command, s=size_str, **kwargs):
171
+ if command[0] in {"nvidia-smi", "rocm-smi"}:
172
+ raise FileNotFoundError(command[0])
173
+ assert command == ["xpu-smi", "discovery"] # nosec
174
+ stdout = (
175
+ "+-----------+------------------------------------------------------+\n"
176
+ "| Device ID | Device Information |\n"
177
+ "+-----------+------------------------------------------------------+\n"
178
+ "| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
179
+ f"| | Memory Physical Size: {s} |\n"
180
+ "+-----------+------------------------------------------------------+\n"
181
+ )
182
+ return completed(stdout)
183
+
184
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
185
+ assert hardware.detect_local_gpu() == ("Intel(R) Arc(TM) A770 Graphics", expected_vram, 1) # nosec
186
+
187
+
188
+ def test_detect_local_gpu_falls_back_on_malformed_xpu_smi(monkeypatch):
189
+ def fake_run(command, **kwargs):
190
+ if command[0] in {"nvidia-smi", "rocm-smi"}:
191
+ raise FileNotFoundError(command[0])
192
+ if command[0] == "xpu-smi":
193
+ # Returns device name but no parseable memory size
194
+ stdout = (
195
+ "+-----------+------------------------------------------------------+\n"
196
+ "| Device ID | Device Information |\n"
197
+ "+-----------+------------------------------------------------------+\n"
198
+ "| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
199
+ "| | Vendor Name: Intel(R) Corporation |\n"
200
+ "| | Memory Physical Size: N/A |\n"
201
+ "+-----------+------------------------------------------------------+\n"
202
+ )
203
+ return completed(stdout)
204
+ raise FileNotFoundError(command[0])
205
+
206
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
207
+
208
+ # Since xpu-smi didn't return valid memory, detect_local_gpu should fall back to default/next
209
+ assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1) # nosec
210
+
211
+
212
+ def test_detect_local_gpu_falls_back_on_mismatched_intel_count(monkeypatch):
213
+ def fake_run(command, **kwargs):
214
+ if command[0] in {"nvidia-smi", "rocm-smi"}:
215
+ raise FileNotFoundError(command[0])
216
+ if command[0] == "xpu-smi":
217
+ # 2 GPUs, but only 1 has memory size
218
+ stdout = (
219
+ "+-----------+------------------------------------------------------+\n"
220
+ "| Device ID | Device Information |\n"
221
+ "+-----------+------------------------------------------------------+\n"
222
+ "| 0 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
223
+ "| | Memory Physical Size: 16384.00 MiB |\n"
224
+ "+-----------+------------------------------------------------------+\n"
225
+ "| 1 | Device Name: Intel(R) Arc(TM) A770 Graphics |\n"
226
+ "+-----------+------------------------------------------------------+\n"
227
+ )
228
+ return completed(stdout)
229
+ raise FileNotFoundError(command[0])
230
+
231
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
232
+
233
+ # Since device count (2) != memory entries count (1), it must fall back
234
+ assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1) # nosec
235
+
236
+
237
+ def test_detect_local_gpu_falls_back_to_apple_unified_memory(monkeypatch):
238
+ def fake_run(command, **kwargs):
239
+ if command[0] in {"nvidia-smi", "rocm-smi", "xpu-smi"}:
240
+ raise FileNotFoundError(command[0])
241
+ assert command == ["sysctl", "hw.memsize"]
242
+ return completed("hw.memsize: 17179869184\n")
243
+
244
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
245
+
246
+ assert hardware.detect_local_gpu() == ("Apple Silicon (Unified Memory)", 12.0, 1)
247
+
248
+
249
+ def test_detect_local_gpu_returns_default_when_detection_fails(monkeypatch):
250
+ def fake_run(command, **kwargs):
251
+ raise FileNotFoundError(command[0])
252
+
253
+ monkeypatch.setattr(hardware.subprocess, "run", fake_run)
254
+
255
+ assert hardware.detect_local_gpu() == ("Unknown", 8.0, 1)
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import pytest
3
+ from modelinfo.parsers.huggingface import _get_hf_endpoint
3
4
  from modelinfo.parsers.safetensors import parse_safetensors_header
4
5
 
5
6
  FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
@@ -45,3 +46,39 @@ def test_gguf_parser_metadata():
45
46
  # Verify the architecture bypass parses it to titlecase and prevents "Unknown Architecture"
46
47
  arch_name = identify_architecture_name(tensors, num_layers=1)
47
48
  assert arch_name == "Qwen2 (1 transformer layers)"
49
+
50
+
51
+
52
+
53
+ def test_hf_endpoint_valid_https(monkeypatch):
54
+ """Valid https:// endpoint is accepted."""
55
+ monkeypatch.setenv("HF_ENDPOINT", "https://huggingface.co")
56
+ assert _get_hf_endpoint() == "https://huggingface.co"
57
+
58
+
59
+ def test_hf_endpoint_default_https(monkeypatch):
60
+ """Default endpoint when HF_ENDPOINT is not set."""
61
+ monkeypatch.delenv("HF_ENDPOINT", raising=False)
62
+ endpoint = _get_hf_endpoint()
63
+ assert endpoint == "https://huggingface.co"
64
+
65
+
66
+ def test_hf_endpoint_rejects_http(monkeypatch):
67
+ """http:// scheme is rejected with ValueError."""
68
+ monkeypatch.setenv("HF_ENDPOINT", "http://localhost:8080")
69
+ with pytest.raises(ValueError, match="must use https:// scheme"):
70
+ _get_hf_endpoint()
71
+
72
+
73
+ def test_hf_endpoint_rejects_empty(monkeypatch):
74
+ """Empty string is rejected with ValueError."""
75
+ monkeypatch.setenv("HF_ENDPOINT", "")
76
+ with pytest.raises(ValueError):
77
+ _get_hf_endpoint()
78
+
79
+
80
+ def test_hf_endpoint_rejects_no_hostname(monkeypatch):
81
+ """URL without a hostname is rejected with ValueError."""
82
+ monkeypatch.setenv("HF_ENDPOINT", "https:///repo")
83
+ with pytest.raises(ValueError, match="must include a valid hostname"):
84
+ _get_hf_endpoint()
@@ -1,12 +0,0 @@
1
- import pytest
2
-
3
- from modelinfo import __version__
4
- from modelinfo.cli import parse_args
5
-
6
-
7
- def test_version_flag_prints_installed_version(capsys):
8
- with pytest.raises(SystemExit) as exc_info:
9
- parse_args(["--version"])
10
-
11
- assert exc_info.value.code == 0
12
- assert f"modelinfo {__version__}" in capsys.readouterr().out
File without changes
File without changes