modelinfo-cli 1.4.2__tar.gz → 1.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/PKG-INFO +4 -1
  2. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/README.md +3 -0
  3. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/pyproject.toml +1 -1
  4. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/__init__.py +1 -1
  5. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/cli.py +91 -34
  6. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/hardware.py +182 -38
  7. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/huggingface.py +48 -21
  8. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/PKG-INFO +4 -1
  9. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/SOURCES.txt +2 -0
  10. modelinfo_cli-1.4.4/tests/test_cli.py +179 -0
  11. modelinfo_cli-1.4.4/tests/test_hardware.py +255 -0
  12. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/tests/test_parsers.py +37 -0
  13. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/LICENSE +0 -0
  14. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/setup.cfg +0 -0
  15. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/__main__.py +0 -0
  16. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/architecture.py +0 -0
  17. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/calculator.py +0 -0
  18. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/__init__.py +0 -0
  19. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/base.py +0 -0
  20. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/gguf.py +0 -0
  21. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/pytorch.py +0 -0
  22. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/safetensors.py +0 -0
  23. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/ui.py +0 -0
  24. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/dependency_links.txt +0 -0
  25. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/entry_points.txt +0 -0
  26. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/requires.txt +0 -0
  27. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/top_level.txt +0 -0
  28. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/tests/test_calculator.py +0 -0
  29. {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/tests/test_constraints.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelinfo-cli
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
5
5
  Author: ModelInfo Contributors
6
6
  License: MIT
@@ -164,12 +164,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
164
164
  | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
165
165
  | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
166
166
  | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
167
+ | `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
167
168
  | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
168
169
  | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
169
170
  | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
170
171
  | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
171
172
  | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
172
173
  | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
174
+ | `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
175
+ | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
173
176
 
174
177
  ## Architecture
175
178
 
@@ -146,12 +146,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
146
146
  | `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
147
147
  | `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
148
148
  | `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
149
+ | `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
149
150
  | `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
150
151
  | `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
151
152
  | `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
152
153
  | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
153
154
  | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
154
155
  | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
156
+ | `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
157
+ | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
155
158
 
156
159
  ## Architecture
157
160
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "modelinfo-cli"
7
- version = "1.4.2"
7
+ version = "1.4.4"
8
8
  description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -2,4 +2,4 @@
2
2
  modelinfo - A high-performance CLI utility for inspecting ML model checkpoints.
3
3
  """
4
4
 
5
- __version__ = "1.4.2"
5
+ __version__ = "1.4.4"
@@ -1,9 +1,9 @@
1
1
  import argparse
2
2
  import json
3
+ import math
3
4
  import os
4
5
  import sys
5
6
  from typing import Sequence
6
-
7
7
  from modelinfo.architecture import identify_architecture_name
8
8
  from modelinfo.calculator import calculate_footprint
9
9
  from modelinfo.parsers.gguf import parse_gguf_header
@@ -12,6 +12,43 @@ from modelinfo.parsers.safetensors import parse_safetensors_header
12
12
  from modelinfo.ui import console, print_model_info, print_compare_info
13
13
 
14
14
 
15
+ class VersionAction(argparse.Action):
16
+ def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"):
17
+ super().__init__(
18
+ option_strings=option_strings,
19
+ dest=dest,
20
+ default=default,
21
+ nargs=0,
22
+ help=help,
23
+ )
24
+
25
+ def __call__(self, parser, namespace, values, option_string=None):
26
+ from importlib.metadata import PackageNotFoundError, version
27
+ from modelinfo import __version__
28
+
29
+ try:
30
+ ver = version("modelinfo-cli")
31
+ except PackageNotFoundError:
32
+ ver = __version__
33
+
34
+ print(f"{parser.prog} {ver}")
35
+ parser.exit()
36
+
37
+
38
+ def _positive_int(value: str) -> int:
39
+ ivalue = int(value)
40
+ if ivalue < 1:
41
+ raise argparse.ArgumentTypeError("batch size must be at least 1")
42
+ return ivalue
43
+
44
+
45
+ def _positive_float(value: str) -> float:
46
+ fvalue = float(value)
47
+ if not math.isfinite(fvalue) or fvalue <= 0:
48
+ raise argparse.ArgumentTypeError("timeout must be a finite number greater than 0")
49
+ return fvalue
50
+
51
+
15
52
  def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
16
53
  parser = argparse.ArgumentParser(
17
54
  prog="modelinfo",
@@ -30,6 +67,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
30
67
  default=None,
31
68
  help="Context length for dynamic KV cache footprint calculation.",
32
69
  )
70
+ parser.add_argument(
71
+ "--batch-size",
72
+ type=_positive_int,
73
+ default=1,
74
+ help="Batch size for dynamic KV cache footprint calculation.",
75
+ )
33
76
  parser.add_argument(
34
77
  "--max-vram",
35
78
  type=float,
@@ -47,6 +90,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
47
90
  action="store_true",
48
91
  help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
49
92
  )
93
+ parser.add_argument(
94
+ "--timeout",
95
+ type=_positive_float,
96
+ default=10.0,
97
+ help="Network timeout in seconds for remote Hugging Face fetches.",
98
+ )
50
99
  parser.add_argument(
51
100
  "--topology",
52
101
  type=str,
@@ -72,6 +121,11 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
72
121
  default=0.9,
73
122
  help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.",
74
123
  )
124
+ parser.add_argument(
125
+ "-v",
126
+ "--version",
127
+ action=VersionAction,
128
+ )
75
129
 
76
130
  return parser.parse_args(argv)
77
131
 
@@ -79,8 +133,10 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
79
133
  def analyze_model(
80
134
  file_path: str,
81
135
  context_override: int | None,
82
- gpu_count: int = 1,
136
+ gpu_count: int = 1,
137
+ batch_size: int = 1,
83
138
  fetch_tensors: bool = False,
139
+ timeout: float = 10.0,
84
140
  topology: str = "pcie4",
85
141
  strategy: str = "tp",
86
142
  is_vllm: bool = False,
@@ -95,7 +151,9 @@ def analyze_model(
95
151
 
96
152
  if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
97
153
  from modelinfo.parsers.huggingface import fetch_huggingface_repo
98
- tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
154
+ tensors, config, format_name, disk_size = fetch_huggingface_repo(
155
+ file_path, fetch_tensors=fetch_tensors, timeout=timeout
156
+ )
99
157
  elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
100
158
  tensors = parse_safetensors_header(file_path)
101
159
  format_name = "SafeTensors"
@@ -114,6 +172,8 @@ def analyze_model(
114
172
  elif file_path_lower.endswith(".pt") or file_path_lower.endswith(".bin"):
115
173
  tensors = parse_pytorch_header(file_path)
116
174
  format_name = "PyTorch"
175
+ elif os.path.isdir(file_path):
176
+ raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.")
117
177
  else:
118
178
  raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.")
119
179
 
@@ -135,6 +195,7 @@ def analyze_model(
135
195
  footprint = calculate_footprint(
136
196
  tensors,
137
197
  context_length=context_length,
198
+ batch_size=batch_size,
138
199
  config=config,
139
200
  gpu_count=gpu_count,
140
201
  topology=topology,
@@ -190,43 +251,39 @@ def main(argv: Sequence[str] | None = None) -> int:
190
251
 
191
252
  models = []
192
253
  for model_path in args.file:
193
- try:
194
- info = analyze_model(
195
- model_path,
196
- args.context,
197
- gpu_count,
198
- fetch_tensors=args.tensors,
199
- topology=args.topology,
200
- strategy=args.strategy,
201
- is_vllm=args.vllm,
202
- gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
203
- gpu_util=args.gpu_util
204
- )
205
- models.append((model_path.split("/")[-1], info))
206
- except Exception as e:
207
- console.print(f"[red]Error analyzing model '{model_path}': {e}[/red]")
208
- return 1
254
+ info = analyze_model(
255
+ model_path,
256
+ args.context,
257
+ gpu_count=gpu_count,
258
+ batch_size=args.batch_size,
259
+ fetch_tensors=args.tensors,
260
+ timeout=args.timeout,
261
+ topology=args.topology,
262
+ strategy=args.strategy,
263
+ is_vllm=args.vllm,
264
+ gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
265
+ gpu_util=args.gpu_util
266
+ )
267
+ models.append((model_path.split("/")[-1], info))
209
268
 
210
269
  print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
211
270
  return 0
212
271
 
213
272
  file_path = args.file[0]
214
273
 
215
- try:
216
- info = analyze_model(
217
- file_path,
218
- args.context,
219
- gpu_count,
220
- fetch_tensors=args.tensors,
221
- topology=args.topology,
222
- strategy=args.strategy,
223
- is_vllm=args.vllm,
224
- gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
225
- gpu_util=args.gpu_util
226
- )
227
- except Exception as e:
228
- console.print(f"[red]Error: {e}[/red]")
229
- return 1
274
+ info = analyze_model(
275
+ file_path,
276
+ args.context,
277
+ gpu_count=gpu_count,
278
+ batch_size=args.batch_size,
279
+ fetch_tensors=args.tensors,
280
+ timeout=args.timeout,
281
+ topology=args.topology,
282
+ strategy=args.strategy,
283
+ is_vllm=args.vllm,
284
+ gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
285
+ gpu_util=args.gpu_util
286
+ )
230
287
 
231
288
  print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
232
289
  return 0
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import subprocess
3
- from typing import Tuple
3
+ from typing import Optional, Tuple
4
4
 
5
5
  KNOWN_GPUS = {
6
6
  # --- NVIDIA Consumer (RTX 50/40/30/20/10 Series & Titans) ---
@@ -21,6 +21,7 @@ KNOWN_GPUS = {
21
21
  "rtx4060ti16gb": 16.0,
22
22
  "rtx4060ti": 8.0,
23
23
  "rtx4060": 8.0,
24
+ "rtx4050": 6.0,
24
25
  "rtx3090ti": 24.0,
25
26
  "rtx3090": 24.0,
26
27
  "rtx3080ti": 12.0,
@@ -31,6 +32,7 @@ KNOWN_GPUS = {
31
32
  "rtx3060ti": 8.0,
32
33
  "rtx306012gb": 12.0,
33
34
  "rtx3060": 8.0,
35
+ "rtx3050ti": 4.0,
34
36
  "rtx3050": 8.0,
35
37
  "rtx2080ti": 11.0,
36
38
  "rtx2080super": 8.0,
@@ -40,6 +42,11 @@ KNOWN_GPUS = {
40
42
  "rtx2060super": 8.0,
41
43
  "rtx206012gb": 12.0,
42
44
  "rtx2060": 6.0,
45
+ "gtx1660super": 6.0,
46
+ "gtx1660ti": 6.0,
47
+ "gtx1660": 6.0,
48
+ "gtx1650super": 4.0,
49
+ "gtx1650": 4.0,
43
50
  "gtx1080ti": 11.0,
44
51
  "gtx1080": 8.0,
45
52
  "gtx1070ti": 8.0,
@@ -50,7 +57,6 @@ KNOWN_GPUS = {
50
57
  "titanxp": 12.0,
51
58
  "titanxpascal": 12.0,
52
59
  "titanx": 12.0,
53
-
54
60
  # --- NVIDIA Data Center / Workstation ---
55
61
  "b200": 192.0,
56
62
  "b100": 192.0,
@@ -82,7 +88,6 @@ KNOWN_GPUS = {
82
88
  "rtxa4000": 16.0,
83
89
  "quadrortx8000": 48.0,
84
90
  "quadrortx6000": 24.0,
85
-
86
91
  # --- AMD Consumer (RX 9000/7000/6000 Series) ---
87
92
  "rx9070xt": 16.0,
88
93
  "rx9070": 16.0,
@@ -106,7 +111,8 @@ KNOWN_GPUS = {
106
111
  "rx6650xt": 8.0,
107
112
  "rx6600xt": 8.0,
108
113
  "rx6600": 8.0,
109
-
114
+ "rx580": 8.0,
115
+ "rx570": 4.0,
110
116
  # --- AMD Data Center / Pro ---
111
117
  "mi300x": 192.0,
112
118
  "mi250x": 128.0,
@@ -114,7 +120,6 @@ KNOWN_GPUS = {
114
120
  "prow7900": 48.0,
115
121
  "prow7800": 32.0,
116
122
  "prow6800": 32.0,
117
-
118
123
  # --- Intel Consumer & Accelerators ---
119
124
  "arcb580": 12.0,
120
125
  "b580": 12.0,
@@ -128,63 +133,162 @@ KNOWN_GPUS = {
128
133
  "gaudi2": 96.0,
129
134
  }
130
135
 
136
+
131
137
  def normalize_gpu_string(name: str) -> str:
132
138
  """Strips vendor fluff, spaces, and hyphens to map correctly to KNOWN_GPUS."""
133
139
  name = name.lower()
134
-
140
+
135
141
  # Remove common vendor/marketing fluff that disrupts core identifiers
136
- fluff_words = ["nvidia", "geforce", "amd", "radeon", "intel", "arc", "generation", "edition", "graphics", "accelerator"]
142
+ fluff_words = [
143
+ "nvidia",
144
+ "geforce",
145
+ "amd",
146
+ "radeon",
147
+ "intel",
148
+ "arc",
149
+ "generation",
150
+ "edition",
151
+ "graphics",
152
+ "accelerator",
153
+ ]
137
154
  for word in fluff_words:
138
155
  name = name.replace(word, "")
139
-
140
- return re.sub(r'[\s\-]', '', name)
141
156
 
142
- def detect_local_gpu() -> Tuple[str, float, int]:
143
- # 1. NVIDIA
157
+ return re.sub(r"[\s\-]", "", name)
158
+
159
+
160
+ def _detect_nvidia_gpu() -> Optional[Tuple[str, float, int]]:
144
161
  try:
145
162
  result = subprocess.run(
146
- ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
147
- capture_output=True, text=True, check=True
163
+ [
164
+ "nvidia-smi",
165
+ "--query-gpu=name,memory.total",
166
+ "--format=csv,noheader,nounits",
167
+ ],
168
+ capture_output=True,
169
+ text=True,
170
+ check=True,
171
+ timeout=2.0,
148
172
  )
149
- lines = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
173
+ lines = [
174
+ line.strip() for line in result.stdout.strip().split("\n") if line.strip()
175
+ ]
150
176
  if lines:
151
177
  total_mb = 0
152
178
  for line in lines:
153
- parts = line.split(',')
179
+ parts = line.split(",")
154
180
  if len(parts) >= 2:
155
181
  total_mb += int(parts[1].strip())
156
-
182
+
157
183
  gpu_count = len(lines)
158
- first_name = lines[0].split(',')[0].strip()
159
- display_name = f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
184
+ first_name = lines[0].split(",")[0].strip()
185
+ display_name = (
186
+ f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
187
+ )
160
188
  return display_name, total_mb / 1024.0, gpu_count
161
189
  except Exception:
162
190
  pass
163
-
164
- # 2. AMD (ROCm)
191
+ return None
192
+
193
+
194
+ def _detect_amd_gpu() -> Optional[Tuple[str, float, int]]:
165
195
  try:
166
196
  result = subprocess.run(
167
197
  ["rocm-smi", "--showmeminfo", "vram"],
168
- capture_output=True, text=True, check=True
198
+ capture_output=True,
199
+ text=True,
200
+ check=True,
201
+ timeout=2.0,
169
202
  )
170
- lines = [line for line in result.stdout.strip().split('\n') if "Total Memory (B):" in line]
203
+ lines = [
204
+ line
205
+ for line in result.stdout.strip().split("\n")
206
+ if "Total Memory (B):" in line
207
+ ]
171
208
  if lines:
172
209
  total_bytes = 0
173
210
  gpu_count = len(lines)
174
211
  for line in lines:
175
- parts = line.split(':')
212
+ parts = line.split(":")
176
213
  if len(parts) >= 2:
177
214
  total_bytes += int(parts[1].strip())
178
- display_name = f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
215
+ display_name = (
216
+ f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
217
+ )
179
218
  return display_name, total_bytes / (1024.0**3), gpu_count
180
219
  except Exception:
181
220
  pass
182
-
183
- # 3. Apple Silicon
221
+ return None
222
+
223
+
224
+ def _parse_intel_vram(size_str: str) -> Optional[float]:
225
+ match = re.search(r"([\d\.]+)\s*([a-zA-Z]*)", size_str)
226
+ if not match:
227
+ return None
228
+ val = float(match.group(1))
229
+ unit = match.group(2).lower()
230
+ if unit in ("gib", "gb"):
231
+ val *= 1024.0
232
+ elif unit in ("kib", "kb"):
233
+ val /= 1024.0
234
+ elif unit == "b":
235
+ val /= (1024.0 * 1024.0)
236
+ return val
237
+
238
+
239
+ def _parse_xpu_smi_output(stdout: str) -> Tuple[list[str], float, int]:
240
+ gpu_names: list[str] = []
241
+ total_mib: float = 0.0
242
+ parsed_memory_entries: int = 0
243
+
244
+ for line in stdout.splitlines():
245
+ lower_line = line.lower()
246
+ if "device name:" in lower_line:
247
+ idx = lower_line.index("device name:")
248
+ name = line[idx + len("device name:"):].split("|")[0].strip()
249
+ gpu_names.append(name)
250
+ elif "memory physical size:" in lower_line:
251
+ idx = lower_line.index("memory physical size:")
252
+ size_str = line[idx + len("memory physical size:"):].split("|")[0].strip()
253
+ val = _parse_intel_vram(size_str)
254
+ if val is not None:
255
+ total_mib += val
256
+ parsed_memory_entries += 1
257
+
258
+ return gpu_names, total_mib, parsed_memory_entries
259
+
260
+
261
+ def _detect_intel_gpu() -> Optional[Tuple[str, float, int]]:
262
+ try:
263
+ result = subprocess.run(
264
+ ["xpu-smi", "discovery"],
265
+ capture_output=True,
266
+ text=True,
267
+ check=True,
268
+ timeout=2.0,
269
+ )
270
+ gpu_names, total_mib, parsed_memory_entries = _parse_xpu_smi_output(result.stdout)
271
+
272
+ if gpu_names and parsed_memory_entries == len(gpu_names) and total_mib > 0.0:
273
+ gpu_count = len(gpu_names)
274
+ first_name = gpu_names[0]
275
+ display_name = (
276
+ f"Intel Multi-GPU ({gpu_count}x {first_name})" if gpu_count > 1 else first_name
277
+ )
278
+ return display_name, total_mib / 1024.0, gpu_count
279
+ except Exception:
280
+ pass
281
+ return None
282
+
283
+
284
+ def _detect_apple_gpu() -> Optional[Tuple[str, float, int]]:
184
285
  try:
185
286
  result = subprocess.run(
186
287
  ["sysctl", "hw.memsize"],
187
- capture_output=True, text=True, check=True
288
+ capture_output=True,
289
+ text=True,
290
+ check=True,
291
+ timeout=2.0,
188
292
  )
189
293
  total_bytes = int(result.stdout.strip().split()[1])
190
294
  # Apply 75% operational heuristic for Apple Silicon wire limits
@@ -192,34 +296,62 @@ def detect_local_gpu() -> Tuple[str, float, int]:
192
296
  return "Apple Silicon (Unified Memory)", vram_gb, 1
193
297
  except Exception:
194
298
  pass
195
-
299
+ return None
300
+
301
+
302
+ def detect_local_gpu() -> Tuple[str, float, int]:
303
+ # 1. NVIDIA
304
+ nvidia_res = _detect_nvidia_gpu()
305
+ if nvidia_res is not None:
306
+ return nvidia_res
307
+
308
+ # 2. AMD (ROCm)
309
+ amd_res = _detect_amd_gpu()
310
+ if amd_res is not None:
311
+ return amd_res
312
+
313
+ # 3. Intel (xpu-smi)
314
+ intel_res = _detect_intel_gpu()
315
+ if intel_res is not None:
316
+ return intel_res
317
+
318
+ # 4. Apple Silicon
319
+ apple_res = _detect_apple_gpu()
320
+ if apple_res is not None:
321
+ return apple_res
322
+
196
323
  return "Unknown", 8.0, 1
197
324
 
325
+
198
326
  def resolve_gpu(target: str) -> Tuple[str, float, int]:
199
327
  if target.lower() == "auto":
200
328
  return detect_local_gpu()
201
-
329
+
202
330
  # Apple Silicon routing trap
203
331
  lower_target = target.lower()
204
- if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(r'^m[1-4](-?(pro|max|ultra))?$', lower_target):
205
- raise ValueError("Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits.")
206
-
332
+ if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
333
+ r"^m[1-4](-?(pro|max|ultra))?$", lower_target
334
+ ):
335
+ raise ValueError(
336
+ "Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits."
337
+ )
338
+
207
339
  # Parse potential multi-GPU format e.g., "2x RTX4090"
208
340
  gpu_count = 1
209
- match = re.match(r'^(\d+)x\s*(.+)$', lower_target)
341
+ match = re.match(r"^(\d+)x\s*(.+)$", lower_target)
210
342
  if match:
211
343
  gpu_count = int(match.group(1))
212
344
  target_name = match.group(2)
213
345
  else:
214
346
  target_name = target
215
-
347
+
216
348
  normalized = normalize_gpu_string(target_name)
217
-
349
+
218
350
  if normalized in KNOWN_GPUS:
219
351
  vram_gb = KNOWN_GPUS[normalized] * gpu_count
220
352
  display_name = f"{gpu_count}x {target_name}" if gpu_count > 1 else target_name
221
353
  return display_name, vram_gb, gpu_count
222
-
354
+
223
355
  # If the user passed a pure number, assume GB
224
356
  try:
225
357
  vram_gb = float(normalized) * gpu_count
@@ -227,5 +359,17 @@ def resolve_gpu(target: str) -> Tuple[str, float, int]:
227
359
  return display_name, vram_gb, gpu_count
228
360
  except ValueError:
229
361
  pass
230
-
231
- raise ValueError(f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value.")
362
+
363
+ import difflib
364
+
365
+ matches = difflib.get_close_matches(normalized, KNOWN_GPUS.keys(), n=3, cutoff=0.6)
366
+ if matches:
367
+ suggestions = ", ".join(matches)
368
+ raise ValueError(
369
+ f"Unknown GPU target '{target}'. Did you mean: {suggestions}? "
370
+ f"Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
371
+ )
372
+
373
+ raise ValueError(
374
+ f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
375
+ )