modelinfo-cli 1.4.2__tar.gz → 1.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/PKG-INFO +4 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/README.md +3 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/pyproject.toml +1 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/__init__.py +1 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/cli.py +91 -34
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/hardware.py +182 -38
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/huggingface.py +48 -21
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/PKG-INFO +4 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/SOURCES.txt +2 -0
- modelinfo_cli-1.4.4/tests/test_cli.py +179 -0
- modelinfo_cli-1.4.4/tests/test_hardware.py +255 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/tests/test_parsers.py +37 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/LICENSE +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/setup.cfg +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/__main__.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/architecture.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/calculator.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/__init__.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/base.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/gguf.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/pytorch.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/parsers/safetensors.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo/ui.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/dependency_links.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/entry_points.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/requires.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/src/modelinfo_cli.egg-info/top_level.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/tests/test_calculator.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.4}/tests/test_constraints.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
@@ -164,12 +164,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
|
|
|
164
164
|
| `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
|
|
165
165
|
| `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
|
|
166
166
|
| `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
|
|
167
|
+
| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
|
|
167
168
|
| `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
|
|
168
169
|
| `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
|
|
169
170
|
| `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
|
|
170
171
|
| `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
|
|
171
172
|
| `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
|
|
172
173
|
| `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
|
|
174
|
+
| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
|
|
175
|
+
| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
|
|
173
176
|
|
|
174
177
|
## Architecture
|
|
175
178
|
|
|
@@ -146,12 +146,15 @@ Qwen2.5-0.5B 494.0M BF16 8K 1.6 GB ✓
|
|
|
146
146
|
| `[files...]` | `modelinfo modelA modelB` | Pass multiple files/repos to automatically render a side-by-side comparison table instead of a deep-dive summary. |
|
|
147
147
|
| `--gpu` | `--gpu rtx4090` | Check if the model fits. Accepts GPU names (`rtx4090`, `b200`, `rx7900xtx`), explicit VRAM limits in GB (`--gpu 24`), or local hardware auto-discovery (`--gpu auto`). |
|
|
148
148
|
| `--context` | `--context 32768` | Adjust the target KV cache length. Essential for calculating the dynamic memory footprint of long-context models. Defaults to `8192`. |
|
|
149
|
+
| `--batch-size` | `--batch-size 32` | Batch size for dynamic KV cache footprint calculation. Defaults to `1`. |
|
|
149
150
|
| `--max-vram` | `--max-vram 80` | Adjusts the color-coded heat mapping thresholds (Green/Yellow/Red) in the terminal output to match a specific hardware ceiling. |
|
|
150
151
|
| `--vllm` | `--vllm --gpu auto` | Switches from additive memory checking to a serving capacity simulation. Shows exactly how many tokens fit in the PagedAttention pool. |
|
|
151
152
|
| `--gpu-util` | `--gpu-util 0.9` | Sets the vLLM `gpu_memory_utilization` ratio. Defaults to `0.9` (reserves 10% for PyTorch context). |
|
|
152
153
|
| `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
|
|
153
154
|
| `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
|
|
154
155
|
| `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
|
|
156
|
+
| `--timeout` | `--timeout 30` | Network timeout in seconds for remote Hugging Face fetches. Defaults to `10`. |
|
|
157
|
+
| `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
|
|
155
158
|
|
|
156
159
|
## Architecture
|
|
157
160
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "modelinfo-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.4"
|
|
8
8
|
description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import json
|
|
3
|
+
import math
|
|
3
4
|
import os
|
|
4
5
|
import sys
|
|
5
6
|
from typing import Sequence
|
|
6
|
-
|
|
7
7
|
from modelinfo.architecture import identify_architecture_name
|
|
8
8
|
from modelinfo.calculator import calculate_footprint
|
|
9
9
|
from modelinfo.parsers.gguf import parse_gguf_header
|
|
@@ -12,6 +12,43 @@ from modelinfo.parsers.safetensors import parse_safetensors_header
|
|
|
12
12
|
from modelinfo.ui import console, print_model_info, print_compare_info
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
class VersionAction(argparse.Action):
|
|
16
|
+
def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"):
|
|
17
|
+
super().__init__(
|
|
18
|
+
option_strings=option_strings,
|
|
19
|
+
dest=dest,
|
|
20
|
+
default=default,
|
|
21
|
+
nargs=0,
|
|
22
|
+
help=help,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def __call__(self, parser, namespace, values, option_string=None):
|
|
26
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
27
|
+
from modelinfo import __version__
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
ver = version("modelinfo-cli")
|
|
31
|
+
except PackageNotFoundError:
|
|
32
|
+
ver = __version__
|
|
33
|
+
|
|
34
|
+
print(f"{parser.prog} {ver}")
|
|
35
|
+
parser.exit()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _positive_int(value: str) -> int:
|
|
39
|
+
ivalue = int(value)
|
|
40
|
+
if ivalue < 1:
|
|
41
|
+
raise argparse.ArgumentTypeError("batch size must be at least 1")
|
|
42
|
+
return ivalue
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _positive_float(value: str) -> float:
|
|
46
|
+
fvalue = float(value)
|
|
47
|
+
if not math.isfinite(fvalue) or fvalue <= 0:
|
|
48
|
+
raise argparse.ArgumentTypeError("timeout must be a finite number greater than 0")
|
|
49
|
+
return fvalue
|
|
50
|
+
|
|
51
|
+
|
|
15
52
|
def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
16
53
|
parser = argparse.ArgumentParser(
|
|
17
54
|
prog="modelinfo",
|
|
@@ -30,6 +67,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
30
67
|
default=None,
|
|
31
68
|
help="Context length for dynamic KV cache footprint calculation.",
|
|
32
69
|
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--batch-size",
|
|
72
|
+
type=_positive_int,
|
|
73
|
+
default=1,
|
|
74
|
+
help="Batch size for dynamic KV cache footprint calculation.",
|
|
75
|
+
)
|
|
33
76
|
parser.add_argument(
|
|
34
77
|
"--max-vram",
|
|
35
78
|
type=float,
|
|
@@ -47,6 +90,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
47
90
|
action="store_true",
|
|
48
91
|
help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
|
|
49
92
|
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"--timeout",
|
|
95
|
+
type=_positive_float,
|
|
96
|
+
default=10.0,
|
|
97
|
+
help="Network timeout in seconds for remote Hugging Face fetches.",
|
|
98
|
+
)
|
|
50
99
|
parser.add_argument(
|
|
51
100
|
"--topology",
|
|
52
101
|
type=str,
|
|
@@ -72,6 +121,11 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
72
121
|
default=0.9,
|
|
73
122
|
help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.",
|
|
74
123
|
)
|
|
124
|
+
parser.add_argument(
|
|
125
|
+
"-v",
|
|
126
|
+
"--version",
|
|
127
|
+
action=VersionAction,
|
|
128
|
+
)
|
|
75
129
|
|
|
76
130
|
return parser.parse_args(argv)
|
|
77
131
|
|
|
@@ -79,8 +133,10 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
79
133
|
def analyze_model(
|
|
80
134
|
file_path: str,
|
|
81
135
|
context_override: int | None,
|
|
82
|
-
gpu_count: int = 1,
|
|
136
|
+
gpu_count: int = 1,
|
|
137
|
+
batch_size: int = 1,
|
|
83
138
|
fetch_tensors: bool = False,
|
|
139
|
+
timeout: float = 10.0,
|
|
84
140
|
topology: str = "pcie4",
|
|
85
141
|
strategy: str = "tp",
|
|
86
142
|
is_vllm: bool = False,
|
|
@@ -95,7 +151,9 @@ def analyze_model(
|
|
|
95
151
|
|
|
96
152
|
if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
|
|
97
153
|
from modelinfo.parsers.huggingface import fetch_huggingface_repo
|
|
98
|
-
tensors, config, format_name, disk_size = fetch_huggingface_repo(
|
|
154
|
+
tensors, config, format_name, disk_size = fetch_huggingface_repo(
|
|
155
|
+
file_path, fetch_tensors=fetch_tensors, timeout=timeout
|
|
156
|
+
)
|
|
99
157
|
elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
|
|
100
158
|
tensors = parse_safetensors_header(file_path)
|
|
101
159
|
format_name = "SafeTensors"
|
|
@@ -114,6 +172,8 @@ def analyze_model(
|
|
|
114
172
|
elif file_path_lower.endswith(".pt") or file_path_lower.endswith(".bin"):
|
|
115
173
|
tensors = parse_pytorch_header(file_path)
|
|
116
174
|
format_name = "PyTorch"
|
|
175
|
+
elif os.path.isdir(file_path):
|
|
176
|
+
raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.")
|
|
117
177
|
else:
|
|
118
178
|
raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.")
|
|
119
179
|
|
|
@@ -135,6 +195,7 @@ def analyze_model(
|
|
|
135
195
|
footprint = calculate_footprint(
|
|
136
196
|
tensors,
|
|
137
197
|
context_length=context_length,
|
|
198
|
+
batch_size=batch_size,
|
|
138
199
|
config=config,
|
|
139
200
|
gpu_count=gpu_count,
|
|
140
201
|
topology=topology,
|
|
@@ -190,43 +251,39 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
190
251
|
|
|
191
252
|
models = []
|
|
192
253
|
for model_path in args.file:
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
console.print(f"[red]Error analyzing model '{model_path}': {e}[/red]")
|
|
208
|
-
return 1
|
|
254
|
+
info = analyze_model(
|
|
255
|
+
model_path,
|
|
256
|
+
args.context,
|
|
257
|
+
gpu_count=gpu_count,
|
|
258
|
+
batch_size=args.batch_size,
|
|
259
|
+
fetch_tensors=args.tensors,
|
|
260
|
+
timeout=args.timeout,
|
|
261
|
+
topology=args.topology,
|
|
262
|
+
strategy=args.strategy,
|
|
263
|
+
is_vllm=args.vllm,
|
|
264
|
+
gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
|
|
265
|
+
gpu_util=args.gpu_util
|
|
266
|
+
)
|
|
267
|
+
models.append((model_path.split("/")[-1], info))
|
|
209
268
|
|
|
210
269
|
print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
|
|
211
270
|
return 0
|
|
212
271
|
|
|
213
272
|
file_path = args.file[0]
|
|
214
273
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
console.print(f"[red]Error: {e}[/red]")
|
|
229
|
-
return 1
|
|
274
|
+
info = analyze_model(
|
|
275
|
+
file_path,
|
|
276
|
+
args.context,
|
|
277
|
+
gpu_count=gpu_count,
|
|
278
|
+
batch_size=args.batch_size,
|
|
279
|
+
fetch_tensors=args.tensors,
|
|
280
|
+
timeout=args.timeout,
|
|
281
|
+
topology=args.topology,
|
|
282
|
+
strategy=args.strategy,
|
|
283
|
+
is_vllm=args.vllm,
|
|
284
|
+
gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
|
|
285
|
+
gpu_util=args.gpu_util
|
|
286
|
+
)
|
|
230
287
|
|
|
231
288
|
print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
|
|
232
289
|
return 0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import subprocess
|
|
3
|
-
from typing import Tuple
|
|
3
|
+
from typing import Optional, Tuple
|
|
4
4
|
|
|
5
5
|
KNOWN_GPUS = {
|
|
6
6
|
# --- NVIDIA Consumer (RTX 50/40/30/20/10 Series & Titans) ---
|
|
@@ -21,6 +21,7 @@ KNOWN_GPUS = {
|
|
|
21
21
|
"rtx4060ti16gb": 16.0,
|
|
22
22
|
"rtx4060ti": 8.0,
|
|
23
23
|
"rtx4060": 8.0,
|
|
24
|
+
"rtx4050": 6.0,
|
|
24
25
|
"rtx3090ti": 24.0,
|
|
25
26
|
"rtx3090": 24.0,
|
|
26
27
|
"rtx3080ti": 12.0,
|
|
@@ -31,6 +32,7 @@ KNOWN_GPUS = {
|
|
|
31
32
|
"rtx3060ti": 8.0,
|
|
32
33
|
"rtx306012gb": 12.0,
|
|
33
34
|
"rtx3060": 8.0,
|
|
35
|
+
"rtx3050ti": 4.0,
|
|
34
36
|
"rtx3050": 8.0,
|
|
35
37
|
"rtx2080ti": 11.0,
|
|
36
38
|
"rtx2080super": 8.0,
|
|
@@ -40,6 +42,11 @@ KNOWN_GPUS = {
|
|
|
40
42
|
"rtx2060super": 8.0,
|
|
41
43
|
"rtx206012gb": 12.0,
|
|
42
44
|
"rtx2060": 6.0,
|
|
45
|
+
"gtx1660super": 6.0,
|
|
46
|
+
"gtx1660ti": 6.0,
|
|
47
|
+
"gtx1660": 6.0,
|
|
48
|
+
"gtx1650super": 4.0,
|
|
49
|
+
"gtx1650": 4.0,
|
|
43
50
|
"gtx1080ti": 11.0,
|
|
44
51
|
"gtx1080": 8.0,
|
|
45
52
|
"gtx1070ti": 8.0,
|
|
@@ -50,7 +57,6 @@ KNOWN_GPUS = {
|
|
|
50
57
|
"titanxp": 12.0,
|
|
51
58
|
"titanxpascal": 12.0,
|
|
52
59
|
"titanx": 12.0,
|
|
53
|
-
|
|
54
60
|
# --- NVIDIA Data Center / Workstation ---
|
|
55
61
|
"b200": 192.0,
|
|
56
62
|
"b100": 192.0,
|
|
@@ -82,7 +88,6 @@ KNOWN_GPUS = {
|
|
|
82
88
|
"rtxa4000": 16.0,
|
|
83
89
|
"quadrortx8000": 48.0,
|
|
84
90
|
"quadrortx6000": 24.0,
|
|
85
|
-
|
|
86
91
|
# --- AMD Consumer (RX 9000/7000/6000 Series) ---
|
|
87
92
|
"rx9070xt": 16.0,
|
|
88
93
|
"rx9070": 16.0,
|
|
@@ -106,7 +111,8 @@ KNOWN_GPUS = {
|
|
|
106
111
|
"rx6650xt": 8.0,
|
|
107
112
|
"rx6600xt": 8.0,
|
|
108
113
|
"rx6600": 8.0,
|
|
109
|
-
|
|
114
|
+
"rx580": 8.0,
|
|
115
|
+
"rx570": 4.0,
|
|
110
116
|
# --- AMD Data Center / Pro ---
|
|
111
117
|
"mi300x": 192.0,
|
|
112
118
|
"mi250x": 128.0,
|
|
@@ -114,7 +120,6 @@ KNOWN_GPUS = {
|
|
|
114
120
|
"prow7900": 48.0,
|
|
115
121
|
"prow7800": 32.0,
|
|
116
122
|
"prow6800": 32.0,
|
|
117
|
-
|
|
118
123
|
# --- Intel Consumer & Accelerators ---
|
|
119
124
|
"arcb580": 12.0,
|
|
120
125
|
"b580": 12.0,
|
|
@@ -128,63 +133,162 @@ KNOWN_GPUS = {
|
|
|
128
133
|
"gaudi2": 96.0,
|
|
129
134
|
}
|
|
130
135
|
|
|
136
|
+
|
|
131
137
|
def normalize_gpu_string(name: str) -> str:
|
|
132
138
|
"""Strips vendor fluff, spaces, and hyphens to map correctly to KNOWN_GPUS."""
|
|
133
139
|
name = name.lower()
|
|
134
|
-
|
|
140
|
+
|
|
135
141
|
# Remove common vendor/marketing fluff that disrupts core identifiers
|
|
136
|
-
fluff_words = [
|
|
142
|
+
fluff_words = [
|
|
143
|
+
"nvidia",
|
|
144
|
+
"geforce",
|
|
145
|
+
"amd",
|
|
146
|
+
"radeon",
|
|
147
|
+
"intel",
|
|
148
|
+
"arc",
|
|
149
|
+
"generation",
|
|
150
|
+
"edition",
|
|
151
|
+
"graphics",
|
|
152
|
+
"accelerator",
|
|
153
|
+
]
|
|
137
154
|
for word in fluff_words:
|
|
138
155
|
name = name.replace(word, "")
|
|
139
|
-
|
|
140
|
-
return re.sub(r'[\s\-]', '', name)
|
|
141
156
|
|
|
142
|
-
|
|
143
|
-
|
|
157
|
+
return re.sub(r"[\s\-]", "", name)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _detect_nvidia_gpu() -> Optional[Tuple[str, float, int]]:
|
|
144
161
|
try:
|
|
145
162
|
result = subprocess.run(
|
|
146
|
-
[
|
|
147
|
-
|
|
163
|
+
[
|
|
164
|
+
"nvidia-smi",
|
|
165
|
+
"--query-gpu=name,memory.total",
|
|
166
|
+
"--format=csv,noheader,nounits",
|
|
167
|
+
],
|
|
168
|
+
capture_output=True,
|
|
169
|
+
text=True,
|
|
170
|
+
check=True,
|
|
171
|
+
timeout=2.0,
|
|
148
172
|
)
|
|
149
|
-
lines = [
|
|
173
|
+
lines = [
|
|
174
|
+
line.strip() for line in result.stdout.strip().split("\n") if line.strip()
|
|
175
|
+
]
|
|
150
176
|
if lines:
|
|
151
177
|
total_mb = 0
|
|
152
178
|
for line in lines:
|
|
153
|
-
parts = line.split(
|
|
179
|
+
parts = line.split(",")
|
|
154
180
|
if len(parts) >= 2:
|
|
155
181
|
total_mb += int(parts[1].strip())
|
|
156
|
-
|
|
182
|
+
|
|
157
183
|
gpu_count = len(lines)
|
|
158
|
-
first_name = lines[0].split(
|
|
159
|
-
display_name =
|
|
184
|
+
first_name = lines[0].split(",")[0].strip()
|
|
185
|
+
display_name = (
|
|
186
|
+
f"Multi-GPU: {gpu_count}x {first_name}" if gpu_count > 1 else first_name
|
|
187
|
+
)
|
|
160
188
|
return display_name, total_mb / 1024.0, gpu_count
|
|
161
189
|
except Exception:
|
|
162
190
|
pass
|
|
163
|
-
|
|
164
|
-
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _detect_amd_gpu() -> Optional[Tuple[str, float, int]]:
|
|
165
195
|
try:
|
|
166
196
|
result = subprocess.run(
|
|
167
197
|
["rocm-smi", "--showmeminfo", "vram"],
|
|
168
|
-
capture_output=True,
|
|
198
|
+
capture_output=True,
|
|
199
|
+
text=True,
|
|
200
|
+
check=True,
|
|
201
|
+
timeout=2.0,
|
|
169
202
|
)
|
|
170
|
-
lines = [
|
|
203
|
+
lines = [
|
|
204
|
+
line
|
|
205
|
+
for line in result.stdout.strip().split("\n")
|
|
206
|
+
if "Total Memory (B):" in line
|
|
207
|
+
]
|
|
171
208
|
if lines:
|
|
172
209
|
total_bytes = 0
|
|
173
210
|
gpu_count = len(lines)
|
|
174
211
|
for line in lines:
|
|
175
|
-
parts = line.split(
|
|
212
|
+
parts = line.split(":")
|
|
176
213
|
if len(parts) >= 2:
|
|
177
214
|
total_bytes += int(parts[1].strip())
|
|
178
|
-
display_name =
|
|
215
|
+
display_name = (
|
|
216
|
+
f"AMD Multi-GPU ({gpu_count}x)" if gpu_count > 1 else "AMD GPU"
|
|
217
|
+
)
|
|
179
218
|
return display_name, total_bytes / (1024.0**3), gpu_count
|
|
180
219
|
except Exception:
|
|
181
220
|
pass
|
|
182
|
-
|
|
183
|
-
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _parse_intel_vram(size_str: str) -> Optional[float]:
|
|
225
|
+
match = re.search(r"([\d\.]+)\s*([a-zA-Z]*)", size_str)
|
|
226
|
+
if not match:
|
|
227
|
+
return None
|
|
228
|
+
val = float(match.group(1))
|
|
229
|
+
unit = match.group(2).lower()
|
|
230
|
+
if unit in ("gib", "gb"):
|
|
231
|
+
val *= 1024.0
|
|
232
|
+
elif unit in ("kib", "kb"):
|
|
233
|
+
val /= 1024.0
|
|
234
|
+
elif unit == "b":
|
|
235
|
+
val /= (1024.0 * 1024.0)
|
|
236
|
+
return val
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _parse_xpu_smi_output(stdout: str) -> Tuple[list[str], float, int]:
|
|
240
|
+
gpu_names: list[str] = []
|
|
241
|
+
total_mib: float = 0.0
|
|
242
|
+
parsed_memory_entries: int = 0
|
|
243
|
+
|
|
244
|
+
for line in stdout.splitlines():
|
|
245
|
+
lower_line = line.lower()
|
|
246
|
+
if "device name:" in lower_line:
|
|
247
|
+
idx = lower_line.index("device name:")
|
|
248
|
+
name = line[idx + len("device name:"):].split("|")[0].strip()
|
|
249
|
+
gpu_names.append(name)
|
|
250
|
+
elif "memory physical size:" in lower_line:
|
|
251
|
+
idx = lower_line.index("memory physical size:")
|
|
252
|
+
size_str = line[idx + len("memory physical size:"):].split("|")[0].strip()
|
|
253
|
+
val = _parse_intel_vram(size_str)
|
|
254
|
+
if val is not None:
|
|
255
|
+
total_mib += val
|
|
256
|
+
parsed_memory_entries += 1
|
|
257
|
+
|
|
258
|
+
return gpu_names, total_mib, parsed_memory_entries
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _detect_intel_gpu() -> Optional[Tuple[str, float, int]]:
|
|
262
|
+
try:
|
|
263
|
+
result = subprocess.run(
|
|
264
|
+
["xpu-smi", "discovery"],
|
|
265
|
+
capture_output=True,
|
|
266
|
+
text=True,
|
|
267
|
+
check=True,
|
|
268
|
+
timeout=2.0,
|
|
269
|
+
)
|
|
270
|
+
gpu_names, total_mib, parsed_memory_entries = _parse_xpu_smi_output(result.stdout)
|
|
271
|
+
|
|
272
|
+
if gpu_names and parsed_memory_entries == len(gpu_names) and total_mib > 0.0:
|
|
273
|
+
gpu_count = len(gpu_names)
|
|
274
|
+
first_name = gpu_names[0]
|
|
275
|
+
display_name = (
|
|
276
|
+
f"Intel Multi-GPU ({gpu_count}x {first_name})" if gpu_count > 1 else first_name
|
|
277
|
+
)
|
|
278
|
+
return display_name, total_mib / 1024.0, gpu_count
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _detect_apple_gpu() -> Optional[Tuple[str, float, int]]:
|
|
184
285
|
try:
|
|
185
286
|
result = subprocess.run(
|
|
186
287
|
["sysctl", "hw.memsize"],
|
|
187
|
-
capture_output=True,
|
|
288
|
+
capture_output=True,
|
|
289
|
+
text=True,
|
|
290
|
+
check=True,
|
|
291
|
+
timeout=2.0,
|
|
188
292
|
)
|
|
189
293
|
total_bytes = int(result.stdout.strip().split()[1])
|
|
190
294
|
# Apply 75% operational heuristic for Apple Silicon wire limits
|
|
@@ -192,34 +296,62 @@ def detect_local_gpu() -> Tuple[str, float, int]:
|
|
|
192
296
|
return "Apple Silicon (Unified Memory)", vram_gb, 1
|
|
193
297
|
except Exception:
|
|
194
298
|
pass
|
|
195
|
-
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def detect_local_gpu() -> Tuple[str, float, int]:
|
|
303
|
+
# 1. NVIDIA
|
|
304
|
+
nvidia_res = _detect_nvidia_gpu()
|
|
305
|
+
if nvidia_res is not None:
|
|
306
|
+
return nvidia_res
|
|
307
|
+
|
|
308
|
+
# 2. AMD (ROCm)
|
|
309
|
+
amd_res = _detect_amd_gpu()
|
|
310
|
+
if amd_res is not None:
|
|
311
|
+
return amd_res
|
|
312
|
+
|
|
313
|
+
# 3. Intel (xpu-smi)
|
|
314
|
+
intel_res = _detect_intel_gpu()
|
|
315
|
+
if intel_res is not None:
|
|
316
|
+
return intel_res
|
|
317
|
+
|
|
318
|
+
# 4. Apple Silicon
|
|
319
|
+
apple_res = _detect_apple_gpu()
|
|
320
|
+
if apple_res is not None:
|
|
321
|
+
return apple_res
|
|
322
|
+
|
|
196
323
|
return "Unknown", 8.0, 1
|
|
197
324
|
|
|
325
|
+
|
|
198
326
|
def resolve_gpu(target: str) -> Tuple[str, float, int]:
|
|
199
327
|
if target.lower() == "auto":
|
|
200
328
|
return detect_local_gpu()
|
|
201
|
-
|
|
329
|
+
|
|
202
330
|
# Apple Silicon routing trap
|
|
203
331
|
lower_target = target.lower()
|
|
204
|
-
if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
|
|
205
|
-
|
|
206
|
-
|
|
332
|
+
if lower_target in ["m1", "m2", "m3", "m4", "apple", "mac"] or re.match(
|
|
333
|
+
r"^m[1-4](-?(pro|max|ultra))?$", lower_target
|
|
334
|
+
):
|
|
335
|
+
raise ValueError(
|
|
336
|
+
"Apple Silicon VRAM varies by machine configuration. Please use '--gpu auto' to calculate your specific Unified Memory limits."
|
|
337
|
+
)
|
|
338
|
+
|
|
207
339
|
# Parse potential multi-GPU format e.g., "2x RTX4090"
|
|
208
340
|
gpu_count = 1
|
|
209
|
-
match = re.match(r
|
|
341
|
+
match = re.match(r"^(\d+)x\s*(.+)$", lower_target)
|
|
210
342
|
if match:
|
|
211
343
|
gpu_count = int(match.group(1))
|
|
212
344
|
target_name = match.group(2)
|
|
213
345
|
else:
|
|
214
346
|
target_name = target
|
|
215
|
-
|
|
347
|
+
|
|
216
348
|
normalized = normalize_gpu_string(target_name)
|
|
217
|
-
|
|
349
|
+
|
|
218
350
|
if normalized in KNOWN_GPUS:
|
|
219
351
|
vram_gb = KNOWN_GPUS[normalized] * gpu_count
|
|
220
352
|
display_name = f"{gpu_count}x {target_name}" if gpu_count > 1 else target_name
|
|
221
353
|
return display_name, vram_gb, gpu_count
|
|
222
|
-
|
|
354
|
+
|
|
223
355
|
# If the user passed a pure number, assume GB
|
|
224
356
|
try:
|
|
225
357
|
vram_gb = float(normalized) * gpu_count
|
|
@@ -227,5 +359,17 @@ def resolve_gpu(target: str) -> Tuple[str, float, int]:
|
|
|
227
359
|
return display_name, vram_gb, gpu_count
|
|
228
360
|
except ValueError:
|
|
229
361
|
pass
|
|
230
|
-
|
|
231
|
-
|
|
362
|
+
|
|
363
|
+
import difflib
|
|
364
|
+
|
|
365
|
+
matches = difflib.get_close_matches(normalized, KNOWN_GPUS.keys(), n=3, cutoff=0.6)
|
|
366
|
+
if matches:
|
|
367
|
+
suggestions = ", ".join(matches)
|
|
368
|
+
raise ValueError(
|
|
369
|
+
f"Unknown GPU target '{target}'. Did you mean: {suggestions}? "
|
|
370
|
+
f"Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
raise ValueError(
|
|
374
|
+
f"Unknown GPU target '{target}'. Use '--gpu auto' to detect automatically, or provide a known name (e.g., 'RTX4090') or a numeric GB value."
|
|
375
|
+
)
|