modelinfo-cli 1.4.2__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelinfo_cli-1.4.2/src/modelinfo_cli.egg-info → modelinfo_cli-1.4.3}/PKG-INFO +1 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/pyproject.toml +1 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/__init__.py +1 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/cli.py +53 -32
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/hardware.py +10 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/parsers/huggingface.py +11 -7
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3/src/modelinfo_cli.egg-info}/PKG-INFO +1 -1
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo_cli.egg-info/SOURCES.txt +1 -0
- modelinfo_cli-1.4.3/tests/test_cli.py +12 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/LICENSE +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/README.md +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/setup.cfg +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/__main__.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/architecture.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/calculator.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/parsers/__init__.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/parsers/base.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/parsers/gguf.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/parsers/pytorch.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/parsers/safetensors.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo/ui.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo_cli.egg-info/dependency_links.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo_cli.egg-info/entry_points.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo_cli.egg-info/requires.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/src/modelinfo_cli.egg-info/top_level.txt +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/tests/test_calculator.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/tests/test_constraints.py +0 -0
- {modelinfo_cli-1.4.2 → modelinfo_cli-1.4.3}/tests/test_parsers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "modelinfo-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.3"
|
|
8
8
|
description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -3,7 +3,6 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
5
5
|
from typing import Sequence
|
|
6
|
-
|
|
7
6
|
from modelinfo.architecture import identify_architecture_name
|
|
8
7
|
from modelinfo.calculator import calculate_footprint
|
|
9
8
|
from modelinfo.parsers.gguf import parse_gguf_header
|
|
@@ -12,6 +11,29 @@ from modelinfo.parsers.safetensors import parse_safetensors_header
|
|
|
12
11
|
from modelinfo.ui import console, print_model_info, print_compare_info
|
|
13
12
|
|
|
14
13
|
|
|
14
|
+
class VersionAction(argparse.Action):
|
|
15
|
+
def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"):
|
|
16
|
+
super().__init__(
|
|
17
|
+
option_strings=option_strings,
|
|
18
|
+
dest=dest,
|
|
19
|
+
default=default,
|
|
20
|
+
nargs=0,
|
|
21
|
+
help=help,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def __call__(self, parser, namespace, values, option_string=None):
|
|
25
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
26
|
+
from modelinfo import __version__
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
ver = version("modelinfo-cli")
|
|
30
|
+
except PackageNotFoundError:
|
|
31
|
+
ver = __version__
|
|
32
|
+
|
|
33
|
+
print(f"{parser.prog} {ver}")
|
|
34
|
+
parser.exit()
|
|
35
|
+
|
|
36
|
+
|
|
15
37
|
def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
16
38
|
parser = argparse.ArgumentParser(
|
|
17
39
|
prog="modelinfo",
|
|
@@ -72,6 +94,11 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
|
72
94
|
default=0.9,
|
|
73
95
|
help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.",
|
|
74
96
|
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"-v",
|
|
99
|
+
"--version",
|
|
100
|
+
action=VersionAction,
|
|
101
|
+
)
|
|
75
102
|
|
|
76
103
|
return parser.parse_args(argv)
|
|
77
104
|
|
|
@@ -114,6 +141,8 @@ def analyze_model(
|
|
|
114
141
|
elif file_path_lower.endswith(".pt") or file_path_lower.endswith(".bin"):
|
|
115
142
|
tensors = parse_pytorch_header(file_path)
|
|
116
143
|
format_name = "PyTorch"
|
|
144
|
+
elif os.path.isdir(file_path):
|
|
145
|
+
raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.")
|
|
117
146
|
else:
|
|
118
147
|
raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.")
|
|
119
148
|
|
|
@@ -190,43 +219,35 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
190
219
|
|
|
191
220
|
models = []
|
|
192
221
|
for model_path in args.file:
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
models.append((model_path.split("/")[-1], info))
|
|
206
|
-
except Exception as e:
|
|
207
|
-
console.print(f"[red]Error analyzing model '{model_path}': {e}[/red]")
|
|
208
|
-
return 1
|
|
222
|
+
info = analyze_model(
|
|
223
|
+
model_path,
|
|
224
|
+
args.context,
|
|
225
|
+
gpu_count,
|
|
226
|
+
fetch_tensors=args.tensors,
|
|
227
|
+
topology=args.topology,
|
|
228
|
+
strategy=args.strategy,
|
|
229
|
+
is_vllm=args.vllm,
|
|
230
|
+
gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
|
|
231
|
+
gpu_util=args.gpu_util
|
|
232
|
+
)
|
|
233
|
+
models.append((model_path.split("/")[-1], info))
|
|
209
234
|
|
|
210
235
|
print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
|
|
211
236
|
return 0
|
|
212
237
|
|
|
213
238
|
file_path = args.file[0]
|
|
214
239
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
)
|
|
227
|
-
except Exception as e:
|
|
228
|
-
console.print(f"[red]Error: {e}[/red]")
|
|
229
|
-
return 1
|
|
240
|
+
info = analyze_model(
|
|
241
|
+
file_path,
|
|
242
|
+
args.context,
|
|
243
|
+
gpu_count,
|
|
244
|
+
fetch_tensors=args.tensors,
|
|
245
|
+
topology=args.topology,
|
|
246
|
+
strategy=args.strategy,
|
|
247
|
+
is_vllm=args.vllm,
|
|
248
|
+
gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
|
|
249
|
+
gpu_util=args.gpu_util
|
|
250
|
+
)
|
|
230
251
|
|
|
231
252
|
print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
|
|
232
253
|
return 0
|
|
@@ -21,6 +21,7 @@ KNOWN_GPUS = {
|
|
|
21
21
|
"rtx4060ti16gb": 16.0,
|
|
22
22
|
"rtx4060ti": 8.0,
|
|
23
23
|
"rtx4060": 8.0,
|
|
24
|
+
"rtx4050" : 6.0,
|
|
24
25
|
"rtx3090ti": 24.0,
|
|
25
26
|
"rtx3090": 24.0,
|
|
26
27
|
"rtx3080ti": 12.0,
|
|
@@ -31,6 +32,7 @@ KNOWN_GPUS = {
|
|
|
31
32
|
"rtx3060ti": 8.0,
|
|
32
33
|
"rtx306012gb": 12.0,
|
|
33
34
|
"rtx3060": 8.0,
|
|
35
|
+
"rtx3050ti" : 4.0,
|
|
34
36
|
"rtx3050": 8.0,
|
|
35
37
|
"rtx2080ti": 11.0,
|
|
36
38
|
"rtx2080super": 8.0,
|
|
@@ -40,6 +42,11 @@ KNOWN_GPUS = {
|
|
|
40
42
|
"rtx2060super": 8.0,
|
|
41
43
|
"rtx206012gb": 12.0,
|
|
42
44
|
"rtx2060": 6.0,
|
|
45
|
+
"gtx1660super": 6.0,
|
|
46
|
+
"gtx1660ti" : 6.0,
|
|
47
|
+
"gtx1660": 6.0,
|
|
48
|
+
"gtx1650super": 4.0,
|
|
49
|
+
"gtx1650": 4.0,
|
|
43
50
|
"gtx1080ti": 11.0,
|
|
44
51
|
"gtx1080": 8.0,
|
|
45
52
|
"gtx1070ti": 8.0,
|
|
@@ -106,6 +113,9 @@ KNOWN_GPUS = {
|
|
|
106
113
|
"rx6650xt": 8.0,
|
|
107
114
|
"rx6600xt": 8.0,
|
|
108
115
|
"rx6600": 8.0,
|
|
116
|
+
"rx580": 8.0,
|
|
117
|
+
"rx570": 4.0,
|
|
118
|
+
|
|
109
119
|
|
|
110
120
|
# --- AMD Data Center / Pro ---
|
|
111
121
|
"mi300x": 192.0,
|
|
@@ -29,7 +29,7 @@ def _get_hf_token() -> str | None:
|
|
|
29
29
|
|
|
30
30
|
return None
|
|
31
31
|
|
|
32
|
-
def _make_request(url: str, headers: Dict[str, str] = None) -> bytes:
|
|
32
|
+
def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None) -> bytes:
|
|
33
33
|
if headers is None:
|
|
34
34
|
headers = {}
|
|
35
35
|
|
|
@@ -40,12 +40,14 @@ def _make_request(url: str, headers: Dict[str, str] = None) -> bytes:
|
|
|
40
40
|
req = urllib.request.Request(url, headers=headers)
|
|
41
41
|
try:
|
|
42
42
|
with urllib.request.urlopen(req, timeout=10) as response:
|
|
43
|
+
if limit is not None:
|
|
44
|
+
return response.read(limit)
|
|
43
45
|
return response.read()
|
|
44
46
|
except urllib.error.HTTPError as e:
|
|
45
47
|
if e.code == 401:
|
|
46
|
-
raise PermissionError(f"Gated Model or Invalid Token
|
|
48
|
+
raise PermissionError(f"Gated/Private Model or Invalid Token (401 Unauthorized). Set the HF_TOKEN environment variable to access {url}")
|
|
47
49
|
if e.code == 404:
|
|
48
|
-
|
|
50
|
+
raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
|
|
49
51
|
raise
|
|
50
52
|
|
|
51
53
|
def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
|
|
@@ -54,10 +56,10 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
|
|
|
54
56
|
# 1. Fetch the first 500KB in a single roundtrip
|
|
55
57
|
headers = {"Range": "bytes=0-500000"}
|
|
56
58
|
try:
|
|
57
|
-
chunk = _make_request(url, headers=headers)
|
|
59
|
+
chunk = _make_request(url, headers=headers, limit=500000)
|
|
58
60
|
except urllib.error.HTTPError as e:
|
|
59
61
|
if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
|
|
60
|
-
chunk = _make_request(url)
|
|
62
|
+
chunk = _make_request(url, limit=500000)
|
|
61
63
|
else:
|
|
62
64
|
raise
|
|
63
65
|
|
|
@@ -72,7 +74,7 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
|
|
|
72
74
|
else:
|
|
73
75
|
# 3. Double-roundtrip only if the header is massive (>500KB)
|
|
74
76
|
headers = {"Range": f"bytes=8-{8+header_size-1}"}
|
|
75
|
-
json_bytes = _make_request(url, headers=headers)
|
|
77
|
+
json_bytes = _make_request(url, headers=headers, limit=header_size)
|
|
76
78
|
|
|
77
79
|
return json.loads(json_bytes)
|
|
78
80
|
|
|
@@ -86,7 +88,9 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
|
|
|
86
88
|
api_data = json.loads(_make_request(api_url).decode("utf-8"))
|
|
87
89
|
except urllib.error.HTTPError as e:
|
|
88
90
|
if e.code == 401:
|
|
89
|
-
raise PermissionError(f"Gated Model
|
|
91
|
+
raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
|
|
92
|
+
if e.code == 404:
|
|
93
|
+
raise FileNotFoundError(f"Could not find repository on Hugging Face (404 Not Found): {repo_id}")
|
|
90
94
|
raise
|
|
91
95
|
|
|
92
96
|
siblings = api_data.get("siblings", [])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from modelinfo import __version__
|
|
4
|
+
from modelinfo.cli import parse_args
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_version_flag_prints_installed_version(capsys):
|
|
8
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
9
|
+
parse_args(["--version"])
|
|
10
|
+
|
|
11
|
+
assert exc_info.value.code == 0
|
|
12
|
+
assert f"modelinfo {__version__}" in capsys.readouterr().out
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|