modelinfo-cli 1.4.1__tar.gz → 1.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelinfo_cli-1.4.1/src/modelinfo_cli.egg-info → modelinfo_cli-1.4.2}/PKG-INFO +1 -1
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/pyproject.toml +1 -1
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/__init__.py +1 -1
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/architecture.py +15 -17
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/cli.py +2 -2
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/parsers/huggingface.py +1 -1
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2/src/modelinfo_cli.egg-info}/PKG-INFO +1 -1
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/LICENSE +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/README.md +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/setup.cfg +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/__main__.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/calculator.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/hardware.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/parsers/__init__.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/parsers/base.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/parsers/gguf.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/parsers/pytorch.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/parsers/safetensors.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo/ui.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo_cli.egg-info/SOURCES.txt +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo_cli.egg-info/dependency_links.txt +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo_cli.egg-info/entry_points.txt +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo_cli.egg-info/requires.txt +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/src/modelinfo_cli.egg-info/top_level.txt +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/tests/test_calculator.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/tests/test_constraints.py +0 -0
- {modelinfo_cli-1.4.1 → modelinfo_cli-1.4.2}/tests/test_parsers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "modelinfo-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.2"
|
|
8
8
|
description = "A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
1
|
from typing import Any, Dict, Tuple
|
|
4
2
|
|
|
5
3
|
def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None) -> Tuple[int, int, bool]:
|
|
@@ -10,16 +8,16 @@ def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None)
|
|
|
10
8
|
num_layers = 0
|
|
11
9
|
kv_dim = 0
|
|
12
10
|
is_estimate = False
|
|
13
|
-
|
|
11
|
+
|
|
14
12
|
metadata = tensors.get("__metadata__", {})
|
|
15
13
|
gen_arch = metadata.get("general.architecture")
|
|
16
|
-
|
|
14
|
+
|
|
17
15
|
# 1. Attempt explicit GGUF metadata
|
|
18
16
|
if gen_arch:
|
|
19
17
|
arch_str = str(gen_arch)
|
|
20
18
|
num_layers = metadata.get(f"{arch_str}.block_count", 0)
|
|
21
19
|
kv_heads = metadata.get(f"{arch_str}.attention.head_count_kv", 0)
|
|
22
|
-
|
|
20
|
+
|
|
23
21
|
key_length = metadata.get(f"{arch_str}.attention.key_length")
|
|
24
22
|
if not key_length:
|
|
25
23
|
embed_len = metadata.get(f"{arch_str}.embedding_length", 0)
|
|
@@ -28,7 +26,7 @@ def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None)
|
|
|
28
26
|
key_length = embed_len // q_heads
|
|
29
27
|
else:
|
|
30
28
|
key_length = 0
|
|
31
|
-
|
|
29
|
+
|
|
32
30
|
if kv_heads > 0 and key_length > 0:
|
|
33
31
|
kv_dim = kv_heads * key_length
|
|
34
32
|
if num_layers > 0:
|
|
@@ -40,7 +38,7 @@ def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None)
|
|
|
40
38
|
num_attention_heads = config.get("num_attention_heads", 1)
|
|
41
39
|
num_key_value_heads = config.get("num_key_value_heads", num_attention_heads)
|
|
42
40
|
hidden_size = config.get("hidden_size", 0)
|
|
43
|
-
|
|
41
|
+
|
|
44
42
|
if num_attention_heads > 0:
|
|
45
43
|
head_dim = hidden_size // num_attention_heads
|
|
46
44
|
kv_dim = num_key_value_heads * head_dim
|
|
@@ -51,11 +49,11 @@ def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None)
|
|
|
51
49
|
layers_set = set()
|
|
52
50
|
found_fused = False
|
|
53
51
|
found_k_proj = False
|
|
54
|
-
|
|
52
|
+
|
|
55
53
|
for name, meta in tensors.items():
|
|
56
54
|
if name == "__metadata__":
|
|
57
55
|
continue
|
|
58
|
-
|
|
56
|
+
|
|
59
57
|
parts = name.split(".")
|
|
60
58
|
if "layers" in parts:
|
|
61
59
|
idx = parts.index("layers")
|
|
@@ -71,7 +69,7 @@ def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None)
|
|
|
71
69
|
shape = meta.get("shape", [])
|
|
72
70
|
if len(shape) >= 2:
|
|
73
71
|
kv_dim = shape[0]
|
|
74
|
-
|
|
72
|
+
|
|
75
73
|
if "qkv_proj.weight" in name or "c_attn.weight" in name:
|
|
76
74
|
found_fused = True
|
|
77
75
|
if not found_k_proj:
|
|
@@ -82,7 +80,7 @@ def extract_architecture(tensors: Dict[str, Any], config: Dict[str, Any] = None)
|
|
|
82
80
|
num_layers = len(layers_set)
|
|
83
81
|
if found_fused and not found_k_proj and kv_dim > 0:
|
|
84
82
|
is_estimate = True
|
|
85
|
-
|
|
83
|
+
|
|
86
84
|
return num_layers, kv_dim, is_estimate
|
|
87
85
|
|
|
88
86
|
def identify_architecture_name(tensors: Dict[str, Any], num_layers: int, config: Dict[str, Any] = None) -> str:
|
|
@@ -90,18 +88,18 @@ def identify_architecture_name(tensors: Dict[str, Any], num_layers: int, config:
|
|
|
90
88
|
if config and "architectures" in config and config["architectures"]:
|
|
91
89
|
arch_title = config["architectures"][0]
|
|
92
90
|
return f"{arch_title} ({num_layers} layers)" if num_layers else arch_title
|
|
93
|
-
|
|
91
|
+
|
|
94
92
|
metadata = tensors.get("__metadata__", {})
|
|
95
93
|
gen_arch = metadata.get("general.architecture")
|
|
96
|
-
|
|
94
|
+
|
|
97
95
|
if gen_arch:
|
|
98
96
|
arch_title = str(gen_arch).title()
|
|
99
97
|
return f"{arch_title} ({num_layers} transformer layers)" if num_layers else arch_title
|
|
100
|
-
|
|
98
|
+
|
|
101
99
|
for name in tensors.keys():
|
|
102
100
|
if name == "__metadata__":
|
|
103
101
|
continue
|
|
104
|
-
|
|
102
|
+
|
|
105
103
|
name_lower = name.lower()
|
|
106
104
|
if "llama" in name_lower:
|
|
107
105
|
return f"Llama ({num_layers} transformer layers)" if num_layers else "Llama"
|
|
@@ -109,5 +107,5 @@ def identify_architecture_name(tensors: Dict[str, Any], num_layers: int, config:
|
|
|
109
107
|
return f"Mistral ({num_layers} transformer layers)" if num_layers else "Mistral"
|
|
110
108
|
if "qwen" in name_lower:
|
|
111
109
|
return f"Qwen ({num_layers} transformer layers)" if num_layers else "Qwen"
|
|
112
|
-
|
|
113
|
-
return f"Generic Transformer ({num_layers} layers)" if num_layers > 0 else "Unknown Architecture"
|
|
110
|
+
|
|
111
|
+
return f"Generic Transformer ({num_layers} layers)" if num_layers > 0 else "Unknown Architecture"
|
|
@@ -207,7 +207,7 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
207
207
|
console.print(f"[red]Error analyzing model '{model_path}': {e}[/red]")
|
|
208
208
|
return 1
|
|
209
209
|
|
|
210
|
-
print_compare_info(models, args.max_vram, gpu_name=gpu_name_display)
|
|
210
|
+
print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
|
|
211
211
|
return 0
|
|
212
212
|
|
|
213
213
|
file_path = args.file[0]
|
|
@@ -228,7 +228,7 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
228
228
|
console.print(f"[red]Error: {e}[/red]")
|
|
229
229
|
return 1
|
|
230
230
|
|
|
231
|
-
print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else
|
|
231
|
+
print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
|
|
232
232
|
return 0
|
|
233
233
|
|
|
234
234
|
|
|
@@ -126,7 +126,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
|
|
|
126
126
|
def fetch_shard(shard: str):
|
|
127
127
|
return shard, _fetch_safetensors_header(repo_id, shard)
|
|
128
128
|
|
|
129
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers=min(8, len(unique_shards))) as executor:
|
|
129
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
|
|
130
130
|
future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
|
|
131
131
|
for future in concurrent.futures.as_completed(future_to_shard):
|
|
132
132
|
shard, shard_header = future.result()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelinfo-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: A CLI tool to inspect ML checkpoints (.safetensors, .gguf, .pt) and calculate inference VRAM, multi-GPU memory splits, and vLLM serving capacity.
|
|
5
5
|
Author: ModelInfo Contributors
|
|
6
6
|
License: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|