model-explorer-gguf 1.0.0.post1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_explorer_gguf/__init__.py +10 -0
- model_explorer_gguf/_version.py +1 -0
- model_explorer_gguf/bin/__init__.py +0 -0
- model_explorer_gguf/bin/ggufvis.py +115 -0
- model_explorer_gguf/compare.py +102 -0
- model_explorer_gguf/gguf_parser.py +376 -0
- model_explorer_gguf/graph_builder.py +947 -0
- model_explorer_gguf/main.py +80 -0
- model_explorer_gguf/settings.py +39 -0
- model_explorer_gguf/themes/gguf.json +122 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/METADATA +127 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/RECORD +17 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/WHEEL +5 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/entry_points.txt +2 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/licenses/LICENSE +201 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/licenses/NOTICE +16 -0
- model_explorer_gguf-1.0.0.post1.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.0.post1.dev0"
|
|
File without changes
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""A shortcut to run model explorer with GGUF extension."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_config(config_path, model_path):
|
|
12
|
+
"""Load config from file, checking standard locations."""
|
|
13
|
+
candidates = []
|
|
14
|
+
if config_path:
|
|
15
|
+
candidates.append(Path(config_path))
|
|
16
|
+
candidates.append(Path.cwd() / ".ggufvis.json")
|
|
17
|
+
candidates.append(Path(model_path).parent / ".ggufvis.json")
|
|
18
|
+
for path in candidates:
|
|
19
|
+
if path.is_file():
|
|
20
|
+
try:
|
|
21
|
+
return json.loads(path.read_text())
|
|
22
|
+
except (json.JSONDecodeError, OSError):
|
|
23
|
+
continue
|
|
24
|
+
return {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_settings_args(settings_args):
|
|
28
|
+
"""Parse KEY=VALUE settings from CLI."""
|
|
29
|
+
if not settings_args:
|
|
30
|
+
return {}
|
|
31
|
+
result = {}
|
|
32
|
+
for arg in settings_args:
|
|
33
|
+
if "=" not in arg:
|
|
34
|
+
continue
|
|
35
|
+
key, value = arg.split("=", 1)
|
|
36
|
+
value = value.strip()
|
|
37
|
+
if value.lower() in ("true", "false"):
|
|
38
|
+
result[key.strip()] = value.lower() == "true"
|
|
39
|
+
else:
|
|
40
|
+
try:
|
|
41
|
+
result[key.strip()] = int(value)
|
|
42
|
+
except ValueError:
|
|
43
|
+
try:
|
|
44
|
+
result[key.strip()] = float(value)
|
|
45
|
+
except ValueError:
|
|
46
|
+
result[key.strip()] = value
|
|
47
|
+
return result
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def format_stats_text(stats):
|
|
51
|
+
"""Format stats dict as readable text."""
|
|
52
|
+
return "\n".join(f" {k}: {v}" for k, v in stats.items())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main():
|
|
56
|
+
parser = argparse.ArgumentParser(description="Visualize GGUF models with Model Explorer")
|
|
57
|
+
parser.add_argument("model", help="Path to .gguf file")
|
|
58
|
+
parser.add_argument("--compare", metavar="MODEL", help="Path to second .gguf for comparison")
|
|
59
|
+
parser.add_argument("--stats", action="store_true", help="Print model statistics to stdout")
|
|
60
|
+
parser.add_argument(
|
|
61
|
+
"--format",
|
|
62
|
+
choices=["json", "text"],
|
|
63
|
+
default="text",
|
|
64
|
+
help="Output format for --stats (default: text)",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument("--config", metavar="PATH", help="Path to config file")
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--settings",
|
|
69
|
+
nargs="*",
|
|
70
|
+
metavar="KEY=VALUE",
|
|
71
|
+
help="Override settings (e.g., show_tensor_values=true)",
|
|
72
|
+
)
|
|
73
|
+
args = parser.parse_args()
|
|
74
|
+
|
|
75
|
+
file_config = load_config(args.config, args.model)
|
|
76
|
+
cli_settings = parse_settings_args(args.settings)
|
|
77
|
+
merged = {**file_config, **cli_settings}
|
|
78
|
+
|
|
79
|
+
if args.stats:
|
|
80
|
+
from model_explorer_gguf.gguf_parser import GGUFParser
|
|
81
|
+
from model_explorer_gguf.settings import Settings
|
|
82
|
+
|
|
83
|
+
gguf_parser = GGUFParser(args.model)
|
|
84
|
+
gguf_parser.parse()
|
|
85
|
+
stats = {
|
|
86
|
+
"model_name": gguf_parser.get_model_name(),
|
|
87
|
+
"architecture": gguf_parser.get_architecture(),
|
|
88
|
+
"context_length": gguf_parser.get_context_length(),
|
|
89
|
+
"embedding_length": gguf_parser.get_embedding_length(),
|
|
90
|
+
"block_count": gguf_parser.get_block_count(),
|
|
91
|
+
"tensor_count": gguf_parser.header.tensor_count,
|
|
92
|
+
"gguf_version": gguf_parser.header.version,
|
|
93
|
+
}
|
|
94
|
+
if args.format == "json":
|
|
95
|
+
print(json.dumps(stats, indent=2))
|
|
96
|
+
else:
|
|
97
|
+
print(format_stats_text(stats))
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
cmd = ["model-explorer", "--extensions", "model_explorer_gguf", args.model]
|
|
101
|
+
if args.compare:
|
|
102
|
+
merged["comparison_model_path"] = args.compare
|
|
103
|
+
|
|
104
|
+
env = os.environ.copy()
|
|
105
|
+
if merged:
|
|
106
|
+
env["GGUFVIS_SETTINGS"] = json.dumps(merged)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
subprocess.run(cmd, env=env)
|
|
110
|
+
except KeyboardInterrupt:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
main()
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Model comparison and diff view for GGUF models."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from model_explorer.graph_builder import Graph
|
|
6
|
+
|
|
7
|
+
from . import graph_builder as _gb
|
|
8
|
+
from .gguf_parser import GGUFParser
|
|
9
|
+
from .settings import Settings
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def compare_models(
|
|
13
|
+
model_a_path: str, model_b_path: str, settings: Optional[Settings] = None
|
|
14
|
+
) -> Dict[str, Any]:
|
|
15
|
+
"""Compare two GGUF models and return diff graphs for Model Explorer.
|
|
16
|
+
|
|
17
|
+
Returns a ModelExplorerGraphs dict with a graphCollections containing
|
|
18
|
+
both models side-by-side.
|
|
19
|
+
"""
|
|
20
|
+
if settings is None:
|
|
21
|
+
settings = Settings()
|
|
22
|
+
|
|
23
|
+
parser_a = GGUFParser(model_a_path)
|
|
24
|
+
parser_a.parse()
|
|
25
|
+
parser_b = GGUFParser(model_b_path)
|
|
26
|
+
parser_b.parse()
|
|
27
|
+
|
|
28
|
+
if settings.show_tensor_values:
|
|
29
|
+
parser_a.load_all_tensor_data()
|
|
30
|
+
parser_b.load_all_tensor_data()
|
|
31
|
+
|
|
32
|
+
graph_a = _gb.build_graph(parser_a, settings=settings)
|
|
33
|
+
graph_b = _gb.build_graph(parser_b, settings=settings)
|
|
34
|
+
|
|
35
|
+
diff_attrs = _diff_metadata(parser_a, parser_b)
|
|
36
|
+
_apply_diff_metadata(graph_a, diff_attrs)
|
|
37
|
+
_apply_diff_metadata(graph_b, diff_attrs)
|
|
38
|
+
|
|
39
|
+
label_a = parser_a.get_model_name()
|
|
40
|
+
label_b = parser_b.get_model_name()
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
"graphCollections": [
|
|
44
|
+
{
|
|
45
|
+
"label": f"{label_a} vs {label_b}",
|
|
46
|
+
"graphs": [graph_a, graph_b],
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _apply_diff_metadata(graph: Graph, diff_attrs: Dict[str, str]) -> None:
|
|
53
|
+
if graph.groupNodeAttributes:
|
|
54
|
+
graph.groupNodeAttributes[""].update(diff_attrs)
|
|
55
|
+
else:
|
|
56
|
+
graph.groupNodeAttributes = {"": diff_attrs}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _diff_metadata(parser_a: GGUFParser, parser_b: GGUFParser) -> Dict[str, str]:
|
|
60
|
+
"""Compute diff summary between two parsers for GroupNodeAttributes."""
|
|
61
|
+
names_a = {t.name for t in parser_a.tensors}
|
|
62
|
+
names_b = {t.name for t in parser_b.tensors}
|
|
63
|
+
|
|
64
|
+
matching = names_a & names_b
|
|
65
|
+
only_a = names_a - names_b
|
|
66
|
+
only_b = names_b - names_a
|
|
67
|
+
|
|
68
|
+
attrs: Dict[str, str] = {
|
|
69
|
+
"diff.matching_count": str(len(matching)),
|
|
70
|
+
"diff.only_in_a_count": str(len(only_a)),
|
|
71
|
+
"diff.only_in_b_count": str(len(only_b)),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if only_a:
|
|
75
|
+
attrs["diff.only_in_a"] = ", ".join(sorted(only_a)[:20])
|
|
76
|
+
if only_b:
|
|
77
|
+
attrs["diff.only_in_b"] = ", ".join(sorted(only_b)[:20])
|
|
78
|
+
|
|
79
|
+
shape_diffs = []
|
|
80
|
+
type_diffs = []
|
|
81
|
+
tensor_a_map = {t.name: t for t in parser_a.tensors}
|
|
82
|
+
tensor_b_map = {t.name: t for t in parser_b.tensors}
|
|
83
|
+
|
|
84
|
+
for name in sorted(matching):
|
|
85
|
+
ta = tensor_a_map[name]
|
|
86
|
+
tb = tensor_b_map[name]
|
|
87
|
+
if ta.dimensions != tb.dimensions:
|
|
88
|
+
shape_diffs.append(name)
|
|
89
|
+
if ta.ggml_type != tb.ggml_type:
|
|
90
|
+
type_diffs.append(name)
|
|
91
|
+
|
|
92
|
+
if shape_diffs:
|
|
93
|
+
attrs["diff.shape_diffs"] = ", ".join(shape_diffs[:20])
|
|
94
|
+
attrs["diff.shape_diffs_count"] = str(len(shape_diffs))
|
|
95
|
+
if type_diffs:
|
|
96
|
+
attrs["diff.type_diffs"] = ", ".join(type_diffs[:20])
|
|
97
|
+
attrs["diff.type_diffs_count"] = str(len(type_diffs))
|
|
98
|
+
|
|
99
|
+
attrs["diff.tensor_count_a"] = str(len(parser_a.tensors))
|
|
100
|
+
attrs["diff.tensor_count_b"] = str(len(parser_b.tensors))
|
|
101
|
+
|
|
102
|
+
return attrs
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GGUF file format parser.
|
|
3
|
+
|
|
4
|
+
This module implements a complete parser for the GGUF (GGML Universal File) format
|
|
5
|
+
as specified in the llama.cpp project.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import glob
|
|
9
|
+
import re
|
|
10
|
+
import struct
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from enum import IntEnum
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GGUFValueType(IntEnum):
|
|
20
|
+
"""GGUF metadata value types."""
|
|
21
|
+
|
|
22
|
+
UINT8 = 0
|
|
23
|
+
INT8 = 1
|
|
24
|
+
UINT16 = 2
|
|
25
|
+
INT16 = 3
|
|
26
|
+
UINT32 = 4
|
|
27
|
+
INT32 = 5
|
|
28
|
+
FLOAT32 = 6
|
|
29
|
+
BOOL = 7
|
|
30
|
+
STRING = 8
|
|
31
|
+
ARRAY = 9
|
|
32
|
+
UINT64 = 10
|
|
33
|
+
INT64 = 11
|
|
34
|
+
FLOAT64 = 12
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GGMLQuantizationType(IntEnum):
|
|
38
|
+
"""GGML quantization types supported in GGUF."""
|
|
39
|
+
|
|
40
|
+
F32 = 0
|
|
41
|
+
F16 = 1
|
|
42
|
+
Q4_0 = 2
|
|
43
|
+
Q4_1 = 3
|
|
44
|
+
Q5_0 = 6
|
|
45
|
+
Q5_1 = 7
|
|
46
|
+
Q8_0 = 8
|
|
47
|
+
Q8_1 = 9
|
|
48
|
+
Q2_K = 10
|
|
49
|
+
Q3_K = 11
|
|
50
|
+
Q4_K = 12
|
|
51
|
+
Q5_K = 13
|
|
52
|
+
Q6_K = 14
|
|
53
|
+
Q8_K = 15
|
|
54
|
+
IQ2_XXS = 16
|
|
55
|
+
IQ2_XS = 17
|
|
56
|
+
IQ3_XXS = 18
|
|
57
|
+
IQ1_S = 19
|
|
58
|
+
IQ4_NL = 20
|
|
59
|
+
IQ3_S = 21
|
|
60
|
+
IQ2_S = 22
|
|
61
|
+
IQ4_XS = 23
|
|
62
|
+
I8 = 24
|
|
63
|
+
I16 = 25
|
|
64
|
+
I32 = 26
|
|
65
|
+
I64 = 27
|
|
66
|
+
F64 = 28
|
|
67
|
+
IQ1_M = 29
|
|
68
|
+
BF16 = 30
|
|
69
|
+
Q4_0_4_4 = 31
|
|
70
|
+
Q4_0_4_8 = 32
|
|
71
|
+
Q4_0_8_8 = 33
|
|
72
|
+
TQ1_0 = 34
|
|
73
|
+
TQ2_0 = 35
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
GGML_TYPE_NAMES = {
|
|
77
|
+
GGMLQuantizationType.F32: "F32",
|
|
78
|
+
GGMLQuantizationType.F16: "F16",
|
|
79
|
+
GGMLQuantizationType.Q4_0: "Q4_0",
|
|
80
|
+
GGMLQuantizationType.Q4_1: "Q4_1",
|
|
81
|
+
GGMLQuantizationType.Q5_0: "Q5_0",
|
|
82
|
+
GGMLQuantizationType.Q5_1: "Q5_1",
|
|
83
|
+
GGMLQuantizationType.Q8_0: "Q8_0",
|
|
84
|
+
GGMLQuantizationType.Q8_1: "Q8_1",
|
|
85
|
+
GGMLQuantizationType.Q2_K: "Q2_K",
|
|
86
|
+
GGMLQuantizationType.Q3_K: "Q3_K",
|
|
87
|
+
GGMLQuantizationType.Q4_K: "Q4_K",
|
|
88
|
+
GGMLQuantizationType.Q5_K: "Q5_K",
|
|
89
|
+
GGMLQuantizationType.Q6_K: "Q6_K",
|
|
90
|
+
GGMLQuantizationType.Q8_K: "Q8_K",
|
|
91
|
+
GGMLQuantizationType.IQ2_XXS: "IQ2_XXS",
|
|
92
|
+
GGMLQuantizationType.IQ2_XS: "IQ2_XS",
|
|
93
|
+
GGMLQuantizationType.IQ3_XXS: "IQ3_XXS",
|
|
94
|
+
GGMLQuantizationType.IQ1_S: "IQ1_S",
|
|
95
|
+
GGMLQuantizationType.IQ4_NL: "IQ4_NL",
|
|
96
|
+
GGMLQuantizationType.IQ3_S: "IQ3_S",
|
|
97
|
+
GGMLQuantizationType.IQ2_S: "IQ2_S",
|
|
98
|
+
GGMLQuantizationType.IQ4_XS: "IQ4_XS",
|
|
99
|
+
GGMLQuantizationType.I8: "I8",
|
|
100
|
+
GGMLQuantizationType.I16: "I16",
|
|
101
|
+
GGMLQuantizationType.I32: "I32",
|
|
102
|
+
GGMLQuantizationType.I64: "I64",
|
|
103
|
+
GGMLQuantizationType.F64: "F64",
|
|
104
|
+
GGMLQuantizationType.IQ1_M: "IQ1_M",
|
|
105
|
+
GGMLQuantizationType.BF16: "BF16",
|
|
106
|
+
GGMLQuantizationType.Q4_0_4_4: "Q4_0_4_4",
|
|
107
|
+
GGMLQuantizationType.Q4_0_4_8: "Q4_0_4_8",
|
|
108
|
+
GGMLQuantizationType.Q4_0_8_8: "Q4_0_8_8",
|
|
109
|
+
GGMLQuantizationType.TQ1_0: "TQ1_0",
|
|
110
|
+
GGMLQuantizationType.TQ2_0: "TQ2_0",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class GGUFHeader:
|
|
116
|
+
"""GGUF file header."""
|
|
117
|
+
|
|
118
|
+
magic: bytes
|
|
119
|
+
version: int
|
|
120
|
+
tensor_count: int
|
|
121
|
+
metadata_kv_count: int
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class GGUFTensorInfo:
|
|
126
|
+
"""GGUF tensor information."""
|
|
127
|
+
|
|
128
|
+
name: str
|
|
129
|
+
n_dimensions: int
|
|
130
|
+
dimensions: List[int]
|
|
131
|
+
ggml_type: int
|
|
132
|
+
offset: int
|
|
133
|
+
shard_idx: int = 0
|
|
134
|
+
shard_file: str = ""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class GGUFParser:
|
|
138
|
+
"""Parser for GGUF files."""
|
|
139
|
+
|
|
140
|
+
GGUF_MAGIC = b"GGUF"
|
|
141
|
+
GGUF_VERSION = 3
|
|
142
|
+
DEFAULT_ALIGNMENT = 32
|
|
143
|
+
|
|
144
|
+
def __init__(self, file_path: str):
|
|
145
|
+
"""Initialize the GGUF parser.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
file_path: Path to the GGUF file
|
|
149
|
+
"""
|
|
150
|
+
self.file_path = Path(file_path)
|
|
151
|
+
self.header: Optional[GGUFHeader] = None
|
|
152
|
+
self.metadata: Dict[str, Any] = {}
|
|
153
|
+
self.tensors: List[GGUFTensorInfo] = []
|
|
154
|
+
self.alignment = self.DEFAULT_ALIGNMENT
|
|
155
|
+
self.tensor_data_offset = 0
|
|
156
|
+
self.tensor_data: Optional[dict] = None
|
|
157
|
+
|
|
158
|
+
def parse(self) -> None:
|
|
159
|
+
"""Parse the GGUF file."""
|
|
160
|
+
with open(self.file_path, "rb") as f:
|
|
161
|
+
self._parse_header(f)
|
|
162
|
+
self._parse_metadata(f)
|
|
163
|
+
self._parse_tensor_info(f)
|
|
164
|
+
|
|
165
|
+
def _parse_header(self, f: BinaryIO) -> None:
|
|
166
|
+
"""Parse the GGUF header."""
|
|
167
|
+
magic = f.read(4)
|
|
168
|
+
if magic != self.GGUF_MAGIC:
|
|
169
|
+
raise ValueError(f"Invalid GGUF magic number: {magic}")
|
|
170
|
+
|
|
171
|
+
version = struct.unpack("<I", f.read(4))[0]
|
|
172
|
+
if version > self.GGUF_VERSION:
|
|
173
|
+
raise ValueError(f"Unsupported GGUF version: {version}")
|
|
174
|
+
|
|
175
|
+
tensor_count = struct.unpack("<Q", f.read(8))[0]
|
|
176
|
+
metadata_kv_count = struct.unpack("<Q", f.read(8))[0]
|
|
177
|
+
|
|
178
|
+
self.header = GGUFHeader(
|
|
179
|
+
magic=magic,
|
|
180
|
+
version=version,
|
|
181
|
+
tensor_count=tensor_count,
|
|
182
|
+
metadata_kv_count=metadata_kv_count,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _read_string(self, f: BinaryIO) -> str:
|
|
186
|
+
"""Read a GGUF string (length-prefixed)."""
|
|
187
|
+
length = struct.unpack("<Q", f.read(8))[0]
|
|
188
|
+
return f.read(length).decode("utf-8")
|
|
189
|
+
|
|
190
|
+
def _read_value(self, f: BinaryIO, value_type: int) -> Any:
|
|
191
|
+
"""Read a metadata value based on its type."""
|
|
192
|
+
if value_type == GGUFValueType.UINT8:
|
|
193
|
+
return struct.unpack("<B", f.read(1))[0]
|
|
194
|
+
elif value_type == GGUFValueType.INT8:
|
|
195
|
+
return struct.unpack("<b", f.read(1))[0]
|
|
196
|
+
elif value_type == GGUFValueType.UINT16:
|
|
197
|
+
return struct.unpack("<H", f.read(2))[0]
|
|
198
|
+
elif value_type == GGUFValueType.INT16:
|
|
199
|
+
return struct.unpack("<h", f.read(2))[0]
|
|
200
|
+
elif value_type == GGUFValueType.UINT32:
|
|
201
|
+
return struct.unpack("<I", f.read(4))[0]
|
|
202
|
+
elif value_type == GGUFValueType.INT32:
|
|
203
|
+
return struct.unpack("<i", f.read(4))[0]
|
|
204
|
+
elif value_type == GGUFValueType.FLOAT32:
|
|
205
|
+
return struct.unpack("<f", f.read(4))[0]
|
|
206
|
+
elif value_type == GGUFValueType.UINT64:
|
|
207
|
+
return struct.unpack("<Q", f.read(8))[0]
|
|
208
|
+
elif value_type == GGUFValueType.INT64:
|
|
209
|
+
return struct.unpack("<q", f.read(8))[0]
|
|
210
|
+
elif value_type == GGUFValueType.FLOAT64:
|
|
211
|
+
return struct.unpack("<d", f.read(8))[0]
|
|
212
|
+
elif value_type == GGUFValueType.BOOL:
|
|
213
|
+
return bool(struct.unpack("<B", f.read(1))[0])
|
|
214
|
+
elif value_type == GGUFValueType.STRING:
|
|
215
|
+
return self._read_string(f)
|
|
216
|
+
elif value_type == GGUFValueType.ARRAY:
|
|
217
|
+
return self._read_array(f)
|
|
218
|
+
else:
|
|
219
|
+
raise ValueError(f"Unknown value type: {value_type}")
|
|
220
|
+
|
|
221
|
+
def _read_array(self, f: BinaryIO) -> List[Any]:
|
|
222
|
+
"""Read an array value."""
|
|
223
|
+
element_type = struct.unpack("<I", f.read(4))[0]
|
|
224
|
+
length = struct.unpack("<Q", f.read(8))[0]
|
|
225
|
+
|
|
226
|
+
elements = []
|
|
227
|
+
for _ in range(length):
|
|
228
|
+
elements.append(self._read_value(f, element_type))
|
|
229
|
+
return elements
|
|
230
|
+
|
|
231
|
+
def _parse_metadata(self, f: BinaryIO) -> None:
|
|
232
|
+
"""Parse the metadata key-value pairs."""
|
|
233
|
+
for _ in range(self.header.metadata_kv_count):
|
|
234
|
+
key = self._read_string(f)
|
|
235
|
+
value_type = struct.unpack("<I", f.read(4))[0]
|
|
236
|
+
value = self._read_value(f, value_type)
|
|
237
|
+
self.metadata[key] = value
|
|
238
|
+
|
|
239
|
+
# Get alignment value from metadata if present
|
|
240
|
+
if "general.alignment" in self.metadata:
|
|
241
|
+
self.alignment = self.metadata["general.alignment"]
|
|
242
|
+
|
|
243
|
+
def _parse_tensor_info(self, f: BinaryIO) -> None:
|
|
244
|
+
"""Parse the tensor information."""
|
|
245
|
+
for _ in range(self.header.tensor_count):
|
|
246
|
+
name = self._read_string(f)
|
|
247
|
+
n_dimensions = struct.unpack("<I", f.read(4))[0]
|
|
248
|
+
|
|
249
|
+
dimensions = []
|
|
250
|
+
for _ in range(n_dimensions):
|
|
251
|
+
dimensions.append(struct.unpack("<Q", f.read(8))[0])
|
|
252
|
+
|
|
253
|
+
ggml_type = struct.unpack("<I", f.read(4))[0]
|
|
254
|
+
offset = struct.unpack("<Q", f.read(8))[0]
|
|
255
|
+
|
|
256
|
+
tensor_info = GGUFTensorInfo(
|
|
257
|
+
name=name,
|
|
258
|
+
n_dimensions=n_dimensions,
|
|
259
|
+
dimensions=dimensions,
|
|
260
|
+
ggml_type=ggml_type,
|
|
261
|
+
offset=offset,
|
|
262
|
+
)
|
|
263
|
+
self.tensors.append(tensor_info)
|
|
264
|
+
|
|
265
|
+
# Calculate tensor data offset
|
|
266
|
+
current_pos = f.tell()
|
|
267
|
+
self.tensor_data_offset = self._align_offset(current_pos, self.alignment)
|
|
268
|
+
|
|
269
|
+
def _align_offset(self, offset: int, alignment: int) -> int:
|
|
270
|
+
"""Align an offset to the specified alignment."""
|
|
271
|
+
return (offset + alignment - 1) // alignment * alignment
|
|
272
|
+
|
|
273
|
+
def get_architecture(self) -> str:
|
|
274
|
+
"""Get the model architecture from metadata."""
|
|
275
|
+
return self.metadata.get("general.architecture", "unknown")
|
|
276
|
+
|
|
277
|
+
def get_model_name(self) -> str:
|
|
278
|
+
"""Get the model name from metadata."""
|
|
279
|
+
return self.metadata.get("general.name", "unknown")
|
|
280
|
+
|
|
281
|
+
def get_context_length(self) -> int:
|
|
282
|
+
"""Get the context length from metadata."""
|
|
283
|
+
arch = self.get_architecture()
|
|
284
|
+
key = f"{arch}.context_length"
|
|
285
|
+
return self.metadata.get(key, 0)
|
|
286
|
+
|
|
287
|
+
def get_embedding_length(self) -> int:
|
|
288
|
+
"""Get the embedding length from metadata."""
|
|
289
|
+
arch = self.get_architecture()
|
|
290
|
+
key = f"{arch}.embedding_length"
|
|
291
|
+
return self.metadata.get(key, 0)
|
|
292
|
+
|
|
293
|
+
def get_block_count(self) -> int:
|
|
294
|
+
"""Get the number of blocks from metadata."""
|
|
295
|
+
arch = self.get_architecture()
|
|
296
|
+
key = f"{arch}.block_count"
|
|
297
|
+
return self.metadata.get(key, 0)
|
|
298
|
+
|
|
299
|
+
def get_tensor_type_name(self, ggml_type: int) -> str:
|
|
300
|
+
"""Get the human-readable name of a tensor type."""
|
|
301
|
+
return GGML_TYPE_NAMES.get(ggml_type, f"UNKNOWN_{ggml_type}")
|
|
302
|
+
|
|
303
|
+
def parse_headers_only(self) -> None:
|
|
304
|
+
"""Parse only the header and metadata — skip tensor info.
|
|
305
|
+
|
|
306
|
+
This is a fast path for quick metadata inspection. It reads:
|
|
307
|
+
- Magic (4 bytes) + version (4 bytes) + tensor_count (8 bytes) +
|
|
308
|
+
metadata_kv_count (8 bytes) = 24 bytes fixed header
|
|
309
|
+
- Metadata KV pairs (bounded by metadata_kv_count)
|
|
310
|
+
- Returns without computing alignment or reading tensor info
|
|
311
|
+
"""
|
|
312
|
+
with open(self.file_path, "rb") as f:
|
|
313
|
+
self._parse_header(f)
|
|
314
|
+
self._parse_metadata(f)
|
|
315
|
+
|
|
316
|
+
def parse_multi_shard(self) -> None:
|
|
317
|
+
"""Parse a multi-shard GGUF model.
|
|
318
|
+
|
|
319
|
+
Shard 0 contains the full header and metadata. Each shard has its own
|
|
320
|
+
tensor infos. Tensor offsets are adjusted to be absolute within each shard.
|
|
321
|
+
"""
|
|
322
|
+
shards = discover_shards(str(self.file_path))
|
|
323
|
+
|
|
324
|
+
if len(shards) <= 1:
|
|
325
|
+
self.parse()
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
self.file_path = Path(shards[0])
|
|
329
|
+
with open(self.file_path, "rb") as f:
|
|
330
|
+
self._parse_header(f)
|
|
331
|
+
self._parse_metadata(f)
|
|
332
|
+
self._parse_tensor_info(f)
|
|
333
|
+
|
|
334
|
+
self._shard_paths = [Path(s) for s in shards]
|
|
335
|
+
|
|
336
|
+
def load_tensor_data(self, tensor_name: str) -> "np.ndarray":
|
|
337
|
+
"""Load and dequantize a single tensor's data using gguf.GGUFReader."""
|
|
338
|
+
from gguf import GGUFReader as _GGUFReader
|
|
339
|
+
from gguf import dequantize as _dequantize
|
|
340
|
+
|
|
341
|
+
reader = _GGUFReader(str(self.file_path))
|
|
342
|
+
for rt in reader.tensors:
|
|
343
|
+
if rt.name == tensor_name:
|
|
344
|
+
return _dequantize(rt.data, rt.tensor_type)
|
|
345
|
+
raise KeyError(f"Tensor '{tensor_name}' not found")
|
|
346
|
+
|
|
347
|
+
def load_all_tensor_data(self) -> dict:
|
|
348
|
+
"""Load and dequantize all tensor data. Returns {name: float32 array}."""
|
|
349
|
+
from gguf import GGUFReader as _GGUFReader
|
|
350
|
+
from gguf import dequantize as _dequantize
|
|
351
|
+
|
|
352
|
+
result = {}
|
|
353
|
+
reader = _GGUFReader(str(self.file_path))
|
|
354
|
+
for rt in reader.tensors:
|
|
355
|
+
result[rt.name] = _dequantize(rt.data, rt.tensor_type)
|
|
356
|
+
self.tensor_data = result
|
|
357
|
+
return result
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def discover_shards(model_path: str) -> list:
|
|
361
|
+
"""Find all shard files for a split GGUF model.
|
|
362
|
+
|
|
363
|
+
If the model_path matches *.gguf-split.*, finds all shards in the same
|
|
364
|
+
directory and returns them sorted by shard index. Otherwise returns
|
|
365
|
+
[model_path] for single-file mode.
|
|
366
|
+
"""
|
|
367
|
+
path = Path(model_path)
|
|
368
|
+
if ".gguf-split." not in path.name:
|
|
369
|
+
return [model_path]
|
|
370
|
+
|
|
371
|
+
pattern = str(path.parent / "*.gguf-split.*")
|
|
372
|
+
shards = sorted(
|
|
373
|
+
glob.glob(pattern),
|
|
374
|
+
key=lambda p: int(re.search(r"\.(\d+)-of-\d+$", p).group(1)),
|
|
375
|
+
)
|
|
376
|
+
return shards if shards else [model_path]
|