haoline 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haoline/.streamlit/config.toml +10 -0
- haoline/__init__.py +248 -0
- haoline/analyzer.py +935 -0
- haoline/cli.py +2712 -0
- haoline/compare.py +811 -0
- haoline/compare_visualizations.py +1564 -0
- haoline/edge_analysis.py +525 -0
- haoline/eval/__init__.py +131 -0
- haoline/eval/adapters.py +844 -0
- haoline/eval/cli.py +390 -0
- haoline/eval/comparison.py +542 -0
- haoline/eval/deployment.py +633 -0
- haoline/eval/schemas.py +833 -0
- haoline/examples/__init__.py +15 -0
- haoline/examples/basic_inspection.py +74 -0
- haoline/examples/compare_models.py +117 -0
- haoline/examples/hardware_estimation.py +78 -0
- haoline/format_adapters.py +1001 -0
- haoline/formats/__init__.py +123 -0
- haoline/formats/coreml.py +250 -0
- haoline/formats/gguf.py +483 -0
- haoline/formats/openvino.py +255 -0
- haoline/formats/safetensors.py +273 -0
- haoline/formats/tflite.py +369 -0
- haoline/hardware.py +2307 -0
- haoline/hierarchical_graph.py +462 -0
- haoline/html_export.py +1573 -0
- haoline/layer_summary.py +769 -0
- haoline/llm_summarizer.py +465 -0
- haoline/op_icons.py +618 -0
- haoline/operational_profiling.py +1492 -0
- haoline/patterns.py +1116 -0
- haoline/pdf_generator.py +265 -0
- haoline/privacy.py +250 -0
- haoline/pydantic_models.py +241 -0
- haoline/report.py +1923 -0
- haoline/report_sections.py +539 -0
- haoline/risks.py +521 -0
- haoline/schema.py +523 -0
- haoline/streamlit_app.py +2024 -0
- haoline/tests/__init__.py +4 -0
- haoline/tests/conftest.py +123 -0
- haoline/tests/test_analyzer.py +868 -0
- haoline/tests/test_compare_visualizations.py +293 -0
- haoline/tests/test_edge_analysis.py +243 -0
- haoline/tests/test_eval.py +604 -0
- haoline/tests/test_format_adapters.py +460 -0
- haoline/tests/test_hardware.py +237 -0
- haoline/tests/test_hardware_recommender.py +90 -0
- haoline/tests/test_hierarchical_graph.py +326 -0
- haoline/tests/test_html_export.py +180 -0
- haoline/tests/test_layer_summary.py +428 -0
- haoline/tests/test_llm_patterns.py +540 -0
- haoline/tests/test_llm_summarizer.py +339 -0
- haoline/tests/test_patterns.py +774 -0
- haoline/tests/test_pytorch.py +327 -0
- haoline/tests/test_report.py +383 -0
- haoline/tests/test_risks.py +398 -0
- haoline/tests/test_schema.py +417 -0
- haoline/tests/test_tensorflow.py +380 -0
- haoline/tests/test_visualizations.py +316 -0
- haoline/universal_ir.py +856 -0
- haoline/visualizations.py +1086 -0
- haoline/visualize_yolo.py +44 -0
- haoline/web.py +110 -0
- haoline-0.3.0.dist-info/METADATA +471 -0
- haoline-0.3.0.dist-info/RECORD +70 -0
- haoline-0.3.0.dist-info/WHEEL +4 -0
- haoline-0.3.0.dist-info/entry_points.txt +5 -0
- haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
haoline/formats/gguf.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
# Copyright (c) 2025 HaoLine Contributors
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
GGUF format reader for llama.cpp models.
|
|
6
|
+
|
|
7
|
+
GGUF (GGML Universal Format) is the standard format for llama.cpp
|
|
8
|
+
and other GGML-based inference engines. This reader extracts:
|
|
9
|
+
- Model metadata (architecture, context length, etc.)
|
|
10
|
+
- Tensor information (names, shapes, quantization types)
|
|
11
|
+
- Memory footprint estimates
|
|
12
|
+
|
|
13
|
+
Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import struct
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from enum import IntEnum
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, BinaryIO
|
|
23
|
+
|
|
24
|
+
# GGUF magic number
|
|
25
|
+
GGUF_MAGIC = b"GGUF"
|
|
26
|
+
|
|
27
|
+
# Supported GGUF versions
|
|
28
|
+
GGUF_VERSION_MIN = 2
|
|
29
|
+
GGUF_VERSION_MAX = 3
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class GGMLType(IntEnum):
|
|
33
|
+
"""GGML tensor data types with their properties."""
|
|
34
|
+
|
|
35
|
+
F32 = 0
|
|
36
|
+
F16 = 1
|
|
37
|
+
Q4_0 = 2
|
|
38
|
+
Q4_1 = 3
|
|
39
|
+
Q5_0 = 6
|
|
40
|
+
Q5_1 = 7
|
|
41
|
+
Q8_0 = 8
|
|
42
|
+
Q8_1 = 9
|
|
43
|
+
Q2_K = 10
|
|
44
|
+
Q3_K = 11
|
|
45
|
+
Q4_K = 12
|
|
46
|
+
Q5_K = 13
|
|
47
|
+
Q6_K = 14
|
|
48
|
+
Q8_K = 15
|
|
49
|
+
IQ2_XXS = 16
|
|
50
|
+
IQ2_XS = 17
|
|
51
|
+
IQ3_XXS = 18
|
|
52
|
+
IQ1_S = 19
|
|
53
|
+
IQ4_NL = 20
|
|
54
|
+
IQ3_S = 21
|
|
55
|
+
IQ2_S = 22
|
|
56
|
+
IQ4_XS = 23
|
|
57
|
+
I8 = 24
|
|
58
|
+
I16 = 25
|
|
59
|
+
I32 = 26
|
|
60
|
+
I64 = 27
|
|
61
|
+
F64 = 28
|
|
62
|
+
BF16 = 29
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Bits per weight for each quantization type
|
|
66
|
+
GGML_TYPE_BITS: dict[int, float] = {
|
|
67
|
+
GGMLType.F32: 32.0,
|
|
68
|
+
GGMLType.F16: 16.0,
|
|
69
|
+
GGMLType.BF16: 16.0,
|
|
70
|
+
GGMLType.Q4_0: 4.5, # 4 bits + 0.5 for scales
|
|
71
|
+
GGMLType.Q4_1: 5.0,
|
|
72
|
+
GGMLType.Q5_0: 5.5,
|
|
73
|
+
GGMLType.Q5_1: 6.0,
|
|
74
|
+
GGMLType.Q8_0: 8.5,
|
|
75
|
+
GGMLType.Q8_1: 9.0,
|
|
76
|
+
GGMLType.Q2_K: 2.5625,
|
|
77
|
+
GGMLType.Q3_K: 3.4375,
|
|
78
|
+
GGMLType.Q4_K: 4.5,
|
|
79
|
+
GGMLType.Q5_K: 5.5,
|
|
80
|
+
GGMLType.Q6_K: 6.5625,
|
|
81
|
+
GGMLType.Q8_K: 8.5,
|
|
82
|
+
GGMLType.IQ2_XXS: 2.0625,
|
|
83
|
+
GGMLType.IQ2_XS: 2.3125,
|
|
84
|
+
GGMLType.IQ3_XXS: 3.0625,
|
|
85
|
+
GGMLType.IQ1_S: 1.5,
|
|
86
|
+
GGMLType.IQ4_NL: 4.5,
|
|
87
|
+
GGMLType.IQ3_S: 3.4375,
|
|
88
|
+
GGMLType.IQ2_S: 2.5,
|
|
89
|
+
GGMLType.IQ4_XS: 4.25,
|
|
90
|
+
GGMLType.I8: 8.0,
|
|
91
|
+
GGMLType.I16: 16.0,
|
|
92
|
+
GGMLType.I32: 32.0,
|
|
93
|
+
GGMLType.I64: 64.0,
|
|
94
|
+
GGMLType.F64: 64.0,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def ggml_type_name(type_id: int) -> str:
|
|
99
|
+
"""Get human-readable name for a GGML type."""
|
|
100
|
+
try:
|
|
101
|
+
return GGMLType(type_id).name
|
|
102
|
+
except ValueError:
|
|
103
|
+
return f"UNKNOWN_{type_id}"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class GGUFValueType(IntEnum):
|
|
107
|
+
"""GGUF metadata value types."""
|
|
108
|
+
|
|
109
|
+
UINT8 = 0
|
|
110
|
+
INT8 = 1
|
|
111
|
+
UINT16 = 2
|
|
112
|
+
INT16 = 3
|
|
113
|
+
UINT32 = 4
|
|
114
|
+
INT32 = 5
|
|
115
|
+
FLOAT32 = 6
|
|
116
|
+
BOOL = 7
|
|
117
|
+
STRING = 8
|
|
118
|
+
ARRAY = 9
|
|
119
|
+
UINT64 = 10
|
|
120
|
+
INT64 = 11
|
|
121
|
+
FLOAT64 = 12
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class TensorInfo:
|
|
126
|
+
"""Information about a single tensor in the GGUF file."""
|
|
127
|
+
|
|
128
|
+
name: str
|
|
129
|
+
n_dims: int
|
|
130
|
+
dims: tuple[int, ...]
|
|
131
|
+
type_id: int
|
|
132
|
+
offset: int
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def type_name(self) -> str:
|
|
136
|
+
"""Human-readable type name."""
|
|
137
|
+
return ggml_type_name(self.type_id)
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def n_elements(self) -> int:
|
|
141
|
+
"""Total number of elements."""
|
|
142
|
+
result = 1
|
|
143
|
+
for d in self.dims:
|
|
144
|
+
result *= d
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def bits_per_element(self) -> float:
|
|
149
|
+
"""Bits per element for this tensor's type."""
|
|
150
|
+
return GGML_TYPE_BITS.get(self.type_id, 32.0)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def size_bytes(self) -> int:
|
|
154
|
+
"""Estimated size in bytes."""
|
|
155
|
+
return int(self.n_elements * self.bits_per_element / 8)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class GGUFInfo:
|
|
160
|
+
"""Parsed GGUF file information."""
|
|
161
|
+
|
|
162
|
+
path: Path
|
|
163
|
+
version: int
|
|
164
|
+
tensor_count: int
|
|
165
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
166
|
+
tensors: list[TensorInfo] = field(default_factory=list)
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def architecture(self) -> str:
|
|
170
|
+
"""Model architecture (e.g., 'llama', 'mistral')."""
|
|
171
|
+
return str(self.metadata.get("general.architecture", "unknown"))
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def model_name(self) -> str:
|
|
175
|
+
"""Model name from metadata."""
|
|
176
|
+
return str(self.metadata.get("general.name", self.path.stem))
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def context_length(self) -> int | None:
|
|
180
|
+
"""Maximum context length."""
|
|
181
|
+
arch = self.architecture
|
|
182
|
+
return self.metadata.get(f"{arch}.context_length")
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def embedding_length(self) -> int | None:
|
|
186
|
+
"""Hidden size / embedding dimension."""
|
|
187
|
+
arch = self.architecture
|
|
188
|
+
return self.metadata.get(f"{arch}.embedding_length")
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def block_count(self) -> int | None:
|
|
192
|
+
"""Number of transformer blocks/layers."""
|
|
193
|
+
arch = self.architecture
|
|
194
|
+
return self.metadata.get(f"{arch}.block_count")
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def head_count(self) -> int | None:
|
|
198
|
+
"""Number of attention heads."""
|
|
199
|
+
arch = self.architecture
|
|
200
|
+
return self.metadata.get(f"{arch}.attention.head_count")
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def head_count_kv(self) -> int | None:
|
|
204
|
+
"""Number of KV heads (for GQA/MQA)."""
|
|
205
|
+
arch = self.architecture
|
|
206
|
+
return self.metadata.get(f"{arch}.attention.head_count_kv")
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def vocab_size(self) -> int | None:
|
|
210
|
+
"""Vocabulary size."""
|
|
211
|
+
arch = self.architecture
|
|
212
|
+
return self.metadata.get(f"{arch}.vocab_size")
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def total_params(self) -> int:
|
|
216
|
+
"""Total parameter count."""
|
|
217
|
+
return sum(t.n_elements for t in self.tensors)
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def total_size_bytes(self) -> int:
|
|
221
|
+
"""Total model size in bytes."""
|
|
222
|
+
return sum(t.size_bytes for t in self.tensors)
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def quantization_breakdown(self) -> dict[str, int]:
|
|
226
|
+
"""Count of tensors by quantization type."""
|
|
227
|
+
breakdown: dict[str, int] = {}
|
|
228
|
+
for t in self.tensors:
|
|
229
|
+
type_name = t.type_name
|
|
230
|
+
breakdown[type_name] = breakdown.get(type_name, 0) + 1
|
|
231
|
+
return breakdown
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def size_breakdown(self) -> dict[str, int]:
|
|
235
|
+
"""Size in bytes by quantization type."""
|
|
236
|
+
breakdown: dict[str, int] = {}
|
|
237
|
+
for t in self.tensors:
|
|
238
|
+
type_name = t.type_name
|
|
239
|
+
breakdown[type_name] = breakdown.get(type_name, 0) + t.size_bytes
|
|
240
|
+
return breakdown
|
|
241
|
+
|
|
242
|
+
def estimate_vram(self, context_length: int | None = None) -> dict[str, int]:
|
|
243
|
+
"""
|
|
244
|
+
Estimate VRAM requirements.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
context_length: Context length to use for KV cache estimation.
|
|
248
|
+
Defaults to model's context_length metadata.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Dict with 'weights', 'kv_cache', and 'total' in bytes.
|
|
252
|
+
"""
|
|
253
|
+
ctx = context_length or self.context_length or 2048
|
|
254
|
+
weights = self.total_size_bytes
|
|
255
|
+
|
|
256
|
+
# Estimate KV cache
|
|
257
|
+
kv_cache = 0
|
|
258
|
+
n_layers = self.block_count or 32
|
|
259
|
+
hidden = self.embedding_length or 4096
|
|
260
|
+
n_kv_heads = self.head_count_kv or self.head_count or 32
|
|
261
|
+
head_dim = hidden // (self.head_count or 32)
|
|
262
|
+
|
|
263
|
+
# KV cache: 2 (K+V) * layers * ctx * kv_heads * head_dim * 2 bytes (fp16)
|
|
264
|
+
kv_cache = 2 * n_layers * ctx * n_kv_heads * head_dim * 2
|
|
265
|
+
|
|
266
|
+
return {
|
|
267
|
+
"weights": weights,
|
|
268
|
+
"kv_cache": kv_cache,
|
|
269
|
+
"total": weights + kv_cache,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
def to_dict(self) -> dict[str, Any]:
|
|
273
|
+
"""Convert to dictionary for JSON serialization."""
|
|
274
|
+
return {
|
|
275
|
+
"path": str(self.path),
|
|
276
|
+
"version": self.version,
|
|
277
|
+
"architecture": self.architecture,
|
|
278
|
+
"model_name": self.model_name,
|
|
279
|
+
"tensor_count": self.tensor_count,
|
|
280
|
+
"total_params": self.total_params,
|
|
281
|
+
"total_size_bytes": self.total_size_bytes,
|
|
282
|
+
"context_length": self.context_length,
|
|
283
|
+
"embedding_length": self.embedding_length,
|
|
284
|
+
"block_count": self.block_count,
|
|
285
|
+
"head_count": self.head_count,
|
|
286
|
+
"head_count_kv": self.head_count_kv,
|
|
287
|
+
"vocab_size": self.vocab_size,
|
|
288
|
+
"quantization_breakdown": self.quantization_breakdown,
|
|
289
|
+
"size_breakdown": self.size_breakdown,
|
|
290
|
+
"metadata": {
|
|
291
|
+
k: v
|
|
292
|
+
for k, v in self.metadata.items()
|
|
293
|
+
if not k.startswith("tokenizer.") # Skip large tokenizer data
|
|
294
|
+
},
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class GGUFReader:
|
|
299
|
+
"""Reader for GGUF format files."""
|
|
300
|
+
|
|
301
|
+
def __init__(self, path: str | Path):
|
|
302
|
+
"""
|
|
303
|
+
Initialize reader with file path.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
path: Path to the GGUF file.
|
|
307
|
+
"""
|
|
308
|
+
self.path = Path(path)
|
|
309
|
+
if not self.path.exists():
|
|
310
|
+
raise FileNotFoundError(f"GGUF file not found: {self.path}")
|
|
311
|
+
|
|
312
|
+
def read(self) -> GGUFInfo:
|
|
313
|
+
"""
|
|
314
|
+
Read and parse the GGUF file.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
GGUFInfo with parsed metadata and tensor information.
|
|
318
|
+
|
|
319
|
+
Raises:
|
|
320
|
+
ValueError: If the file is not a valid GGUF file.
|
|
321
|
+
"""
|
|
322
|
+
with open(self.path, "rb") as f:
|
|
323
|
+
return self._parse(f)
|
|
324
|
+
|
|
325
|
+
def _parse(self, f: BinaryIO) -> GGUFInfo:
|
|
326
|
+
"""Parse the GGUF file structure."""
|
|
327
|
+
# Read magic
|
|
328
|
+
magic = f.read(4)
|
|
329
|
+
if magic != GGUF_MAGIC:
|
|
330
|
+
raise ValueError(f"Not a GGUF file: expected {GGUF_MAGIC!r}, got {magic!r}")
|
|
331
|
+
|
|
332
|
+
# Read version
|
|
333
|
+
version = struct.unpack("<I", f.read(4))[0]
|
|
334
|
+
if version < GGUF_VERSION_MIN or version > GGUF_VERSION_MAX:
|
|
335
|
+
raise ValueError(
|
|
336
|
+
f"Unsupported GGUF version {version} "
|
|
337
|
+
f"(supported: {GGUF_VERSION_MIN}-{GGUF_VERSION_MAX})"
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Read counts
|
|
341
|
+
tensor_count = struct.unpack("<Q", f.read(8))[0]
|
|
342
|
+
metadata_kv_count = struct.unpack("<Q", f.read(8))[0]
|
|
343
|
+
|
|
344
|
+
# Read metadata
|
|
345
|
+
metadata = {}
|
|
346
|
+
for _ in range(metadata_kv_count):
|
|
347
|
+
key = self._read_string(f)
|
|
348
|
+
value = self._read_value(f)
|
|
349
|
+
metadata[key] = value
|
|
350
|
+
|
|
351
|
+
# Read tensor info
|
|
352
|
+
tensors = []
|
|
353
|
+
for _ in range(tensor_count):
|
|
354
|
+
tensor = self._read_tensor_info(f)
|
|
355
|
+
tensors.append(tensor)
|
|
356
|
+
|
|
357
|
+
return GGUFInfo(
|
|
358
|
+
path=self.path,
|
|
359
|
+
version=version,
|
|
360
|
+
tensor_count=tensor_count,
|
|
361
|
+
metadata=metadata,
|
|
362
|
+
tensors=tensors,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def _read_string(self, f: BinaryIO) -> str:
|
|
366
|
+
"""Read a length-prefixed string."""
|
|
367
|
+
length = struct.unpack("<Q", f.read(8))[0]
|
|
368
|
+
return f.read(length).decode("utf-8")
|
|
369
|
+
|
|
370
|
+
def _read_value(self, f: BinaryIO) -> Any:
|
|
371
|
+
"""Read a typed metadata value."""
|
|
372
|
+
value_type = struct.unpack("<I", f.read(4))[0]
|
|
373
|
+
|
|
374
|
+
if value_type == GGUFValueType.UINT8:
|
|
375
|
+
return struct.unpack("<B", f.read(1))[0]
|
|
376
|
+
elif value_type == GGUFValueType.INT8:
|
|
377
|
+
return struct.unpack("<b", f.read(1))[0]
|
|
378
|
+
elif value_type == GGUFValueType.UINT16:
|
|
379
|
+
return struct.unpack("<H", f.read(2))[0]
|
|
380
|
+
elif value_type == GGUFValueType.INT16:
|
|
381
|
+
return struct.unpack("<h", f.read(2))[0]
|
|
382
|
+
elif value_type == GGUFValueType.UINT32:
|
|
383
|
+
return struct.unpack("<I", f.read(4))[0]
|
|
384
|
+
elif value_type == GGUFValueType.INT32:
|
|
385
|
+
return struct.unpack("<i", f.read(4))[0]
|
|
386
|
+
elif value_type == GGUFValueType.FLOAT32:
|
|
387
|
+
return struct.unpack("<f", f.read(4))[0]
|
|
388
|
+
elif value_type == GGUFValueType.BOOL:
|
|
389
|
+
return struct.unpack("<B", f.read(1))[0] != 0
|
|
390
|
+
elif value_type == GGUFValueType.STRING:
|
|
391
|
+
return self._read_string(f)
|
|
392
|
+
elif value_type == GGUFValueType.ARRAY:
|
|
393
|
+
return self._read_array(f)
|
|
394
|
+
elif value_type == GGUFValueType.UINT64:
|
|
395
|
+
return struct.unpack("<Q", f.read(8))[0]
|
|
396
|
+
elif value_type == GGUFValueType.INT64:
|
|
397
|
+
return struct.unpack("<q", f.read(8))[0]
|
|
398
|
+
elif value_type == GGUFValueType.FLOAT64:
|
|
399
|
+
return struct.unpack("<d", f.read(8))[0]
|
|
400
|
+
else:
|
|
401
|
+
raise ValueError(f"Unknown value type: {value_type}")
|
|
402
|
+
|
|
403
|
+
def _read_array(self, f: BinaryIO) -> list[Any]:
|
|
404
|
+
"""Read an array value."""
|
|
405
|
+
element_type = struct.unpack("<I", f.read(4))[0]
|
|
406
|
+
length = struct.unpack("<Q", f.read(8))[0]
|
|
407
|
+
|
|
408
|
+
# Read elements based on type
|
|
409
|
+
if element_type == GGUFValueType.UINT8:
|
|
410
|
+
return list(struct.unpack(f"<{length}B", f.read(length)))
|
|
411
|
+
elif element_type == GGUFValueType.INT8:
|
|
412
|
+
return list(struct.unpack(f"<{length}b", f.read(length)))
|
|
413
|
+
elif element_type == GGUFValueType.UINT16:
|
|
414
|
+
return list(struct.unpack(f"<{length}H", f.read(length * 2)))
|
|
415
|
+
elif element_type == GGUFValueType.INT16:
|
|
416
|
+
return list(struct.unpack(f"<{length}h", f.read(length * 2)))
|
|
417
|
+
elif element_type == GGUFValueType.UINT32:
|
|
418
|
+
return list(struct.unpack(f"<{length}I", f.read(length * 4)))
|
|
419
|
+
elif element_type == GGUFValueType.INT32:
|
|
420
|
+
return list(struct.unpack(f"<{length}i", f.read(length * 4)))
|
|
421
|
+
elif element_type == GGUFValueType.FLOAT32:
|
|
422
|
+
return list(struct.unpack(f"<{length}f", f.read(length * 4)))
|
|
423
|
+
elif element_type == GGUFValueType.BOOL:
|
|
424
|
+
return [b != 0 for b in struct.unpack(f"<{length}B", f.read(length))]
|
|
425
|
+
elif element_type == GGUFValueType.STRING:
|
|
426
|
+
return [self._read_string(f) for _ in range(length)]
|
|
427
|
+
elif element_type == GGUFValueType.UINT64:
|
|
428
|
+
return list(struct.unpack(f"<{length}Q", f.read(length * 8)))
|
|
429
|
+
elif element_type == GGUFValueType.INT64:
|
|
430
|
+
return list(struct.unpack(f"<{length}q", f.read(length * 8)))
|
|
431
|
+
elif element_type == GGUFValueType.FLOAT64:
|
|
432
|
+
return list(struct.unpack(f"<{length}d", f.read(length * 8)))
|
|
433
|
+
else:
|
|
434
|
+
# Skip unknown array types
|
|
435
|
+
return []
|
|
436
|
+
|
|
437
|
+
def _read_tensor_info(self, f: BinaryIO) -> TensorInfo:
|
|
438
|
+
"""Read tensor metadata."""
|
|
439
|
+
name = self._read_string(f)
|
|
440
|
+
n_dims = struct.unpack("<I", f.read(4))[0]
|
|
441
|
+
dims = tuple(struct.unpack("<Q", f.read(8))[0] for _ in range(n_dims))
|
|
442
|
+
type_id = struct.unpack("<I", f.read(4))[0]
|
|
443
|
+
offset = struct.unpack("<Q", f.read(8))[0]
|
|
444
|
+
|
|
445
|
+
return TensorInfo(
|
|
446
|
+
name=name,
|
|
447
|
+
n_dims=n_dims,
|
|
448
|
+
dims=dims,
|
|
449
|
+
type_id=type_id,
|
|
450
|
+
offset=offset,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def is_gguf_file(path: str | Path) -> bool:
|
|
455
|
+
"""
|
|
456
|
+
Check if a file is a valid GGUF file.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
path: Path to check.
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
True if the file starts with GGUF magic bytes.
|
|
463
|
+
"""
|
|
464
|
+
path = Path(path)
|
|
465
|
+
if not path.exists() or not path.is_file():
|
|
466
|
+
return False
|
|
467
|
+
|
|
468
|
+
try:
|
|
469
|
+
with open(path, "rb") as f:
|
|
470
|
+
magic = f.read(4)
|
|
471
|
+
return magic == GGUF_MAGIC
|
|
472
|
+
except Exception:
|
|
473
|
+
return False
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def format_size(size_bytes: int) -> str:
|
|
477
|
+
"""Format bytes as human-readable string."""
|
|
478
|
+
size_float = float(size_bytes)
|
|
479
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
480
|
+
if size_float < 1024:
|
|
481
|
+
return f"{size_float:.2f} {unit}"
|
|
482
|
+
size_float /= 1024
|
|
483
|
+
return f"{size_float:.2f} PB"
|