hyperglyph-codec 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hyperglyph/__init__.py +19 -0
- hyperglyph/blocks.py +49 -0
- hyperglyph/cli.py +112 -0
- hyperglyph/codec.py +200 -0
- hyperglyph/config.py +37 -0
- hyperglyph/exceptions.py +9 -0
- hyperglyph/hdc.py +69 -0
- hyperglyph/metrics.py +60 -0
- hyperglyph/prototypes.py +60 -0
- hyperglyph/py.typed +0 -0
- hyperglyph/residual.py +49 -0
- hyperglyph/serialization.py +75 -0
- hyperglyph/torch_adapter.py +67 -0
- hyperglyph_codec-0.1.0.dist-info/METADATA +627 -0
- hyperglyph_codec-0.1.0.dist-info/RECORD +18 -0
- hyperglyph_codec-0.1.0.dist-info/WHEEL +4 -0
- hyperglyph_codec-0.1.0.dist-info/entry_points.txt +2 -0
- hyperglyph_codec-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Serialization helpers for compressed models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import zipfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Mapping
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from .codec import CompressedModel, CompressedTensor
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def save_compressed(compressed_model: CompressedModel, path: str | Path) -> None:
|
|
16
|
+
"""Save a compressed model to a .hwz zip archive."""
|
|
17
|
+
destination = Path(path)
|
|
18
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
with zipfile.ZipFile(destination, "w", compression=zipfile.ZIP_DEFLATED) as archive:
|
|
20
|
+
metadata = {
|
|
21
|
+
"format_version": compressed_model.format_version,
|
|
22
|
+
"tensors": {
|
|
23
|
+
name: tensor_to_dict(tensor) for name, tensor in compressed_model.tensors.items()
|
|
24
|
+
},
|
|
25
|
+
}
|
|
26
|
+
archive.writestr("metadata.json", json.dumps(metadata, indent=2))
|
|
27
|
+
prototype_arrays = {}
|
|
28
|
+
for name, tensor in compressed_model.tensors.items():
|
|
29
|
+
prototype_arrays[f"{name}_prototypes"] = tensor.prototype_matrix
|
|
30
|
+
if prototype_arrays:
|
|
31
|
+
with archive.open("prototypes.npz", "w") as handle:
|
|
32
|
+
np.savez(handle, **prototype_arrays) # type: ignore[arg-type]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_compressed(path: str | Path) -> CompressedModel:
|
|
36
|
+
"""Load a compressed model from a .hwz archive."""
|
|
37
|
+
archive_path = Path(path)
|
|
38
|
+
with zipfile.ZipFile(archive_path, "r") as archive:
|
|
39
|
+
metadata = json.loads(archive.read("metadata.json"))
|
|
40
|
+
tensors: dict[str, CompressedTensor] = {}
|
|
41
|
+
for name, value in metadata.get("tensors", {}).items():
|
|
42
|
+
tensors[name] = dict_to_tensor(value)
|
|
43
|
+
return CompressedModel(
|
|
44
|
+
tensors=tensors, payload=b"", format_version=metadata.get("format_version", "0.1")
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def tensor_to_dict(tensor: CompressedTensor) -> dict[str, Any]:
|
|
49
|
+
"""Convert a compressed tensor to a JSON-safe dictionary."""
|
|
50
|
+
return {
|
|
51
|
+
"name": tensor.name,
|
|
52
|
+
"shape": list(tensor.shape),
|
|
53
|
+
"block_size": tensor.block_size,
|
|
54
|
+
"prototype_ids": tensor.prototype_ids,
|
|
55
|
+
"scales": tensor.scales,
|
|
56
|
+
"residuals": tensor.residuals,
|
|
57
|
+
"prototype_matrix": tensor.prototype_matrix.tolist(),
|
|
58
|
+
"seed": tensor.seed,
|
|
59
|
+
"codec_config": tensor.codec_config,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def dict_to_tensor(payload: Mapping[str, Any]) -> CompressedTensor:
|
|
64
|
+
"""Convert a JSON-safe dictionary back to a CompressedTensor."""
|
|
65
|
+
return CompressedTensor(
|
|
66
|
+
name=str(payload["name"]),
|
|
67
|
+
shape=tuple(int(value) for value in payload["shape"]),
|
|
68
|
+
block_size=int(payload["block_size"]),
|
|
69
|
+
prototype_ids=[int(value) for value in payload["prototype_ids"]],
|
|
70
|
+
scales=[float(value) for value in payload["scales"]],
|
|
71
|
+
residuals=[dict(value) for value in payload["residuals"]],
|
|
72
|
+
prototype_matrix=np.asarray(payload["prototype_matrix"], dtype=np.float32),
|
|
73
|
+
seed=int(payload["seed"]),
|
|
74
|
+
codec_config=dict(payload["codec_config"]),
|
|
75
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Optional PyTorch adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Mapping
|
|
6
|
+
|
|
7
|
+
from .config import HyperGlyphConfig
|
|
8
|
+
from .exceptions import OptionalDependencyError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def is_torch_available() -> bool:
|
|
12
|
+
"""Check whether torch is installed."""
|
|
13
|
+
try:
|
|
14
|
+
import torch # noqa: F401
|
|
15
|
+
except ImportError:
|
|
16
|
+
return False
|
|
17
|
+
return True
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def tensor_to_numpy(tensor: Any) -> Any:
|
|
21
|
+
"""Convert a torch tensor to a NumPy array."""
|
|
22
|
+
if not is_torch_available():
|
|
23
|
+
raise OptionalDependencyError("torch is required for tensor_to_numpy")
|
|
24
|
+
import torch
|
|
25
|
+
|
|
26
|
+
if isinstance(tensor, torch.Tensor):
|
|
27
|
+
return tensor.detach().cpu().numpy()
|
|
28
|
+
return tensor
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def numpy_to_tensor(array: Any, reference_tensor: Any | None = None) -> Any:
|
|
32
|
+
"""Convert a NumPy array back to a torch tensor."""
|
|
33
|
+
if not is_torch_available():
|
|
34
|
+
raise OptionalDependencyError("torch is required for numpy_to_tensor")
|
|
35
|
+
import torch
|
|
36
|
+
|
|
37
|
+
if reference_tensor is not None and isinstance(reference_tensor, torch.Tensor):
|
|
38
|
+
return torch.tensor(array, dtype=reference_tensor.dtype, device=reference_tensor.device)
|
|
39
|
+
return torch.tensor(array)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def compress_state_dict(
|
|
43
|
+
state_dict: Mapping[str, Any], config: HyperGlyphConfig | None = None
|
|
44
|
+
) -> Any:
|
|
45
|
+
"""Compress a torch state_dict."""
|
|
46
|
+
if not is_torch_available():
|
|
47
|
+
raise OptionalDependencyError("torch is required for compress_state_dict")
|
|
48
|
+
from .codec import HyperGlyphCodec
|
|
49
|
+
|
|
50
|
+
codec = HyperGlyphCodec(config)
|
|
51
|
+
numpy_state_dict = {name: tensor_to_numpy(value) for name, value in state_dict.items()}
|
|
52
|
+
return codec.compress_state_dict(numpy_state_dict)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def decompress_state_dict(
|
|
56
|
+
compressed_model: Any, reference_state_dict: Mapping[str, Any] | None = None
|
|
57
|
+
) -> dict[str, Any]:
|
|
58
|
+
"""Decompress a compressed model into a dictionary of numpy arrays or torch tensors."""
|
|
59
|
+
if not is_torch_available():
|
|
60
|
+
raise OptionalDependencyError("torch is required for decompress_state_dict")
|
|
61
|
+
from .codec import HyperGlyphCodec
|
|
62
|
+
|
|
63
|
+
codec = HyperGlyphCodec()
|
|
64
|
+
restored = codec.decompress_state_dict(compressed_model)
|
|
65
|
+
if reference_state_dict is None:
|
|
66
|
+
return restored
|
|
67
|
+
return {name: numpy_to_tensor(restored[name], reference_state_dict[name]) for name in restored}
|