hyperglyph-codec 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ """Serialization helpers for compressed models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import zipfile
7
+ from pathlib import Path
8
+ from typing import Any, Mapping
9
+
10
+ import numpy as np
11
+
12
+ from .codec import CompressedModel, CompressedTensor
13
+
14
+
15
+ def save_compressed(compressed_model: CompressedModel, path: str | Path) -> None:
16
+ """Save a compressed model to a .hwz zip archive."""
17
+ destination = Path(path)
18
+ destination.parent.mkdir(parents=True, exist_ok=True)
19
+ with zipfile.ZipFile(destination, "w", compression=zipfile.ZIP_DEFLATED) as archive:
20
+ metadata = {
21
+ "format_version": compressed_model.format_version,
22
+ "tensors": {
23
+ name: tensor_to_dict(tensor) for name, tensor in compressed_model.tensors.items()
24
+ },
25
+ }
26
+ archive.writestr("metadata.json", json.dumps(metadata, indent=2))
27
+ prototype_arrays = {}
28
+ for name, tensor in compressed_model.tensors.items():
29
+ prototype_arrays[f"{name}_prototypes"] = tensor.prototype_matrix
30
+ if prototype_arrays:
31
+ with archive.open("prototypes.npz", "w") as handle:
32
+ np.savez(handle, **prototype_arrays) # type: ignore[arg-type]
33
+
34
+
35
+ def load_compressed(path: str | Path) -> CompressedModel:
36
+ """Load a compressed model from a .hwz archive."""
37
+ archive_path = Path(path)
38
+ with zipfile.ZipFile(archive_path, "r") as archive:
39
+ metadata = json.loads(archive.read("metadata.json"))
40
+ tensors: dict[str, CompressedTensor] = {}
41
+ for name, value in metadata.get("tensors", {}).items():
42
+ tensors[name] = dict_to_tensor(value)
43
+ return CompressedModel(
44
+ tensors=tensors, payload=b"", format_version=metadata.get("format_version", "0.1")
45
+ )
46
+
47
+
48
+ def tensor_to_dict(tensor: CompressedTensor) -> dict[str, Any]:
49
+ """Convert a compressed tensor to a JSON-safe dictionary."""
50
+ return {
51
+ "name": tensor.name,
52
+ "shape": list(tensor.shape),
53
+ "block_size": tensor.block_size,
54
+ "prototype_ids": tensor.prototype_ids,
55
+ "scales": tensor.scales,
56
+ "residuals": tensor.residuals,
57
+ "prototype_matrix": tensor.prototype_matrix.tolist(),
58
+ "seed": tensor.seed,
59
+ "codec_config": tensor.codec_config,
60
+ }
61
+
62
+
63
+ def dict_to_tensor(payload: Mapping[str, Any]) -> CompressedTensor:
64
+ """Convert a JSON-safe dictionary back to a CompressedTensor."""
65
+ return CompressedTensor(
66
+ name=str(payload["name"]),
67
+ shape=tuple(int(value) for value in payload["shape"]),
68
+ block_size=int(payload["block_size"]),
69
+ prototype_ids=[int(value) for value in payload["prototype_ids"]],
70
+ scales=[float(value) for value in payload["scales"]],
71
+ residuals=[dict(value) for value in payload["residuals"]],
72
+ prototype_matrix=np.asarray(payload["prototype_matrix"], dtype=np.float32),
73
+ seed=int(payload["seed"]),
74
+ codec_config=dict(payload["codec_config"]),
75
+ )
@@ -0,0 +1,67 @@
1
+ """Optional PyTorch adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Mapping
6
+
7
+ from .config import HyperGlyphConfig
8
+ from .exceptions import OptionalDependencyError
9
+
10
+
11
+ def is_torch_available() -> bool:
12
+ """Check whether torch is installed."""
13
+ try:
14
+ import torch # noqa: F401
15
+ except ImportError:
16
+ return False
17
+ return True
18
+
19
+
20
+ def tensor_to_numpy(tensor: Any) -> Any:
21
+ """Convert a torch tensor to a NumPy array."""
22
+ if not is_torch_available():
23
+ raise OptionalDependencyError("torch is required for tensor_to_numpy")
24
+ import torch
25
+
26
+ if isinstance(tensor, torch.Tensor):
27
+ return tensor.detach().cpu().numpy()
28
+ return tensor
29
+
30
+
31
+ def numpy_to_tensor(array: Any, reference_tensor: Any | None = None) -> Any:
32
+ """Convert a NumPy array back to a torch tensor."""
33
+ if not is_torch_available():
34
+ raise OptionalDependencyError("torch is required for numpy_to_tensor")
35
+ import torch
36
+
37
+ if reference_tensor is not None and isinstance(reference_tensor, torch.Tensor):
38
+ return torch.tensor(array, dtype=reference_tensor.dtype, device=reference_tensor.device)
39
+ return torch.tensor(array)
40
+
41
+
42
+ def compress_state_dict(
43
+ state_dict: Mapping[str, Any], config: HyperGlyphConfig | None = None
44
+ ) -> Any:
45
+ """Compress a torch state_dict."""
46
+ if not is_torch_available():
47
+ raise OptionalDependencyError("torch is required for compress_state_dict")
48
+ from .codec import HyperGlyphCodec
49
+
50
+ codec = HyperGlyphCodec(config)
51
+ numpy_state_dict = {name: tensor_to_numpy(value) for name, value in state_dict.items()}
52
+ return codec.compress_state_dict(numpy_state_dict)
53
+
54
+
55
+ def decompress_state_dict(
56
+ compressed_model: Any, reference_state_dict: Mapping[str, Any] | None = None
57
+ ) -> dict[str, Any]:
58
+ """Decompress a compressed model into a dictionary of numpy arrays or torch tensors."""
59
+ if not is_torch_available():
60
+ raise OptionalDependencyError("torch is required for decompress_state_dict")
61
+ from .codec import HyperGlyphCodec
62
+
63
+ codec = HyperGlyphCodec()
64
+ restored = codec.decompress_state_dict(compressed_model)
65
+ if reference_state_dict is None:
66
+ return restored
67
+ return {name: numpy_to_tensor(restored[name], reference_state_dict[name]) for name in restored}