hyperglyph-codec 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hyperglyph_codec-0.2.0/CHANGELOG.md +18 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/PKG-INFO +51 -15
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/README.md +50 -14
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/docs/algorithm.md +6 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/docs/api.md +8 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/docs/cli.md +10 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/docs/index.md +5 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/docs/roadmap.md +9 -6
- hyperglyph_codec-0.2.0/examples/artifacts/sample-v0.2-benchmark.md +13 -0
- hyperglyph_codec-0.2.0/examples/artifacts/sample-v0.2.hwz +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/pyproject.toml +1 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/__init__.py +5 -1
- hyperglyph_codec-0.2.0/src/hyperglyph/benchmark.py +105 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/cli.py +32 -5
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/codec.py +77 -7
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/config.py +6 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/metrics.py +32 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/residual.py +28 -5
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/serialization.py +22 -7
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/torch_adapter.py +4 -1
- hyperglyph_codec-0.2.0/tests/test_benchmark.py +19 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_cli.py +1 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_codec.py +21 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_residual.py +13 -1
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_serialization.py +2 -2
- hyperglyph_codec-0.1.0/CHANGELOG.md +0 -8
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/.gitattributes +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/.github/workflows/ci.yml +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/.github/workflows/publish-testpypi.yml +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/.github/workflows/publish.yml +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/.gitignore +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/.pre-commit-config.yaml +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/CONTRIBUTING.md +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/LICENSE +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/RELEASE.md +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/examples/compress_mlp.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/examples/compress_state_dict.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/examples/mnist_demo.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/hyperglyph.png +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/blocks.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/exceptions.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/hdc.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/prototypes.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/src/hyperglyph/py.typed +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_blocks.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_hdc.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_prototypes.py +0 -0
- {hyperglyph_codec-0.1.0 → hyperglyph_codec-0.2.0}/tests/test_torch_adapter.py +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.2.0
|
|
4
|
+
|
|
5
|
+
- Added int8 sparse residual quantization.
|
|
6
|
+
- Added prototype scale modes for per-block, per-tensor, and per-channel scaling.
|
|
7
|
+
- Added benchmark reports with FP32, FP16 estimate, INT8 estimate, and Hyper Glyph comparisons.
|
|
8
|
+
- Added markdown benchmark export through the Python API and CLI.
|
|
9
|
+
- Added an example compressed `.hwz` artifact and benchmark report.
|
|
10
|
+
- Improved `.hwz` serialization so prototype arrays are stored once in `prototypes.npz`.
|
|
11
|
+
- Preserved skipped tensors when restoring PyTorch state dicts with a reference state dict.
|
|
12
|
+
|
|
13
|
+
## 0.1.0
|
|
14
|
+
|
|
15
|
+
- Initial public release.
|
|
16
|
+
- Added NumPy compression path.
|
|
17
|
+
- Added optional PyTorch adapter.
|
|
18
|
+
- Added CLI and .hwz serialization.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hyperglyph-codec
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Hyperdimensional symbolic residual compression for neural network weights
|
|
5
5
|
Author: Robert McMenemy
|
|
6
6
|
License-Expression: MIT
|
|
@@ -60,11 +60,14 @@ Description-Content-Type: text/markdown
|
|
|
60
60
|
- **Block-level tensor compression** for NumPy arrays and neural network weights.
|
|
61
61
|
- **Symbolic prototype assignment** to represent repeated weight patterns compactly.
|
|
62
62
|
- **Sparse residual repair** to preserve reconstruction fidelity after prototype decoding.
|
|
63
|
+
- **Int8 residual quantization** to reduce sparse repair payload size.
|
|
64
|
+
- **Per-block, per-tensor, and per-channel prototype scales** for tuning reconstruction behavior.
|
|
63
65
|
- **Configurable compression controls** for block size, prototype count, residual size, and tensor filtering.
|
|
64
66
|
- **State dict compression** for model-like parameter dictionaries.
|
|
65
67
|
- **Optional PyTorch support** for loading, compressing, restoring, and benchmarking `.pt` state dicts.
|
|
66
68
|
- **`.hwz` serialization** for saving compressed models as portable archives.
|
|
67
69
|
- **Compression reports** with size ratio, tensor counts, and reconstruction error metrics.
|
|
70
|
+
- **Markdown benchmark export** with FP32, FP16 estimate, INT8 estimate, and Hyper Glyph comparisons.
|
|
68
71
|
- **A small CLI** for compressing, decompressing, inspecting, and benchmarking model archives.
|
|
69
72
|
- **Typed Python API** designed for research, experimentation, and extension.
|
|
70
73
|
|
|
@@ -114,10 +117,22 @@ That is the core job: encode large weight tensors as reusable symbolic
|
|
|
114
117
|
prototypes plus a small residual correction, then report the size and
|
|
115
118
|
reconstruction tradeoff.
|
|
116
119
|
|
|
117
|
-
Hyper Glyph v0.
|
|
120
|
+
Hyper Glyph v0.2.0 is an experimental research codec. It is intended for
|
|
118
121
|
testing ideas around hyperdimensional and symbolic weight compression rather
|
|
119
122
|
than guaranteed production compression.
|
|
120
123
|
|
|
124
|
+
Sample v0.2.0 benchmark from `examples/artifacts/sample-v0.2-benchmark.md`:
|
|
125
|
+
|
|
126
|
+
| Representation | Bytes | Ratio vs FP32 | MSE | MAE | Max abs error |
|
|
127
|
+
| --- | ---: | ---: | ---: | ---: | ---: |
|
|
128
|
+
| FP32 | 24576 | 1.00x | 0 | 0 | 0 |
|
|
129
|
+
| FP16 estimate | 12288 | 2.00x | - | - | - |
|
|
130
|
+
| INT8 estimate | 6144 | 4.00x | - | - | - |
|
|
131
|
+
| Hyper Glyph | 22032 | 1.12x | 0.00266153 | 0.0405458 | 0.197096 |
|
|
132
|
+
|
|
133
|
+
The matching compressed artifact is `examples/artifacts/sample-v0.2.hwz`
|
|
134
|
+
and is 26,318 bytes on disk in the current zip-based archive format.
|
|
135
|
+
|
|
121
136
|
---
|
|
122
137
|
|
|
123
138
|
## Why Hyperdimensional Weight Compression?
|
|
@@ -132,7 +147,7 @@ Weight tensors
|
|
|
132
147
|
-> Learn reusable prototype blocks
|
|
133
148
|
-> Assign each block to a prototype
|
|
134
149
|
-> Store per-block scales
|
|
135
|
-
-> Store sparse top-k residual corrections
|
|
150
|
+
-> Store sparse top-k residual corrections as int8 or float32
|
|
136
151
|
|
|
137
152
|
-> Save compressed archive
|
|
138
153
|
-> Restore approximate tensors
|
|
@@ -145,6 +160,7 @@ tradeoff directly:
|
|
|
145
160
|
- **Large weight matrices** that can be split into repeated local blocks.
|
|
146
161
|
- **Prototype-based compression** where blocks share learned representatives.
|
|
147
162
|
- **Sparse residual repair** where only the largest reconstruction corrections are stored.
|
|
163
|
+
- **Scale modes** for per-block, per-tensor, or per-channel prototype scaling.
|
|
148
164
|
- **Approximate reconstruction** with measurable MSE, MAE, and max absolute error.
|
|
149
165
|
- **State dict workflows** that match common PyTorch model storage patterns.
|
|
150
166
|
- **Portable archive output** for saving and inspecting compressed runs.
|
|
@@ -177,7 +193,7 @@ Compression
|
|
|
177
193
|
- prototype learning
|
|
178
194
|
- prototype assignment
|
|
179
195
|
- scale calculation
|
|
180
|
-
- sparse residual encoding
|
|
196
|
+
- int8 or float32 sparse residual encoding
|
|
181
197
|
|
|
|
182
198
|
v
|
|
183
199
|
CompressedModel
|
|
@@ -270,6 +286,8 @@ config = HyperGlyphConfig(
|
|
|
270
286
|
block_size=16,
|
|
271
287
|
n_prototypes=16,
|
|
272
288
|
residual_k=4,
|
|
289
|
+
residual_dtype="int8",
|
|
290
|
+
scale_mode="block",
|
|
273
291
|
)
|
|
274
292
|
|
|
275
293
|
codec = HyperGlyphCodec(config)
|
|
@@ -323,6 +341,8 @@ hyperglyph compress model.pt model.hwz \
|
|
|
323
341
|
--n-buckets 16 \
|
|
324
342
|
--n-prototypes 128 \
|
|
325
343
|
--residual-k 8 \
|
|
344
|
+
--residual-dtype int8 \
|
|
345
|
+
--scale-mode channel \
|
|
326
346
|
--min-tensor-size 256
|
|
327
347
|
```
|
|
328
348
|
|
|
@@ -344,6 +364,12 @@ Benchmark compression and reconstruction:
|
|
|
344
364
|
hyperglyph benchmark model.pt
|
|
345
365
|
```
|
|
346
366
|
|
|
367
|
+
Export the benchmark as markdown:
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
hyperglyph benchmark model.pt --markdown-output benchmark.md
|
|
371
|
+
```
|
|
372
|
+
|
|
347
373
|
---
|
|
348
374
|
|
|
349
375
|
## Benchmark Example
|
|
@@ -354,17 +380,14 @@ A small practical benchmark is enough to see the current codec behavior:
|
|
|
354
380
|
hyperglyph benchmark model.pt
|
|
355
381
|
```
|
|
356
382
|
|
|
357
|
-
Example
|
|
383
|
+
Example markdown output:
|
|
358
384
|
|
|
359
385
|
```text
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
total_mse
|
|
366
|
-
total_mae
|
|
367
|
-
max_abs_error
|
|
386
|
+
| Representation | Bytes | Ratio vs FP32 | MSE | MAE | Max abs error |
|
|
387
|
+
| FP32 | 24576 | 1.00x | 0 | 0 | 0 |
|
|
388
|
+
| FP16 estimate | 12288 | 2.00x | - | - | - |
|
|
389
|
+
| INT8 estimate | 6144 | 4.00x | - | - | - |
|
|
390
|
+
| Hyper Glyph | 22032 | 1.12x | 0.00266153 | 0.0405458 | 0.197096 |
|
|
368
391
|
```
|
|
369
392
|
|
|
370
393
|
The current package focuses on transparent compression experiments rather than
|
|
@@ -388,6 +411,8 @@ config = HyperGlyphConfig(
|
|
|
388
411
|
n_buckets=16,
|
|
389
412
|
n_prototypes=128,
|
|
390
413
|
residual_k=8,
|
|
414
|
+
residual_dtype="int8",
|
|
415
|
+
scale_mode="channel",
|
|
391
416
|
seed=42,
|
|
392
417
|
min_tensor_size=256,
|
|
393
418
|
compress_bias=False,
|
|
@@ -434,6 +459,9 @@ block ~= prototype[prototype_id] * scale + sparse_residual
|
|
|
434
459
|
Increase `residual_k` for better reconstruction fidelity, or reduce it for a
|
|
435
460
|
smaller compressed representation.
|
|
436
461
|
|
|
462
|
+
Set `residual_dtype="int8"` to quantize sparse residual values. Use
|
|
463
|
+
`residual_dtype="float32"` when you want unquantized residual repairs.
|
|
464
|
+
|
|
437
465
|
### 5. **Serialization**
|
|
438
466
|
|
|
439
467
|
Save compressed models as `.hwz` zip archives:
|
|
@@ -485,6 +513,8 @@ config = HyperGlyphConfig(
|
|
|
485
513
|
n_buckets=16,
|
|
486
514
|
n_prototypes=128,
|
|
487
515
|
residual_k=8,
|
|
516
|
+
residual_dtype="int8",
|
|
517
|
+
scale_mode="block",
|
|
488
518
|
seed=42,
|
|
489
519
|
min_tensor_size=256,
|
|
490
520
|
compress_bias=False,
|
|
@@ -498,6 +528,8 @@ Key settings:
|
|
|
498
528
|
- **`block_size`** controls how many flattened weights are grouped together.
|
|
499
529
|
- **`n_prototypes`** controls how many reusable block representatives are learned.
|
|
500
530
|
- **`residual_k`** controls how many residual correction values are stored per block.
|
|
531
|
+
- **`residual_dtype`** controls whether sparse residual values are stored as `int8` or `float32`.
|
|
532
|
+
- **`scale_mode`** controls whether prototype scales are calculated per `block`, per `tensor`, or per `channel`.
|
|
501
533
|
- **`min_tensor_size`** skips tensors too small to benefit from compression.
|
|
502
534
|
- **`compress_bias`** enables compression for bias tensors, which are skipped by default.
|
|
503
535
|
- **`seed`** makes prototype selection deterministic.
|
|
@@ -521,6 +553,8 @@ codec = HyperGlyphCodec(
|
|
|
521
553
|
block_size=16,
|
|
522
554
|
n_prototypes=64,
|
|
523
555
|
residual_k=8,
|
|
556
|
+
residual_dtype="int8",
|
|
557
|
+
scale_mode="channel",
|
|
524
558
|
)
|
|
525
559
|
)
|
|
526
560
|
|
|
@@ -568,6 +602,9 @@ examples/
|
|
|
568
602
|
compress_mlp.py # PyTorch MLP compression example
|
|
569
603
|
compress_state_dict.py # NumPy state dict compression example
|
|
570
604
|
mnist_demo.py # MNIST-oriented demo
|
|
605
|
+
artifacts/
|
|
606
|
+
sample-v0.2.hwz # Example compressed archive
|
|
607
|
+
sample-v0.2-benchmark.md # Markdown benchmark report
|
|
571
608
|
hyperglyph.png # Project logo
|
|
572
609
|
pyproject.toml # Package metadata and dependencies
|
|
573
610
|
CHANGELOG.md # Release history
|
|
@@ -621,7 +658,6 @@ If you use Hyper Glyph in research, please cite:
|
|
|
621
658
|
title={Hyper Glyph: Hyperdimensional Symbolic Residual Compression for Neural Network Weights},
|
|
622
659
|
author={Robert McMenemy},
|
|
623
660
|
year={2026},
|
|
624
|
-
version={0.
|
|
661
|
+
version={0.2.0},
|
|
625
662
|
}
|
|
626
663
|
```
|
|
627
|
-
|
|
@@ -25,11 +25,14 @@
|
|
|
25
25
|
- **Block-level tensor compression** for NumPy arrays and neural network weights.
|
|
26
26
|
- **Symbolic prototype assignment** to represent repeated weight patterns compactly.
|
|
27
27
|
- **Sparse residual repair** to preserve reconstruction fidelity after prototype decoding.
|
|
28
|
+
- **Int8 residual quantization** to reduce sparse repair payload size.
|
|
29
|
+
- **Per-block, per-tensor, and per-channel prototype scales** for tuning reconstruction behavior.
|
|
28
30
|
- **Configurable compression controls** for block size, prototype count, residual size, and tensor filtering.
|
|
29
31
|
- **State dict compression** for model-like parameter dictionaries.
|
|
30
32
|
- **Optional PyTorch support** for loading, compressing, restoring, and benchmarking `.pt` state dicts.
|
|
31
33
|
- **`.hwz` serialization** for saving compressed models as portable archives.
|
|
32
34
|
- **Compression reports** with size ratio, tensor counts, and reconstruction error metrics.
|
|
35
|
+
- **Markdown benchmark export** with FP32, FP16 estimate, INT8 estimate, and Hyper Glyph comparisons.
|
|
33
36
|
- **A small CLI** for compressing, decompressing, inspecting, and benchmarking model archives.
|
|
34
37
|
- **Typed Python API** designed for research, experimentation, and extension.
|
|
35
38
|
|
|
@@ -79,10 +82,22 @@ That is the core job: encode large weight tensors as reusable symbolic
|
|
|
79
82
|
prototypes plus a small residual correction, then report the size and
|
|
80
83
|
reconstruction tradeoff.
|
|
81
84
|
|
|
82
|
-
Hyper Glyph v0.
|
|
85
|
+
Hyper Glyph v0.2.0 is an experimental research codec. It is intended for
|
|
83
86
|
testing ideas around hyperdimensional and symbolic weight compression rather
|
|
84
87
|
than guaranteed production compression.
|
|
85
88
|
|
|
89
|
+
Sample v0.2.0 benchmark from `examples/artifacts/sample-v0.2-benchmark.md`:
|
|
90
|
+
|
|
91
|
+
| Representation | Bytes | Ratio vs FP32 | MSE | MAE | Max abs error |
|
|
92
|
+
| --- | ---: | ---: | ---: | ---: | ---: |
|
|
93
|
+
| FP32 | 24576 | 1.00x | 0 | 0 | 0 |
|
|
94
|
+
| FP16 estimate | 12288 | 2.00x | - | - | - |
|
|
95
|
+
| INT8 estimate | 6144 | 4.00x | - | - | - |
|
|
96
|
+
| Hyper Glyph | 22032 | 1.12x | 0.00266153 | 0.0405458 | 0.197096 |
|
|
97
|
+
|
|
98
|
+
The matching compressed artifact is `examples/artifacts/sample-v0.2.hwz`
|
|
99
|
+
and is 26,318 bytes on disk in the current zip-based archive format.
|
|
100
|
+
|
|
86
101
|
---
|
|
87
102
|
|
|
88
103
|
## Why Hyperdimensional Weight Compression?
|
|
@@ -97,7 +112,7 @@ Weight tensors
|
|
|
97
112
|
-> Learn reusable prototype blocks
|
|
98
113
|
-> Assign each block to a prototype
|
|
99
114
|
-> Store per-block scales
|
|
100
|
-
-> Store sparse top-k residual corrections
|
|
115
|
+
-> Store sparse top-k residual corrections as int8 or float32
|
|
101
116
|
|
|
102
117
|
-> Save compressed archive
|
|
103
118
|
-> Restore approximate tensors
|
|
@@ -110,6 +125,7 @@ tradeoff directly:
|
|
|
110
125
|
- **Large weight matrices** that can be split into repeated local blocks.
|
|
111
126
|
- **Prototype-based compression** where blocks share learned representatives.
|
|
112
127
|
- **Sparse residual repair** where only the largest reconstruction corrections are stored.
|
|
128
|
+
- **Scale modes** for per-block, per-tensor, or per-channel prototype scaling.
|
|
113
129
|
- **Approximate reconstruction** with measurable MSE, MAE, and max absolute error.
|
|
114
130
|
- **State dict workflows** that match common PyTorch model storage patterns.
|
|
115
131
|
- **Portable archive output** for saving and inspecting compressed runs.
|
|
@@ -142,7 +158,7 @@ Compression
|
|
|
142
158
|
- prototype learning
|
|
143
159
|
- prototype assignment
|
|
144
160
|
- scale calculation
|
|
145
|
-
- sparse residual encoding
|
|
161
|
+
- int8 or float32 sparse residual encoding
|
|
146
162
|
|
|
|
147
163
|
v
|
|
148
164
|
CompressedModel
|
|
@@ -235,6 +251,8 @@ config = HyperGlyphConfig(
|
|
|
235
251
|
block_size=16,
|
|
236
252
|
n_prototypes=16,
|
|
237
253
|
residual_k=4,
|
|
254
|
+
residual_dtype="int8",
|
|
255
|
+
scale_mode="block",
|
|
238
256
|
)
|
|
239
257
|
|
|
240
258
|
codec = HyperGlyphCodec(config)
|
|
@@ -288,6 +306,8 @@ hyperglyph compress model.pt model.hwz \
|
|
|
288
306
|
--n-buckets 16 \
|
|
289
307
|
--n-prototypes 128 \
|
|
290
308
|
--residual-k 8 \
|
|
309
|
+
--residual-dtype int8 \
|
|
310
|
+
--scale-mode channel \
|
|
291
311
|
--min-tensor-size 256
|
|
292
312
|
```
|
|
293
313
|
|
|
@@ -309,6 +329,12 @@ Benchmark compression and reconstruction:
|
|
|
309
329
|
hyperglyph benchmark model.pt
|
|
310
330
|
```
|
|
311
331
|
|
|
332
|
+
Export the benchmark as markdown:
|
|
333
|
+
|
|
334
|
+
```bash
|
|
335
|
+
hyperglyph benchmark model.pt --markdown-output benchmark.md
|
|
336
|
+
```
|
|
337
|
+
|
|
312
338
|
---
|
|
313
339
|
|
|
314
340
|
## Benchmark Example
|
|
@@ -319,17 +345,14 @@ A small practical benchmark is enough to see the current codec behavior:
|
|
|
319
345
|
hyperglyph benchmark model.pt
|
|
320
346
|
```
|
|
321
347
|
|
|
322
|
-
Example
|
|
348
|
+
Example markdown output:
|
|
323
349
|
|
|
324
350
|
```text
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
total_mse
|
|
331
|
-
total_mae
|
|
332
|
-
max_abs_error
|
|
351
|
+
| Representation | Bytes | Ratio vs FP32 | MSE | MAE | Max abs error |
|
|
352
|
+
| FP32 | 24576 | 1.00x | 0 | 0 | 0 |
|
|
353
|
+
| FP16 estimate | 12288 | 2.00x | - | - | - |
|
|
354
|
+
| INT8 estimate | 6144 | 4.00x | - | - | - |
|
|
355
|
+
| Hyper Glyph | 22032 | 1.12x | 0.00266153 | 0.0405458 | 0.197096 |
|
|
333
356
|
```
|
|
334
357
|
|
|
335
358
|
The current package focuses on transparent compression experiments rather than
|
|
@@ -353,6 +376,8 @@ config = HyperGlyphConfig(
|
|
|
353
376
|
n_buckets=16,
|
|
354
377
|
n_prototypes=128,
|
|
355
378
|
residual_k=8,
|
|
379
|
+
residual_dtype="int8",
|
|
380
|
+
scale_mode="channel",
|
|
356
381
|
seed=42,
|
|
357
382
|
min_tensor_size=256,
|
|
358
383
|
compress_bias=False,
|
|
@@ -399,6 +424,9 @@ block ~= prototype[prototype_id] * scale + sparse_residual
|
|
|
399
424
|
Increase `residual_k` for better reconstruction fidelity, or reduce it for a
|
|
400
425
|
smaller compressed representation.
|
|
401
426
|
|
|
427
|
+
Set `residual_dtype="int8"` to quantize sparse residual values. Use
|
|
428
|
+
`residual_dtype="float32"` when you want unquantized residual repairs.
|
|
429
|
+
|
|
402
430
|
### 5. **Serialization**
|
|
403
431
|
|
|
404
432
|
Save compressed models as `.hwz` zip archives:
|
|
@@ -450,6 +478,8 @@ config = HyperGlyphConfig(
|
|
|
450
478
|
n_buckets=16,
|
|
451
479
|
n_prototypes=128,
|
|
452
480
|
residual_k=8,
|
|
481
|
+
residual_dtype="int8",
|
|
482
|
+
scale_mode="block",
|
|
453
483
|
seed=42,
|
|
454
484
|
min_tensor_size=256,
|
|
455
485
|
compress_bias=False,
|
|
@@ -463,6 +493,8 @@ Key settings:
|
|
|
463
493
|
- **`block_size`** controls how many flattened weights are grouped together.
|
|
464
494
|
- **`n_prototypes`** controls how many reusable block representatives are learned.
|
|
465
495
|
- **`residual_k`** controls how many residual correction values are stored per block.
|
|
496
|
+
- **`residual_dtype`** controls whether sparse residual values are stored as `int8` or `float32`.
|
|
497
|
+
- **`scale_mode`** controls whether prototype scales are calculated per `block`, per `tensor`, or per `channel`.
|
|
466
498
|
- **`min_tensor_size`** skips tensors too small to benefit from compression.
|
|
467
499
|
- **`compress_bias`** enables compression for bias tensors, which are skipped by default.
|
|
468
500
|
- **`seed`** makes prototype selection deterministic.
|
|
@@ -486,6 +518,8 @@ codec = HyperGlyphCodec(
|
|
|
486
518
|
block_size=16,
|
|
487
519
|
n_prototypes=64,
|
|
488
520
|
residual_k=8,
|
|
521
|
+
residual_dtype="int8",
|
|
522
|
+
scale_mode="channel",
|
|
489
523
|
)
|
|
490
524
|
)
|
|
491
525
|
|
|
@@ -533,6 +567,9 @@ examples/
|
|
|
533
567
|
compress_mlp.py # PyTorch MLP compression example
|
|
534
568
|
compress_state_dict.py # NumPy state dict compression example
|
|
535
569
|
mnist_demo.py # MNIST-oriented demo
|
|
570
|
+
artifacts/
|
|
571
|
+
sample-v0.2.hwz # Example compressed archive
|
|
572
|
+
sample-v0.2-benchmark.md # Markdown benchmark report
|
|
536
573
|
hyperglyph.png # Project logo
|
|
537
574
|
pyproject.toml # Package metadata and dependencies
|
|
538
575
|
CHANGELOG.md # Release history
|
|
@@ -586,7 +623,6 @@ If you use Hyper Glyph in research, please cite:
|
|
|
586
623
|
title={Hyper Glyph: Hyperdimensional Symbolic Residual Compression for Neural Network Weights},
|
|
587
624
|
author={Robert McMenemy},
|
|
588
625
|
year={2026},
|
|
589
|
-
version={0.
|
|
626
|
+
version={0.2.0},
|
|
590
627
|
}
|
|
591
628
|
```
|
|
592
|
-
|
|
@@ -11,4 +11,9 @@ The reconstruction is:
|
|
|
11
11
|
|
|
12
12
|
$$W \approx \text{Decode}(\text{prototype}) + \text{sparse residual}$$
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
In v0.2, prototype scales can be calculated per block, per tensor, or per
|
|
15
|
+
channel. Sparse residual values can be stored as float32 or quantized to int8
|
|
16
|
+
with a residual scale for decoding.
|
|
17
|
+
|
|
18
|
+
The implementation is intentionally simple and leaves room for learned decoders
|
|
19
|
+
and richer codecs later.
|
|
@@ -4,6 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
Configuration for the codec.
|
|
6
6
|
|
|
7
|
+
- `residual_dtype`: `int8` or `float32` sparse residual storage.
|
|
8
|
+
- `scale_mode`: `block`, `tensor`, or `channel` prototype scaling.
|
|
9
|
+
|
|
7
10
|
## HyperGlyphCodec
|
|
8
11
|
|
|
9
12
|
- compress_array(name, array)
|
|
@@ -12,6 +15,11 @@ Configuration for the codec.
|
|
|
12
15
|
- decompress_state_dict(compressed_model)
|
|
13
16
|
- report(compressed_model, original_state_dict, restored_state_dict)
|
|
14
17
|
|
|
18
|
+
## Benchmark helpers
|
|
19
|
+
|
|
20
|
+
- benchmark_state_dict(state_dict, codec=None)
|
|
21
|
+
- BenchmarkReport.to_markdown()
|
|
22
|
+
|
|
15
23
|
## Serialization helpers
|
|
16
24
|
|
|
17
25
|
- save_compressed(compressed_model, path)
|
|
@@ -5,7 +5,9 @@
|
|
|
5
5
|
Compress a PyTorch state dict into a .hwz archive.
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
hyperglyph compress model.pt model.hwz
|
|
8
|
+
hyperglyph compress model.pt model.hwz \
|
|
9
|
+
--residual-dtype int8 \
|
|
10
|
+
--scale-mode channel
|
|
9
11
|
```
|
|
10
12
|
|
|
11
13
|
## decompress
|
|
@@ -31,3 +33,10 @@ Benchmark compression and reconstruction for a state dict.
|
|
|
31
33
|
```bash
|
|
32
34
|
hyperglyph benchmark model.pt
|
|
33
35
|
```
|
|
36
|
+
|
|
37
|
+
Write a markdown benchmark report with FP32, FP16 estimate, INT8 estimate, and
|
|
38
|
+
Hyper Glyph comparisons:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
hyperglyph benchmark model.pt --markdown-output benchmark.md
|
|
42
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Hyper Glyph
|
|
2
2
|
|
|
3
|
-
Hyper Glyph is an experimental package for compressing neural network weights with symbolic hyperdimensional prototypes and sparse residual repair.
|
|
3
|
+
Hyper Glyph is an experimental package for compressing neural network weights with symbolic hyperdimensional prototypes, configurable prototype scales, and sparse residual repair.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -17,6 +17,10 @@ config = HyperGlyphConfig(block_size=16, n_prototypes=32, residual_k=4)
|
|
|
17
17
|
codec = HyperGlyphCodec(config)
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
+
v0.2 adds int8 residual quantization, per-block/per-tensor/per-channel scale
|
|
21
|
+
modes, markdown benchmark reports, and baseline comparisons against FP32, FP16
|
|
22
|
+
estimate, and INT8 estimate sizes.
|
|
23
|
+
|
|
20
24
|
## Notes
|
|
21
25
|
|
|
22
26
|
The codec is intended for research and experimentation rather than guaranteed production compression.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Roadmap
|
|
2
2
|
|
|
3
|
-
## Version 0.1.0
|
|
3
|
+
## Version 0.1.0 - Proof of concept
|
|
4
4
|
|
|
5
5
|
- src layout package
|
|
6
6
|
- HyperGlyphConfig
|
|
@@ -14,17 +14,20 @@
|
|
|
14
14
|
- CI
|
|
15
15
|
- PyPI publish workflow
|
|
16
16
|
|
|
17
|
-
## Version 0.2.0
|
|
17
|
+
## Version 0.2.0 - Residual quantization and reports
|
|
18
18
|
|
|
19
|
-
- per-channel scale
|
|
19
|
+
- per-block, per-tensor, and per-channel scale modes
|
|
20
20
|
- residual quantization int8
|
|
21
|
-
-
|
|
22
|
-
-
|
|
21
|
+
- markdown benchmark export
|
|
22
|
+
- baseline comparisons against FP32, FP16 estimate, and INT8 estimate
|
|
23
|
+
- example compressed .hwz artifact
|
|
23
24
|
- improved compression report
|
|
24
25
|
|
|
25
|
-
## Version 0.3.0
|
|
26
|
+
## Version 0.3.0 - PyTorch integration
|
|
26
27
|
|
|
27
28
|
- compress_model(model)
|
|
28
29
|
- decompress_into_model(model, compressed)
|
|
29
30
|
- calibration pass
|
|
30
31
|
- layer include/exclude filters
|
|
32
|
+
- optional int4 residual packing
|
|
33
|
+
- entropy-coded residual indices
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Hyper Glyph Benchmark
|
|
2
|
+
|
|
3
|
+
| Representation | Bytes | Ratio vs FP32 | MSE | MAE | Max abs error |
|
|
4
|
+
| --- | ---: | ---: | ---: | ---: | ---: |
|
|
5
|
+
| FP32 | 24576 | 1.00x | 0 | 0 | 0 |
|
|
6
|
+
| FP16 estimate | 12288 | 2.00x | - | - | - |
|
|
7
|
+
| INT8 estimate | 6144 | 4.00x | - | - | - |
|
|
8
|
+
| Hyper Glyph | 22032 | 1.12x | 0.00266153 | 0.0405458 | 0.197096 |
|
|
9
|
+
|
|
10
|
+
## Tensor Summary
|
|
11
|
+
|
|
12
|
+
- Tensors compressed: 2
|
|
13
|
+
- Tensors skipped: 0
|
|
Binary file
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Hyper Glyph package."""
|
|
2
2
|
|
|
3
|
+
from .benchmark import BaselineComparison, BenchmarkReport, benchmark_state_dict
|
|
3
4
|
from .codec import CompressedModel, CompressedTensor, CompressionReport, HyperGlyphCodec
|
|
4
5
|
from .config import HyperGlyphConfig
|
|
5
6
|
from .serialization import load_compressed, save_compressed
|
|
@@ -8,6 +9,8 @@ from .torch_adapter import compress_state_dict, decompress_state_dict
|
|
|
8
9
|
__all__ = [
|
|
9
10
|
"HyperGlyphCodec",
|
|
10
11
|
"HyperGlyphConfig",
|
|
12
|
+
"BaselineComparison",
|
|
13
|
+
"BenchmarkReport",
|
|
11
14
|
"CompressionReport",
|
|
12
15
|
"CompressedModel",
|
|
13
16
|
"CompressedTensor",
|
|
@@ -15,5 +18,6 @@ __all__ = [
|
|
|
15
18
|
"decompress_state_dict",
|
|
16
19
|
"save_compressed",
|
|
17
20
|
"load_compressed",
|
|
21
|
+
"benchmark_state_dict",
|
|
18
22
|
]
|
|
19
|
-
__version__ = "0.
|
|
23
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Benchmark reporting helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Mapping
|
|
7
|
+
|
|
8
|
+
from .codec import CompressionReport, HyperGlyphCodec
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(slots=True)
|
|
12
|
+
class BaselineComparison:
|
|
13
|
+
"""A size comparison against a baseline representation."""
|
|
14
|
+
|
|
15
|
+
name: str
|
|
16
|
+
bytes: int
|
|
17
|
+
ratio_vs_fp32: float
|
|
18
|
+
mse: float | None = None
|
|
19
|
+
mae: float | None = None
|
|
20
|
+
max_abs_error: float | None = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(slots=True)
|
|
24
|
+
class BenchmarkReport:
|
|
25
|
+
"""A benchmark report with baseline and Hyper Glyph metrics."""
|
|
26
|
+
|
|
27
|
+
compression: CompressionReport
|
|
28
|
+
baselines: list[BaselineComparison]
|
|
29
|
+
|
|
30
|
+
def to_markdown(self) -> str:
|
|
31
|
+
"""Export the benchmark as a markdown table."""
|
|
32
|
+
lines = [
|
|
33
|
+
"# Hyper Glyph Benchmark",
|
|
34
|
+
"",
|
|
35
|
+
"| Representation | Bytes | Ratio vs FP32 | MSE | MAE | Max abs error |",
|
|
36
|
+
"| --- | ---: | ---: | ---: | ---: | ---: |",
|
|
37
|
+
]
|
|
38
|
+
for baseline in self.baselines:
|
|
39
|
+
lines.append(
|
|
40
|
+
"| "
|
|
41
|
+
f"{baseline.name} | "
|
|
42
|
+
f"{baseline.bytes} | "
|
|
43
|
+
f"{baseline.ratio_vs_fp32:.2f}x | "
|
|
44
|
+
f"{_format_optional(baseline.mse)} | "
|
|
45
|
+
f"{_format_optional(baseline.mae)} | "
|
|
46
|
+
f"{_format_optional(baseline.max_abs_error)} |"
|
|
47
|
+
)
|
|
48
|
+
lines.extend(
|
|
49
|
+
[
|
|
50
|
+
"",
|
|
51
|
+
"## Tensor Summary",
|
|
52
|
+
"",
|
|
53
|
+
f"- Tensors compressed: {self.compression.tensors_compressed}",
|
|
54
|
+
f"- Tensors skipped: {self.compression.tensors_skipped}",
|
|
55
|
+
]
|
|
56
|
+
)
|
|
57
|
+
return "\n".join(lines) + "\n"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def benchmark_state_dict(
|
|
61
|
+
state_dict: Mapping[str, Any],
|
|
62
|
+
codec: HyperGlyphCodec | None = None,
|
|
63
|
+
) -> BenchmarkReport:
|
|
64
|
+
"""Compress a state dict and return baseline comparisons."""
|
|
65
|
+
active_codec = codec or HyperGlyphCodec()
|
|
66
|
+
compressed = active_codec.compress_state_dict(state_dict)
|
|
67
|
+
restored = active_codec.decompress_state_dict(compressed)
|
|
68
|
+
compression = active_codec.report(compressed, state_dict, restored)
|
|
69
|
+
fp32_bytes = compression.original_bytes
|
|
70
|
+
baselines = [
|
|
71
|
+
BaselineComparison("FP32", fp32_bytes, 1.0, 0.0, 0.0, 0.0),
|
|
72
|
+
BaselineComparison(
|
|
73
|
+
"FP16 estimate",
|
|
74
|
+
compression.fp16_estimate_bytes,
|
|
75
|
+
_ratio(fp32_bytes, compression.fp16_estimate_bytes),
|
|
76
|
+
),
|
|
77
|
+
BaselineComparison(
|
|
78
|
+
"INT8 estimate",
|
|
79
|
+
compression.int8_estimate_bytes,
|
|
80
|
+
_ratio(fp32_bytes, compression.int8_estimate_bytes),
|
|
81
|
+
),
|
|
82
|
+
BaselineComparison(
|
|
83
|
+
"Hyper Glyph",
|
|
84
|
+
compression.compressed_bytes,
|
|
85
|
+
compression.compression_ratio,
|
|
86
|
+
compression.total_mse,
|
|
87
|
+
compression.total_mae,
|
|
88
|
+
compression.max_abs_error,
|
|
89
|
+
),
|
|
90
|
+
]
|
|
91
|
+
return BenchmarkReport(compression=compression, baselines=baselines)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _ratio(original_bytes: int, compressed_bytes: int) -> float:
|
|
95
|
+
if compressed_bytes <= 0:
|
|
96
|
+
return float("inf")
|
|
97
|
+
return original_bytes / compressed_bytes
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _format_optional(value: float | None) -> str:
|
|
101
|
+
if value is None:
|
|
102
|
+
return "-"
|
|
103
|
+
if value == 0.0:
|
|
104
|
+
return "0"
|
|
105
|
+
return f"{value:.6g}"
|
|
@@ -11,6 +11,7 @@ try:
|
|
|
11
11
|
except ImportError: # pragma: no cover - optional dependency path
|
|
12
12
|
torch = None
|
|
13
13
|
|
|
14
|
+
from .benchmark import benchmark_state_dict
|
|
14
15
|
from .codec import HyperGlyphCodec
|
|
15
16
|
from .config import HyperGlyphConfig
|
|
16
17
|
from .serialization import load_compressed, save_compressed
|
|
@@ -29,6 +30,10 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
29
30
|
compress_parser.add_argument("--n-buckets", type=int, default=16)
|
|
30
31
|
compress_parser.add_argument("--n-prototypes", type=int, default=128)
|
|
31
32
|
compress_parser.add_argument("--residual-k", type=int, default=8)
|
|
33
|
+
compress_parser.add_argument("--residual-dtype", choices=["float32", "int8"], default="int8")
|
|
34
|
+
compress_parser.add_argument(
|
|
35
|
+
"--scale-mode", choices=["block", "tensor", "channel"], default="block"
|
|
36
|
+
)
|
|
32
37
|
compress_parser.add_argument("--seed", type=int, default=42)
|
|
33
38
|
compress_parser.add_argument("--compress-bias", action="store_true")
|
|
34
39
|
compress_parser.add_argument("--min-tensor-size", type=int, default=256)
|
|
@@ -42,6 +47,16 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
42
47
|
|
|
43
48
|
benchmark_parser = subparsers.add_parser("benchmark")
|
|
44
49
|
benchmark_parser.add_argument("input", help="Input torch state dict file (.pt)")
|
|
50
|
+
benchmark_parser.add_argument(
|
|
51
|
+
"--markdown-output", help="Write benchmark report to a markdown file"
|
|
52
|
+
)
|
|
53
|
+
benchmark_parser.add_argument("--block-size", type=int, default=16)
|
|
54
|
+
benchmark_parser.add_argument("--n-prototypes", type=int, default=128)
|
|
55
|
+
benchmark_parser.add_argument("--residual-k", type=int, default=8)
|
|
56
|
+
benchmark_parser.add_argument("--residual-dtype", choices=["float32", "int8"], default="int8")
|
|
57
|
+
benchmark_parser.add_argument(
|
|
58
|
+
"--scale-mode", choices=["block", "tensor", "channel"], default="block"
|
|
59
|
+
)
|
|
45
60
|
|
|
46
61
|
return parser
|
|
47
62
|
|
|
@@ -61,6 +76,8 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
61
76
|
n_buckets=args.n_buckets,
|
|
62
77
|
n_prototypes=args.n_prototypes,
|
|
63
78
|
residual_k=args.residual_k,
|
|
79
|
+
residual_dtype=args.residual_dtype,
|
|
80
|
+
scale_mode=args.scale_mode,
|
|
64
81
|
seed=args.seed,
|
|
65
82
|
compress_bias=args.compress_bias,
|
|
66
83
|
min_tensor_size=args.min_tensor_size,
|
|
@@ -97,11 +114,21 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
97
114
|
if torch is None:
|
|
98
115
|
raise SystemExit("PyTorch is required for benchmark CLI commands")
|
|
99
116
|
state_dict = torch.load(args.input, map_location="cpu")
|
|
100
|
-
codec = HyperGlyphCodec(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
117
|
+
codec = HyperGlyphCodec(
|
|
118
|
+
HyperGlyphConfig(
|
|
119
|
+
block_size=args.block_size,
|
|
120
|
+
n_prototypes=args.n_prototypes,
|
|
121
|
+
residual_k=args.residual_k,
|
|
122
|
+
residual_dtype=args.residual_dtype,
|
|
123
|
+
scale_mode=args.scale_mode,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
report = benchmark_state_dict(state_dict, codec)
|
|
127
|
+
markdown = report.to_markdown()
|
|
128
|
+
if args.markdown_output:
|
|
129
|
+
with open(args.markdown_output, "w", encoding="utf-8") as handle:
|
|
130
|
+
handle.write(markdown)
|
|
131
|
+
print(markdown)
|
|
105
132
|
return 0
|
|
106
133
|
|
|
107
134
|
parser.error("unknown command")
|
|
@@ -11,6 +11,7 @@ import numpy as np
|
|
|
11
11
|
from .blocks import restore_tensor_shape, split_array_blocks
|
|
12
12
|
from .config import HyperGlyphConfig
|
|
13
13
|
from .metrics import (
|
|
14
|
+
baseline_size_bytes,
|
|
14
15
|
compressed_size_bytes,
|
|
15
16
|
compression_ratio,
|
|
16
17
|
mae,
|
|
@@ -43,7 +44,7 @@ class CompressedModel:
|
|
|
43
44
|
|
|
44
45
|
tensors: dict[str, CompressedTensor]
|
|
45
46
|
payload: bytes = field(default_factory=bytes)
|
|
46
|
-
format_version: str = "0.
|
|
47
|
+
format_version: str = "0.2"
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
@dataclass(slots=True)
|
|
@@ -53,6 +54,10 @@ class CompressionReport:
|
|
|
53
54
|
original_bytes: int
|
|
54
55
|
compressed_bytes: int
|
|
55
56
|
compression_ratio: float
|
|
57
|
+
fp16_estimate_bytes: int
|
|
58
|
+
int8_estimate_bytes: int
|
|
59
|
+
fp16_compression_ratio: float
|
|
60
|
+
int8_compression_ratio: float
|
|
56
61
|
tensors_compressed: int
|
|
57
62
|
tensors_skipped: int
|
|
58
63
|
total_mse: float
|
|
@@ -82,16 +87,18 @@ class HyperGlyphCodec:
|
|
|
82
87
|
reconstructed_prototypes = reconstruct_from_prototypes(assignments, prototypes)
|
|
83
88
|
|
|
84
89
|
prototype_ids: list[int] = [int(idx) for idx in assignments]
|
|
85
|
-
scales
|
|
90
|
+
scales = self._block_scales(array, blocks, reconstructed_prototypes)
|
|
86
91
|
residuals: list[dict[str, Any]] = []
|
|
87
92
|
for idx, block in enumerate(blocks):
|
|
88
93
|
proto = reconstructed_prototypes[idx]
|
|
89
|
-
|
|
90
|
-
proto_norm = max(float(np.linalg.norm(proto)), 1e-6)
|
|
91
|
-
scale = block_norm / proto_norm
|
|
92
|
-
scales.append(scale)
|
|
94
|
+
scale = scales[idx]
|
|
93
95
|
proto_scaled = proto * scale
|
|
94
|
-
residual = compute_topk_residual(
|
|
96
|
+
residual = compute_topk_residual(
|
|
97
|
+
block,
|
|
98
|
+
proto_scaled,
|
|
99
|
+
self.config.residual_k,
|
|
100
|
+
dtype=self.config.residual_dtype,
|
|
101
|
+
)
|
|
95
102
|
residuals.append(serialize_residual(residual))
|
|
96
103
|
|
|
97
104
|
return CompressedTensor(
|
|
@@ -109,6 +116,8 @@ class HyperGlyphCodec:
|
|
|
109
116
|
"n_buckets": self.config.n_buckets,
|
|
110
117
|
"n_prototypes": self.config.n_prototypes,
|
|
111
118
|
"residual_k": self.config.residual_k,
|
|
119
|
+
"residual_dtype": self.config.residual_dtype,
|
|
120
|
+
"scale_mode": self.config.scale_mode,
|
|
112
121
|
"seed": self.config.seed,
|
|
113
122
|
"dtype": self.config.dtype,
|
|
114
123
|
"device": self.config.device,
|
|
@@ -161,6 +170,8 @@ class HyperGlyphCodec:
|
|
|
161
170
|
original_bytes = original_size_bytes(original_state_dict or {})
|
|
162
171
|
compressed_bytes = compressed_size_bytes(compressed_model)
|
|
163
172
|
ratio = compression_ratio(original_bytes, compressed_bytes)
|
|
173
|
+
fp16_bytes = baseline_size_bytes(original_state_dict or {}, bytes_per_value=2)
|
|
174
|
+
int8_bytes = baseline_size_bytes(original_state_dict or {}, bytes_per_value=1)
|
|
164
175
|
tensors_compressed = len(compressed_model.tensors)
|
|
165
176
|
tensors_skipped = 0
|
|
166
177
|
if original_state_dict is not None:
|
|
@@ -183,6 +194,10 @@ class HyperGlyphCodec:
|
|
|
183
194
|
original_bytes=original_bytes,
|
|
184
195
|
compressed_bytes=compressed_bytes,
|
|
185
196
|
compression_ratio=ratio,
|
|
197
|
+
fp16_estimate_bytes=fp16_bytes,
|
|
198
|
+
int8_estimate_bytes=int8_bytes,
|
|
199
|
+
fp16_compression_ratio=compression_ratio(original_bytes, fp16_bytes),
|
|
200
|
+
int8_compression_ratio=compression_ratio(original_bytes, int8_bytes),
|
|
186
201
|
tensors_compressed=tensors_compressed,
|
|
187
202
|
tensors_skipped=tensors_skipped,
|
|
188
203
|
total_mse=total_mse,
|
|
@@ -198,3 +213,58 @@ class HyperGlyphCodec:
|
|
|
198
213
|
return (
|
|
199
214
|
"bias" not in name.lower() and int(np.prod(tensor.shape)) >= self.config.min_tensor_size
|
|
200
215
|
)
|
|
216
|
+
|
|
217
|
+
def _block_scales(
|
|
218
|
+
self,
|
|
219
|
+
array: np.ndarray,
|
|
220
|
+
blocks: list[np.ndarray],
|
|
221
|
+
reconstructed_prototypes: np.ndarray,
|
|
222
|
+
) -> list[float]:
|
|
223
|
+
"""Calculate block, tensor, or channel scale values for prototype decoding."""
|
|
224
|
+
if self.config.scale_mode == "tensor":
|
|
225
|
+
block_matrix = np.stack(blocks, axis=0).astype(np.float32)
|
|
226
|
+
block_norm = float(np.linalg.norm(block_matrix))
|
|
227
|
+
proto_norm = max(float(np.linalg.norm(reconstructed_prototypes)), 1e-6)
|
|
228
|
+
return [block_norm / proto_norm for _ in blocks]
|
|
229
|
+
|
|
230
|
+
if self.config.scale_mode == "channel":
|
|
231
|
+
channel_scales = self._channel_scales(array, reconstructed_prototypes)
|
|
232
|
+
channel_ids = self._block_channel_ids(array.shape, len(blocks))
|
|
233
|
+
return [channel_scales[channel_id] for channel_id in channel_ids]
|
|
234
|
+
|
|
235
|
+
scales: list[float] = []
|
|
236
|
+
for idx, block in enumerate(blocks):
|
|
237
|
+
block_norm = float(np.linalg.norm(block))
|
|
238
|
+
proto_norm = max(float(np.linalg.norm(reconstructed_prototypes[idx])), 1e-6)
|
|
239
|
+
scales.append(block_norm / proto_norm)
|
|
240
|
+
return scales
|
|
241
|
+
|
|
242
|
+
def _channel_scales(
|
|
243
|
+
self, array: np.ndarray, reconstructed_prototypes: np.ndarray
|
|
244
|
+
) -> list[float]:
|
|
245
|
+
if array.ndim == 0:
|
|
246
|
+
return [1.0]
|
|
247
|
+
channel_count = int(array.shape[0]) if array.ndim > 0 else 1
|
|
248
|
+
channel_size = int(np.prod(array.shape[1:])) if array.ndim > 1 else 1
|
|
249
|
+
flat_original = np.asarray(array, dtype=np.float32).reshape(-1)
|
|
250
|
+
flat_reconstructed = reconstructed_prototypes.reshape(-1)[: flat_original.size]
|
|
251
|
+
scales: list[float] = []
|
|
252
|
+
for channel in range(channel_count):
|
|
253
|
+
start = channel * channel_size
|
|
254
|
+
end = min(start + channel_size, flat_original.size)
|
|
255
|
+
original_norm = float(np.linalg.norm(flat_original[start:end]))
|
|
256
|
+
proto_norm = max(float(np.linalg.norm(flat_reconstructed[start:end])), 1e-6)
|
|
257
|
+
scales.append(original_norm / proto_norm)
|
|
258
|
+
return scales
|
|
259
|
+
|
|
260
|
+
def _block_channel_ids(self, shape: tuple[int, ...], block_count: int) -> list[int]:
|
|
261
|
+
if not shape:
|
|
262
|
+
return [0 for _ in range(block_count)]
|
|
263
|
+
channel_count = int(shape[0])
|
|
264
|
+
channel_size = int(np.prod(shape[1:])) if len(shape) > 1 else 1
|
|
265
|
+
ids: list[int] = []
|
|
266
|
+
for block_index in range(block_count):
|
|
267
|
+
flat_index = block_index * self.config.block_size
|
|
268
|
+
channel_id = min(flat_index // max(channel_size, 1), channel_count - 1)
|
|
269
|
+
ids.append(int(channel_id))
|
|
270
|
+
return ids
|
|
@@ -14,6 +14,8 @@ class HyperGlyphConfig:
|
|
|
14
14
|
n_buckets: int = 16
|
|
15
15
|
n_prototypes: int = 128
|
|
16
16
|
residual_k: int = 8
|
|
17
|
+
residual_dtype: str = "int8"
|
|
18
|
+
scale_mode: str = "block"
|
|
17
19
|
seed: int = 42
|
|
18
20
|
min_tensor_size: int = 256
|
|
19
21
|
compress_bias: bool = False
|
|
@@ -31,6 +33,10 @@ class HyperGlyphConfig:
|
|
|
31
33
|
raise ValueError("n_prototypes must be positive")
|
|
32
34
|
if self.residual_k < 0:
|
|
33
35
|
raise ValueError("residual_k must be non-negative")
|
|
36
|
+
if self.residual_dtype not in {"float32", "int8"}:
|
|
37
|
+
raise ValueError("residual_dtype must be 'float32' or 'int8'")
|
|
38
|
+
if self.scale_mode not in {"block", "tensor", "channel"}:
|
|
39
|
+
raise ValueError("scale_mode must be 'block', 'tensor', or 'channel'")
|
|
34
40
|
if self.min_tensor_size <= 0:
|
|
35
41
|
raise ValueError("min_tensor_size must be positive")
|
|
36
42
|
if self.dtype not in {"float32", "float64"}:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import Mapping
|
|
5
|
+
from typing import Any, Mapping
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
|
|
@@ -15,13 +15,44 @@ def original_size_bytes(state_dict: Mapping[str, np.ndarray]) -> int:
|
|
|
15
15
|
return total
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
def baseline_size_bytes(state_dict: Mapping[str, Any], bytes_per_value: int) -> int:
|
|
19
|
+
"""Estimate a dense baseline size with a fixed number of bytes per value."""
|
|
20
|
+
total = 0
|
|
21
|
+
for tensor in state_dict.values():
|
|
22
|
+
total += int(np.asarray(tensor).size) * bytes_per_value
|
|
23
|
+
return total
|
|
24
|
+
|
|
25
|
+
|
|
18
26
|
def compressed_size_bytes(compressed_model: object) -> int:
|
|
19
27
|
"""Estimate the compressed size in bytes."""
|
|
20
28
|
if isinstance(compressed_model, Mapping):
|
|
21
29
|
return len(compressed_model.get("payload", b""))
|
|
30
|
+
tensors = getattr(compressed_model, "tensors", None)
|
|
31
|
+
if isinstance(tensors, Mapping):
|
|
32
|
+
total = 0
|
|
33
|
+
for tensor in tensors.values():
|
|
34
|
+
total += compressed_tensor_size_bytes(tensor)
|
|
35
|
+
return total
|
|
22
36
|
return 0
|
|
23
37
|
|
|
24
38
|
|
|
39
|
+
def compressed_tensor_size_bytes(tensor: object) -> int:
|
|
40
|
+
"""Estimate the byte size of a compressed tensor payload."""
|
|
41
|
+
prototype_matrix = np.asarray(getattr(tensor, "prototype_matrix", np.asarray([])))
|
|
42
|
+
prototype_bytes = int(prototype_matrix.size) * 4
|
|
43
|
+
prototype_id_bytes = len(getattr(tensor, "prototype_ids", [])) * 4
|
|
44
|
+
scale_bytes = len(getattr(tensor, "scales", [])) * 4
|
|
45
|
+
shape_bytes = len(getattr(tensor, "shape", ())) * 4
|
|
46
|
+
residual_bytes = 0
|
|
47
|
+
for residual in getattr(tensor, "residuals", []):
|
|
48
|
+
indices = residual.get("indices", [])
|
|
49
|
+
values = residual.get("values", [])
|
|
50
|
+
residual_bytes += len(indices) * 2
|
|
51
|
+
residual_bytes += len(values) if residual.get("dtype") == "int8" else len(values) * 4
|
|
52
|
+
residual_bytes += 4
|
|
53
|
+
return prototype_bytes + prototype_id_bytes + scale_bytes + shape_bytes + residual_bytes
|
|
54
|
+
|
|
55
|
+
|
|
25
56
|
def compression_ratio(original_bytes: int, compressed_bytes: int) -> float:
|
|
26
57
|
"""Compute compression ratio as original / compressed."""
|
|
27
58
|
if compressed_bytes <= 0:
|
|
@@ -8,11 +8,16 @@ import numpy as np
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def compute_topk_residual(
|
|
11
|
-
original_block: np.ndarray,
|
|
11
|
+
original_block: np.ndarray,
|
|
12
|
+
reconstructed_block: np.ndarray,
|
|
13
|
+
k: int,
|
|
14
|
+
dtype: str = "int8",
|
|
12
15
|
) -> dict[str, Any]:
|
|
13
16
|
"""Return the indices and values of the top-k residual entries."""
|
|
14
17
|
if k < 0:
|
|
15
18
|
raise ValueError("k must be non-negative")
|
|
19
|
+
if dtype not in {"float32", "int8"}:
|
|
20
|
+
raise ValueError("dtype must be 'float32' or 'int8'")
|
|
16
21
|
if original_block.shape != reconstructed_block.shape:
|
|
17
22
|
raise ValueError("blocks must have the same shape")
|
|
18
23
|
|
|
@@ -20,24 +25,40 @@ def compute_topk_residual(
|
|
|
20
25
|
reconstructed_block, dtype=np.float32
|
|
21
26
|
)
|
|
22
27
|
if k == 0:
|
|
23
|
-
return {"indices": [], "values": []}
|
|
28
|
+
return {"indices": [], "values": [], "dtype": dtype}
|
|
24
29
|
|
|
25
30
|
if diff.size == 0:
|
|
26
|
-
return {"indices": [], "values": []}
|
|
31
|
+
return {"indices": [], "values": [], "dtype": dtype}
|
|
27
32
|
|
|
28
33
|
flat = diff.reshape(-1)
|
|
29
34
|
topk_idx = np.argsort(np.abs(flat))[-k:][::-1]
|
|
35
|
+
values: np.ndarray = np.asarray([float(flat[index]) for index in topk_idx], dtype=np.float32)
|
|
36
|
+
if dtype == "int8":
|
|
37
|
+
scale = float(np.max(np.abs(values)) / 127.0) if values.size else 1.0
|
|
38
|
+
if scale == 0.0:
|
|
39
|
+
scale = 1.0
|
|
40
|
+
quantized: np.ndarray = np.clip(np.rint(values / scale), -127, 127).astype(np.int8)
|
|
41
|
+
return {
|
|
42
|
+
"indices": [int(index) for index in topk_idx],
|
|
43
|
+
"values": [int(value) for value in quantized],
|
|
44
|
+
"scale": scale,
|
|
45
|
+
"dtype": "int8",
|
|
46
|
+
}
|
|
30
47
|
return {
|
|
31
48
|
"indices": [int(index) for index in topk_idx],
|
|
32
|
-
"values": [float(
|
|
49
|
+
"values": [float(value) for value in values],
|
|
50
|
+
"dtype": "float32",
|
|
33
51
|
}
|
|
34
52
|
|
|
35
53
|
|
|
36
54
|
def apply_residual(block: np.ndarray, residual: dict[str, Any]) -> np.ndarray:
|
|
37
55
|
"""Apply sparse residual values to a block."""
|
|
38
56
|
result = np.asarray(block, dtype=np.float32).reshape(-1).copy()
|
|
57
|
+
scale = float(residual.get("scale", 1.0))
|
|
58
|
+
dtype = str(residual.get("dtype", "float32"))
|
|
39
59
|
for index, value in zip(residual.get("indices", []), residual.get("values", [])):
|
|
40
|
-
|
|
60
|
+
decoded_value = float(value) * scale if dtype == "int8" else float(value)
|
|
61
|
+
result[int(index)] += decoded_value
|
|
41
62
|
return result.reshape(block.shape)
|
|
42
63
|
|
|
43
64
|
|
|
@@ -46,4 +67,6 @@ def serialize_residual(residual: dict[str, Any]) -> dict[str, Any]:
|
|
|
46
67
|
return {
|
|
47
68
|
"indices": list(residual.get("indices", [])),
|
|
48
69
|
"values": list(residual.get("values", [])),
|
|
70
|
+
"scale": float(residual.get("scale", 1.0)),
|
|
71
|
+
"dtype": str(residual.get("dtype", "float32")),
|
|
49
72
|
}
|
|
@@ -20,7 +20,8 @@ def save_compressed(compressed_model: CompressedModel, path: str | Path) -> None
|
|
|
20
20
|
metadata = {
|
|
21
21
|
"format_version": compressed_model.format_version,
|
|
22
22
|
"tensors": {
|
|
23
|
-
name: tensor_to_dict(tensor
|
|
23
|
+
name: tensor_to_dict(tensor, include_prototype_matrix=False)
|
|
24
|
+
for name, tensor in compressed_model.tensors.items()
|
|
24
25
|
},
|
|
25
26
|
}
|
|
26
27
|
archive.writestr("metadata.json", json.dumps(metadata, indent=2))
|
|
@@ -37,31 +38,45 @@ def load_compressed(path: str | Path) -> CompressedModel:
|
|
|
37
38
|
archive_path = Path(path)
|
|
38
39
|
with zipfile.ZipFile(archive_path, "r") as archive:
|
|
39
40
|
metadata = json.loads(archive.read("metadata.json"))
|
|
41
|
+
prototype_arrays: Mapping[str, Any] = {}
|
|
42
|
+
if "prototypes.npz" in archive.namelist():
|
|
43
|
+
with archive.open("prototypes.npz") as handle:
|
|
44
|
+
prototype_arrays = dict(np.load(handle))
|
|
40
45
|
tensors: dict[str, CompressedTensor] = {}
|
|
41
46
|
for name, value in metadata.get("tensors", {}).items():
|
|
42
|
-
|
|
47
|
+
prototype_key = f"{name}_prototypes"
|
|
48
|
+
tensors[name] = dict_to_tensor(value, prototype_arrays.get(prototype_key))
|
|
43
49
|
return CompressedModel(
|
|
44
50
|
tensors=tensors, payload=b"", format_version=metadata.get("format_version", "0.1")
|
|
45
51
|
)
|
|
46
52
|
|
|
47
53
|
|
|
48
|
-
def tensor_to_dict(
|
|
54
|
+
def tensor_to_dict(
|
|
55
|
+
tensor: CompressedTensor, include_prototype_matrix: bool = True
|
|
56
|
+
) -> dict[str, Any]:
|
|
49
57
|
"""Convert a compressed tensor to a JSON-safe dictionary."""
|
|
50
|
-
|
|
58
|
+
payload: dict[str, Any] = {
|
|
51
59
|
"name": tensor.name,
|
|
52
60
|
"shape": list(tensor.shape),
|
|
53
61
|
"block_size": tensor.block_size,
|
|
54
62
|
"prototype_ids": tensor.prototype_ids,
|
|
55
63
|
"scales": tensor.scales,
|
|
56
64
|
"residuals": tensor.residuals,
|
|
57
|
-
"prototype_matrix": tensor.prototype_matrix.tolist(),
|
|
58
65
|
"seed": tensor.seed,
|
|
59
66
|
"codec_config": tensor.codec_config,
|
|
60
67
|
}
|
|
68
|
+
if include_prototype_matrix:
|
|
69
|
+
payload["prototype_matrix"] = tensor.prototype_matrix.tolist()
|
|
70
|
+
return payload
|
|
61
71
|
|
|
62
72
|
|
|
63
|
-
def dict_to_tensor(
|
|
73
|
+
def dict_to_tensor(
|
|
74
|
+
payload: Mapping[str, Any], prototype_matrix: np.ndarray | None = None
|
|
75
|
+
) -> CompressedTensor:
|
|
64
76
|
"""Convert a JSON-safe dictionary back to a CompressedTensor."""
|
|
77
|
+
matrix = prototype_matrix
|
|
78
|
+
if matrix is None:
|
|
79
|
+
matrix = np.asarray(payload.get("prototype_matrix", []), dtype=np.float32)
|
|
65
80
|
return CompressedTensor(
|
|
66
81
|
name=str(payload["name"]),
|
|
67
82
|
shape=tuple(int(value) for value in payload["shape"]),
|
|
@@ -69,7 +84,7 @@ def dict_to_tensor(payload: Mapping[str, Any]) -> CompressedTensor:
|
|
|
69
84
|
prototype_ids=[int(value) for value in payload["prototype_ids"]],
|
|
70
85
|
scales=[float(value) for value in payload["scales"]],
|
|
71
86
|
residuals=[dict(value) for value in payload["residuals"]],
|
|
72
|
-
prototype_matrix=np.asarray(
|
|
87
|
+
prototype_matrix=np.asarray(matrix, dtype=np.float32),
|
|
73
88
|
seed=int(payload["seed"]),
|
|
74
89
|
codec_config=dict(payload["codec_config"]),
|
|
75
90
|
)
|
|
@@ -64,4 +64,7 @@ def decompress_state_dict(
|
|
|
64
64
|
restored = codec.decompress_state_dict(compressed_model)
|
|
65
65
|
if reference_state_dict is None:
|
|
66
66
|
return restored
|
|
67
|
-
|
|
67
|
+
merged: dict[str, Any] = dict(reference_state_dict)
|
|
68
|
+
for name, value in restored.items():
|
|
69
|
+
merged[name] = numpy_to_tensor(value, reference_state_dict[name])
|
|
70
|
+
return merged
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from hyperglyph import HyperGlyphCodec, HyperGlyphConfig, benchmark_state_dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_benchmark_report_exports_markdown_with_baselines() -> None:
|
|
7
|
+
state_dict = {"weight": np.arange(256, dtype=np.float32).reshape(16, 16)}
|
|
8
|
+
codec = HyperGlyphCodec(
|
|
9
|
+
HyperGlyphConfig(block_size=8, n_prototypes=8, residual_k=2, min_tensor_size=4)
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
report = benchmark_state_dict(state_dict, codec)
|
|
13
|
+
markdown = report.to_markdown()
|
|
14
|
+
|
|
15
|
+
assert "FP32" in markdown
|
|
16
|
+
assert "FP16 estimate" in markdown
|
|
17
|
+
assert "INT8 estimate" in markdown
|
|
18
|
+
assert "Hyper Glyph" in markdown
|
|
19
|
+
assert report.compression.compressed_bytes > 0
|
|
@@ -14,7 +14,7 @@ def test_inspect_command_exits_successfully(tmp_path) -> None:
|
|
|
14
14
|
|
|
15
15
|
path = tmp_path / "model.hwz"
|
|
16
16
|
save_compressed(
|
|
17
|
-
CompressedModel(tensors={"weight": compressed}, payload=b"", format_version="0.
|
|
17
|
+
CompressedModel(tensors={"weight": compressed}, payload=b"", format_version="0.2"),
|
|
18
18
|
path,
|
|
19
19
|
)
|
|
20
20
|
|
|
@@ -22,6 +22,9 @@ def test_report_returns_valid_compression_fields() -> None:
|
|
|
22
22
|
CompressedModelWrapper(compressed), {"weight": data}, {"weight": restored}
|
|
23
23
|
)
|
|
24
24
|
assert report.tensors_compressed == 1
|
|
25
|
+
assert report.compressed_bytes > 0
|
|
26
|
+
assert report.fp16_estimate_bytes == data.size * 2
|
|
27
|
+
assert report.int8_estimate_bytes == data.size
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
def test_small_tensors_are_skipped_when_below_threshold() -> None:
|
|
@@ -34,8 +37,25 @@ def test_small_tensors_are_skipped_when_below_threshold() -> None:
|
|
|
34
37
|
assert "too small" in str(exc)
|
|
35
38
|
|
|
36
39
|
|
|
40
|
+
def test_tensor_and_channel_scale_modes_compress() -> None:
|
|
41
|
+
data = np.arange(64, dtype=np.float32).reshape(8, 8)
|
|
42
|
+
for scale_mode in ("tensor", "channel"):
|
|
43
|
+
config = HyperGlyphConfig(
|
|
44
|
+
block_size=8,
|
|
45
|
+
n_prototypes=4,
|
|
46
|
+
residual_k=2,
|
|
47
|
+
min_tensor_size=4,
|
|
48
|
+
scale_mode=scale_mode,
|
|
49
|
+
)
|
|
50
|
+
codec = HyperGlyphCodec(config)
|
|
51
|
+
compressed = codec.compress_array("weight", data)
|
|
52
|
+
restored = codec.decompress_array(compressed)
|
|
53
|
+
assert restored.shape == data.shape
|
|
54
|
+
assert compressed.codec_config["scale_mode"] == scale_mode
|
|
55
|
+
|
|
56
|
+
|
|
37
57
|
class CompressedModelWrapper:
|
|
38
58
|
def __init__(self, compressed: object) -> None:
|
|
39
59
|
self.tensors = {"weight": compressed}
|
|
40
60
|
self.payload = b""
|
|
41
|
-
self.format_version = "0.
|
|
61
|
+
self.format_version = "0.2"
|
|
@@ -20,4 +20,16 @@ def test_applying_residual_changes_reconstructed_block_correctly() -> None:
|
|
|
20
20
|
|
|
21
21
|
def test_k_zero_works() -> None:
|
|
22
22
|
residual = compute_topk_residual(np.ones(4), np.zeros(4), 0)
|
|
23
|
-
assert residual == {"indices": [], "values": []}
|
|
23
|
+
assert residual == {"indices": [], "values": [], "dtype": "int8"}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_int8_residual_quantization_round_trips_close_values() -> None:
|
|
27
|
+
original = np.array([1.0, 2.0, 3.0], dtype=np.float32)
|
|
28
|
+
reconstructed = np.array([1.1, 1.8, 3.2], dtype=np.float32)
|
|
29
|
+
residual = compute_topk_residual(original, reconstructed, 2, dtype="int8")
|
|
30
|
+
|
|
31
|
+
assert residual["dtype"] == "int8"
|
|
32
|
+
assert all(isinstance(value, int) for value in residual["values"])
|
|
33
|
+
|
|
34
|
+
updated = apply_residual(reconstructed, residual)
|
|
35
|
+
assert np.max(np.abs(updated - original)) < 0.21
|
|
@@ -15,7 +15,7 @@ def test_save_and_load_hwz(tmp_path) -> None:
|
|
|
15
15
|
save_compressed(compressed_model_from_tensor(compressed), path)
|
|
16
16
|
loaded = load_compressed(path)
|
|
17
17
|
|
|
18
|
-
assert loaded.format_version == "0.
|
|
18
|
+
assert loaded.format_version == "0.2"
|
|
19
19
|
assert "weight" in loaded.tensors
|
|
20
20
|
assert loaded.tensors["weight"].name == "weight"
|
|
21
21
|
|
|
@@ -35,4 +35,4 @@ def test_metadata_format_version_exists(tmp_path) -> None:
|
|
|
35
35
|
def compressed_model_from_tensor(compressed):
|
|
36
36
|
from hyperglyph.codec import CompressedModel
|
|
37
37
|
|
|
38
|
-
return CompressedModel(tensors={"weight": compressed}, payload=b"", format_version="0.
|
|
38
|
+
return CompressedModel(tensors={"weight": compressed}, payload=b"", format_version="0.2")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|