superlocalmemory 3.3.7 → 3.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.9",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -310,6 +310,7 @@ class PolarQuantConfig:
|
|
|
310
310
|
dimension: int = 768
|
|
311
311
|
rotation_matrix_path: str = "" # empty = ~/.superlocalmemory/polar_rotation.npy
|
|
312
312
|
seed: int = 42 # reproducible rotation matrix
|
|
313
|
+
codebook_method: str = "turbo" # "turbo" (default) or "polar_legacy"
|
|
313
314
|
|
|
314
315
|
|
|
315
316
|
@dataclass(frozen=True)
|
|
@@ -338,7 +339,7 @@ class QuantizationConfig:
|
|
|
338
339
|
eap_enabled: bool = True
|
|
339
340
|
keep_float32_backup: bool = True
|
|
340
341
|
auto_compact_interval_hours: int = 6
|
|
341
|
-
polar_search_penalty: float = 0.95
|
|
342
|
+
polar_search_penalty: float = 0.97 # V3.3.8: 0.95→0.97, TurboQuant has lower MSE
|
|
342
343
|
|
|
343
344
|
|
|
344
345
|
@dataclass(frozen=True)
|
|
@@ -83,13 +83,23 @@ class PolarQuantEncoder:
|
|
|
83
83
|
HR-09: Angle indices as uint8, packed into bytes.
|
|
84
84
|
"""
|
|
85
85
|
|
|
86
|
-
__slots__ = ("_config", "_d", "_S", "_codebooks")
|
|
86
|
+
__slots__ = ("_config", "_d", "_S", "_codebooks", "_turbo", "_use_turbo")
|
|
87
87
|
|
|
88
88
|
def __init__(self, config: PolarQuantConfig) -> None:
|
|
89
89
|
self._config = config
|
|
90
90
|
self._d = config.dimension
|
|
91
|
-
|
|
92
|
-
|
|
91
|
+
codebook_method = getattr(config, "codebook_method", "turbo")
|
|
92
|
+
if codebook_method == "turbo":
|
|
93
|
+
from superlocalmemory.math.turbo_quant import TurboQuantEncoder
|
|
94
|
+
self._turbo = TurboQuantEncoder(config)
|
|
95
|
+
self._S = self._turbo._S
|
|
96
|
+
self._codebooks = self._generate_uniform_codebooks() # for legacy decode
|
|
97
|
+
self._use_turbo = True
|
|
98
|
+
else:
|
|
99
|
+
self._turbo = None
|
|
100
|
+
self._S = self._load_or_create_rotation_matrix()
|
|
101
|
+
self._codebooks = self._generate_uniform_codebooks()
|
|
102
|
+
self._use_turbo = False
|
|
93
103
|
|
|
94
104
|
# -- Rotation matrix (HR-01, HR-02) ------------------------------------
|
|
95
105
|
|
|
@@ -156,14 +166,14 @@ class PolarQuantEncoder:
|
|
|
156
166
|
# -- Encode ------------------------------------------------------------
|
|
157
167
|
|
|
158
168
|
def encode(self, embedding: NDArray, bit_width: int = 4) -> QuantizedEmbedding:
|
|
159
|
-
"""Encode a float32 embedding into quantized
|
|
169
|
+
"""Encode a float32 embedding into quantized representation.
|
|
160
170
|
|
|
161
171
|
Args:
|
|
162
172
|
embedding: 1-D float vector of dimension self._d.
|
|
163
173
|
bit_width: 2, 4, or 8.
|
|
164
174
|
|
|
165
175
|
Returns:
|
|
166
|
-
QuantizedEmbedding with packed
|
|
176
|
+
QuantizedEmbedding with packed indices.
|
|
167
177
|
|
|
168
178
|
Raises:
|
|
169
179
|
ValueError: Invalid bit_width or dimension mismatch.
|
|
@@ -177,13 +187,25 @@ class PolarQuantEncoder:
|
|
|
177
187
|
f"shape mismatch: expected ({self._d},), got {embedding.shape}"
|
|
178
188
|
)
|
|
179
189
|
|
|
180
|
-
#
|
|
181
|
-
|
|
190
|
+
# V3.3.8: TurboQuant path (default)
|
|
191
|
+
if self._use_turbo:
|
|
192
|
+
result = self._turbo.encode(embedding, bit_width)
|
|
193
|
+
return QuantizedEmbedding(
|
|
194
|
+
fact_id="",
|
|
195
|
+
radius=result.radius,
|
|
196
|
+
angle_indices=result.indices,
|
|
197
|
+
bit_width=result.bit_width,
|
|
198
|
+
qjl_bits=None,
|
|
199
|
+
)
|
|
182
200
|
|
|
183
|
-
#
|
|
201
|
+
# Legacy PolarQuant path
|
|
202
|
+
return self._encode_polar(embedding, bit_width)
|
|
203
|
+
|
|
204
|
+
def _encode_polar(self, embedding: NDArray, bit_width: int) -> QuantizedEmbedding:
|
|
205
|
+
"""Legacy PolarQuant encode (polar coordinate transform)."""
|
|
206
|
+
v_rot = self._S @ embedding
|
|
184
207
|
r = float(np.linalg.norm(v_rot))
|
|
185
208
|
|
|
186
|
-
# Degenerate zero vector
|
|
187
209
|
if r < 1e-12:
|
|
188
210
|
zero_angles = np.zeros(self._d - 1, dtype=np.uint8)
|
|
189
211
|
if bit_width == 8:
|
|
@@ -200,17 +222,11 @@ class PolarQuantEncoder:
|
|
|
200
222
|
qjl_bits=None,
|
|
201
223
|
)
|
|
202
224
|
|
|
203
|
-
# Step 3: Normalize
|
|
204
225
|
v_unit = v_rot / r
|
|
205
|
-
|
|
206
|
-
# Step 4: Cartesian to polar angles
|
|
207
226
|
angles = _cartesian_to_polar_angles(v_unit)
|
|
208
|
-
|
|
209
|
-
# Step 5: Quantize angles using codebook
|
|
210
227
|
cb = self._codebooks[bit_width]
|
|
211
228
|
indices = np.digitize(angles, cb["boundaries"][1:-1]).astype(np.uint8)
|
|
212
229
|
|
|
213
|
-
# Step 6: Pack into bytes
|
|
214
230
|
if bit_width == 8:
|
|
215
231
|
packed = indices.tobytes()
|
|
216
232
|
elif bit_width == 4:
|
|
@@ -228,18 +244,43 @@ class PolarQuantEncoder:
|
|
|
228
244
|
|
|
229
245
|
# -- Decode ------------------------------------------------------------
|
|
230
246
|
|
|
247
|
+
# TQ magic prefix for format detection (HR-MIG-02)
|
|
248
|
+
_TQ_MAGIC = b"\x54\x51"
|
|
249
|
+
|
|
231
250
|
def decode(self, qe: QuantizedEmbedding) -> NDArray:
|
|
232
251
|
"""Decode a QuantizedEmbedding back to float64 vector.
|
|
233
252
|
|
|
253
|
+
V3.3.8: Detects "TQ" prefix (0x54, 0x51) to route between
|
|
254
|
+
TurboQuant and legacy PolarQuant decode paths.
|
|
255
|
+
|
|
234
256
|
Args:
|
|
235
257
|
qe: Quantized embedding produced by encode().
|
|
236
258
|
|
|
237
259
|
Returns:
|
|
238
260
|
Reconstructed vector of dimension self._d.
|
|
239
261
|
"""
|
|
262
|
+
# Format detection: TQ prefix = TurboQuant, else legacy polar
|
|
263
|
+
if qe.angle_indices[:2] == self._TQ_MAGIC:
|
|
264
|
+
return self._decode_turbo(qe)
|
|
265
|
+
return self._decode_polar(qe)
|
|
266
|
+
|
|
267
|
+
def _decode_turbo(self, qe: QuantizedEmbedding) -> NDArray:
|
|
268
|
+
"""Decode TurboQuant-encoded BLOB (has TQ prefix)."""
|
|
269
|
+
if self._turbo is None:
|
|
270
|
+
from superlocalmemory.math.turbo_quant import TurboQuantEncoder
|
|
271
|
+
self._turbo = TurboQuantEncoder(self._config)
|
|
272
|
+
from superlocalmemory.math.turbo_quant import TurboQuantResult
|
|
273
|
+
result = TurboQuantResult(
|
|
274
|
+
radius=qe.radius,
|
|
275
|
+
indices=qe.angle_indices,
|
|
276
|
+
bit_width=qe.bit_width,
|
|
277
|
+
)
|
|
278
|
+
return self._turbo.decode(result)
|
|
279
|
+
|
|
280
|
+
def _decode_polar(self, qe: QuantizedEmbedding) -> NDArray:
|
|
281
|
+
"""Decode legacy PolarQuant BLOB (no TQ prefix)."""
|
|
240
282
|
n_angles = self._d - 1
|
|
241
283
|
|
|
242
|
-
# Step 1: Unpack angle indices
|
|
243
284
|
if qe.bit_width == 8:
|
|
244
285
|
indices = np.frombuffer(qe.angle_indices, dtype=np.uint8).copy()
|
|
245
286
|
elif qe.bit_width == 4:
|
|
@@ -247,19 +288,12 @@ class PolarQuantEncoder:
|
|
|
247
288
|
else:
|
|
248
289
|
indices = self.unpack_2bit(qe.angle_indices, n_angles)
|
|
249
290
|
|
|
250
|
-
# Step 2: Dequantize -- map indices to centroid angles
|
|
251
291
|
centroids = self._codebooks[qe.bit_width]["centroids"]
|
|
252
|
-
# Clip indices to valid range
|
|
253
292
|
indices = np.clip(indices, 0, len(centroids) - 1)
|
|
254
293
|
angles = centroids[indices]
|
|
255
294
|
|
|
256
|
-
# Step 3: Polar to Cartesian
|
|
257
295
|
v_unit = _polar_to_cartesian(angles, self._d)
|
|
258
|
-
|
|
259
|
-
# Step 4: Scale by radius
|
|
260
296
|
v_rot = v_unit * qe.radius
|
|
261
|
-
|
|
262
|
-
# Step 5: Inverse rotation (S is orthogonal, so S^T = S^{-1})
|
|
263
297
|
v_orig = self._S.T @ v_rot
|
|
264
298
|
|
|
265
299
|
return v_orig
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""TurboQuant embedding quantization (ICLR 2026).
|
|
6
|
+
|
|
7
|
+
Per-coordinate Lloyd-Max scalar quantization after random orthogonal rotation.
|
|
8
|
+
D_mse <= sqrt(3*pi/2) / 4^b. No scipy (HR-SCIPY-01). 2-byte "TQ" prefix on
|
|
9
|
+
all BLOBs (HR-MIG-02). Bit-widths: 2, 4, 8 only (HR-3BIT-01).
|
|
10
|
+
|
|
11
|
+
References: TurboQuant (arXiv 2504.19874), PolarQuant (arXiv 2502.02617).
|
|
12
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj | License: MIT
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import math
|
|
19
|
+
import shutil
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from numpy.typing import NDArray
|
|
25
|
+
|
|
26
|
+
from superlocalmemory.core.config import PolarQuantConfig
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
TQ_MAGIC = b"\x54\x51" # 2-byte prefix for TurboQuant BLOBs (HR-MIG-02)
|
|
31
|
+
SUPPORTED_BIT_WIDTHS: frozenset[int] = frozenset({2, 4, 8})
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Data types
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class TurboQuantResult:
|
|
40
|
+
"""Immutable TurboQuant-encoded embedding. radius=float16, indices=TQ-prefixed."""
|
|
41
|
+
|
|
42
|
+
radius: float
|
|
43
|
+
indices: bytes # TQ_MAGIC + packed codebook indices
|
|
44
|
+
bit_width: int
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Lloyd-Max codebook (HR-SCIPY-01: math.erf + math.exp only)
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
_SQRT_2PI = math.sqrt(2.0 * math.pi)
|
|
52
|
+
_SQRT_2 = math.sqrt(2.0)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _std_normal_pdf(x: float) -> float:
|
|
56
|
+
return math.exp(-0.5 * x * x) / _SQRT_2PI
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _std_normal_cdf(x: float) -> float:
|
|
60
|
+
return 0.5 * (1.0 + math.erf(x / _SQRT_2))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _compute_lloyd_max_gaussian(
|
|
64
|
+
sigma: float, n_levels: int, max_iter: int = 100, tol: float = 1e-10,
|
|
65
|
+
) -> NDArray:
|
|
66
|
+
"""Lloyd-Max optimal codebook for N(0, sigma^2) on [-1, 1]. Deterministic (HR-CB-01).
|
|
67
|
+
|
|
68
|
+
The codebook boundaries extend to [-1, 1] (full unit-sphere coordinate range)
|
|
69
|
+
rather than [-5*sigma, 5*sigma], because after rotation, unit vector coordinates
|
|
70
|
+
CAN have extreme values (up to ±1). The Gaussian distribution determines
|
|
71
|
+
centroid placement, but the boundary range must cover all possible values.
|
|
72
|
+
"""
|
|
73
|
+
lo, hi = -1.0, 1.0 # Full unit-sphere coordinate range
|
|
74
|
+
boundaries = np.linspace(lo, hi, n_levels + 1)
|
|
75
|
+
centroids = np.zeros(n_levels)
|
|
76
|
+
for k in range(n_levels):
|
|
77
|
+
centroids[k] = 0.5 * (boundaries[k] + boundaries[k + 1])
|
|
78
|
+
|
|
79
|
+
for _ in range(max_iter):
|
|
80
|
+
old = centroids.copy()
|
|
81
|
+
for k in range(n_levels):
|
|
82
|
+
a_k = float(boundaries[k]) / sigma
|
|
83
|
+
b_k = float(boundaries[k + 1]) / sigma
|
|
84
|
+
denom = _std_normal_cdf(b_k) - _std_normal_cdf(a_k)
|
|
85
|
+
if denom > 1e-15:
|
|
86
|
+
centroids[k] = sigma * (_std_normal_pdf(a_k) - _std_normal_pdf(b_k)) / denom
|
|
87
|
+
else:
|
|
88
|
+
# Tail region: use midpoint (values here are rare but must be handled)
|
|
89
|
+
centroids[k] = 0.5 * (boundaries[k] + boundaries[k + 1])
|
|
90
|
+
for k in range(1, n_levels):
|
|
91
|
+
boundaries[k] = 0.5 * (centroids[k - 1] + centroids[k])
|
|
92
|
+
if float(np.max(np.abs(centroids - old))) < tol:
|
|
93
|
+
break
|
|
94
|
+
|
|
95
|
+
return np.sort(centroids)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Bit packing
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _pack_8bit(indices: NDArray) -> bytes:
|
|
104
|
+
return indices.astype(np.uint8).tobytes()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _unpack_8bit(data: bytes, length: int) -> NDArray:
|
|
108
|
+
return np.frombuffer(data, dtype=np.uint8)[:length].copy()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _pack_4bit(indices: NDArray) -> bytes:
|
|
112
|
+
n = len(indices)
|
|
113
|
+
padded = np.zeros(n + (n % 2), dtype=np.uint8)
|
|
114
|
+
padded[:n] = np.clip(indices, 0, 15)
|
|
115
|
+
return ((padded[0::2] << 4) | padded[1::2]).tobytes()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _unpack_4bit(data: bytes, length: int) -> NDArray:
|
|
119
|
+
packed = np.frombuffer(data, dtype=np.uint8)
|
|
120
|
+
result = np.empty(len(packed) * 2, dtype=np.uint8)
|
|
121
|
+
result[0::2] = packed >> 4
|
|
122
|
+
result[1::2] = packed & 0x0F
|
|
123
|
+
return result[:length]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _pack_2bit(indices: NDArray) -> bytes:
|
|
127
|
+
n = len(indices)
|
|
128
|
+
padded = np.zeros(n + (4 - n % 4) % 4, dtype=np.uint8)
|
|
129
|
+
padded[:n] = np.clip(indices, 0, 3)
|
|
130
|
+
return (
|
|
131
|
+
(padded[0::4] << 6) | (padded[1::4] << 4)
|
|
132
|
+
| (padded[2::4] << 2) | padded[3::4]
|
|
133
|
+
).tobytes()
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _unpack_2bit(data: bytes, length: int) -> NDArray:
|
|
137
|
+
packed = np.frombuffer(data, dtype=np.uint8)
|
|
138
|
+
result = np.empty(len(packed) * 4, dtype=np.uint8)
|
|
139
|
+
result[0::4] = (packed >> 6) & 0x03
|
|
140
|
+
result[1::4] = (packed >> 4) & 0x03
|
|
141
|
+
result[2::4] = (packed >> 2) & 0x03
|
|
142
|
+
result[3::4] = packed & 0x03
|
|
143
|
+
return result[:length]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
_PACKERS: dict[int, tuple] = {
|
|
147
|
+
8: (_pack_8bit, _unpack_8bit),
|
|
148
|
+
4: (_pack_4bit, _unpack_4bit),
|
|
149
|
+
2: (_pack_2bit, _unpack_2bit),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# TurboQuantEncoder
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class TurboQuantEncoder:
|
|
158
|
+
"""Per-coordinate Lloyd-Max quantizer with random rotation.
|
|
159
|
+
|
|
160
|
+
HR-ROT-01: Same rotation matrix for encode/decode.
|
|
161
|
+
HR-CB-02: Codebooks computed ONCE at __init__.
|
|
162
|
+
HR-SCIPY-01: No scipy dependency.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
__slots__ = ("_config", "_d", "_S", "_codebooks")
|
|
166
|
+
|
|
167
|
+
def __init__(self, config: PolarQuantConfig) -> None:
|
|
168
|
+
self._config = config
|
|
169
|
+
self._d = config.dimension
|
|
170
|
+
self._S = self._load_or_create_rotation_matrix()
|
|
171
|
+
self._codebooks = self._compute_codebooks()
|
|
172
|
+
|
|
173
|
+
def _load_or_create_rotation_matrix(self) -> NDArray:
|
|
174
|
+
"""Load/create rotation matrix with copy-on-detect (AUDIT C4-MED-01)."""
|
|
175
|
+
d = self._d
|
|
176
|
+
slm_dir = Path.home() / ".superlocalmemory"
|
|
177
|
+
|
|
178
|
+
turbo_path_str = self._config.rotation_matrix_path
|
|
179
|
+
if not turbo_path_str:
|
|
180
|
+
turbo_path_str = str(slm_dir / f"turbo_rotation_{d}.npy")
|
|
181
|
+
turbo_path = Path(turbo_path_str)
|
|
182
|
+
|
|
183
|
+
if turbo_path.exists():
|
|
184
|
+
try:
|
|
185
|
+
S = np.load(str(turbo_path))
|
|
186
|
+
if S.shape == (d, d):
|
|
187
|
+
return S
|
|
188
|
+
logger.warning("Turbo rotation shape %s != (%d,%d)", S.shape, d, d)
|
|
189
|
+
except Exception as exc:
|
|
190
|
+
logger.warning("Corrupt turbo rotation: %s", exc)
|
|
191
|
+
|
|
192
|
+
# Copy-on-detect: reuse existing polar rotation matrix
|
|
193
|
+
polar_path = slm_dir / f"polar_rotation_{d}.npy"
|
|
194
|
+
if polar_path.exists() and not turbo_path.exists():
|
|
195
|
+
try:
|
|
196
|
+
S = np.load(str(polar_path))
|
|
197
|
+
if S.shape == (d, d):
|
|
198
|
+
turbo_path.parent.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
shutil.copy2(str(polar_path), str(turbo_path))
|
|
200
|
+
logger.info("Copied polar rotation matrix for TurboQuant compatibility")
|
|
201
|
+
return S
|
|
202
|
+
except Exception as exc:
|
|
203
|
+
logger.warning("Could not copy polar rotation: %s", exc)
|
|
204
|
+
|
|
205
|
+
# Generate new via Mezzadri-corrected QR
|
|
206
|
+
rng = np.random.default_rng(self._config.seed)
|
|
207
|
+
H = rng.standard_normal((d, d))
|
|
208
|
+
Q, R = np.linalg.qr(H)
|
|
209
|
+
S = Q @ np.diag(np.sign(np.diag(R)))
|
|
210
|
+
|
|
211
|
+
turbo_path.parent.mkdir(parents=True, exist_ok=True)
|
|
212
|
+
np.save(str(turbo_path), S)
|
|
213
|
+
logger.info("Generated TurboQuant rotation (%d x %d) at %s", d, d, turbo_path)
|
|
214
|
+
return S
|
|
215
|
+
|
|
216
|
+
def _compute_codebooks(self) -> dict[int, NDArray]:
|
|
217
|
+
"""Pre-compute Lloyd-Max codebooks for 2/4/8-bit."""
|
|
218
|
+
sigma = 1.0 / math.sqrt(self._d)
|
|
219
|
+
codebooks: dict[int, NDArray] = {}
|
|
220
|
+
for bw in sorted(SUPPORTED_BIT_WIDTHS):
|
|
221
|
+
centroids = _compute_lloyd_max_gaussian(sigma, 2 ** bw)
|
|
222
|
+
assert len(centroids) == 2 ** bw
|
|
223
|
+
assert np.all(centroids[1:] >= centroids[:-1])
|
|
224
|
+
codebooks[bw] = centroids
|
|
225
|
+
return codebooks
|
|
226
|
+
|
|
227
|
+
def encode(self, embedding: NDArray, bit_width: int = 4) -> TurboQuantResult:
|
|
228
|
+
"""Encode embedding. HR-ENC-01: pure. HR-ENC-02: radius=float16."""
|
|
229
|
+
if bit_width not in SUPPORTED_BIT_WIDTHS:
|
|
230
|
+
raise ValueError(f"bit_width must be 2, 4, or 8, got {bit_width}")
|
|
231
|
+
if embedding.shape != (self._d,):
|
|
232
|
+
raise ValueError(f"shape mismatch: expected ({self._d},), got {embedding.shape}")
|
|
233
|
+
|
|
234
|
+
y = self._S @ embedding
|
|
235
|
+
r = float(np.linalg.norm(y))
|
|
236
|
+
|
|
237
|
+
if r < 1e-12:
|
|
238
|
+
pack_fn, _ = _PACKERS[bit_width]
|
|
239
|
+
packed = TQ_MAGIC + pack_fn(np.zeros(self._d, dtype=np.uint8))
|
|
240
|
+
return TurboQuantResult(radius=0.0, indices=packed, bit_width=bit_width)
|
|
241
|
+
|
|
242
|
+
y_unit = y / r
|
|
243
|
+
centroids = self._codebooks[bit_width]
|
|
244
|
+
idx = np.searchsorted(centroids, y_unit)
|
|
245
|
+
idx = np.clip(idx, 0, len(centroids) - 1)
|
|
246
|
+
left = np.clip(idx - 1, 0, len(centroids) - 1)
|
|
247
|
+
use_left = np.abs(y_unit - centroids[left]) < np.abs(y_unit - centroids[idx])
|
|
248
|
+
idx = np.where(use_left, left, idx).astype(np.uint8)
|
|
249
|
+
|
|
250
|
+
pack_fn, _ = _PACKERS[bit_width]
|
|
251
|
+
packed = TQ_MAGIC + pack_fn(idx)
|
|
252
|
+
|
|
253
|
+
return TurboQuantResult(
|
|
254
|
+
radius=float(np.float16(r)), indices=packed, bit_width=bit_width,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
def decode(self, result: TurboQuantResult) -> NDArray:
|
|
258
|
+
"""Decode with format detection: TQ prefix -> turbo, else -> legacy polar."""
|
|
259
|
+
blob = result.indices
|
|
260
|
+
|
|
261
|
+
if blob[:2] == TQ_MAGIC:
|
|
262
|
+
data = blob[2:]
|
|
263
|
+
else:
|
|
264
|
+
return self._decode_legacy_polar(result)
|
|
265
|
+
|
|
266
|
+
_, unpack_fn = _PACKERS[result.bit_width]
|
|
267
|
+
indices = unpack_fn(data, self._d)
|
|
268
|
+
centroids = self._codebooks[result.bit_width]
|
|
269
|
+
y_unit_approx = centroids[np.clip(indices, 0, len(centroids) - 1)]
|
|
270
|
+
return self._S.T @ (y_unit_approx * result.radius)
|
|
271
|
+
|
|
272
|
+
def _decode_legacy_polar(self, result: TurboQuantResult) -> NDArray:
|
|
273
|
+
"""Decode legacy PolarQuant BLOB (no TQ prefix) for SLM <= 3.3.6."""
|
|
274
|
+
from superlocalmemory.math.polar_quant import PolarQuantEncoder, _polar_to_cartesian
|
|
275
|
+
|
|
276
|
+
n_angles = self._d - 1
|
|
277
|
+
if result.bit_width == 8:
|
|
278
|
+
indices = np.frombuffer(result.indices, dtype=np.uint8).copy()
|
|
279
|
+
elif result.bit_width == 4:
|
|
280
|
+
indices = PolarQuantEncoder.unpack_4bit(result.indices, n_angles)
|
|
281
|
+
else:
|
|
282
|
+
indices = PolarQuantEncoder.unpack_2bit(result.indices, n_angles)
|
|
283
|
+
|
|
284
|
+
levels = 2 ** result.bit_width
|
|
285
|
+
boundaries = np.linspace(0.0, math.pi, levels + 1)
|
|
286
|
+
centroids = (boundaries[:-1] + boundaries[1:]) / 2.0
|
|
287
|
+
angles = centroids[np.clip(indices, 0, len(centroids) - 1)]
|
|
288
|
+
|
|
289
|
+
v_unit = _polar_to_cartesian(angles, self._d)
|
|
290
|
+
return self._S.T @ (v_unit * result.radius)
|
|
291
|
+
|
|
292
|
+
def approximate_similarity(self, query: NDArray, result: TurboQuantResult) -> float:
|
|
293
|
+
"""Cosine similarity via decode. Returns 0.0 on degenerate inputs."""
|
|
294
|
+
decoded = self.decode(result)
|
|
295
|
+
denom = np.linalg.norm(query) * np.linalg.norm(decoded)
|
|
296
|
+
if denom < 1e-12:
|
|
297
|
+
return 0.0
|
|
298
|
+
sim = float(np.dot(query, decoded) / denom)
|
|
299
|
+
return 0.0 if (math.isnan(sim) or math.isinf(sim)) else sim
|
|
300
|
+
|
|
301
|
+
# Static pack/unpack (backward compat with PolarQuantEncoder API)
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def pack_4bit(indices: NDArray) -> bytes:
|
|
305
|
+
return _pack_4bit(indices)
|
|
306
|
+
|
|
307
|
+
@staticmethod
|
|
308
|
+
def unpack_4bit(data: bytes, length: int) -> NDArray:
|
|
309
|
+
return _unpack_4bit(data, length)
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def pack_2bit(indices: NDArray) -> bytes:
|
|
313
|
+
return _pack_2bit(indices)
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def unpack_2bit(data: bytes, length: int) -> NDArray:
|
|
317
|
+
return _unpack_2bit(data, length)
|