superlocalmemory 3.2.3 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -1
- package/README.md +106 -71
- package/package.json +1 -2
- package/pyproject.toml +16 -1
- package/src/superlocalmemory/cli/commands.py +309 -0
- package/src/superlocalmemory/cli/main.py +44 -0
- package/src/superlocalmemory/core/config.py +276 -4
- package/src/superlocalmemory/core/consolidation_engine.py +37 -0
- package/src/superlocalmemory/core/engine.py +21 -0
- package/src/superlocalmemory/core/engine_wiring.py +58 -8
- package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
- package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
- package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
- package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
- package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
- package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
- package/src/superlocalmemory/infra/pid_manager.py +193 -0
- package/src/superlocalmemory/infra/process_reaper.py +572 -0
- package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
- package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
- package/src/superlocalmemory/math/ebbinghaus.py +309 -0
- package/src/superlocalmemory/math/fisher_quantized.py +251 -0
- package/src/superlocalmemory/math/hopfield.py +279 -0
- package/src/superlocalmemory/math/polar_quant.py +379 -0
- package/src/superlocalmemory/math/qjl.py +115 -0
- package/src/superlocalmemory/mcp/server.py +2 -0
- package/src/superlocalmemory/mcp/tools_v3.py +10 -0
- package/src/superlocalmemory/mcp/tools_v33.py +351 -0
- package/src/superlocalmemory/parameterization/__init__.py +47 -0
- package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
- package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
- package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
- package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
- package/src/superlocalmemory/retrieval/engine.py +21 -3
- package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
- package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
- package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
- package/src/superlocalmemory/retrieval/strategy.py +16 -6
- package/src/superlocalmemory/server/routes/agents.py +68 -8
- package/src/superlocalmemory/server/routes/learning.py +18 -1
- package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
- package/src/superlocalmemory/server/routes/v3_api.py +503 -1
- package/src/superlocalmemory/storage/database.py +206 -0
- package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
- package/src/superlocalmemory/storage/migration_v33.py +140 -0
- package/src/superlocalmemory/storage/quantized_store.py +261 -0
- package/src/superlocalmemory/storage/schema_v32.py +137 -0
- package/conftest.py +0 -5
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""Modern Continuous Hopfield Network (Ramsauer et al., 2020).
|
|
6
|
+
|
|
7
|
+
Implementation of the continuous Hopfield energy and update rules
|
|
8
|
+
from "Hopfield Networks is All You Need" (arXiv 2008.02217).
|
|
9
|
+
|
|
10
|
+
This is the mathematical foundation for the 6th retrieval channel.
|
|
11
|
+
The Hopfield update is equivalent to single-head self-attention:
|
|
12
|
+
xi_new = X' @ softmax(beta * X @ xi)
|
|
13
|
+
|
|
14
|
+
Key properties:
|
|
15
|
+
- Energy function: E(xi) = -logsumexp(beta * X @ xi) + beta/2 * ||xi||^2
|
|
16
|
+
- Update rule: xi_new = X' @ softmax(beta * X @ xi)
|
|
17
|
+
- Beta: 1/sqrt(d) (inverse temperature)
|
|
18
|
+
- Storage capacity: O(exp(d/2)) -- exponential in dimension
|
|
19
|
+
- Convergence: 1 step for well-separated patterns
|
|
20
|
+
|
|
21
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
22
|
+
License: MIT
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
import math
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import numpy as np
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Configuration (local definition; Delivery Lead moves to core/config.py)
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class HopfieldConfig:
|
|
43
|
+
"""Modern Continuous Hopfield Network configuration.
|
|
44
|
+
|
|
45
|
+
Based on Ramsauer et al. (2020): "Hopfield Networks is All You Need"
|
|
46
|
+
Energy: E(xi) = -log(sum_i exp(B * xi' * x_i)) + B/2 * ||xi||^2
|
|
47
|
+
Update: xi_new = X' @ softmax(B * X @ xi)
|
|
48
|
+
Beta: B = 1/sqrt(d) where d = dimension
|
|
49
|
+
|
|
50
|
+
Storage capacity: O(e^{d/2}) -- exponential in dimension.
|
|
51
|
+
For d=768: theoretical capacity >> millions of patterns.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
enabled: bool = True
|
|
55
|
+
dimension: int = 768
|
|
56
|
+
max_iterations: int = 1
|
|
57
|
+
convergence_epsilon: float = 1e-6
|
|
58
|
+
prefilter_threshold: int = 10_000
|
|
59
|
+
prefilter_candidates: int = 1000
|
|
60
|
+
skip_threshold: int = 100_000
|
|
61
|
+
cache_ttl_seconds: float = 60.0
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
# Hopfield State (immutable result)
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
@dataclass(frozen=True)
|
|
69
|
+
class HopfieldState:
|
|
70
|
+
"""Result of a single Hopfield retrieval step."""
|
|
71
|
+
|
|
72
|
+
retrieved_pattern: np.ndarray # d-dimensional completed pattern
|
|
73
|
+
attention_weights: np.ndarray # n-dimensional softmax weights
|
|
74
|
+
energy_before: float # E(xi) before update
|
|
75
|
+
energy_after: float # E(xi_new) after update
|
|
76
|
+
converged: bool # energy_after <= energy_before
|
|
77
|
+
iterations: int # Number of update steps taken
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
# Modern Continuous Hopfield Network
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
class ModernHopfieldNetwork:
|
|
85
|
+
"""Modern Continuous Hopfield Network (Ramsauer et al., 2020).
|
|
86
|
+
|
|
87
|
+
Provides energy computation, single-step update, full retrieval
|
|
88
|
+
with convergence detection, and attention scoring.
|
|
89
|
+
|
|
90
|
+
Usage::
|
|
91
|
+
|
|
92
|
+
net = ModernHopfieldNetwork(HopfieldConfig(dimension=768))
|
|
93
|
+
state = net.retrieve(query_vec, memory_matrix)
|
|
94
|
+
scores = net.attention_scores(query_vec, memory_matrix)
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self, config: HopfieldConfig) -> None:
|
|
98
|
+
"""Initialize with config. Computes beta = 1/sqrt(d).
|
|
99
|
+
|
|
100
|
+
HR-01: Beta MUST be 1/sqrt(d), no other values.
|
|
101
|
+
"""
|
|
102
|
+
self._config = config
|
|
103
|
+
self._beta: float = 1.0 / math.sqrt(config.dimension) # HR-01
|
|
104
|
+
|
|
105
|
+
# -- Public API ---------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
def energy(self, xi: np.ndarray, memory_matrix: np.ndarray) -> float:
|
|
108
|
+
"""Compute Modern Hopfield energy function.
|
|
109
|
+
|
|
110
|
+
E(xi) = -logsumexp(beta * X @ xi) + beta/2 * ||xi||^2
|
|
111
|
+
|
|
112
|
+
HR-04: Uses numerically stable logsumexp (shift by max).
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
xi: Query vector, shape (d,).
|
|
116
|
+
memory_matrix: Stored patterns, shape (n, d). L2-normalized rows.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Energy value (float). Lower = better match to stored patterns.
|
|
120
|
+
Returns 0.0 for empty memory matrix.
|
|
121
|
+
"""
|
|
122
|
+
# Guard: empty matrix
|
|
123
|
+
if memory_matrix.shape[0] == 0:
|
|
124
|
+
return 0.0
|
|
125
|
+
|
|
126
|
+
xi_64 = xi.astype(np.float64) # HR-11: float64 for energy
|
|
127
|
+
mem_64 = memory_matrix.astype(np.float64)
|
|
128
|
+
|
|
129
|
+
# Compute logits: beta * X @ xi, shape (n,)
|
|
130
|
+
logits = self._beta * (mem_64 @ xi_64)
|
|
131
|
+
|
|
132
|
+
# HR-04: Numerically stable logsumexp
|
|
133
|
+
max_logit = float(np.max(logits))
|
|
134
|
+
lse = max_logit + float(np.log(np.sum(np.exp(logits - max_logit))))
|
|
135
|
+
|
|
136
|
+
# Energy: -lse + beta/2 * ||xi||^2
|
|
137
|
+
energy = -lse + (self._beta / 2.0) * float(np.dot(xi_64, xi_64))
|
|
138
|
+
|
|
139
|
+
# Guard against NaN (degenerate all-zero matrix)
|
|
140
|
+
if math.isnan(energy):
|
|
141
|
+
return 0.0
|
|
142
|
+
|
|
143
|
+
return energy
|
|
144
|
+
|
|
145
|
+
def update(self, xi: np.ndarray, memory_matrix: np.ndarray) -> np.ndarray:
|
|
146
|
+
"""Single Hopfield update step.
|
|
147
|
+
|
|
148
|
+
xi_new = X' @ softmax(beta * X @ xi)
|
|
149
|
+
|
|
150
|
+
HR-02: Uses numerically stable softmax (shift by max).
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
xi: Query vector, shape (d,).
|
|
154
|
+
memory_matrix: Stored patterns, shape (n, d). L2-normalized rows.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Updated vector xi_new, shape (d,). Returns zeros if matrix is empty.
|
|
158
|
+
"""
|
|
159
|
+
d = self._config.dimension
|
|
160
|
+
|
|
161
|
+
# Guard: empty matrix
|
|
162
|
+
if memory_matrix.shape[0] == 0:
|
|
163
|
+
return np.zeros(d, dtype=np.float32)
|
|
164
|
+
|
|
165
|
+
# Compute logits: beta * X @ xi, shape (n,)
|
|
166
|
+
logits = self._beta * (memory_matrix @ xi)
|
|
167
|
+
|
|
168
|
+
# HR-02: Numerically stable softmax
|
|
169
|
+
attention = self._softmax(logits)
|
|
170
|
+
|
|
171
|
+
# Pattern completion: X' @ attention, shape (d,)
|
|
172
|
+
xi_new = memory_matrix.T @ attention
|
|
173
|
+
|
|
174
|
+
return xi_new.astype(np.float32)
|
|
175
|
+
|
|
176
|
+
def retrieve(
|
|
177
|
+
self,
|
|
178
|
+
query: np.ndarray,
|
|
179
|
+
memory_matrix: np.ndarray,
|
|
180
|
+
max_iterations: int = 0,
|
|
181
|
+
) -> HopfieldState:
|
|
182
|
+
"""Full retrieval with convergence detection and energy tracking.
|
|
183
|
+
|
|
184
|
+
Iteratively applies the Hopfield update rule until energy converges
|
|
185
|
+
or max_iterations is reached.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
query: Query vector, shape (d,).
|
|
189
|
+
memory_matrix: Stored patterns, shape (n, d).
|
|
190
|
+
max_iterations: Override for config.max_iterations. 0 = use config.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
HopfieldState with retrieved pattern, attention, energy, convergence info.
|
|
194
|
+
"""
|
|
195
|
+
d = self._config.dimension
|
|
196
|
+
|
|
197
|
+
# Guard: empty matrix
|
|
198
|
+
if memory_matrix.shape[0] == 0:
|
|
199
|
+
return HopfieldState(
|
|
200
|
+
retrieved_pattern=np.zeros(d, dtype=np.float32),
|
|
201
|
+
attention_weights=np.array([], dtype=np.float32),
|
|
202
|
+
energy_before=0.0,
|
|
203
|
+
energy_after=0.0,
|
|
204
|
+
converged=False,
|
|
205
|
+
iterations=0,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
iters = max_iterations if max_iterations > 0 else self._config.max_iterations
|
|
209
|
+
|
|
210
|
+
# HR-11: float64 for energy computation precision
|
|
211
|
+
xi = query.copy().astype(np.float64)
|
|
212
|
+
mem_f32 = memory_matrix.astype(np.float32) # HR-11: float32 for memory
|
|
213
|
+
|
|
214
|
+
energy_initial = self.energy(xi.astype(np.float32), mem_f32)
|
|
215
|
+
e_before = energy_initial
|
|
216
|
+
xi_new = xi.copy()
|
|
217
|
+
|
|
218
|
+
iteration = 0
|
|
219
|
+
for iteration in range(iters):
|
|
220
|
+
xi_new = self.update(xi.astype(np.float32), mem_f32).astype(np.float64)
|
|
221
|
+
e_after = self.energy(xi_new.astype(np.float32), mem_f32)
|
|
222
|
+
|
|
223
|
+
# Convergence check
|
|
224
|
+
if abs(e_after - e_before) < self._config.convergence_epsilon:
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
xi = xi_new
|
|
228
|
+
e_before = e_after
|
|
229
|
+
|
|
230
|
+
# Final energy
|
|
231
|
+
e_final = self.energy(xi_new.astype(np.float32), mem_f32)
|
|
232
|
+
|
|
233
|
+
# Final attention weights
|
|
234
|
+
attention = self._softmax(
|
|
235
|
+
self._beta * (mem_f32 @ xi_new.astype(np.float32)),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return HopfieldState(
|
|
239
|
+
retrieved_pattern=xi_new.astype(np.float32),
|
|
240
|
+
attention_weights=attention.astype(np.float32),
|
|
241
|
+
energy_before=energy_initial,
|
|
242
|
+
energy_after=e_final,
|
|
243
|
+
converged=(e_final <= energy_initial + 1e-9),
|
|
244
|
+
iterations=iteration + 1,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
def attention_scores(
|
|
248
|
+
self,
|
|
249
|
+
query: np.ndarray,
|
|
250
|
+
memory_matrix: np.ndarray,
|
|
251
|
+
) -> np.ndarray:
|
|
252
|
+
"""Compute Hopfield attention weights WITHOUT full update.
|
|
253
|
+
|
|
254
|
+
Used for scoring/ranking without pattern completion.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
query: Query vector, shape (d,).
|
|
258
|
+
memory_matrix: Stored patterns, shape (n, d).
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Attention weights, shape (n,), summing to 1.0.
|
|
262
|
+
"""
|
|
263
|
+
if memory_matrix.shape[0] == 0:
|
|
264
|
+
return np.array([], dtype=np.float32)
|
|
265
|
+
|
|
266
|
+
logits = self._beta * (memory_matrix @ query)
|
|
267
|
+
return self._softmax(logits)
|
|
268
|
+
|
|
269
|
+
# -- Private helpers ----------------------------------------------------
|
|
270
|
+
|
|
271
|
+
def _softmax(self, logits: np.ndarray) -> np.ndarray:
|
|
272
|
+
"""Numerically stable softmax.
|
|
273
|
+
|
|
274
|
+
HR-02: Shift by max to prevent overflow.
|
|
275
|
+
softmax(x) = exp(x - max(x)) / sum(exp(x - max(x)))
|
|
276
|
+
"""
|
|
277
|
+
shifted = logits - np.max(logits) # HR-02
|
|
278
|
+
exp_vals = np.exp(shifted)
|
|
279
|
+
return exp_vals / np.sum(exp_vals)
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""PolarQuant embedding quantization.
|
|
6
|
+
|
|
7
|
+
Implements random orthogonal rotation + recursive polar coordinate
|
|
8
|
+
transform + scalar quantization for ultra-compact embedding storage.
|
|
9
|
+
|
|
10
|
+
Pipeline:
|
|
11
|
+
1. Random rotation (Mezzadri-corrected QR) -- preserves angles
|
|
12
|
+
2. Cartesian -> hyperspherical polar coordinates
|
|
13
|
+
3. Scalar quantization of angles (uniform codebook)
|
|
14
|
+
4. Byte packing (8/4/2-bit)
|
|
15
|
+
|
|
16
|
+
Reconstruction:
|
|
17
|
+
1. Unpack bytes -> indices
|
|
18
|
+
2. Map indices -> centroid angles via codebook
|
|
19
|
+
3. Polar -> Cartesian
|
|
20
|
+
4. Inverse rotation (S^T since S is orthogonal)
|
|
21
|
+
|
|
22
|
+
References:
|
|
23
|
+
- PolarQuant (arXiv 2502.02617)
|
|
24
|
+
- TurboQuant (ICLR 2026, arXiv 2504.19874)
|
|
25
|
+
- Mezzadri F (2007). How to generate random matrices from
|
|
26
|
+
the classical compact groups.
|
|
27
|
+
|
|
28
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
29
|
+
License: MIT
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import logging
|
|
35
|
+
import math
|
|
36
|
+
import os
|
|
37
|
+
from dataclasses import dataclass
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
|
|
40
|
+
import numpy as np
|
|
41
|
+
from numpy.typing import NDArray
|
|
42
|
+
|
|
43
|
+
from superlocalmemory.core.config import PolarQuantConfig
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Data types
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class QuantizedEmbedding:
|
|
55
|
+
"""Immutable container for a quantized embedding.
|
|
56
|
+
|
|
57
|
+
Fields:
|
|
58
|
+
fact_id: Linked atomic fact (empty string during encode).
|
|
59
|
+
radius: float16-precision L2 norm (sole norm for storage + reconstruct).
|
|
60
|
+
angle_indices: Packed quantized angle indices as bytes.
|
|
61
|
+
bit_width: Quantization level (2, 4, or 8).
|
|
62
|
+
qjl_bits: Optional QJL residual correction bits.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
fact_id: str
|
|
66
|
+
radius: float
|
|
67
|
+
angle_indices: bytes
|
|
68
|
+
bit_width: int
|
|
69
|
+
qjl_bits: bytes | None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# PolarQuantEncoder
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class PolarQuantEncoder:
|
|
78
|
+
"""Random-rotation + polar-coordinate embedding quantizer.
|
|
79
|
+
|
|
80
|
+
HR-01: Rotation matrix generated ONCE and reused for ALL embeddings.
|
|
81
|
+
HR-02: Same rotation matrix for all embeddings in a profile.
|
|
82
|
+
HR-08: No new pip dependencies (numpy + stdlib only).
|
|
83
|
+
HR-09: Angle indices as uint8, packed into bytes.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
__slots__ = ("_config", "_d", "_S", "_codebooks")
|
|
87
|
+
|
|
88
|
+
def __init__(self, config: PolarQuantConfig) -> None:
|
|
89
|
+
self._config = config
|
|
90
|
+
self._d = config.dimension
|
|
91
|
+
self._S = self._load_or_create_rotation_matrix()
|
|
92
|
+
self._codebooks = self._generate_uniform_codebooks()
|
|
93
|
+
|
|
94
|
+
# -- Rotation matrix (HR-01, HR-02) ------------------------------------
|
|
95
|
+
|
|
96
|
+
def _load_or_create_rotation_matrix(self) -> NDArray:
|
|
97
|
+
"""Load or create Mezzadri-corrected random orthogonal matrix.
|
|
98
|
+
|
|
99
|
+
Mezzadri correction (B-CRIT-01):
|
|
100
|
+
S = Q @ diag(sign(diag(R)))
|
|
101
|
+
ensures UNIFORM sampling from O(d). Plain QR gives Haar-random
|
|
102
|
+
only up to sign flips.
|
|
103
|
+
"""
|
|
104
|
+
path_str = self._config.rotation_matrix_path
|
|
105
|
+
if not path_str:
|
|
106
|
+
path_str = str(Path.home() / ".superlocalmemory" / "polar_rotation.npy")
|
|
107
|
+
|
|
108
|
+
path = Path(path_str)
|
|
109
|
+
|
|
110
|
+
if path.exists():
|
|
111
|
+
try:
|
|
112
|
+
S = np.load(str(path))
|
|
113
|
+
if S.shape == (self._d, self._d):
|
|
114
|
+
return S
|
|
115
|
+
logger.warning(
|
|
116
|
+
"Rotation matrix shape %s != expected (%d,%d), regenerating",
|
|
117
|
+
S.shape, self._d, self._d,
|
|
118
|
+
)
|
|
119
|
+
except Exception as exc:
|
|
120
|
+
logger.warning("Corrupt rotation matrix, regenerating: %s", exc)
|
|
121
|
+
|
|
122
|
+
# Generate new rotation matrix with Mezzadri correction
|
|
123
|
+
rng = np.random.default_rng(self._config.seed)
|
|
124
|
+
H = rng.standard_normal((self._d, self._d))
|
|
125
|
+
Q, R = np.linalg.qr(H)
|
|
126
|
+
# Mezzadri correction: ensures uniform sampling from O(d)
|
|
127
|
+
S = Q @ np.diag(np.sign(np.diag(R)))
|
|
128
|
+
|
|
129
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
130
|
+
np.save(str(path), S)
|
|
131
|
+
logger.info("Generated rotation matrix (%d x %d) at %s", self._d, self._d, path)
|
|
132
|
+
return S
|
|
133
|
+
|
|
134
|
+
# -- Codebook generation -----------------------------------------------
|
|
135
|
+
|
|
136
|
+
def _generate_uniform_codebooks(self) -> dict[int, dict[str, NDArray]]:
|
|
137
|
+
"""Generate uniform codebooks for 2/4/8-bit quantization.
|
|
138
|
+
|
|
139
|
+
Uniform approximation is justified because at d=768, the
|
|
140
|
+
Beta(d/2-k, 1/2) angle distribution concentrates around pi/2
|
|
141
|
+
so tightly that uniform and Lloyd-Max converge (KL < 0.01 bits).
|
|
142
|
+
"""
|
|
143
|
+
codebooks: dict[int, dict[str, NDArray]] = {}
|
|
144
|
+
for bit_width in (2, 4, 8):
|
|
145
|
+
levels = 2 ** bit_width
|
|
146
|
+
boundaries = np.linspace(0.0, math.pi, levels + 1)
|
|
147
|
+
centroids = (boundaries[:-1] + boundaries[1:]) / 2.0
|
|
148
|
+
codebooks[bit_width] = {
|
|
149
|
+
"boundaries": boundaries,
|
|
150
|
+
"centroids": centroids,
|
|
151
|
+
}
|
|
152
|
+
return codebooks
|
|
153
|
+
|
|
154
|
+
# -- Encode ------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
def encode(self, embedding: NDArray, bit_width: int = 4) -> QuantizedEmbedding:
|
|
157
|
+
"""Encode a float32 embedding into quantized polar representation.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
embedding: 1-D float vector of dimension self._d.
|
|
161
|
+
bit_width: 2, 4, or 8.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
QuantizedEmbedding with packed angle indices.
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
ValueError: Invalid bit_width or dimension mismatch.
|
|
168
|
+
"""
|
|
169
|
+
if bit_width not in (2, 4, 8):
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"bit_width must be 2, 4, or 8, got {bit_width}"
|
|
172
|
+
)
|
|
173
|
+
if embedding.shape != (self._d,):
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"shape mismatch: expected ({self._d},), got {embedding.shape}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Step 1: Random rotation
|
|
179
|
+
v_rot = self._S @ embedding
|
|
180
|
+
|
|
181
|
+
# Step 2: Compute radius
|
|
182
|
+
r = float(np.linalg.norm(v_rot))
|
|
183
|
+
|
|
184
|
+
# Degenerate zero vector
|
|
185
|
+
if r < 1e-12:
|
|
186
|
+
zero_angles = np.zeros(self._d - 1, dtype=np.uint8)
|
|
187
|
+
if bit_width == 8:
|
|
188
|
+
packed = zero_angles.tobytes()
|
|
189
|
+
elif bit_width == 4:
|
|
190
|
+
packed = self.pack_4bit(zero_angles)
|
|
191
|
+
else:
|
|
192
|
+
packed = self.pack_2bit(zero_angles)
|
|
193
|
+
return QuantizedEmbedding(
|
|
194
|
+
fact_id="",
|
|
195
|
+
radius=0.0,
|
|
196
|
+
angle_indices=packed,
|
|
197
|
+
bit_width=bit_width,
|
|
198
|
+
qjl_bits=None,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Step 3: Normalize
|
|
202
|
+
v_unit = v_rot / r
|
|
203
|
+
|
|
204
|
+
# Step 4: Cartesian to polar angles
|
|
205
|
+
angles = _cartesian_to_polar_angles(v_unit)
|
|
206
|
+
|
|
207
|
+
# Step 5: Quantize angles using codebook
|
|
208
|
+
cb = self._codebooks[bit_width]
|
|
209
|
+
indices = np.digitize(angles, cb["boundaries"][1:-1]).astype(np.uint8)
|
|
210
|
+
|
|
211
|
+
# Step 6: Pack into bytes
|
|
212
|
+
if bit_width == 8:
|
|
213
|
+
packed = indices.tobytes()
|
|
214
|
+
elif bit_width == 4:
|
|
215
|
+
packed = self.pack_4bit(indices)
|
|
216
|
+
else:
|
|
217
|
+
packed = self.pack_2bit(indices)
|
|
218
|
+
|
|
219
|
+
return QuantizedEmbedding(
|
|
220
|
+
fact_id="",
|
|
221
|
+
radius=float(np.float16(r)),
|
|
222
|
+
angle_indices=packed,
|
|
223
|
+
bit_width=bit_width,
|
|
224
|
+
qjl_bits=None,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# -- Decode ------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
def decode(self, qe: QuantizedEmbedding) -> NDArray:
|
|
230
|
+
"""Decode a QuantizedEmbedding back to float64 vector.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
qe: Quantized embedding produced by encode().
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Reconstructed vector of dimension self._d.
|
|
237
|
+
"""
|
|
238
|
+
n_angles = self._d - 1
|
|
239
|
+
|
|
240
|
+
# Step 1: Unpack angle indices
|
|
241
|
+
if qe.bit_width == 8:
|
|
242
|
+
indices = np.frombuffer(qe.angle_indices, dtype=np.uint8).copy()
|
|
243
|
+
elif qe.bit_width == 4:
|
|
244
|
+
indices = self.unpack_4bit(qe.angle_indices, n_angles)
|
|
245
|
+
else:
|
|
246
|
+
indices = self.unpack_2bit(qe.angle_indices, n_angles)
|
|
247
|
+
|
|
248
|
+
# Step 2: Dequantize -- map indices to centroid angles
|
|
249
|
+
centroids = self._codebooks[qe.bit_width]["centroids"]
|
|
250
|
+
# Clip indices to valid range
|
|
251
|
+
indices = np.clip(indices, 0, len(centroids) - 1)
|
|
252
|
+
angles = centroids[indices]
|
|
253
|
+
|
|
254
|
+
# Step 3: Polar to Cartesian
|
|
255
|
+
v_unit = _polar_to_cartesian(angles, self._d)
|
|
256
|
+
|
|
257
|
+
# Step 4: Scale by radius
|
|
258
|
+
v_rot = v_unit * qe.radius
|
|
259
|
+
|
|
260
|
+
# Step 5: Inverse rotation (S is orthogonal, so S^T = S^{-1})
|
|
261
|
+
v_orig = self._S.T @ v_rot
|
|
262
|
+
|
|
263
|
+
return v_orig
|
|
264
|
+
|
|
265
|
+
# -- Similarity --------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
def approximate_similarity(
|
|
268
|
+
self, query: NDArray, qe: QuantizedEmbedding,
|
|
269
|
+
) -> float:
|
|
270
|
+
"""Compute approximate cosine similarity via decode.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
query: Query vector (float32/64).
|
|
274
|
+
qe: Quantized embedding.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Cosine similarity in [-1, 1]. Returns 0.0 on degenerate inputs.
|
|
278
|
+
"""
|
|
279
|
+
v_decoded = self.decode(qe)
|
|
280
|
+
denom = np.linalg.norm(query) * np.linalg.norm(v_decoded)
|
|
281
|
+
if denom < 1e-12:
|
|
282
|
+
return 0.0
|
|
283
|
+
sim = float(np.dot(query, v_decoded) / denom)
|
|
284
|
+
# NaN guard
|
|
285
|
+
if math.isnan(sim) or math.isinf(sim):
|
|
286
|
+
return 0.0
|
|
287
|
+
return sim
|
|
288
|
+
|
|
289
|
+
# -- Bit packing (static methods) --------------------------------------
|
|
290
|
+
|
|
291
|
+
@staticmethod
|
|
292
|
+
def pack_4bit(indices: NDArray) -> bytes:
|
|
293
|
+
"""Pack uint8 indices (0-15) into 4-bit pairs.
|
|
294
|
+
|
|
295
|
+
Two indices per byte: high nibble | low nibble.
|
|
296
|
+
Pads to even length if needed.
|
|
297
|
+
"""
|
|
298
|
+
n = len(indices)
|
|
299
|
+
padded = np.zeros(n + (n % 2), dtype=np.uint8)
|
|
300
|
+
padded[:n] = np.clip(indices, 0, 15)
|
|
301
|
+
packed = (padded[0::2] << 4) | padded[1::2]
|
|
302
|
+
return packed.tobytes()
|
|
303
|
+
|
|
304
|
+
@staticmethod
|
|
305
|
+
def unpack_4bit(data: bytes, length: int) -> NDArray:
|
|
306
|
+
"""Unpack 4-bit pairs back to uint8 indices."""
|
|
307
|
+
packed = np.frombuffer(data, dtype=np.uint8)
|
|
308
|
+
high = packed >> 4
|
|
309
|
+
low = packed & 0x0F
|
|
310
|
+
result = np.empty(len(packed) * 2, dtype=np.uint8)
|
|
311
|
+
result[0::2] = high
|
|
312
|
+
result[1::2] = low
|
|
313
|
+
return result[:length]
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def pack_2bit(indices: NDArray) -> bytes:
|
|
317
|
+
"""Pack uint8 indices (0-3) into 2-bit quads.
|
|
318
|
+
|
|
319
|
+
Four indices per byte: [b7b6 | b5b4 | b3b2 | b1b0].
|
|
320
|
+
Pads to multiple of 4 if needed.
|
|
321
|
+
"""
|
|
322
|
+
n = len(indices)
|
|
323
|
+
pad_len = (4 - n % 4) % 4
|
|
324
|
+
padded = np.zeros(n + pad_len, dtype=np.uint8)
|
|
325
|
+
padded[:n] = np.clip(indices, 0, 3)
|
|
326
|
+
packed = (
|
|
327
|
+
(padded[0::4] << 6)
|
|
328
|
+
| (padded[1::4] << 4)
|
|
329
|
+
| (padded[2::4] << 2)
|
|
330
|
+
| padded[3::4]
|
|
331
|
+
)
|
|
332
|
+
return packed.tobytes()
|
|
333
|
+
|
|
334
|
+
@staticmethod
|
|
335
|
+
def unpack_2bit(data: bytes, length: int) -> NDArray:
|
|
336
|
+
"""Unpack 2-bit quads back to uint8 indices."""
|
|
337
|
+
packed = np.frombuffer(data, dtype=np.uint8)
|
|
338
|
+
result = np.empty(len(packed) * 4, dtype=np.uint8)
|
|
339
|
+
result[0::4] = (packed >> 6) & 0x03
|
|
340
|
+
result[1::4] = (packed >> 4) & 0x03
|
|
341
|
+
result[2::4] = (packed >> 2) & 0x03
|
|
342
|
+
result[3::4] = packed & 0x03
|
|
343
|
+
return result[:length]
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# ---------------------------------------------------------------------------
|
|
347
|
+
# Coordinate conversion helpers (module-level for reuse)
|
|
348
|
+
# ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _cartesian_to_polar_angles(v_unit: NDArray) -> NDArray:
|
|
352
|
+
"""Convert unit vector to d-1 polar angles. O(d) time.
|
|
353
|
+
|
|
354
|
+
Uses the recursive polar decomposition:
|
|
355
|
+
v[i] = cos(theta_i) * product(sin(theta_j) for j < i)
|
|
356
|
+
"""
|
|
357
|
+
d = len(v_unit)
|
|
358
|
+
angles = np.empty(d - 1)
|
|
359
|
+
for i in range(d - 1):
|
|
360
|
+
remaining_norm = float(np.linalg.norm(v_unit[i:]))
|
|
361
|
+
if remaining_norm < 1e-12:
|
|
362
|
+
angles[i:] = math.pi / 2
|
|
363
|
+
break
|
|
364
|
+
angles[i] = math.acos(np.clip(v_unit[i] / remaining_norm, -1.0, 1.0))
|
|
365
|
+
return angles
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _polar_to_cartesian(angles: NDArray, d: int) -> NDArray:
|
|
369
|
+
"""Convert d-1 polar angles to d-dimensional unit vector.
|
|
370
|
+
|
|
371
|
+
Inverse of _cartesian_to_polar_angles.
|
|
372
|
+
"""
|
|
373
|
+
v = np.empty(d)
|
|
374
|
+
sin_product = 1.0
|
|
375
|
+
for i in range(d - 1):
|
|
376
|
+
v[i] = math.cos(angles[i]) * sin_product
|
|
377
|
+
sin_product *= math.sin(angles[i])
|
|
378
|
+
v[d - 1] = sin_product
|
|
379
|
+
return v
|