superlocalmemory 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -1
- package/README.md +106 -71
- package/package.json +1 -2
- package/pyproject.toml +16 -1
- package/src/superlocalmemory/cli/commands.py +309 -0
- package/src/superlocalmemory/cli/main.py +44 -0
- package/src/superlocalmemory/core/config.py +282 -11
- package/src/superlocalmemory/core/consolidation_engine.py +37 -0
- package/src/superlocalmemory/core/engine.py +21 -0
- package/src/superlocalmemory/core/engine_wiring.py +58 -8
- package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
- package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
- package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
- package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
- package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
- package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
- package/src/superlocalmemory/infra/pid_manager.py +193 -0
- package/src/superlocalmemory/infra/process_reaper.py +572 -0
- package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
- package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
- package/src/superlocalmemory/math/ebbinghaus.py +309 -0
- package/src/superlocalmemory/math/fisher_quantized.py +251 -0
- package/src/superlocalmemory/math/hopfield.py +279 -0
- package/src/superlocalmemory/math/polar_quant.py +379 -0
- package/src/superlocalmemory/math/qjl.py +115 -0
- package/src/superlocalmemory/mcp/server.py +2 -0
- package/src/superlocalmemory/mcp/tools_v3.py +10 -0
- package/src/superlocalmemory/mcp/tools_v33.py +351 -0
- package/src/superlocalmemory/parameterization/__init__.py +47 -0
- package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
- package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
- package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
- package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
- package/src/superlocalmemory/retrieval/engine.py +21 -3
- package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
- package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
- package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
- package/src/superlocalmemory/retrieval/spreading_activation.py +1 -1
- package/src/superlocalmemory/retrieval/strategy.py +16 -6
- package/src/superlocalmemory/retrieval/vector_store.py +1 -1
- package/src/superlocalmemory/server/routes/agents.py +68 -8
- package/src/superlocalmemory/server/routes/learning.py +18 -1
- package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
- package/src/superlocalmemory/server/routes/v3_api.py +503 -1
- package/src/superlocalmemory/storage/database.py +206 -0
- package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
- package/src/superlocalmemory/storage/migration_v33.py +140 -0
- package/src/superlocalmemory/storage/quantized_store.py +261 -0
- package/src/superlocalmemory/storage/schema_v32.py +137 -0
- package/conftest.py +0 -5
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""Spreading Activation-Guided Quantization (SAGQ) engine.
|
|
6
|
+
|
|
7
|
+
Novel contribution: uses graph centrality (PageRank + degree + SA frequency)
|
|
8
|
+
to allocate embedding precision. Well-connected memories keep higher precision
|
|
9
|
+
because they serve as hubs for spreading activation retrieval.
|
|
10
|
+
|
|
11
|
+
Core formula:
|
|
12
|
+
centrality(i) = w_pr * pr_norm + w_deg * deg_norm + w_sa * sa_freq_norm
|
|
13
|
+
sagq_bw = b_min + (b_max - b_min) * centrality, ceil-snapped to valid set
|
|
14
|
+
|
|
15
|
+
Conflict resolution with Phase A EAP:
|
|
16
|
+
final_bw = max(eap_bw, sagq_bw) -- safety first, never over-quantize
|
|
17
|
+
|
|
18
|
+
HR-02: Conflict resolution is ALWAYS max().
|
|
19
|
+
HR-03: Centrality scores always in [0.0, 1.0].
|
|
20
|
+
HR-04: Bit-width always from valid_bit_widths.
|
|
21
|
+
HR-05: All SQL uses parameterized queries.
|
|
22
|
+
HR-07: No-op when config.enabled=False.
|
|
23
|
+
|
|
24
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
25
|
+
License: MIT
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
import math
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from typing import Any, Callable, TYPE_CHECKING
|
|
34
|
+
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
37
|
+
from superlocalmemory.core.config import SAGQConfig
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Data classes (frozen -- all immutable, Rule 10)
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class CentralityScore:
|
|
49
|
+
"""Computed centrality for a single memory node."""
|
|
50
|
+
|
|
51
|
+
fact_id: str
|
|
52
|
+
pagerank_norm: float # pr_norm(i) in [0, 1]
|
|
53
|
+
degree_norm: float # deg_norm(i) in [0, 1]
|
|
54
|
+
sa_freq_norm: float # sa_freq(i) in [0, 1]
|
|
55
|
+
combined_centrality: float # weighted sum in [0, 1]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class SAGQPrecision:
|
|
60
|
+
"""SAGQ precision recommendation for a single memory."""
|
|
61
|
+
|
|
62
|
+
fact_id: str
|
|
63
|
+
centrality: float # combined centrality in [0, 1]
|
|
64
|
+
sagq_bit_width: int # recommended bit-width from SAGQ signal
|
|
65
|
+
eap_bit_width: int # recommended bit-width from EAP signal (Phase A)
|
|
66
|
+
final_bit_width: int # max(sagq_bit_width, eap_bit_width)
|
|
67
|
+
current_bit_width: int # current bit-width from embedding_metadata
|
|
68
|
+
action: str # "upgrade" | "downgrade" | "skip"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# ActivationGuidedQuantizer
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ActivationGuidedQuantizer:
|
|
77
|
+
"""SAGQ engine: maps graph centrality to embedding precision.
|
|
78
|
+
|
|
79
|
+
Reads from fact_importance (PageRank, degree) and activation_cache
|
|
80
|
+
(spreading activation frequency) to compute centrality per memory.
|
|
81
|
+
Maps centrality to bit-width via linear interpolation + ceiling snap.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def __init__(self, db: Any, config: Any) -> None:
|
|
85
|
+
"""Initialize SAGQ quantizer. No side effects."""
|
|
86
|
+
self._db = db
|
|
87
|
+
self._config = config
|
|
88
|
+
|
|
89
|
+
def compute_centrality_batch(
|
|
90
|
+
self, profile_id: str,
|
|
91
|
+
) -> list[CentralityScore]:
|
|
92
|
+
"""Compute centrality for all facts in a profile.
|
|
93
|
+
|
|
94
|
+
Returns list of CentralityScore, each with combined_centrality in [0, 1].
|
|
95
|
+
Returns empty list if disabled or no data.
|
|
96
|
+
"""
|
|
97
|
+
if not self._config.enabled:
|
|
98
|
+
return []
|
|
99
|
+
|
|
100
|
+
# Step 1: Query fact_importance (Q1)
|
|
101
|
+
try:
|
|
102
|
+
rows = self._db.execute(
|
|
103
|
+
"SELECT fact_id, pagerank_score, degree_centrality "
|
|
104
|
+
"FROM fact_importance "
|
|
105
|
+
"WHERE profile_id = ?",
|
|
106
|
+
(profile_id,),
|
|
107
|
+
)
|
|
108
|
+
except Exception as exc:
|
|
109
|
+
logger.warning("SAGQ: fact_importance query failed: %s", exc)
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
if not rows:
|
|
113
|
+
logger.info("SAGQ: no importance data yet for profile %s, skipping", profile_id)
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
# Step 3: Compute max values for normalization (avoid division by zero)
|
|
117
|
+
fact_data = [(dict(r)["fact_id"], dict(r)["pagerank_score"], dict(r)["degree_centrality"]) for r in rows]
|
|
118
|
+
|
|
119
|
+
max_pr = max((pr for _, pr, _ in fact_data), default=0.0)
|
|
120
|
+
max_deg = max((deg for _, _, deg in fact_data), default=0.0)
|
|
121
|
+
if max_pr == 0.0:
|
|
122
|
+
max_pr = 1.0
|
|
123
|
+
if max_deg == 0.0:
|
|
124
|
+
max_deg = 1.0
|
|
125
|
+
|
|
126
|
+
# Step 4: Query activation_cache for SA frequency (Q2)
|
|
127
|
+
sa_window = f"-{self._config.sa_frequency_window_days} days"
|
|
128
|
+
try:
|
|
129
|
+
sa_rows = self._db.execute(
|
|
130
|
+
"SELECT node_id, COUNT(*) as activation_count "
|
|
131
|
+
"FROM activation_cache "
|
|
132
|
+
"WHERE profile_id = ? "
|
|
133
|
+
"AND created_at > datetime('now', ?) "
|
|
134
|
+
"GROUP BY node_id",
|
|
135
|
+
(profile_id, sa_window),
|
|
136
|
+
)
|
|
137
|
+
except Exception as exc:
|
|
138
|
+
logger.debug("SAGQ: activation_cache query failed: %s", exc)
|
|
139
|
+
sa_rows = []
|
|
140
|
+
|
|
141
|
+
# Step 5: Build SA frequency map
|
|
142
|
+
sa_freq_map: dict[str, int] = {}
|
|
143
|
+
for sa_row in sa_rows:
|
|
144
|
+
d = dict(sa_row)
|
|
145
|
+
sa_freq_map[d["node_id"]] = int(d["activation_count"])
|
|
146
|
+
|
|
147
|
+
# Step 6: max SA frequency
|
|
148
|
+
max_sa = max(sa_freq_map.values()) if sa_freq_map else 1
|
|
149
|
+
|
|
150
|
+
# Step 7: Compute centrality for each fact
|
|
151
|
+
cfg = self._config
|
|
152
|
+
result: list[CentralityScore] = []
|
|
153
|
+
for fact_id, pr_score, deg_score in fact_data:
|
|
154
|
+
pr_norm = pr_score / max_pr
|
|
155
|
+
deg_norm = deg_score / max_deg
|
|
156
|
+
sa_freq = sa_freq_map.get(fact_id, 0)
|
|
157
|
+
sa_freq_norm = sa_freq / max_sa
|
|
158
|
+
|
|
159
|
+
combined = (
|
|
160
|
+
cfg.w_pagerank * pr_norm
|
|
161
|
+
+ cfg.w_degree * deg_norm
|
|
162
|
+
+ cfg.w_sa_freq * sa_freq_norm
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# NaN safety (HR-03)
|
|
166
|
+
if math.isnan(combined):
|
|
167
|
+
logger.warning("SAGQ: NaN centrality for %s, defaulting to 0.0", fact_id)
|
|
168
|
+
combined = 0.0
|
|
169
|
+
|
|
170
|
+
# Clamp to [0.0, 1.0]
|
|
171
|
+
combined = max(0.0, min(1.0, combined))
|
|
172
|
+
|
|
173
|
+
result.append(CentralityScore(
|
|
174
|
+
fact_id=fact_id,
|
|
175
|
+
pagerank_norm=pr_norm,
|
|
176
|
+
degree_norm=deg_norm,
|
|
177
|
+
sa_freq_norm=sa_freq_norm,
|
|
178
|
+
combined_centrality=combined,
|
|
179
|
+
))
|
|
180
|
+
|
|
181
|
+
return result
|
|
182
|
+
|
|
183
|
+
def centrality_to_bit_width(self, centrality: float) -> int:
|
|
184
|
+
"""Map centrality score to SAGQ bit-width.
|
|
185
|
+
|
|
186
|
+
Linear interpolation from [0,1] to [b_min, b_max], then ceiling-snap
|
|
187
|
+
to nearest valid bit-width. SAGQ is a preservation signal -- always
|
|
188
|
+
round UP (Decision D1).
|
|
189
|
+
|
|
190
|
+
Returns one of valid_bit_widths.
|
|
191
|
+
"""
|
|
192
|
+
cfg = self._config
|
|
193
|
+
|
|
194
|
+
# Clamp input
|
|
195
|
+
centrality = max(0.0, min(1.0, centrality))
|
|
196
|
+
|
|
197
|
+
# Linear mapping
|
|
198
|
+
raw_bw = cfg.b_min + (cfg.b_max - cfg.b_min) * centrality
|
|
199
|
+
|
|
200
|
+
# Ceiling snap to nearest valid bit-width (smallest >= raw_bw)
|
|
201
|
+
for vbw in cfg.valid_bit_widths:
|
|
202
|
+
if vbw >= raw_bw:
|
|
203
|
+
return vbw
|
|
204
|
+
|
|
205
|
+
# raw_bw exceeds all valid values -- cap at maximum
|
|
206
|
+
return cfg.valid_bit_widths[-1]
|
|
207
|
+
|
|
208
|
+
def compute_sagq_precision_batch(
|
|
209
|
+
self,
|
|
210
|
+
profile_id: str,
|
|
211
|
+
eap_precision_fn: Callable[[str], int],
|
|
212
|
+
) -> list[SAGQPrecision]:
|
|
213
|
+
"""Compute combined SAGQ + EAP precision for all facts in a profile.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
profile_id: Profile to process.
|
|
217
|
+
eap_precision_fn: Callable(fact_id) -> EAP bit-width (from Phase A).
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List of SAGQPrecision with action ("upgrade"/"downgrade"/"skip").
|
|
221
|
+
"""
|
|
222
|
+
if not self._config.enabled:
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
# Step 1: Compute centrality
|
|
226
|
+
centrality_scores = self.compute_centrality_batch(profile_id)
|
|
227
|
+
if not centrality_scores:
|
|
228
|
+
return []
|
|
229
|
+
|
|
230
|
+
# Step 3: Batch-fetch current bit-widths (Q3)
|
|
231
|
+
try:
|
|
232
|
+
bw_rows = self._db.execute(
|
|
233
|
+
"SELECT fact_id, COALESCE(bit_width, 32) as bit_width "
|
|
234
|
+
"FROM embedding_metadata "
|
|
235
|
+
"WHERE profile_id = ?",
|
|
236
|
+
(profile_id,),
|
|
237
|
+
)
|
|
238
|
+
except Exception as exc:
|
|
239
|
+
logger.warning("SAGQ: embedding_metadata query failed: %s", exc)
|
|
240
|
+
bw_rows = []
|
|
241
|
+
|
|
242
|
+
current_bw_map: dict[str, int] = {}
|
|
243
|
+
for row in bw_rows:
|
|
244
|
+
d = dict(row)
|
|
245
|
+
current_bw_map[d["fact_id"]] = int(d["bit_width"])
|
|
246
|
+
|
|
247
|
+
# Step 5: Compute precision for each fact
|
|
248
|
+
result: list[SAGQPrecision] = []
|
|
249
|
+
for cs in centrality_scores:
|
|
250
|
+
# SAGQ signal
|
|
251
|
+
sagq_bw = self.centrality_to_bit_width(cs.combined_centrality)
|
|
252
|
+
|
|
253
|
+
# EAP signal
|
|
254
|
+
eap_bw = eap_precision_fn(cs.fact_id)
|
|
255
|
+
|
|
256
|
+
# HR-02: Conflict resolution -- ALWAYS max()
|
|
257
|
+
final_bw = max(sagq_bw, eap_bw)
|
|
258
|
+
|
|
259
|
+
# Current bit-width
|
|
260
|
+
current_bw = current_bw_map.get(cs.fact_id, 32)
|
|
261
|
+
|
|
262
|
+
# Determine action
|
|
263
|
+
if final_bw < current_bw:
|
|
264
|
+
action = "downgrade"
|
|
265
|
+
elif final_bw > current_bw:
|
|
266
|
+
action = "upgrade"
|
|
267
|
+
else:
|
|
268
|
+
action = "skip"
|
|
269
|
+
|
|
270
|
+
result.append(SAGQPrecision(
|
|
271
|
+
fact_id=cs.fact_id,
|
|
272
|
+
centrality=cs.combined_centrality,
|
|
273
|
+
sagq_bit_width=sagq_bw,
|
|
274
|
+
eap_bit_width=eap_bw,
|
|
275
|
+
final_bit_width=final_bw,
|
|
276
|
+
current_bit_width=current_bw,
|
|
277
|
+
action=action,
|
|
278
|
+
))
|
|
279
|
+
|
|
280
|
+
return result
|
|
281
|
+
|
|
282
|
+
def get_centrality_for_fact(
|
|
283
|
+
self, fact_id: str, profile_id: str,
|
|
284
|
+
) -> float:
|
|
285
|
+
"""Get centrality for a single fact. Returns 0.0 if not found.
|
|
286
|
+
|
|
287
|
+
Used by Phase E (CCQ) for centrality-aware consolidation.
|
|
288
|
+
"""
|
|
289
|
+
if not self._config.enabled:
|
|
290
|
+
return 0.0
|
|
291
|
+
|
|
292
|
+
# Step 1: Query single fact importance (Q11)
|
|
293
|
+
try:
|
|
294
|
+
rows = self._db.execute(
|
|
295
|
+
"SELECT pagerank_score, degree_centrality "
|
|
296
|
+
"FROM fact_importance "
|
|
297
|
+
"WHERE fact_id = ? AND profile_id = ?",
|
|
298
|
+
(fact_id, profile_id),
|
|
299
|
+
)
|
|
300
|
+
except Exception as exc:
|
|
301
|
+
logger.debug("SAGQ: single fact query failed: %s", exc)
|
|
302
|
+
return 0.0
|
|
303
|
+
|
|
304
|
+
if not rows:
|
|
305
|
+
return 0.0
|
|
306
|
+
|
|
307
|
+
d = dict(rows[0])
|
|
308
|
+
pr_score = float(d["pagerank_score"])
|
|
309
|
+
deg_score = float(d["degree_centrality"])
|
|
310
|
+
|
|
311
|
+
# Step 3: Normalization (Q4)
|
|
312
|
+
try:
|
|
313
|
+
max_rows = self._db.execute(
|
|
314
|
+
"SELECT "
|
|
315
|
+
" COALESCE(MAX(pagerank_score), 0.0) as max_pr, "
|
|
316
|
+
" COALESCE(MAX(degree_centrality), 0.0) as max_deg "
|
|
317
|
+
"FROM fact_importance "
|
|
318
|
+
"WHERE profile_id = ?",
|
|
319
|
+
(profile_id,),
|
|
320
|
+
)
|
|
321
|
+
except Exception as exc:
|
|
322
|
+
logger.debug("SAGQ: max query failed: %s", exc)
|
|
323
|
+
return 0.0
|
|
324
|
+
|
|
325
|
+
md = dict(max_rows[0])
|
|
326
|
+
max_pr = max(float(md["max_pr"]), 1e-8)
|
|
327
|
+
max_deg = max(float(md["max_deg"]), 1e-8)
|
|
328
|
+
|
|
329
|
+
pr_norm = pr_score / max_pr
|
|
330
|
+
deg_norm = deg_score / max_deg
|
|
331
|
+
|
|
332
|
+
# Step 6-7: SA frequency for this fact (Q6)
|
|
333
|
+
sa_window = f"-{self._config.sa_frequency_window_days} days"
|
|
334
|
+
try:
|
|
335
|
+
sa_rows = self._db.execute(
|
|
336
|
+
"SELECT COUNT(*) as cnt "
|
|
337
|
+
"FROM activation_cache "
|
|
338
|
+
"WHERE node_id = ? AND profile_id = ? "
|
|
339
|
+
"AND created_at > datetime('now', ?)",
|
|
340
|
+
(fact_id, profile_id, sa_window),
|
|
341
|
+
)
|
|
342
|
+
sa_cnt = int(dict(sa_rows[0])["cnt"]) if sa_rows else 0
|
|
343
|
+
except Exception:
|
|
344
|
+
sa_cnt = 0
|
|
345
|
+
|
|
346
|
+
# Step 7: Max SA frequency (Q5)
|
|
347
|
+
try:
|
|
348
|
+
max_sa_rows = self._db.execute(
|
|
349
|
+
"SELECT COALESCE(MAX(cnt), 1) as max_cnt FROM ("
|
|
350
|
+
" SELECT COUNT(*) as cnt "
|
|
351
|
+
" FROM activation_cache "
|
|
352
|
+
" WHERE profile_id = ? "
|
|
353
|
+
" AND created_at > datetime('now', ?) "
|
|
354
|
+
" GROUP BY node_id"
|
|
355
|
+
")",
|
|
356
|
+
(profile_id, sa_window),
|
|
357
|
+
)
|
|
358
|
+
max_sa = int(dict(max_sa_rows[0])["max_cnt"]) if max_sa_rows else 1
|
|
359
|
+
except Exception:
|
|
360
|
+
max_sa = 1
|
|
361
|
+
|
|
362
|
+
sa_norm = sa_cnt / max(max_sa, 1)
|
|
363
|
+
|
|
364
|
+
# Step 9: Weighted combination
|
|
365
|
+
cfg = self._config
|
|
366
|
+
combined = cfg.w_pagerank * pr_norm + cfg.w_degree * deg_norm + cfg.w_sa_freq * sa_norm
|
|
367
|
+
|
|
368
|
+
# NaN safety
|
|
369
|
+
if math.isnan(combined):
|
|
370
|
+
logger.warning("SAGQ: NaN centrality for %s, defaulting to 0.0", fact_id)
|
|
371
|
+
return 0.0
|
|
372
|
+
|
|
373
|
+
# Step 10: Clamp
|
|
374
|
+
return max(0.0, min(1.0, combined))
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""EAP Scheduler -- Embedding-Aware Precision.
|
|
6
|
+
|
|
7
|
+
Couples Ebbinghaus retention scores to embedding bit-width:
|
|
8
|
+
R > 0.8 -> 32 (float32, full precision)
|
|
9
|
+
R > 0.5 -> 8 (int8, sqlite-vec native)
|
|
10
|
+
R > 0.2 -> 4 (polar 4-bit)
|
|
11
|
+
R > 0.05 -> 2 (polar 2-bit)
|
|
12
|
+
R <= 0.05-> 0 (forgotten, delete embedding)
|
|
13
|
+
|
|
14
|
+
The EAP cycle:
|
|
15
|
+
1. Fetch all facts with retention data + current bit_width
|
|
16
|
+
2. Map retention to target bit_width
|
|
17
|
+
3. Execute downgrades (compress) and upgrades (restore)
|
|
18
|
+
4. Return stats
|
|
19
|
+
|
|
20
|
+
HR-03: Original float32 NEVER deleted unless keep_float32_backup=False
|
|
21
|
+
AND fact is in archive/forgotten zone.
|
|
22
|
+
HR-04: Quantization ONLY via EAP scheduler (not ad-hoc).
|
|
23
|
+
|
|
24
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
25
|
+
License: MIT
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import logging
|
|
32
|
+
from typing import TYPE_CHECKING
|
|
33
|
+
|
|
34
|
+
import numpy as np
|
|
35
|
+
from numpy.typing import NDArray
|
|
36
|
+
|
|
37
|
+
from superlocalmemory.core.config import QuantizationConfig
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve
|
|
41
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
42
|
+
from superlocalmemory.storage.quantized_store import QuantizedEmbeddingStore
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Retention -> bit-width mapping (Section 10, Pattern 4)
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def retention_to_bit_width(retention: float) -> int:
|
|
53
|
+
"""Map Ebbinghaus retention to embedding precision.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
32, 8, 4, 2, or 0 (deleted).
|
|
57
|
+
"""
|
|
58
|
+
if retention > 0.8:
|
|
59
|
+
return 32
|
|
60
|
+
if retention > 0.5:
|
|
61
|
+
return 8
|
|
62
|
+
if retention > 0.2:
|
|
63
|
+
return 4
|
|
64
|
+
if retention > 0.05:
|
|
65
|
+
return 2
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# EAPScheduler
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class EAPScheduler:
|
|
75
|
+
"""Embedding-Aware Precision scheduler.
|
|
76
|
+
|
|
77
|
+
Runs periodic cycles that adjust embedding precision based on
|
|
78
|
+
how well each fact is retained in memory.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# No __slots__: allows mock patching of _get_fact_embedding in tests
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
db: DatabaseManager,
|
|
86
|
+
ebbinghaus: EbbinghausCurve,
|
|
87
|
+
quantized_store: QuantizedEmbeddingStore,
|
|
88
|
+
config: QuantizationConfig,
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Initialize EAP scheduler.
|
|
91
|
+
|
|
92
|
+
No side effects, no DB calls, no file I/O.
|
|
93
|
+
"""
|
|
94
|
+
self._db = db
|
|
95
|
+
self._ebbinghaus = ebbinghaus
|
|
96
|
+
self._quantized_store = quantized_store
|
|
97
|
+
self._config = config
|
|
98
|
+
|
|
99
|
+
def run_eap_cycle(self, profile_id: str) -> dict:
|
|
100
|
+
"""Execute one EAP cycle for a profile.
|
|
101
|
+
|
|
102
|
+
Steps:
|
|
103
|
+
1. Fetch all facts with retention data
|
|
104
|
+
2. Map retention -> target bit_width
|
|
105
|
+
3. Execute downgrades/upgrades
|
|
106
|
+
4. Return stats
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
{total, downgrades, upgrades, skipped, deleted, errors}
|
|
110
|
+
"""
|
|
111
|
+
stats = {
|
|
112
|
+
"total": 0,
|
|
113
|
+
"downgrades": 0,
|
|
114
|
+
"upgrades": 0,
|
|
115
|
+
"skipped": 0,
|
|
116
|
+
"deleted": 0,
|
|
117
|
+
"errors": 0,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Step 1: Fetch all facts with retention + current bit_width
|
|
121
|
+
try:
|
|
122
|
+
rows = self._db.execute(
|
|
123
|
+
"SELECT r.fact_id, r.retention_score, r.lifecycle_zone, "
|
|
124
|
+
" COALESCE(eqm.bit_width, 32) as current_bw "
|
|
125
|
+
"FROM fact_retention r "
|
|
126
|
+
"LEFT JOIN embedding_quantization_metadata eqm "
|
|
127
|
+
" ON r.fact_id = eqm.fact_id "
|
|
128
|
+
"WHERE r.profile_id = ?",
|
|
129
|
+
(profile_id,),
|
|
130
|
+
)
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
logger.error("EAP cycle query failed: %s", exc)
|
|
133
|
+
stats["errors"] = 1
|
|
134
|
+
return stats
|
|
135
|
+
|
|
136
|
+
if not rows:
|
|
137
|
+
return stats
|
|
138
|
+
|
|
139
|
+
# Step 2+3: Process each fact
|
|
140
|
+
for row in rows:
|
|
141
|
+
d = dict(row)
|
|
142
|
+
fact_id = d["fact_id"]
|
|
143
|
+
retention = float(d["retention_score"])
|
|
144
|
+
current_bw = int(d["current_bw"])
|
|
145
|
+
|
|
146
|
+
stats["total"] += 1
|
|
147
|
+
|
|
148
|
+
target_bw = retention_to_bit_width(retention)
|
|
149
|
+
|
|
150
|
+
if target_bw == current_bw:
|
|
151
|
+
stats["skipped"] += 1
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
if target_bw == 0:
|
|
155
|
+
# Forgotten -- mark as deleted
|
|
156
|
+
self._handle_deletion(fact_id, profile_id)
|
|
157
|
+
stats["deleted"] += 1
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
if target_bw < current_bw:
|
|
161
|
+
# Downgrade -- compress to lower precision
|
|
162
|
+
success = self._handle_downgrade(
|
|
163
|
+
fact_id, profile_id, target_bw,
|
|
164
|
+
)
|
|
165
|
+
if success:
|
|
166
|
+
stats["downgrades"] += 1
|
|
167
|
+
else:
|
|
168
|
+
stats["errors"] += 1
|
|
169
|
+
else:
|
|
170
|
+
# Upgrade -- restore to higher precision (only if float32 exists)
|
|
171
|
+
success = self._handle_upgrade(
|
|
172
|
+
fact_id, profile_id, target_bw,
|
|
173
|
+
)
|
|
174
|
+
if success:
|
|
175
|
+
stats["upgrades"] += 1
|
|
176
|
+
else:
|
|
177
|
+
stats["skipped"] += 1 # Can't upgrade without float32
|
|
178
|
+
|
|
179
|
+
return stats
|
|
180
|
+
|
|
181
|
+
# -- Handlers ----------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def _handle_downgrade(
|
|
184
|
+
self, fact_id: str, profile_id: str, target_bw: int,
|
|
185
|
+
) -> bool:
|
|
186
|
+
"""Compress a fact to lower bit_width.
|
|
187
|
+
|
|
188
|
+
Fetches original float32 embedding, quantizes, and stores.
|
|
189
|
+
"""
|
|
190
|
+
embedding = self._get_fact_embedding(fact_id)
|
|
191
|
+
if embedding is None:
|
|
192
|
+
logger.info(
|
|
193
|
+
"No float32 embedding for %s, cannot compress", fact_id,
|
|
194
|
+
)
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
return self._quantized_store.compress_fact(
|
|
198
|
+
fact_id, profile_id, embedding, target_bw,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _handle_upgrade(
|
|
202
|
+
self, fact_id: str, profile_id: str, target_bw: int,
|
|
203
|
+
) -> bool:
|
|
204
|
+
"""Restore a fact to higher precision.
|
|
205
|
+
|
|
206
|
+
Only possible if original float32 is still in fact_embeddings.
|
|
207
|
+
"""
|
|
208
|
+
# For upgrade to float32, just update metadata
|
|
209
|
+
if target_bw == 32:
|
|
210
|
+
try:
|
|
211
|
+
self._db.execute(
|
|
212
|
+
"INSERT INTO embedding_quantization_metadata "
|
|
213
|
+
"(fact_id, profile_id, quantization_level, bit_width, created_at) "
|
|
214
|
+
"VALUES (?, ?, 'float32', 32, datetime('now')) "
|
|
215
|
+
"ON CONFLICT(fact_id) DO UPDATE SET "
|
|
216
|
+
" quantization_level = 'float32', "
|
|
217
|
+
" bit_width = 32",
|
|
218
|
+
(fact_id, profile_id),
|
|
219
|
+
)
|
|
220
|
+
return True
|
|
221
|
+
except Exception as exc:
|
|
222
|
+
logger.error("Upgrade to float32 failed for %s: %s", fact_id, exc)
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
# For upgrade to int8 from polar, re-compress at higher precision
|
|
226
|
+
embedding = self._get_fact_embedding(fact_id)
|
|
227
|
+
if embedding is None:
|
|
228
|
+
return False
|
|
229
|
+
|
|
230
|
+
return self._quantized_store.compress_fact(
|
|
231
|
+
fact_id, profile_id, embedding, target_bw,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def _handle_deletion(self, fact_id: str, profile_id: str) -> None:
|
|
235
|
+
"""Mark an embedding as deleted (forgotten).
|
|
236
|
+
|
|
237
|
+
HR-03: Only deletes if keep_float32_backup is False.
|
|
238
|
+
"""
|
|
239
|
+
try:
|
|
240
|
+
self._db.execute(
|
|
241
|
+
"INSERT INTO embedding_quantization_metadata "
|
|
242
|
+
"(fact_id, profile_id, quantization_level, bit_width, created_at) "
|
|
243
|
+
"VALUES (?, ?, 'deleted', 0, datetime('now')) "
|
|
244
|
+
"ON CONFLICT(fact_id) DO UPDATE SET "
|
|
245
|
+
" quantization_level = 'deleted', "
|
|
246
|
+
" bit_width = 0",
|
|
247
|
+
(fact_id, profile_id),
|
|
248
|
+
)
|
|
249
|
+
except Exception as exc:
|
|
250
|
+
logger.error("Delete metadata failed for %s: %s", fact_id, exc)
|
|
251
|
+
|
|
252
|
+
def _get_fact_embedding(self, fact_id: str) -> NDArray | None:
|
|
253
|
+
"""Retrieve original float32 embedding for a fact.
|
|
254
|
+
|
|
255
|
+
Tries embedding_metadata -> fact_embeddings (vec0 table).
|
|
256
|
+
Falls back to atomic_facts.embedding JSON column.
|
|
257
|
+
"""
|
|
258
|
+
# Try atomic_facts.embedding (JSON column)
|
|
259
|
+
try:
|
|
260
|
+
rows = self._db.execute(
|
|
261
|
+
"SELECT embedding FROM atomic_facts WHERE fact_id = ?",
|
|
262
|
+
(fact_id,),
|
|
263
|
+
)
|
|
264
|
+
if rows:
|
|
265
|
+
raw = dict(rows[0]).get("embedding")
|
|
266
|
+
if raw and raw != "null":
|
|
267
|
+
data = json.loads(raw) if isinstance(raw, str) else raw
|
|
268
|
+
if data:
|
|
269
|
+
return np.array(data, dtype=np.float64)
|
|
270
|
+
except Exception as exc:
|
|
271
|
+
logger.debug(
|
|
272
|
+
"Could not load embedding from atomic_facts for %s: %s",
|
|
273
|
+
fact_id, exc,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return None
|