superlocalmemory 3.4.9 → 3.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -3
- package/docs/cloud-backup.md +174 -0
- package/docs/skill-evolution.md +256 -0
- package/ide/hooks/tool-event-hook.sh +101 -11
- package/package.json +1 -1
- package/pyproject.toml +3 -2
- package/src/superlocalmemory/cli/commands.py +359 -0
- package/src/superlocalmemory/cli/ingest_cmd.py +81 -29
- package/src/superlocalmemory/cli/main.py +32 -0
- package/src/superlocalmemory/cli/setup_wizard.py +54 -11
- package/src/superlocalmemory/core/config.py +35 -0
- package/src/superlocalmemory/core/consolidation_engine.py +138 -0
- package/src/superlocalmemory/core/embedding_worker.py +1 -1
- package/src/superlocalmemory/core/engine.py +19 -0
- package/src/superlocalmemory/core/fact_consolidator.py +425 -0
- package/src/superlocalmemory/core/graph_pruner.py +290 -0
- package/src/superlocalmemory/core/maintenance_scheduler.py +44 -3
- package/src/superlocalmemory/core/recall_pipeline.py +9 -0
- package/src/superlocalmemory/core/tier_manager.py +325 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +96 -28
- package/src/superlocalmemory/evolution/__init__.py +29 -0
- package/src/superlocalmemory/evolution/blind_verifier.py +115 -0
- package/src/superlocalmemory/evolution/evolution_store.py +302 -0
- package/src/superlocalmemory/evolution/mutation_generator.py +181 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +555 -0
- package/src/superlocalmemory/evolution/triggers.py +367 -0
- package/src/superlocalmemory/evolution/types.py +92 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +13 -0
- package/src/superlocalmemory/infra/backup.py +63 -20
- package/src/superlocalmemory/infra/cloud_backup.py +703 -0
- package/src/superlocalmemory/learning/skill_performance_miner.py +422 -0
- package/src/superlocalmemory/mcp/server.py +4 -0
- package/src/superlocalmemory/mcp/tools_evolution.py +338 -0
- package/src/superlocalmemory/retrieval/engine.py +64 -4
- package/src/superlocalmemory/retrieval/forgetting_filter.py +22 -7
- package/src/superlocalmemory/retrieval/strategy.py +2 -2
- package/src/superlocalmemory/server/routes/backup.py +512 -8
- package/src/superlocalmemory/server/routes/behavioral.py +39 -17
- package/src/superlocalmemory/server/routes/evolution.py +213 -0
- package/src/superlocalmemory/server/routes/tiers.py +195 -0
- package/src/superlocalmemory/server/unified_daemon.py +36 -5
- package/src/superlocalmemory/storage/schema_v3410.py +159 -0
- package/src/superlocalmemory/storage/schema_v3411.py +149 -0
- package/src/superlocalmemory/ui/index.html +59 -3
- package/src/superlocalmemory/ui/js/core.js +3 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +83 -0
- package/src/superlocalmemory/ui/js/ng-entities.js +27 -3
- package/src/superlocalmemory/ui/js/ng-shell.js +33 -0
- package/src/superlocalmemory/ui/js/ng-skills.js +611 -0
- package/src/superlocalmemory/ui/js/settings.js +311 -1
- package/src/superlocalmemory.egg-info/PKG-INFO +16 -1
- package/src/superlocalmemory.egg-info/SOURCES.txt +18 -0
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3.4.11 "Scale-Ready" — Graph Pruning Engine.
|
|
6
|
+
|
|
7
|
+
Reduces graph_edges count without losing meaningful connections:
|
|
8
|
+
1. Orphan removal: edges where source/target no longer exists
|
|
9
|
+
2. Self-loop removal: edges where source == target
|
|
10
|
+
3. Duplicate removal: keeps highest-weight edge per (source, target, type)
|
|
11
|
+
4. Supersedes chain collapse: A→B→C becomes A→B + A→C (B→C removed)
|
|
12
|
+
|
|
13
|
+
CRITICAL: Never deletes facts. Only prunes graph EDGES.
|
|
14
|
+
All operations are profile-scoped and idempotent.
|
|
15
|
+
|
|
16
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import sqlite3
|
|
23
|
+
import time
|
|
24
|
+
import uuid
|
|
25
|
+
from datetime import datetime, UTC
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("superlocalmemory.graph_pruner")
|
|
29
|
+
|
|
30
|
+
_CHAIN_BATCH_LIMIT = 10_000
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def prune_graph(
|
|
34
|
+
db_path: str | Path,
|
|
35
|
+
profile_id: str = "default",
|
|
36
|
+
dry_run: bool = False,
|
|
37
|
+
) -> dict:
|
|
38
|
+
"""Run all graph pruning strategies for a specific profile.
|
|
39
|
+
|
|
40
|
+
Returns stats dict with counts for each strategy.
|
|
41
|
+
"""
|
|
42
|
+
conn = sqlite3.connect(str(db_path))
|
|
43
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
44
|
+
conn.execute("PRAGMA busy_timeout=10000")
|
|
45
|
+
conn.row_factory = sqlite3.Row
|
|
46
|
+
|
|
47
|
+
stats = {
|
|
48
|
+
"orphans_removed": 0,
|
|
49
|
+
"supersedes_collapsed": 0,
|
|
50
|
+
"self_loops_removed": 0,
|
|
51
|
+
"duplicates_removed": 0,
|
|
52
|
+
"total_before": 0,
|
|
53
|
+
"total_after": 0,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
c = conn.cursor()
|
|
58
|
+
c.execute(
|
|
59
|
+
"SELECT COUNT(*) as cnt FROM graph_edges WHERE profile_id = ?",
|
|
60
|
+
(profile_id,),
|
|
61
|
+
)
|
|
62
|
+
stats["total_before"] = c.fetchone()["cnt"]
|
|
63
|
+
|
|
64
|
+
start = time.time()
|
|
65
|
+
|
|
66
|
+
# Explicit transaction for atomicity
|
|
67
|
+
c.execute("BEGIN")
|
|
68
|
+
|
|
69
|
+
stats["orphans_removed"] = _remove_orphan_edges(c, profile_id, dry_run)
|
|
70
|
+
stats["self_loops_removed"] = _remove_self_loops(c, profile_id, dry_run)
|
|
71
|
+
stats["duplicates_removed"] = _remove_duplicate_edges(c, profile_id, dry_run)
|
|
72
|
+
stats["supersedes_collapsed"] = _collapse_supersedes_chains(
|
|
73
|
+
c, profile_id, dry_run,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if dry_run:
|
|
77
|
+
c.execute("ROLLBACK")
|
|
78
|
+
else:
|
|
79
|
+
c.execute("COMMIT")
|
|
80
|
+
|
|
81
|
+
c.execute(
|
|
82
|
+
"SELECT COUNT(*) as cnt FROM graph_edges WHERE profile_id = ?",
|
|
83
|
+
(profile_id,),
|
|
84
|
+
)
|
|
85
|
+
stats["total_after"] = c.fetchone()["cnt"]
|
|
86
|
+
|
|
87
|
+
elapsed = time.time() - start
|
|
88
|
+
total_removed = stats["total_before"] - stats["total_after"]
|
|
89
|
+
pct = round(total_removed / max(stats["total_before"], 1) * 100, 1)
|
|
90
|
+
|
|
91
|
+
prefix = "(dry-run) " if dry_run else ""
|
|
92
|
+
logger.info(
|
|
93
|
+
"%sGraph pruning: removed %d edges (%.1f%%) in %.1fs — "
|
|
94
|
+
"orphans=%d, supersedes=%d, self_loops=%d, duplicates=%d",
|
|
95
|
+
prefix, total_removed, pct, elapsed,
|
|
96
|
+
stats["orphans_removed"], stats["supersedes_collapsed"],
|
|
97
|
+
stats["self_loops_removed"], stats["duplicates_removed"],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
except Exception as exc:
|
|
101
|
+
logger.error("Graph pruning failed: %s", exc, exc_info=True)
|
|
102
|
+
stats["error"] = str(exc)
|
|
103
|
+
try:
|
|
104
|
+
c.execute("ROLLBACK")
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
finally:
|
|
108
|
+
conn.close()
|
|
109
|
+
|
|
110
|
+
return stats
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _remove_orphan_edges(
|
|
114
|
+
c: sqlite3.Cursor,
|
|
115
|
+
profile_id: str,
|
|
116
|
+
dry_run: bool,
|
|
117
|
+
) -> int:
|
|
118
|
+
"""Remove edges where source OR target no longer exists in facts/entities.
|
|
119
|
+
|
|
120
|
+
Scoped to profile_id. Checks both source_id and target_id.
|
|
121
|
+
"""
|
|
122
|
+
orphan_sql = """
|
|
123
|
+
SELECT COUNT(*) as cnt FROM graph_edges
|
|
124
|
+
WHERE profile_id = ?
|
|
125
|
+
AND (
|
|
126
|
+
(source_id NOT IN (SELECT fact_id FROM atomic_facts)
|
|
127
|
+
AND source_id NOT IN (SELECT entity_id FROM canonical_entities))
|
|
128
|
+
OR
|
|
129
|
+
(target_id NOT IN (SELECT fact_id FROM atomic_facts)
|
|
130
|
+
AND target_id NOT IN (SELECT entity_id FROM canonical_entities))
|
|
131
|
+
)
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
if dry_run:
|
|
135
|
+
c.execute(orphan_sql, (profile_id,))
|
|
136
|
+
return c.fetchone()["cnt"]
|
|
137
|
+
|
|
138
|
+
c.execute("""
|
|
139
|
+
DELETE FROM graph_edges
|
|
140
|
+
WHERE profile_id = ?
|
|
141
|
+
AND (
|
|
142
|
+
(source_id NOT IN (SELECT fact_id FROM atomic_facts)
|
|
143
|
+
AND source_id NOT IN (SELECT entity_id FROM canonical_entities))
|
|
144
|
+
OR
|
|
145
|
+
(target_id NOT IN (SELECT fact_id FROM atomic_facts)
|
|
146
|
+
AND target_id NOT IN (SELECT entity_id FROM canonical_entities))
|
|
147
|
+
)
|
|
148
|
+
""", (profile_id,))
|
|
149
|
+
return c.rowcount
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _remove_self_loops(
|
|
153
|
+
c: sqlite3.Cursor,
|
|
154
|
+
profile_id: str,
|
|
155
|
+
dry_run: bool,
|
|
156
|
+
) -> int:
|
|
157
|
+
"""Remove edges where source equals target. Scoped to profile_id."""
|
|
158
|
+
if dry_run:
|
|
159
|
+
c.execute(
|
|
160
|
+
"SELECT COUNT(*) as cnt FROM graph_edges "
|
|
161
|
+
"WHERE source_id = target_id AND profile_id = ?",
|
|
162
|
+
(profile_id,),
|
|
163
|
+
)
|
|
164
|
+
return c.fetchone()["cnt"]
|
|
165
|
+
|
|
166
|
+
c.execute(
|
|
167
|
+
"DELETE FROM graph_edges WHERE source_id = target_id AND profile_id = ?",
|
|
168
|
+
(profile_id,),
|
|
169
|
+
)
|
|
170
|
+
return c.rowcount
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _remove_duplicate_edges(
|
|
174
|
+
c: sqlite3.Cursor,
|
|
175
|
+
profile_id: str,
|
|
176
|
+
dry_run: bool,
|
|
177
|
+
) -> int:
|
|
178
|
+
"""Remove duplicate edges (same source+target+type), keeping highest weight.
|
|
179
|
+
|
|
180
|
+
Uses correlated subquery for SQLite 3.22+ compatibility (no window functions).
|
|
181
|
+
"""
|
|
182
|
+
if dry_run:
|
|
183
|
+
# Count actual edges to be deleted (total - groups = excess edges)
|
|
184
|
+
c.execute("""
|
|
185
|
+
SELECT
|
|
186
|
+
(SELECT COUNT(*) FROM graph_edges WHERE profile_id = ?) -
|
|
187
|
+
(SELECT COUNT(*) FROM (
|
|
188
|
+
SELECT source_id, target_id, edge_type
|
|
189
|
+
FROM graph_edges WHERE profile_id = ?
|
|
190
|
+
GROUP BY source_id, target_id, edge_type
|
|
191
|
+
)) as cnt
|
|
192
|
+
""", (profile_id, profile_id))
|
|
193
|
+
return max(c.fetchone()["cnt"], 0)
|
|
194
|
+
|
|
195
|
+
# Keep the edge with highest weight per (source, target, type).
|
|
196
|
+
# Portable: no ROW_NUMBER() OVER, works on SQLite 3.22+.
|
|
197
|
+
c.execute("""
|
|
198
|
+
DELETE FROM graph_edges
|
|
199
|
+
WHERE profile_id = ?
|
|
200
|
+
AND edge_id NOT IN (
|
|
201
|
+
SELECT edge_id FROM graph_edges ge1
|
|
202
|
+
WHERE profile_id = ?
|
|
203
|
+
AND weight = (
|
|
204
|
+
SELECT MAX(weight) FROM graph_edges ge2
|
|
205
|
+
WHERE ge2.source_id = ge1.source_id
|
|
206
|
+
AND ge2.target_id = ge1.target_id
|
|
207
|
+
AND ge2.edge_type = ge1.edge_type
|
|
208
|
+
AND ge2.profile_id = ge1.profile_id
|
|
209
|
+
)
|
|
210
|
+
GROUP BY source_id, target_id, edge_type
|
|
211
|
+
)
|
|
212
|
+
""", (profile_id, profile_id))
|
|
213
|
+
return c.rowcount
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _collapse_supersedes_chains(
|
|
217
|
+
c: sqlite3.Cursor,
|
|
218
|
+
profile_id: str,
|
|
219
|
+
dry_run: bool,
|
|
220
|
+
) -> int:
|
|
221
|
+
"""Collapse supersedes chains: if A supersedes B and B supersedes C,
|
|
222
|
+
remove B→C edge AND create A→C shortcut edge.
|
|
223
|
+
|
|
224
|
+
Preserves reachability: A can still reach C via the new direct edge.
|
|
225
|
+
"""
|
|
226
|
+
c.execute("""
|
|
227
|
+
SELECT e1.edge_id as e1_id, e1.source_id as a, e1.target_id as b,
|
|
228
|
+
e1.weight as e1_weight,
|
|
229
|
+
e2.edge_id as e2_id, e2.target_id as c
|
|
230
|
+
FROM graph_edges e1
|
|
231
|
+
JOIN graph_edges e2 ON e1.target_id = e2.source_id
|
|
232
|
+
WHERE e1.edge_type = 'supersedes'
|
|
233
|
+
AND e2.edge_type = 'supersedes'
|
|
234
|
+
AND e1.profile_id = ?
|
|
235
|
+
AND e2.profile_id = ?
|
|
236
|
+
LIMIT ?
|
|
237
|
+
""", (profile_id, profile_id, _CHAIN_BATCH_LIMIT))
|
|
238
|
+
|
|
239
|
+
chains = c.fetchall()
|
|
240
|
+
if not chains:
|
|
241
|
+
return 0
|
|
242
|
+
|
|
243
|
+
if len(chains) >= _CHAIN_BATCH_LIMIT:
|
|
244
|
+
logger.warning(
|
|
245
|
+
"Supersedes chain collapse hit limit (%d). "
|
|
246
|
+
"More chains may exist — will process in next cycle.",
|
|
247
|
+
_CHAIN_BATCH_LIMIT,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if dry_run:
|
|
251
|
+
return len(chains)
|
|
252
|
+
|
|
253
|
+
# Collect IDs for batch operations
|
|
254
|
+
delete_ids: list[str] = []
|
|
255
|
+
insert_rows: list[tuple] = []
|
|
256
|
+
now = datetime.now(UTC).isoformat()
|
|
257
|
+
|
|
258
|
+
for chain in chains:
|
|
259
|
+
a_id = chain["a"]
|
|
260
|
+
c_id = chain["c"]
|
|
261
|
+
e2_id = chain["e2_id"]
|
|
262
|
+
weight = chain["e1_weight"] or 1.0
|
|
263
|
+
|
|
264
|
+
delete_ids.append(e2_id)
|
|
265
|
+
|
|
266
|
+
# Create A→C shortcut edge (preserves reachability)
|
|
267
|
+
new_edge_id = uuid.uuid4().hex[:16]
|
|
268
|
+
insert_rows.append((
|
|
269
|
+
new_edge_id, profile_id, a_id, c_id,
|
|
270
|
+
"supersedes", weight, now,
|
|
271
|
+
))
|
|
272
|
+
|
|
273
|
+
# Batch DELETE: remove all B→C intermediate edges
|
|
274
|
+
for i in range(0, len(delete_ids), 500):
|
|
275
|
+
batch = delete_ids[i:i + 500]
|
|
276
|
+
placeholders = ",".join("?" * len(batch))
|
|
277
|
+
c.execute(
|
|
278
|
+
f"DELETE FROM graph_edges WHERE edge_id IN ({placeholders})",
|
|
279
|
+
batch,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Batch INSERT: add all A→C shortcut edges
|
|
283
|
+
c.executemany(
|
|
284
|
+
"INSERT OR IGNORE INTO graph_edges "
|
|
285
|
+
"(edge_id, profile_id, source_id, target_id, edge_type, weight, created_at) "
|
|
286
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
287
|
+
insert_rows,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
return len(delete_ids)
|
|
@@ -75,7 +75,7 @@ class MaintenanceScheduler:
|
|
|
75
75
|
self._timer.start()
|
|
76
76
|
|
|
77
77
|
def _run(self) -> None:
|
|
78
|
-
"""Execute maintenance
|
|
78
|
+
"""Execute maintenance + auto-backup check, then schedule next run."""
|
|
79
79
|
if not self._running:
|
|
80
80
|
return
|
|
81
81
|
try:
|
|
@@ -84,8 +84,49 @@ class MaintenanceScheduler:
|
|
|
84
84
|
logger.info("Scheduled maintenance complete: %s", counts)
|
|
85
85
|
except Exception as exc:
|
|
86
86
|
logger.warning("Scheduled maintenance failed: %s", exc)
|
|
87
|
-
|
|
88
|
-
|
|
87
|
+
|
|
88
|
+
# V3.4.11: Graph pruning (remove orphan edges)
|
|
89
|
+
try:
|
|
90
|
+
from superlocalmemory.core.graph_pruner import prune_graph
|
|
91
|
+
prune_stats = prune_graph(self._db.db_path, self._profile_id)
|
|
92
|
+
removed = prune_stats["total_before"] - prune_stats["total_after"]
|
|
93
|
+
if removed > 0:
|
|
94
|
+
logger.info("Graph pruning: %d edges removed", removed)
|
|
95
|
+
except Exception as exc:
|
|
96
|
+
logger.debug("Graph pruning skipped: %s", exc)
|
|
97
|
+
|
|
98
|
+
# V3.4.11: Run tier evaluation (demote old facts)
|
|
99
|
+
try:
|
|
100
|
+
from superlocalmemory.core.tier_manager import evaluate_tiers
|
|
101
|
+
stats = evaluate_tiers(self._db, self._profile_id)
|
|
102
|
+
demoted = stats["demoted_to_warm"] + stats["demoted_to_cold"] + stats["demoted_to_archive"]
|
|
103
|
+
if demoted > 0:
|
|
104
|
+
logger.info("Tier evaluation: %d facts demoted", demoted)
|
|
105
|
+
except Exception as exc:
|
|
106
|
+
logger.debug("Tier evaluation skipped: %s", exc)
|
|
107
|
+
|
|
108
|
+
# V3.4.10: Check if auto-backup is due
|
|
109
|
+
try:
|
|
110
|
+
from superlocalmemory.infra.backup import BackupManager
|
|
111
|
+
manager = BackupManager(db_path=self._db.db_path)
|
|
112
|
+
filename = manager.check_and_backup()
|
|
113
|
+
if filename:
|
|
114
|
+
logger.info("Auto-backup created: %s", filename)
|
|
115
|
+
self._sync_cloud_destinations(manager)
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
logger.debug("Auto-backup check skipped: %s", exc)
|
|
118
|
+
|
|
119
|
+
self._schedule_next()
|
|
120
|
+
|
|
121
|
+
def _sync_cloud_destinations(self, manager: object) -> None:
|
|
122
|
+
"""Push latest backup to configured cloud destinations."""
|
|
123
|
+
try:
|
|
124
|
+
from superlocalmemory.infra.cloud_backup import sync_all_destinations
|
|
125
|
+
sync_all_destinations(self._db.db_path)
|
|
126
|
+
except ImportError:
|
|
127
|
+
pass # cloud_backup module not available yet
|
|
128
|
+
except Exception as exc:
|
|
129
|
+
logger.warning("Cloud sync failed (non-critical): %s", exc)
|
|
89
130
|
|
|
90
131
|
def __del__(self) -> None:
|
|
91
132
|
try:
|
|
@@ -226,6 +226,15 @@ def run_recall(
|
|
|
226
226
|
except Exception as exc:
|
|
227
227
|
logger.debug("Access log batch store failed: %s", exc)
|
|
228
228
|
|
|
229
|
+
# V3.4.11: Promote recalled facts back to active tier (single batch UPDATE)
|
|
230
|
+
if response.results:
|
|
231
|
+
try:
|
|
232
|
+
from superlocalmemory.core.tier_manager import promote_on_access_batch
|
|
233
|
+
fact_ids = [r.fact.fact_id for r in response.results[:10]]
|
|
234
|
+
promote_on_access_batch(db, fact_ids)
|
|
235
|
+
except Exception:
|
|
236
|
+
pass # tier_manager not available yet — graceful
|
|
237
|
+
|
|
229
238
|
# V3.3.16: Behavioral tracking + spaced repetition use module-level
|
|
230
239
|
# singletons to avoid creating new objects per recall (was causing
|
|
231
240
|
# object accumulation across 304 benchmark recalls).
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3.4.11 "Scale-Ready" — Tier Manager.
|
|
6
|
+
|
|
7
|
+
Manages the lifecycle tiers of atomic facts:
|
|
8
|
+
- active (hot): Recent, frequently accessed. Full retrieval priority.
|
|
9
|
+
- warm: Consolidated or aging. Reduced retrieval weight (0.7x).
|
|
10
|
+
- cold: Old, rarely accessed. Low retrieval weight (0.3x).
|
|
11
|
+
- archived: Superseded or consolidated. Excluded from default retrieval,
|
|
12
|
+
but searchable via deep recall.
|
|
13
|
+
|
|
14
|
+
CRITICAL RULE: Facts are NEVER deleted. Only moved between tiers.
|
|
15
|
+
The forgetting curve affects RETRIEVAL RANKING, not data existence.
|
|
16
|
+
|
|
17
|
+
Demotion logic: A fact is demoted based on time since last access
|
|
18
|
+
(via fact_retention.last_accessed_at), NOT total age from created_at.
|
|
19
|
+
If no access record exists, created_at is used as fallback.
|
|
20
|
+
|
|
21
|
+
Runs on the maintenance scheduler alongside Ebbinghaus/Langevin.
|
|
22
|
+
|
|
23
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from datetime import datetime, timedelta, UTC
|
|
30
|
+
from typing import TYPE_CHECKING
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Tier thresholds (configurable via SLMConfig in future)
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
WARM_AFTER_DAYS = 30 # active → warm after 30 days without access
|
|
42
|
+
COLD_AFTER_DAYS = 180 # warm → cold after 180 days without access
|
|
43
|
+
ARCHIVE_AFTER_DAYS = 365 # cold → archived after 365 days without access
|
|
44
|
+
|
|
45
|
+
ACCESS_BOOST_THRESHOLD = 5 # 5+ accesses: boost demotion timer
|
|
46
|
+
ACCESS_BOOST_MULTIPLIER = 2.0
|
|
47
|
+
|
|
48
|
+
IMPORTANCE_RESIST_THRESHOLD = 0.8 # importance >= 0.8: boost demotion timer
|
|
49
|
+
IMPORTANCE_RESIST_MULTIPLIER = 3.0
|
|
50
|
+
|
|
51
|
+
# Cap: when both boosts apply, use max (not multiplicative) to prevent
|
|
52
|
+
# 6x suppression (which would delay archival for 6+ years).
|
|
53
|
+
MAX_COMBINED_MULTIPLIER = 3.0
|
|
54
|
+
|
|
55
|
+
_BATCH_SIZE = 1000 # Process facts in batches to prevent OOM at scale
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def evaluate_tiers(
|
|
59
|
+
db: DatabaseManager,
|
|
60
|
+
profile_id: str = "default",
|
|
61
|
+
dry_run: bool = False,
|
|
62
|
+
) -> dict[str, int]:
|
|
63
|
+
"""Evaluate and update lifecycle tiers for all facts in a profile.
|
|
64
|
+
|
|
65
|
+
Rules:
|
|
66
|
+
1. Pinned facts ALWAYS stay 'active' regardless of age/access.
|
|
67
|
+
2. Recently accessed facts resist demotion (access_count boost).
|
|
68
|
+
3. High-importance facts resist demotion (importance boost).
|
|
69
|
+
4. Boosts cap at MAX_COMBINED_MULTIPLIER (3x), not multiplicative.
|
|
70
|
+
5. NEVER delete facts. NEVER.
|
|
71
|
+
"""
|
|
72
|
+
stats = {
|
|
73
|
+
"demoted_to_warm": 0,
|
|
74
|
+
"demoted_to_cold": 0,
|
|
75
|
+
"demoted_to_archive": 0,
|
|
76
|
+
"pinned_protected": 0,
|
|
77
|
+
"total_evaluated": 0,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
now = datetime.now(UTC)
|
|
81
|
+
pinned_ids = _get_pinned_fact_ids(db, profile_id)
|
|
82
|
+
|
|
83
|
+
stats["demoted_to_warm"] = _demote_tier(
|
|
84
|
+
db, profile_id, "active", "warm",
|
|
85
|
+
WARM_AFTER_DAYS, pinned_ids, now, dry_run,
|
|
86
|
+
)
|
|
87
|
+
stats["demoted_to_cold"] = _demote_tier(
|
|
88
|
+
db, profile_id, "warm", "cold",
|
|
89
|
+
COLD_AFTER_DAYS, pinned_ids, now, dry_run,
|
|
90
|
+
)
|
|
91
|
+
stats["demoted_to_archive"] = _demote_tier(
|
|
92
|
+
db, profile_id, "cold", "archived",
|
|
93
|
+
ARCHIVE_AFTER_DAYS, pinned_ids, now, dry_run,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
stats["pinned_protected"] = len(pinned_ids)
|
|
97
|
+
|
|
98
|
+
# Count only non-archived facts (archived are not inspected by _demote_tier)
|
|
99
|
+
rows = db.execute(
|
|
100
|
+
"SELECT COUNT(*) as c FROM atomic_facts "
|
|
101
|
+
"WHERE profile_id = ? AND lifecycle != 'archived'",
|
|
102
|
+
(profile_id,),
|
|
103
|
+
)
|
|
104
|
+
stats["total_evaluated"] = rows[0]["c"] if rows else 0
|
|
105
|
+
|
|
106
|
+
total_demoted = (
|
|
107
|
+
stats["demoted_to_warm"]
|
|
108
|
+
+ stats["demoted_to_cold"]
|
|
109
|
+
+ stats["demoted_to_archive"]
|
|
110
|
+
)
|
|
111
|
+
if total_demoted > 0:
|
|
112
|
+
logger.info(
|
|
113
|
+
"Tier evaluation: %d demoted (warm=%d, cold=%d, archive=%d), %d pinned",
|
|
114
|
+
total_demoted, stats["demoted_to_warm"],
|
|
115
|
+
stats["demoted_to_cold"], stats["demoted_to_archive"],
|
|
116
|
+
stats["pinned_protected"],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return stats
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def promote_on_access_batch(db: DatabaseManager, fact_ids: list[str]) -> int:
|
|
123
|
+
"""Batch-promote facts back to 'active' when accessed during recall.
|
|
124
|
+
|
|
125
|
+
Single UPDATE for all fact IDs — avoids N sequential writes on hot path.
|
|
126
|
+
"""
|
|
127
|
+
if not fact_ids:
|
|
128
|
+
return 0
|
|
129
|
+
placeholders = ",".join("?" * len(fact_ids))
|
|
130
|
+
db.execute(
|
|
131
|
+
f"UPDATE atomic_facts SET lifecycle = 'active' "
|
|
132
|
+
f"WHERE fact_id IN ({placeholders}) AND lifecycle IN ('warm', 'cold')",
|
|
133
|
+
tuple(fact_ids),
|
|
134
|
+
)
|
|
135
|
+
return len(fact_ids)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def promote_on_access(db: DatabaseManager, fact_id: str) -> None:
|
|
139
|
+
"""Promote a single fact back to 'active' when accessed during recall.
|
|
140
|
+
|
|
141
|
+
Kept for backward compatibility. Prefer promote_on_access_batch.
|
|
142
|
+
"""
|
|
143
|
+
db.execute(
|
|
144
|
+
"UPDATE atomic_facts SET lifecycle = 'active' "
|
|
145
|
+
"WHERE fact_id = ? AND lifecycle IN ('warm', 'cold')",
|
|
146
|
+
(fact_id,),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def pin_fact(
|
|
151
|
+
db: DatabaseManager,
|
|
152
|
+
fact_id: str,
|
|
153
|
+
profile_id: str,
|
|
154
|
+
reason: str = "",
|
|
155
|
+
) -> bool:
|
|
156
|
+
"""Pin a fact to stay in active tier forever.
|
|
157
|
+
|
|
158
|
+
Both the pin record and lifecycle update are scoped to profile_id.
|
|
159
|
+
"""
|
|
160
|
+
now = datetime.now(UTC).isoformat()
|
|
161
|
+
try:
|
|
162
|
+
db.execute(
|
|
163
|
+
"INSERT OR REPLACE INTO pinned_facts "
|
|
164
|
+
"(fact_id, profile_id, pinned_at, reason) VALUES (?, ?, ?, ?)",
|
|
165
|
+
(fact_id, profile_id, now, reason),
|
|
166
|
+
)
|
|
167
|
+
db.execute(
|
|
168
|
+
"UPDATE atomic_facts SET lifecycle = 'active' "
|
|
169
|
+
"WHERE fact_id = ? AND profile_id = ?",
|
|
170
|
+
(fact_id, profile_id),
|
|
171
|
+
)
|
|
172
|
+
return True
|
|
173
|
+
except Exception as exc:
|
|
174
|
+
logger.warning("Failed to pin fact %s: %s", fact_id, exc, exc_info=True)
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def unpin_fact(db: DatabaseManager, fact_id: str) -> bool:
|
|
179
|
+
"""Unpin a fact, allowing normal tier demotion to resume."""
|
|
180
|
+
try:
|
|
181
|
+
db.execute("DELETE FROM pinned_facts WHERE fact_id = ?", (fact_id,))
|
|
182
|
+
return True
|
|
183
|
+
except Exception as exc:
|
|
184
|
+
logger.warning("Failed to unpin fact %s: %s", fact_id, exc, exc_info=True)
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def get_tier_stats(db: DatabaseManager, profile_id: str = "default") -> dict:
|
|
189
|
+
"""Get tier distribution stats for the dashboard."""
|
|
190
|
+
rows = db.execute(
|
|
191
|
+
"SELECT lifecycle, COUNT(*) as cnt FROM atomic_facts "
|
|
192
|
+
"WHERE profile_id = ? GROUP BY lifecycle",
|
|
193
|
+
(profile_id,),
|
|
194
|
+
)
|
|
195
|
+
dist = {r["lifecycle"]: r["cnt"] for r in rows}
|
|
196
|
+
|
|
197
|
+
pinned_rows = db.execute(
|
|
198
|
+
"SELECT COUNT(*) as c FROM pinned_facts WHERE profile_id = ?",
|
|
199
|
+
(profile_id,),
|
|
200
|
+
)
|
|
201
|
+
pinned = pinned_rows[0]["c"] if pinned_rows else 0
|
|
202
|
+
|
|
203
|
+
total = sum(dist.values())
|
|
204
|
+
return {
|
|
205
|
+
"active": dist.get("active", 0),
|
|
206
|
+
"warm": dist.get("warm", 0),
|
|
207
|
+
"cold": dist.get("cold", 0),
|
|
208
|
+
"archived": dist.get("archived", 0),
|
|
209
|
+
"total": total,
|
|
210
|
+
"pinned": pinned,
|
|
211
|
+
"active_pct": round(dist.get("active", 0) / max(total, 1) * 100, 1),
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ---------------------------------------------------------------------------
|
|
216
|
+
# Internal helpers
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
def _get_pinned_fact_ids(db: DatabaseManager, profile_id: str) -> frozenset[str]:
|
|
220
|
+
"""Load all pinned fact IDs for a profile."""
|
|
221
|
+
try:
|
|
222
|
+
rows = db.execute(
|
|
223
|
+
"SELECT fact_id FROM pinned_facts WHERE profile_id = ?",
|
|
224
|
+
(profile_id,),
|
|
225
|
+
)
|
|
226
|
+
return frozenset(r["fact_id"] for r in rows)
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
logger.warning(
|
|
229
|
+
"Failed to load pinned facts for profile %s: %s",
|
|
230
|
+
profile_id, exc, exc_info=True,
|
|
231
|
+
)
|
|
232
|
+
return frozenset()
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _demote_tier(
|
|
236
|
+
db: DatabaseManager,
|
|
237
|
+
profile_id: str,
|
|
238
|
+
from_tier: str,
|
|
239
|
+
to_tier: str,
|
|
240
|
+
base_days: int,
|
|
241
|
+
pinned_ids: frozenset[str],
|
|
242
|
+
now: datetime,
|
|
243
|
+
dry_run: bool,
|
|
244
|
+
) -> int:
|
|
245
|
+
"""Demote facts from one tier to the next based on idle time.
|
|
246
|
+
|
|
247
|
+
Uses last_accessed_at from fact_retention as the reference date
|
|
248
|
+
(time since last access). Falls back to created_at if no access record.
|
|
249
|
+
Processes in batches of _BATCH_SIZE to prevent OOM at scale.
|
|
250
|
+
"""
|
|
251
|
+
demoted_ids: list[str] = []
|
|
252
|
+
offset = 0
|
|
253
|
+
|
|
254
|
+
while True:
|
|
255
|
+
rows = db.execute(
|
|
256
|
+
"SELECT af.fact_id, af.access_count, af.importance, "
|
|
257
|
+
" af.created_at, fr.last_accessed_at "
|
|
258
|
+
"FROM atomic_facts af "
|
|
259
|
+
"LEFT JOIN fact_retention fr ON af.fact_id = fr.fact_id "
|
|
260
|
+
"WHERE af.profile_id = ? AND af.lifecycle = ? "
|
|
261
|
+
"LIMIT ? OFFSET ?",
|
|
262
|
+
(profile_id, from_tier, _BATCH_SIZE, offset),
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
if not rows:
|
|
266
|
+
break
|
|
267
|
+
|
|
268
|
+
for row in rows:
|
|
269
|
+
fid = row["fact_id"]
|
|
270
|
+
|
|
271
|
+
if fid in pinned_ids:
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
effective_days = float(base_days)
|
|
275
|
+
|
|
276
|
+
access_count = row["access_count"] or 0
|
|
277
|
+
access_mult = (
|
|
278
|
+
ACCESS_BOOST_MULTIPLIER
|
|
279
|
+
if access_count >= ACCESS_BOOST_THRESHOLD
|
|
280
|
+
else 1.0
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
importance = row["importance"] or 0.5
|
|
284
|
+
importance_mult = (
|
|
285
|
+
IMPORTANCE_RESIST_MULTIPLIER
|
|
286
|
+
if importance >= IMPORTANCE_RESIST_THRESHOLD
|
|
287
|
+
else 1.0
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Cap: use max of the two boosts, not multiplicative
|
|
291
|
+
effective_days *= min(max(access_mult, importance_mult), MAX_COMBINED_MULTIPLIER)
|
|
292
|
+
|
|
293
|
+
# Reference: last access time, fallback to created_at
|
|
294
|
+
ref_str = row["last_accessed_at"] or row["created_at"] or ""
|
|
295
|
+
if not ref_str:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
ref_date = datetime.fromisoformat(ref_str.replace("Z", "+00:00"))
|
|
300
|
+
if ref_date.tzinfo is None:
|
|
301
|
+
ref_date = ref_date.replace(tzinfo=UTC)
|
|
302
|
+
idle_time = now - ref_date
|
|
303
|
+
if idle_time < timedelta(days=effective_days):
|
|
304
|
+
continue
|
|
305
|
+
except (ValueError, TypeError):
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
demoted_ids.append(fid)
|
|
309
|
+
|
|
310
|
+
if len(rows) < _BATCH_SIZE:
|
|
311
|
+
break
|
|
312
|
+
offset += _BATCH_SIZE
|
|
313
|
+
|
|
314
|
+
if demoted_ids and not dry_run:
|
|
315
|
+
# Batch UPDATE in chunks of 500
|
|
316
|
+
for i in range(0, len(demoted_ids), 500):
|
|
317
|
+
batch = demoted_ids[i:i + 500]
|
|
318
|
+
placeholders = ",".join("?" * len(batch))
|
|
319
|
+
db.execute(
|
|
320
|
+
f"UPDATE atomic_facts SET lifecycle = ? "
|
|
321
|
+
f"WHERE fact_id IN ({placeholders}) AND lifecycle = ?",
|
|
322
|
+
(to_tier, *batch, from_tier),
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
return len(demoted_ids)
|