superlocalmemory 3.2.3 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -1
- package/README.md +106 -71
- package/package.json +1 -2
- package/pyproject.toml +16 -1
- package/src/superlocalmemory/cli/commands.py +309 -0
- package/src/superlocalmemory/cli/main.py +44 -0
- package/src/superlocalmemory/core/config.py +276 -4
- package/src/superlocalmemory/core/consolidation_engine.py +37 -0
- package/src/superlocalmemory/core/engine.py +21 -0
- package/src/superlocalmemory/core/engine_wiring.py +58 -8
- package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
- package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
- package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
- package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
- package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
- package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
- package/src/superlocalmemory/infra/pid_manager.py +193 -0
- package/src/superlocalmemory/infra/process_reaper.py +572 -0
- package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
- package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
- package/src/superlocalmemory/math/ebbinghaus.py +309 -0
- package/src/superlocalmemory/math/fisher_quantized.py +251 -0
- package/src/superlocalmemory/math/hopfield.py +279 -0
- package/src/superlocalmemory/math/polar_quant.py +379 -0
- package/src/superlocalmemory/math/qjl.py +115 -0
- package/src/superlocalmemory/mcp/server.py +2 -0
- package/src/superlocalmemory/mcp/tools_v3.py +10 -0
- package/src/superlocalmemory/mcp/tools_v33.py +351 -0
- package/src/superlocalmemory/parameterization/__init__.py +47 -0
- package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
- package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
- package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
- package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
- package/src/superlocalmemory/retrieval/engine.py +21 -3
- package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
- package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
- package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
- package/src/superlocalmemory/retrieval/strategy.py +16 -6
- package/src/superlocalmemory/server/routes/agents.py +68 -8
- package/src/superlocalmemory/server/routes/learning.py +18 -1
- package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
- package/src/superlocalmemory/server/routes/v3_api.py +503 -1
- package/src/superlocalmemory/storage/database.py +206 -0
- package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
- package/src/superlocalmemory/storage/migration_v33.py +140 -0
- package/src/superlocalmemory/storage/quantized_store.py +261 -0
- package/src/superlocalmemory/storage/schema_v32.py +137 -0
- package/conftest.py +0 -5
|
@@ -962,3 +962,209 @@ class DatabaseManager:
|
|
|
962
962
|
"DELETE FROM core_memory_blocks WHERE profile_id = ?",
|
|
963
963
|
(profile_id,),
|
|
964
964
|
)
|
|
965
|
+
|
|
966
|
+
# ------------------------------------------------------------------
|
|
967
|
+
# Phase A: Fact Retention CRUD (Forgetting Brain)
|
|
968
|
+
# ------------------------------------------------------------------
|
|
969
|
+
|
|
970
|
+
def get_retention(self, fact_id: str, profile_id: str) -> dict | None:
|
|
971
|
+
"""Get retention data for a single fact.
|
|
972
|
+
|
|
973
|
+
Returns dict with column names as keys, or None if not found.
|
|
974
|
+
All SQL parameterized (HR-05).
|
|
975
|
+
"""
|
|
976
|
+
rows = self.execute(
|
|
977
|
+
"SELECT fact_id, retention_score, memory_strength, access_count, "
|
|
978
|
+
" last_accessed_at, lifecycle_zone, last_computed_at "
|
|
979
|
+
"FROM fact_retention WHERE fact_id = ? AND profile_id = ?",
|
|
980
|
+
(fact_id, profile_id),
|
|
981
|
+
)
|
|
982
|
+
return dict(rows[0]) if rows else None
|
|
983
|
+
|
|
984
|
+
def batch_get_retention(
|
|
985
|
+
self, fact_ids: list[str], profile_id: str,
|
|
986
|
+
) -> list[dict]:
|
|
987
|
+
"""Get retention data for a batch of facts.
|
|
988
|
+
|
|
989
|
+
Uses dynamic ? placeholders for IN clause (never string concat).
|
|
990
|
+
Missing fact_ids are simply absent from results.
|
|
991
|
+
All SQL parameterized (HR-05).
|
|
992
|
+
"""
|
|
993
|
+
if not fact_ids:
|
|
994
|
+
return []
|
|
995
|
+
placeholders = ",".join("?" for _ in fact_ids)
|
|
996
|
+
rows = self.execute(
|
|
997
|
+
f"SELECT fact_id, retention_score, lifecycle_zone "
|
|
998
|
+
f"FROM fact_retention "
|
|
999
|
+
f"WHERE fact_id IN ({placeholders}) AND profile_id = ?",
|
|
1000
|
+
(*fact_ids, profile_id),
|
|
1001
|
+
)
|
|
1002
|
+
return [dict(r) for r in rows]
|
|
1003
|
+
|
|
1004
|
+
def upsert_retention(
|
|
1005
|
+
self,
|
|
1006
|
+
fact_id: str,
|
|
1007
|
+
profile_id: str,
|
|
1008
|
+
retention_score: float,
|
|
1009
|
+
memory_strength: float,
|
|
1010
|
+
access_count: int,
|
|
1011
|
+
last_accessed_at: str,
|
|
1012
|
+
lifecycle_zone: str,
|
|
1013
|
+
) -> None:
|
|
1014
|
+
"""UPSERT retention data for a fact.
|
|
1015
|
+
|
|
1016
|
+
Retries 3x on SQLITE_BUSY (handled by execute()).
|
|
1017
|
+
All SQL parameterized (HR-05).
|
|
1018
|
+
"""
|
|
1019
|
+
self.execute(
|
|
1020
|
+
"INSERT INTO fact_retention "
|
|
1021
|
+
"(fact_id, profile_id, retention_score, memory_strength, "
|
|
1022
|
+
" access_count, last_accessed_at, lifecycle_zone, last_computed_at) "
|
|
1023
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now')) "
|
|
1024
|
+
"ON CONFLICT(fact_id) DO UPDATE SET "
|
|
1025
|
+
" retention_score = excluded.retention_score, "
|
|
1026
|
+
" memory_strength = excluded.memory_strength, "
|
|
1027
|
+
" access_count = excluded.access_count, "
|
|
1028
|
+
" lifecycle_zone = excluded.lifecycle_zone, "
|
|
1029
|
+
" last_computed_at = excluded.last_computed_at",
|
|
1030
|
+
(fact_id, profile_id, retention_score, memory_strength,
|
|
1031
|
+
access_count, last_accessed_at, lifecycle_zone),
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
def batch_upsert_retention(
|
|
1035
|
+
self, facts: list[dict], profile_id: str,
|
|
1036
|
+
) -> int:
|
|
1037
|
+
"""Batch UPSERT retention data. Wraps in transaction for atomicity.
|
|
1038
|
+
|
|
1039
|
+
Each dict must contain: fact_id, retention, strength,
|
|
1040
|
+
access_count, last_accessed_at, zone.
|
|
1041
|
+
|
|
1042
|
+
Returns count of successfully upserted rows.
|
|
1043
|
+
"""
|
|
1044
|
+
count = 0
|
|
1045
|
+
with self.transaction():
|
|
1046
|
+
for f in facts:
|
|
1047
|
+
self.upsert_retention(
|
|
1048
|
+
fact_id=f["fact_id"],
|
|
1049
|
+
profile_id=profile_id,
|
|
1050
|
+
retention_score=f["retention"],
|
|
1051
|
+
memory_strength=f["strength"],
|
|
1052
|
+
access_count=f["access_count"],
|
|
1053
|
+
last_accessed_at=f["last_accessed_at"],
|
|
1054
|
+
lifecycle_zone=f["zone"],
|
|
1055
|
+
)
|
|
1056
|
+
count += 1
|
|
1057
|
+
return count
|
|
1058
|
+
|
|
1059
|
+
def get_facts_needing_decay(self, profile_id: str) -> list[dict]:
|
|
1060
|
+
"""Get facts that need decay computation (excludes core memory).
|
|
1061
|
+
|
|
1062
|
+
Core memory facts are immune to forgetting (HR-01).
|
|
1063
|
+
All SQL parameterized (HR-05).
|
|
1064
|
+
"""
|
|
1065
|
+
rows = self.execute(
|
|
1066
|
+
"SELECT f.fact_id, f.created_at, f.profile_id "
|
|
1067
|
+
"FROM atomic_facts f "
|
|
1068
|
+
"LEFT JOIN fact_retention r ON f.fact_id = r.fact_id "
|
|
1069
|
+
"WHERE f.profile_id = ? "
|
|
1070
|
+
"AND f.fact_id NOT IN ("
|
|
1071
|
+
" SELECT json_each.value "
|
|
1072
|
+
" FROM core_memory_blocks, json_each(core_memory_blocks.source_fact_ids) "
|
|
1073
|
+
" WHERE core_memory_blocks.profile_id = ?"
|
|
1074
|
+
")",
|
|
1075
|
+
(profile_id, profile_id),
|
|
1076
|
+
)
|
|
1077
|
+
return [dict(r) for r in rows]
|
|
1078
|
+
|
|
1079
|
+
def soft_delete_fact(self, fact_id: str, profile_id: str) -> None:
|
|
1080
|
+
"""Soft-delete a forgotten fact.
|
|
1081
|
+
|
|
1082
|
+
Sets fact_retention.lifecycle_zone to 'forgotten' and
|
|
1083
|
+
atomic_facts.lifecycle to 'archived' (valid enum value).
|
|
1084
|
+
Never physically deletes (HR-04).
|
|
1085
|
+
|
|
1086
|
+
Idempotent: if fact not found, logs warning and returns.
|
|
1087
|
+
"""
|
|
1088
|
+
# Check existence first (idempotent)
|
|
1089
|
+
rows = self.execute(
|
|
1090
|
+
"SELECT fact_id FROM fact_retention WHERE fact_id = ? AND profile_id = ?",
|
|
1091
|
+
(fact_id, profile_id),
|
|
1092
|
+
)
|
|
1093
|
+
if not rows:
|
|
1094
|
+
logger.warning(
|
|
1095
|
+
"soft_delete_fact: fact_id=%s not found in fact_retention, skipping",
|
|
1096
|
+
fact_id,
|
|
1097
|
+
)
|
|
1098
|
+
return
|
|
1099
|
+
|
|
1100
|
+
# Update fact_retention
|
|
1101
|
+
self.execute(
|
|
1102
|
+
"UPDATE fact_retention SET lifecycle_zone = 'forgotten', "
|
|
1103
|
+
" retention_score = 0.0 "
|
|
1104
|
+
"WHERE fact_id = ? AND profile_id = ?",
|
|
1105
|
+
(fact_id, profile_id),
|
|
1106
|
+
)
|
|
1107
|
+
|
|
1108
|
+
# Mark in atomic_facts as archived (valid enum value per A-CRIT-01)
|
|
1109
|
+
self.execute(
|
|
1110
|
+
"UPDATE atomic_facts SET lifecycle = 'archived' "
|
|
1111
|
+
"WHERE fact_id = ? AND profile_id = ?",
|
|
1112
|
+
(fact_id, profile_id),
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
# ------------------------------------------------------------------
|
|
1116
|
+
# Phase E: CCQ Consolidated Blocks & Audit CRUD
|
|
1117
|
+
# ------------------------------------------------------------------
|
|
1118
|
+
|
|
1119
|
+
def store_ccq_block(
|
|
1120
|
+
self,
|
|
1121
|
+
block_id: str,
|
|
1122
|
+
profile_id: str,
|
|
1123
|
+
content: str,
|
|
1124
|
+
source_fact_ids: str,
|
|
1125
|
+
gist_embedding_rowid: int | None,
|
|
1126
|
+
char_count: int,
|
|
1127
|
+
cluster_id: str,
|
|
1128
|
+
) -> None:
|
|
1129
|
+
"""Store a CCQ consolidated block. Parameterized SQL only."""
|
|
1130
|
+
self.execute(
|
|
1131
|
+
"INSERT INTO ccq_consolidated_blocks "
|
|
1132
|
+
"(block_id, profile_id, content, source_fact_ids, "
|
|
1133
|
+
" gist_embedding_rowid, char_count, compiled_by, cluster_id, created_at) "
|
|
1134
|
+
"VALUES (?, ?, ?, ?, ?, ?, 'ccq', ?, datetime('now'))",
|
|
1135
|
+
(block_id, profile_id, content, source_fact_ids,
|
|
1136
|
+
gist_embedding_rowid, char_count, cluster_id),
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
def get_ccq_blocks(self, profile_id: str) -> list[dict]:
|
|
1140
|
+
"""Get all CCQ consolidated blocks for a profile."""
|
|
1141
|
+
rows = self.execute(
|
|
1142
|
+
"SELECT * FROM ccq_consolidated_blocks "
|
|
1143
|
+
"WHERE profile_id = ? ORDER BY created_at DESC",
|
|
1144
|
+
(profile_id,),
|
|
1145
|
+
)
|
|
1146
|
+
return [dict(r) for r in rows]
|
|
1147
|
+
|
|
1148
|
+
def store_ccq_audit(self, entry: dict) -> None:
|
|
1149
|
+
"""Store a CCQ audit log entry. Parameterized SQL only."""
|
|
1150
|
+
self.execute(
|
|
1151
|
+
"INSERT INTO ccq_audit_log "
|
|
1152
|
+
"(audit_id, profile_id, cluster_id, block_id, fact_ids, fact_count, "
|
|
1153
|
+
" gist_text, extraction_mode, bytes_before, bytes_after, "
|
|
1154
|
+
" compression_ratio, shared_entities, created_at) "
|
|
1155
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))",
|
|
1156
|
+
(entry["audit_id"], entry["profile_id"], entry["cluster_id"],
|
|
1157
|
+
entry["block_id"], entry["fact_ids"], entry["fact_count"],
|
|
1158
|
+
entry["gist_text"], entry["extraction_mode"],
|
|
1159
|
+
entry["bytes_before"], entry["bytes_after"],
|
|
1160
|
+
entry["compression_ratio"], entry["shared_entities"]),
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
def get_ccq_audit(self, profile_id: str, limit: int = 50) -> list[dict]:
|
|
1164
|
+
"""Get CCQ audit log entries for a profile."""
|
|
1165
|
+
rows = self.execute(
|
|
1166
|
+
"SELECT * FROM ccq_audit_log "
|
|
1167
|
+
"WHERE profile_id = ? ORDER BY created_at DESC LIMIT ?",
|
|
1168
|
+
(profile_id, limit),
|
|
1169
|
+
)
|
|
1170
|
+
return [dict(r) for r in rows]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""Embedding migration on mode/model switch.
|
|
6
|
+
|
|
7
|
+
When a user switches modes (e.g., Mode B Ollama -> Mode A sentence-transformers),
|
|
8
|
+
the embeddings live in different vector spaces. This module detects the mismatch
|
|
9
|
+
and flags facts for progressive re-embedding.
|
|
10
|
+
|
|
11
|
+
Key table: ``embedding_metadata.model_name`` stores the model used for each fact.
|
|
12
|
+
A config-level field in ``config.json`` stores the current model signature.
|
|
13
|
+
|
|
14
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import TYPE_CHECKING, Any
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from superlocalmemory.core.config import SLMConfig
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
# Sentinel stored in config.json when no model has been set yet.
|
|
30
|
+
_NO_MODEL = ""
|
|
31
|
+
|
|
32
|
+
# Batch size for progressive re-embedding.
|
|
33
|
+
_REINDEX_BATCH_SIZE = 50
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _model_signature(config: SLMConfig) -> str:
|
|
37
|
+
"""Derive a deterministic signature from the active embedding config.
|
|
38
|
+
|
|
39
|
+
The signature combines provider + model_name + dimension so that
|
|
40
|
+
any change in embedding source is detected.
|
|
41
|
+
"""
|
|
42
|
+
emb = config.embedding
|
|
43
|
+
return f"{emb.provider}::{emb.model_name}::{emb.dimension}"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _read_stored_signature(config_dir: Path) -> str:
|
|
47
|
+
"""Read the last-used embedding model signature from config.json."""
|
|
48
|
+
config_path = config_dir / "config.json"
|
|
49
|
+
if not config_path.exists():
|
|
50
|
+
return _NO_MODEL
|
|
51
|
+
try:
|
|
52
|
+
data = json.loads(config_path.read_text())
|
|
53
|
+
return data.get("embedding_signature", _NO_MODEL)
|
|
54
|
+
except (json.JSONDecodeError, OSError):
|
|
55
|
+
return _NO_MODEL
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _write_stored_signature(config_dir: Path, signature: str) -> None:
|
|
59
|
+
"""Persist the current embedding model signature to config.json."""
|
|
60
|
+
config_path = config_dir / "config.json"
|
|
61
|
+
data: dict[str, Any] = {}
|
|
62
|
+
if config_path.exists():
|
|
63
|
+
try:
|
|
64
|
+
data = json.loads(config_path.read_text())
|
|
65
|
+
except (json.JSONDecodeError, OSError):
|
|
66
|
+
pass
|
|
67
|
+
data["embedding_signature"] = signature
|
|
68
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
config_path.write_text(json.dumps(data, indent=2))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def check_embedding_migration(config: SLMConfig) -> bool:
|
|
73
|
+
"""Check if embedding model changed since last run.
|
|
74
|
+
|
|
75
|
+
Returns True if re-indexing is needed (model signature differs).
|
|
76
|
+
Returns False if signatures match or this is the first run.
|
|
77
|
+
"""
|
|
78
|
+
current_sig = _model_signature(config)
|
|
79
|
+
stored_sig = _read_stored_signature(config.base_dir)
|
|
80
|
+
|
|
81
|
+
if stored_sig == _NO_MODEL:
|
|
82
|
+
# First run — store signature, no migration needed.
|
|
83
|
+
_write_stored_signature(config.base_dir, current_sig)
|
|
84
|
+
logger.info("Embedding signature initialized: %s", current_sig)
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
if stored_sig == current_sig:
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
logger.warning(
|
|
91
|
+
"Embedding model changed: %s -> %s. Re-indexing required.",
|
|
92
|
+
stored_sig, current_sig,
|
|
93
|
+
)
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def run_embedding_migration(
|
|
98
|
+
config: SLMConfig,
|
|
99
|
+
db: Any,
|
|
100
|
+
embedder: Any,
|
|
101
|
+
) -> int:
|
|
102
|
+
"""Re-embed all facts with the current model. Returns count re-embedded.
|
|
103
|
+
|
|
104
|
+
Processes facts in batches to avoid memory spikes. Updates the
|
|
105
|
+
embedding_metadata table and vector store for each fact.
|
|
106
|
+
|
|
107
|
+
This is idempotent — can be interrupted and resumed safely.
|
|
108
|
+
"""
|
|
109
|
+
if embedder is None:
|
|
110
|
+
logger.warning("No embedder available. Skipping re-indexing.")
|
|
111
|
+
return 0
|
|
112
|
+
|
|
113
|
+
current_sig = _model_signature(config)
|
|
114
|
+
profile_id = config.active_profile
|
|
115
|
+
|
|
116
|
+
# Get all fact IDs that need re-embedding (all facts for the profile).
|
|
117
|
+
rows = db.execute(
|
|
118
|
+
"SELECT fact_id, content FROM atomic_facts "
|
|
119
|
+
"WHERE profile_id = ? ORDER BY created_at",
|
|
120
|
+
(profile_id,),
|
|
121
|
+
)
|
|
122
|
+
facts = [(dict(r)["fact_id"], dict(r)["content"]) for r in rows]
|
|
123
|
+
total = len(facts)
|
|
124
|
+
|
|
125
|
+
if total == 0:
|
|
126
|
+
_write_stored_signature(config.base_dir, current_sig)
|
|
127
|
+
return 0
|
|
128
|
+
|
|
129
|
+
logger.info(
|
|
130
|
+
"Re-embedding %d facts with model %s (batch_size=%d)",
|
|
131
|
+
total, current_sig, _REINDEX_BATCH_SIZE,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
reindexed = 0
|
|
135
|
+
for i in range(0, total, _REINDEX_BATCH_SIZE):
|
|
136
|
+
batch = facts[i : i + _REINDEX_BATCH_SIZE]
|
|
137
|
+
texts = [content for _, content in batch]
|
|
138
|
+
fact_ids = [fid for fid, _ in batch]
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
vectors = embedder.embed_batch(texts)
|
|
142
|
+
except Exception as exc:
|
|
143
|
+
logger.error(
|
|
144
|
+
"Re-embedding batch %d-%d failed: %s. Stopping migration.",
|
|
145
|
+
i, i + len(batch), exc,
|
|
146
|
+
)
|
|
147
|
+
break
|
|
148
|
+
|
|
149
|
+
for j, (fid, vec) in enumerate(zip(fact_ids, vectors)):
|
|
150
|
+
if vec is None:
|
|
151
|
+
continue
|
|
152
|
+
# Update embedding in the database (embedding column on atomic_facts).
|
|
153
|
+
try:
|
|
154
|
+
embedding_json = json.dumps(vec)
|
|
155
|
+
db.execute(
|
|
156
|
+
"UPDATE atomic_facts SET embedding = ? WHERE fact_id = ?",
|
|
157
|
+
(embedding_json, fid),
|
|
158
|
+
)
|
|
159
|
+
# Update embedding_metadata with new model name.
|
|
160
|
+
db.execute(
|
|
161
|
+
"UPDATE embedding_metadata SET model_name = ? "
|
|
162
|
+
"WHERE fact_id = ?",
|
|
163
|
+
(config.embedding.model_name, fid),
|
|
164
|
+
)
|
|
165
|
+
reindexed += 1
|
|
166
|
+
except Exception as exc:
|
|
167
|
+
logger.warning(
|
|
168
|
+
"Failed to update embedding for fact %s: %s",
|
|
169
|
+
fid[:16], exc,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Update stored signature after successful migration.
|
|
173
|
+
_write_stored_signature(config.base_dir, current_sig)
|
|
174
|
+
logger.info(
|
|
175
|
+
"Embedding migration complete: %d/%d facts re-embedded.",
|
|
176
|
+
reindexed, total,
|
|
177
|
+
)
|
|
178
|
+
return reindexed
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SLM 3.2.3 -> 3.3 migration: idempotent schema upgrade.
|
|
6
|
+
|
|
7
|
+
Detects a 3.2.3 database (missing 3.3 tables) and creates
|
|
8
|
+
new tables via CREATE TABLE IF NOT EXISTS. Safe to call on
|
|
9
|
+
any DB version — existing tables are never altered.
|
|
10
|
+
|
|
11
|
+
New tables in 3.3 (all from Phases A-G):
|
|
12
|
+
- fact_retention (Phase A: Ebbinghaus forgetting)
|
|
13
|
+
- polar_embeddings (Phase B: PolarQuant quantization)
|
|
14
|
+
- embedding_quantization_metadata (Phase B: EAP bit-width tracking)
|
|
15
|
+
- ccq_consolidated_blocks (Phase E: Cognitive Consolidation)
|
|
16
|
+
- ccq_audit_log (Phase E: CCQ audit trail)
|
|
17
|
+
- soft_prompt_templates (Phase F: Learning Brain)
|
|
18
|
+
|
|
19
|
+
All six are already defined in schema_v32.py V32_DDL and created
|
|
20
|
+
by create_all_tables(). This migration module detects their absence
|
|
21
|
+
and creates them for databases that were created before SLM 3.3.
|
|
22
|
+
|
|
23
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
24
|
+
License: MIT
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
import sqlite3
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from typing import Final
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Tables introduced or required by SLM 3.3 phases
|
|
38
|
+
_V33_TABLES: Final[tuple[str, ...]] = (
|
|
39
|
+
"fact_retention",
|
|
40
|
+
"polar_embeddings",
|
|
41
|
+
"embedding_quantization_metadata",
|
|
42
|
+
"ccq_consolidated_blocks",
|
|
43
|
+
"ccq_audit_log",
|
|
44
|
+
"soft_prompt_templates",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class MigrationReport:
|
|
50
|
+
"""Report of migration actions taken."""
|
|
51
|
+
|
|
52
|
+
tables_checked: int = 0
|
|
53
|
+
tables_created: tuple[str, ...] = ()
|
|
54
|
+
tables_existed: tuple[str, ...] = ()
|
|
55
|
+
errors: tuple[str, ...] = ()
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def is_clean(self) -> bool:
|
|
59
|
+
return len(self.errors) == 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _get_existing_tables(conn: sqlite3.Connection) -> frozenset[str]:
|
|
63
|
+
"""Return set of all table names in the database."""
|
|
64
|
+
rows = conn.execute(
|
|
65
|
+
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
66
|
+
).fetchall()
|
|
67
|
+
return frozenset(row[0] for row in rows)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def detect_v323_database(conn: sqlite3.Connection) -> bool:
|
|
71
|
+
"""Detect if this is a pre-3.3 database missing new tables.
|
|
72
|
+
|
|
73
|
+
Returns True if at least one V33 table is missing.
|
|
74
|
+
"""
|
|
75
|
+
existing = _get_existing_tables(conn)
|
|
76
|
+
return any(table not in existing for table in _V33_TABLES)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def migrate_v323_to_v33(conn: sqlite3.Connection) -> MigrationReport:
|
|
80
|
+
"""Idempotent migration from SLM 3.2.3 to 3.3.
|
|
81
|
+
|
|
82
|
+
Creates missing 3.3 tables via the canonical DDL from schema_v32.py.
|
|
83
|
+
Uses CREATE TABLE IF NOT EXISTS — safe to run on any DB version.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
conn: Open SQLite connection. Caller manages commit.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
MigrationReport with details of actions taken.
|
|
90
|
+
"""
|
|
91
|
+
from superlocalmemory.storage.schema_v32 import V32_DDL
|
|
92
|
+
|
|
93
|
+
existing = _get_existing_tables(conn)
|
|
94
|
+
created: list[str] = []
|
|
95
|
+
existed: list[str] = []
|
|
96
|
+
errors: list[str] = []
|
|
97
|
+
|
|
98
|
+
for table in _V33_TABLES:
|
|
99
|
+
if table in existing:
|
|
100
|
+
existed.append(table)
|
|
101
|
+
logger.debug("Table %s already exists — skipping", table)
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# Find the DDL statement that creates this table
|
|
105
|
+
ddl_found = False
|
|
106
|
+
for ddl in V32_DDL:
|
|
107
|
+
if f"CREATE TABLE IF NOT EXISTS {table}" in ddl:
|
|
108
|
+
try:
|
|
109
|
+
conn.executescript(ddl)
|
|
110
|
+
created.append(table)
|
|
111
|
+
ddl_found = True
|
|
112
|
+
logger.info("Created table: %s", table)
|
|
113
|
+
except sqlite3.Error as exc:
|
|
114
|
+
msg = f"Failed to create {table}: {exc}"
|
|
115
|
+
errors.append(msg)
|
|
116
|
+
logger.error(msg)
|
|
117
|
+
ddl_found = True
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
if not ddl_found:
|
|
121
|
+
msg = f"No DDL found for table: {table}"
|
|
122
|
+
errors.append(msg)
|
|
123
|
+
logger.warning(msg)
|
|
124
|
+
|
|
125
|
+
report = MigrationReport(
|
|
126
|
+
tables_checked=len(_V33_TABLES),
|
|
127
|
+
tables_created=tuple(created),
|
|
128
|
+
tables_existed=tuple(existed),
|
|
129
|
+
errors=tuple(errors),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
logger.info(
|
|
133
|
+
"Migration report: checked=%d, created=%d, existed=%d, errors=%d",
|
|
134
|
+
report.tables_checked,
|
|
135
|
+
len(report.tables_created),
|
|
136
|
+
len(report.tables_existed),
|
|
137
|
+
len(report.errors),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return report
|