superlocalmemory 3.3.19 → 3.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +9 -1
- package/src/superlocalmemory/cli/commands.py +140 -23
- package/src/superlocalmemory/cli/daemon.py +372 -0
- package/src/superlocalmemory/cli/main.py +10 -2
- package/src/superlocalmemory/cli/pending_store.py +158 -0
- package/src/superlocalmemory/cli/setup_wizard.py +39 -6
- package/src/superlocalmemory/code_graph/__init__.py +46 -0
- package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
- package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
- package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
- package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
- package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
- package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
- package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
- package/src/superlocalmemory/code_graph/changes.py +363 -0
- package/src/superlocalmemory/code_graph/communities.py +299 -0
- package/src/superlocalmemory/code_graph/config.py +88 -0
- package/src/superlocalmemory/code_graph/database.py +482 -0
- package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
- package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
- package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
- package/src/superlocalmemory/code_graph/flows.py +350 -0
- package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
- package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
- package/src/superlocalmemory/code_graph/graph_store.py +158 -0
- package/src/superlocalmemory/code_graph/incremental.py +200 -0
- package/src/superlocalmemory/code_graph/models.py +130 -0
- package/src/superlocalmemory/code_graph/parser.py +507 -0
- package/src/superlocalmemory/code_graph/resolver.py +321 -0
- package/src/superlocalmemory/code_graph/search.py +460 -0
- package/src/superlocalmemory/code_graph/service.py +95 -0
- package/src/superlocalmemory/code_graph/watcher.py +207 -0
- package/src/superlocalmemory/core/config.py +4 -3
- package/src/superlocalmemory/core/embedding_worker.py +4 -2
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +32 -0
- package/src/superlocalmemory/core/engine_wiring.py +5 -0
- package/src/superlocalmemory/core/recall_pipeline.py +7 -3
- package/src/superlocalmemory/core/store_pipeline.py +23 -1
- package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
- package/src/superlocalmemory/infra/event_bus.py +5 -0
- package/src/superlocalmemory/mcp/server.py +23 -0
- package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
- package/src/superlocalmemory/retrieval/agentic.py +89 -17
- package/src/superlocalmemory/retrieval/engine.py +137 -2
- package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
- package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
- package/src/superlocalmemory/retrieval/strategy.py +16 -0
- package/src/superlocalmemory/server/api.py +4 -2
- package/src/superlocalmemory/server/ui.py +5 -2
- package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
- package/src/superlocalmemory/ui/index.html +1879 -0
- package/src/superlocalmemory/ui/js/agents.js +192 -0
- package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
- package/src/superlocalmemory/ui/js/behavioral.js +276 -0
- package/src/superlocalmemory/ui/js/clusters.js +206 -0
- package/src/superlocalmemory/ui/js/compliance.js +252 -0
- package/src/superlocalmemory/ui/js/core.js +246 -0
- package/src/superlocalmemory/ui/js/dashboard.js +110 -0
- package/src/superlocalmemory/ui/js/events.js +178 -0
- package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
- package/src/superlocalmemory/ui/js/feedback.js +333 -0
- package/src/superlocalmemory/ui/js/graph-core.js +447 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
- package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
- package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
- package/src/superlocalmemory/ui/js/ide-status.js +102 -0
- package/src/superlocalmemory/ui/js/init.js +45 -0
- package/src/superlocalmemory/ui/js/learning.js +435 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
- package/src/superlocalmemory/ui/js/math-health.js +98 -0
- package/src/superlocalmemory/ui/js/memories.js +264 -0
- package/src/superlocalmemory/ui/js/modal.js +357 -0
- package/src/superlocalmemory/ui/js/patterns.js +93 -0
- package/src/superlocalmemory/ui/js/profiles.js +236 -0
- package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
- package/src/superlocalmemory/ui/js/search.js +59 -0
- package/src/superlocalmemory/ui/js/settings.js +224 -0
- package/src/superlocalmemory/ui/js/timeline.js +32 -0
- package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
|
@@ -31,7 +31,10 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
_MAX_ROUNDS = 2
|
|
33
33
|
_SUFFICIENCY_SCORE_THRESHOLD = 0.6
|
|
34
|
-
|
|
34
|
+
# V3.3.19: Removed "temporal" from skip list. S15's lesson was with
|
|
35
|
+
# weak alias expansion. The new rule-based decomposer (v3.3.19) helps
|
|
36
|
+
# temporal queries by generating entity+action sub-queries.
|
|
37
|
+
_SKIP_TYPES: frozenset[str] = frozenset() # No types skipped
|
|
35
38
|
|
|
36
39
|
_SUFFICIENCY_SYSTEM = (
|
|
37
40
|
"You evaluate whether retrieved context is sufficient to answer a query. "
|
|
@@ -241,22 +244,91 @@ class AgenticRetriever:
|
|
|
241
244
|
def _heuristic_expand(
|
|
242
245
|
self, query: str, profile_id: str,
|
|
243
246
|
) -> list[str]:
|
|
244
|
-
"""Mode A:
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
for
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
247
|
+
"""Mode A: rule-based query decomposition (no LLM).
|
|
248
|
+
|
|
249
|
+
V3.3.19: Full rewrite. Generates targeted sub-queries by:
|
|
250
|
+
1. Extracting person/place names (real proper nouns only)
|
|
251
|
+
2. Extracting action/event keywords (non-stopwords minus entities)
|
|
252
|
+
3. Combining entity + action for focused retrieval
|
|
253
|
+
4. Entity-only and action-only lookups for broader context
|
|
254
|
+
|
|
255
|
+
For LoCoMo "When did [Person] [Action]?" patterns, this generates:
|
|
256
|
+
"Caroline LGBTQ support group" (entity + action)
|
|
257
|
+
"Caroline" (entity only)
|
|
258
|
+
"LGBTQ support group" (action only)
|
|
259
|
+
"""
|
|
260
|
+
sub_queries: list[str] = []
|
|
261
|
+
|
|
262
|
+
# Extract REAL proper nouns from original query (not title-cased)
|
|
263
|
+
# This avoids the extract_query_entities trap where "Support Group"
|
|
264
|
+
# from title-casing gets treated as entities.
|
|
265
|
+
_STARTERS = {
|
|
266
|
+
"What", "Where", "Who", "Which", "How", "When", "Does", "Did",
|
|
267
|
+
"Can", "Could", "Would", "Should", "Are", "Is", "Was", "Were",
|
|
268
|
+
"Has", "Have", "The", "Tell", "Do",
|
|
269
|
+
}
|
|
270
|
+
entities = [
|
|
271
|
+
m for m in re.findall(r"\b[A-Z][a-z]{2,}\b", query)
|
|
272
|
+
if m not in _STARTERS
|
|
273
|
+
]
|
|
274
|
+
# Also grab all-caps abbreviations (LGBTQ, MIT, NYC)
|
|
275
|
+
abbrevs = re.findall(r"\b[A-Z]{2,}\b", query)
|
|
276
|
+
entities.extend(abbrevs)
|
|
277
|
+
|
|
278
|
+
# Extract action/event keywords (remove question words + entity names)
|
|
279
|
+
_STOP = {
|
|
280
|
+
"when", "did", "does", "do", "what", "where", "who", "which",
|
|
281
|
+
"how", "is", "was", "were", "are", "has", "have", "had",
|
|
282
|
+
"the", "a", "an", "to", "for", "of", "in", "on", "at",
|
|
283
|
+
"and", "or", "but", "with", "from", "about", "that", "this",
|
|
284
|
+
"it", "they", "she", "he", "her", "his", "their", "its",
|
|
285
|
+
"been", "being", "would", "could", "should", "will", "can",
|
|
286
|
+
"may", "might", "not", "no", "so", "if", "by", "up",
|
|
287
|
+
"go", "going", "went", "get", "got", "ago",
|
|
288
|
+
"many", "much", "some", "any", "ever",
|
|
289
|
+
}
|
|
290
|
+
entity_lower = {e.lower() for e in entities}
|
|
291
|
+
words = re.sub(r"[^\w\s]", "", query.lower()).split()
|
|
292
|
+
action_words = [
|
|
293
|
+
w for w in words
|
|
294
|
+
if w not in _STOP and w not in entity_lower and len(w) > 2
|
|
295
|
+
]
|
|
296
|
+
|
|
297
|
+
# Strategy 1: Entity + action keywords (most targeted)
|
|
298
|
+
if entities and action_words:
|
|
299
|
+
action_phrase = " ".join(action_words)
|
|
300
|
+
for ent in entities[:2]:
|
|
301
|
+
sub_queries.append(f"{ent} {action_phrase}")
|
|
302
|
+
|
|
303
|
+
# Strategy 2: Action keywords only (finds the event regardless of entity)
|
|
304
|
+
if action_words:
|
|
305
|
+
sub_queries.append(" ".join(action_words))
|
|
306
|
+
|
|
307
|
+
# Strategy 3: Entity-only lookup (broad context)
|
|
308
|
+
for ent in entities[:2]:
|
|
309
|
+
sub_queries.append(ent)
|
|
310
|
+
|
|
311
|
+
# Strategy 4: Alias expansion (original approach, still useful)
|
|
312
|
+
if self._db is not None:
|
|
313
|
+
for name in entities[:2]:
|
|
314
|
+
entity = self._db.get_entity_by_name(name, profile_id)
|
|
315
|
+
if entity:
|
|
316
|
+
try:
|
|
317
|
+
aliases = self._db.get_aliases_for_entity(entity.entity_id)
|
|
318
|
+
for a in aliases[:2]:
|
|
319
|
+
sub_queries.append(f"{a.alias} {' '.join(action_words)}")
|
|
320
|
+
except Exception:
|
|
321
|
+
pass
|
|
322
|
+
|
|
323
|
+
# Deduplicate, limit to 3 sub-queries (keep round 2 fast)
|
|
324
|
+
seen: set[str] = set()
|
|
325
|
+
unique: list[str] = []
|
|
326
|
+
for sq in sub_queries:
|
|
327
|
+
sq_lower = sq.strip().lower()
|
|
328
|
+
if sq_lower and sq_lower not in seen and sq_lower != query.lower():
|
|
329
|
+
seen.add(sq_lower)
|
|
330
|
+
unique.append(sq.strip())
|
|
331
|
+
return unique[:3]
|
|
260
332
|
|
|
261
333
|
|
|
262
334
|
# ---------------------------------------------------------------------------
|
|
@@ -134,7 +134,7 @@ class RetrievalEngine:
|
|
|
134
134
|
profile_hits = []
|
|
135
135
|
|
|
136
136
|
# Dynamic top-k for aggregation queries
|
|
137
|
-
effective_limit =
|
|
137
|
+
effective_limit = 100 if strat.query_type == "aggregation" else limit
|
|
138
138
|
|
|
139
139
|
# 3. Run 4 channels
|
|
140
140
|
ch_results = self._run_channels(query, profile_id, strat)
|
|
@@ -145,6 +145,14 @@ class RetrievalEngine:
|
|
|
145
145
|
# 3. Single-pass RRF fusion
|
|
146
146
|
fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
|
|
147
147
|
|
|
148
|
+
# V3.3.21: Cross-channel intersection boost for multi-hop/temporal queries.
|
|
149
|
+
# Problem: channels work in ISOLATION. "When did Caroline go to X?" needs
|
|
150
|
+
# entity(Caroline) ∩ temporal(date). RRF averages scores but doesn't enforce
|
|
151
|
+
# the intersection constraint. Fix: boost facts that appear in 2+ signal-type
|
|
152
|
+
# channels (entity+temporal, entity+semantic, temporal+semantic).
|
|
153
|
+
if strat.query_type == "multi_hop" and len(ch_results) >= 2:
|
|
154
|
+
fused = self._apply_cross_channel_intersection(fused, ch_results, strat)
|
|
155
|
+
|
|
148
156
|
# Bridge discovery for multi-hop queries
|
|
149
157
|
# V3.3.19: Only bridge.discover() (86ms). Removed bridge.spreading_activation()
|
|
150
158
|
# which did per-node SQL queries across 254K edges → 78s latency.
|
|
@@ -184,9 +192,23 @@ class RetrievalEngine:
|
|
|
184
192
|
top = fused[:pool]
|
|
185
193
|
facts = self._load_facts(top, profile_id)
|
|
186
194
|
|
|
195
|
+
# V3.3.21: Session diversity for aggregation queries.
|
|
196
|
+
# Cat 1 (single-hop/aggregation) needs facts from MULTIPLE sessions.
|
|
197
|
+
# Without diversity enforcement, top-20 may all come from 1-2 sessions,
|
|
198
|
+
# missing scattered mentions across 19+ sessions.
|
|
199
|
+
if strat.query_type == "aggregation" and facts:
|
|
200
|
+
top = self._enforce_session_diversity(top, facts, min_sessions=3, top_k=20)
|
|
201
|
+
|
|
187
202
|
# 5. Cross-encoder rerank (optional)
|
|
188
203
|
# Bug 4 fix: reduced alpha for multi-hop/temporal to preserve diversity
|
|
189
|
-
|
|
204
|
+
# V3.3.21: Skip reranker if worker isn't ready yet (cold start).
|
|
205
|
+
# Returns results without CE reranking (~5-10pp lower quality) but instant
|
|
206
|
+
# instead of blocking 15-19s on first recall. Worker warms up in background.
|
|
207
|
+
reranker_ready = (
|
|
208
|
+
self._reranker is not None
|
|
209
|
+
and getattr(self._reranker, '_worker_ready', False)
|
|
210
|
+
)
|
|
211
|
+
if reranker_ready and facts:
|
|
190
212
|
ce_alpha = 0.5 if strat.query_type in ("multi_hop", "temporal") else 0.75
|
|
191
213
|
top = self._apply_reranker(query, top, facts, alpha=ce_alpha)
|
|
192
214
|
|
|
@@ -199,6 +221,119 @@ class RetrievalEngine:
|
|
|
199
221
|
total_candidates=total, retrieval_time_ms=ms,
|
|
200
222
|
)
|
|
201
223
|
|
|
224
|
+
# -- Cross-channel intersection boost -----------------------------------
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _apply_cross_channel_intersection(
|
|
228
|
+
fused: list[FusionResult],
|
|
229
|
+
ch_results: dict[str, list[tuple[str, float]]],
|
|
230
|
+
strat: QueryStrategy,
|
|
231
|
+
) -> list[FusionResult]:
|
|
232
|
+
"""Boost facts that appear across multiple signal-type channels.
|
|
233
|
+
|
|
234
|
+
V3.3.21: Solves the channel isolation problem. When a query has both
|
|
235
|
+
entity and temporal signals (e.g., "When did Caroline go to X?"), facts
|
|
236
|
+
matching BOTH dimensions should rank higher than facts matching only one.
|
|
237
|
+
|
|
238
|
+
Channel groups:
|
|
239
|
+
- content: semantic, bm25 (text similarity)
|
|
240
|
+
- structure: entity_graph, spreading_activation (graph structure)
|
|
241
|
+
- temporal: temporal (date proximity)
|
|
242
|
+
- associative: hopfield (pattern completion)
|
|
243
|
+
|
|
244
|
+
Boost: facts in 2+ groups get 1.5x, facts in 3+ groups get 2.0x.
|
|
245
|
+
"""
|
|
246
|
+
# Map channels to signal groups
|
|
247
|
+
_CHANNEL_GROUPS = {
|
|
248
|
+
"semantic": "content", "bm25": "content",
|
|
249
|
+
"entity_graph": "structure", "spreading_activation": "structure",
|
|
250
|
+
"temporal": "temporal",
|
|
251
|
+
"hopfield": "associative",
|
|
252
|
+
"profile": "content",
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
# Build fact_id -> set of signal groups it appears in
|
|
256
|
+
fact_groups: dict[str, set[str]] = {}
|
|
257
|
+
for ch_name, results in ch_results.items():
|
|
258
|
+
group = _CHANNEL_GROUPS.get(ch_name, ch_name)
|
|
259
|
+
for fid, _score in results:
|
|
260
|
+
if fid not in fact_groups:
|
|
261
|
+
fact_groups[fid] = set()
|
|
262
|
+
fact_groups[fid].add(group)
|
|
263
|
+
|
|
264
|
+
# Apply boost based on cross-group coverage
|
|
265
|
+
boosted: list[FusionResult] = []
|
|
266
|
+
for fr in fused:
|
|
267
|
+
groups = fact_groups.get(fr.fact_id, set())
|
|
268
|
+
n_groups = len(groups)
|
|
269
|
+
if n_groups >= 3:
|
|
270
|
+
boost = 2.0
|
|
271
|
+
elif n_groups >= 2:
|
|
272
|
+
# Extra boost for temporal+structure intersection (the exact gap)
|
|
273
|
+
if "temporal" in groups and "structure" in groups:
|
|
274
|
+
boost = 1.8
|
|
275
|
+
else:
|
|
276
|
+
boost = 1.5
|
|
277
|
+
else:
|
|
278
|
+
boost = 1.0
|
|
279
|
+
boosted.append(FusionResult(
|
|
280
|
+
fact_id=fr.fact_id,
|
|
281
|
+
fused_score=fr.fused_score * boost,
|
|
282
|
+
channel_ranks=fr.channel_ranks,
|
|
283
|
+
channel_scores=fr.channel_scores,
|
|
284
|
+
))
|
|
285
|
+
boosted.sort(key=lambda r: r.fused_score, reverse=True)
|
|
286
|
+
return boosted
|
|
287
|
+
|
|
288
|
+
# -- Session diversity enforcement ----------------------------------------
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
def _enforce_session_diversity(
|
|
292
|
+
fused: list[FusionResult],
|
|
293
|
+
fact_map: dict[str, AtomicFact],
|
|
294
|
+
min_sessions: int = 3,
|
|
295
|
+
top_k: int = 20,
|
|
296
|
+
) -> list[FusionResult]:
|
|
297
|
+
"""Ensure top-k results span at least min_sessions different session_ids.
|
|
298
|
+
|
|
299
|
+
V3.3.21: Category 1 (aggregation) needs facts from MULTIPLE sessions —
|
|
300
|
+
95.7% of cat 1 questions require cross-session evidence. Without this,
|
|
301
|
+
top-20 may cluster around 1-2 sessions, missing scattered mentions.
|
|
302
|
+
|
|
303
|
+
Algorithm: if top-k has < min_sessions, promote the highest-scored facts
|
|
304
|
+
from underrepresented sessions into the top-k window.
|
|
305
|
+
"""
|
|
306
|
+
if len(fused) <= top_k:
|
|
307
|
+
return fused
|
|
308
|
+
|
|
309
|
+
top = fused[:top_k]
|
|
310
|
+
rest = fused[top_k:]
|
|
311
|
+
|
|
312
|
+
sessions_in_top: set[str] = set()
|
|
313
|
+
for fr in top:
|
|
314
|
+
fact = fact_map.get(fr.fact_id)
|
|
315
|
+
if fact and fact.session_id:
|
|
316
|
+
sessions_in_top.add(fact.session_id)
|
|
317
|
+
|
|
318
|
+
if len(sessions_in_top) >= min_sessions:
|
|
319
|
+
return fused
|
|
320
|
+
|
|
321
|
+
promoted: list[FusionResult] = []
|
|
322
|
+
for fr in rest:
|
|
323
|
+
fact = fact_map.get(fr.fact_id)
|
|
324
|
+
if fact and fact.session_id and fact.session_id not in sessions_in_top:
|
|
325
|
+
sessions_in_top.add(fact.session_id)
|
|
326
|
+
promoted.append(fr)
|
|
327
|
+
if len(sessions_in_top) >= min_sessions:
|
|
328
|
+
break
|
|
329
|
+
|
|
330
|
+
if not promoted:
|
|
331
|
+
return fused
|
|
332
|
+
|
|
333
|
+
promoted_ids = {fr.fact_id for fr in promoted}
|
|
334
|
+
remaining = [fr for fr in rest if fr.fact_id not in promoted_ids]
|
|
335
|
+
return top + promoted + remaining
|
|
336
|
+
|
|
202
337
|
# -- Channel execution --------------------------------------------------
|
|
203
338
|
|
|
204
339
|
def _embed_query(self, query: str) -> list[float] | None:
|
|
@@ -183,8 +183,12 @@ class SemanticChannel:
|
|
|
183
183
|
for fact in facts:
|
|
184
184
|
cos_sim = knn_scores.get(fact.fact_id, 0.0)
|
|
185
185
|
|
|
186
|
-
#
|
|
187
|
-
|
|
186
|
+
# V3.3.21: Fisher-Rao ramp with minimum floor.
|
|
187
|
+
# Bug fix: access_count=0 for fresh facts → Fisher weight=0 → metric DEAD.
|
|
188
|
+
# Paper 2's +12pp on multi-hop came from Fisher-Rao. A 0.3 floor ensures
|
|
189
|
+
# fresh facts still benefit from variance-weighted similarity, while
|
|
190
|
+
# frequently accessed facts get progressively stronger Fisher influence.
|
|
191
|
+
fisher_weight = max(0.15, min(1.2, (fact.access_count or 0) / 10.0 * 1.2))
|
|
188
192
|
|
|
189
193
|
if (fisher_weight > 0.01
|
|
190
194
|
and fact.fisher_variance is not None
|
|
@@ -46,10 +46,12 @@ class SpreadingActivationConfig:
|
|
|
46
46
|
alpha: float = 1.0 # Seed scaling factor
|
|
47
47
|
delta: float = 0.5 # Node retention / self-decay per iteration
|
|
48
48
|
spreading_factor: float = 0.8 # S: energy diffusion rate
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
# V3.3.20: Recalibrated for SLM graph density (254K edges, 768d).
|
|
50
|
+
# SYNAPSE defaults (theta=0.5, top_m=7) were for 384d sparse graphs.
|
|
51
|
+
theta: float = 0.2 # Activation threshold for sigmoid (was 0.5)
|
|
52
|
+
top_m: int = 20 # Lateral inhibition: max active nodes (was 7)
|
|
51
53
|
max_iterations: int = 3 # T: propagation depth
|
|
52
|
-
tau_gate: float = 0.
|
|
54
|
+
tau_gate: float = 0.05 # FOK confidence gate (was 0.12)
|
|
53
55
|
enabled: bool = True # Ships enabled by default
|
|
54
56
|
|
|
55
57
|
|
|
@@ -66,8 +66,21 @@ _CAUSAL_TEMPORAL_WORDS: frozenset[str] = frozenset({
|
|
|
66
66
|
_AGGREGATION_WORDS: frozenset[str] = frozenset({
|
|
67
67
|
"all", "list", "every", "everything", "various", "different",
|
|
68
68
|
"many", "several", "multiple", "summarize", "overview",
|
|
69
|
+
# V3.3.21 R5: LoCoMo cat 1 patterns — "What X does/did Y Z?" needs aggregation.
|
|
70
|
+
# "What activities does Melanie partake in?" = aggregation, not factual.
|
|
71
|
+
"activities", "events", "hobbies", "instruments", "types",
|
|
72
|
+
"things", "places", "jobs", "skills", "interests", "pets",
|
|
69
73
|
})
|
|
70
74
|
|
|
75
|
+
# V3.3.21 R5: Plural noun patterns that signal aggregation queries.
|
|
76
|
+
# "What [noun]s has/does [entity] [verb]?" = needs cross-session aggregation.
|
|
77
|
+
_AGGREGATION_PATTERNS: tuple[str, ...] = (
|
|
78
|
+
r"what (?:\w+ )?(?:activities|events|hobbies|types|things|places|jobs)",
|
|
79
|
+
r"what (?:\w+ )?has .+ (?:done|visited|attended|participated|played|practiced)",
|
|
80
|
+
r"how many (?:\w+ )?(?:times|events|things|places)",
|
|
81
|
+
r"what are .+(?:'s|s') (?:\w+ )?(?:hobbies|interests|activities|skills)",
|
|
82
|
+
)
|
|
83
|
+
|
|
71
84
|
_OPINION_WORDS: tuple[str, ...] = (
|
|
72
85
|
"think", "feel", "opinion", "prefer", "favorite", "best", "worst",
|
|
73
86
|
"believe", "like about", "dislike", "enjoy", "hate", "love",
|
|
@@ -126,6 +139,9 @@ class QueryStrategyClassifier:
|
|
|
126
139
|
return "temporal"
|
|
127
140
|
if words & _AGGREGATION_WORDS:
|
|
128
141
|
return "aggregation"
|
|
142
|
+
# V3.3.21 R5: Regex patterns for aggregation questions
|
|
143
|
+
if any(re.search(p, q) for p in _AGGREGATION_PATTERNS):
|
|
144
|
+
return "aggregation"
|
|
129
145
|
if any(w in q for w in _OPINION_WORDS):
|
|
130
146
|
return "opinion"
|
|
131
147
|
if len(proper_nouns) >= 2:
|
|
@@ -30,8 +30,10 @@ logger = logging.getLogger("superlocalmemory.api_server")
|
|
|
30
30
|
# V3 paths
|
|
31
31
|
MEMORY_DIR = Path.home() / ".superlocalmemory"
|
|
32
32
|
DB_PATH = MEMORY_DIR / "memory.db"
|
|
33
|
-
#
|
|
34
|
-
|
|
33
|
+
# V3.3.21: UI shipped inside the package for pip/npm installs.
|
|
34
|
+
_PKG_UI = Path(__file__).resolve().parent.parent / "ui"
|
|
35
|
+
_REPO_UI = Path(__file__).resolve().parent.parent.parent.parent / "ui"
|
|
36
|
+
UI_DIR = _PKG_UI if (_PKG_UI / "index.html").exists() else _REPO_UI
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
# ============================================================================
|
|
@@ -48,8 +48,11 @@ from superlocalmemory.server.security_middleware import SecurityHeadersMiddlewar
|
|
|
48
48
|
# V3 Paths (migrated from ~/.claude-memory to ~/.superlocalmemory)
|
|
49
49
|
MEMORY_DIR = Path.home() / ".superlocalmemory"
|
|
50
50
|
DB_PATH = MEMORY_DIR / "memory.db"
|
|
51
|
-
#
|
|
52
|
-
|
|
51
|
+
# V3.3.21: UI shipped inside the package for pip/npm installs.
|
|
52
|
+
# Check package location first, then fall back to repo root for dev mode.
|
|
53
|
+
_PKG_UI = Path(__file__).resolve().parent.parent / "ui"
|
|
54
|
+
_REPO_UI = Path(__file__).resolve().parent.parent.parent.parent / "ui"
|
|
55
|
+
UI_DIR = _PKG_UI if (_PKG_UI / "index.html").exists() else _REPO_UI
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
def create_app() -> FastAPI:
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""DDL for the code_graph.db database.
|
|
6
|
+
|
|
7
|
+
Single source of truth for all CodeGraph tables.
|
|
8
|
+
No other module should contain CREATE TABLE statements.
|
|
9
|
+
|
|
10
|
+
Tables:
|
|
11
|
+
1. graph_nodes — Code entities (functions, classes, files, modules)
|
|
12
|
+
2. graph_edges — Relationships (calls, imports, inherits, contains, tested_by)
|
|
13
|
+
3. graph_files — File tracking for incremental updates
|
|
14
|
+
4. graph_metadata — Key-value store for graph-level config
|
|
15
|
+
5. code_memory_links — Bridge table linking code nodes to SLM memory facts
|
|
16
|
+
6. code_node_embeddings — vec0 virtual table for semantic search (optional)
|
|
17
|
+
7. graph_nodes_fts — FTS5 virtual table for text search
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import sqlite3
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# DDL Statements (executed in order)
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
_DDL_STATEMENTS: tuple[str, ...] = (
|
|
33
|
+
# ── Table 1: graph_nodes ──────────────────────────────────────────
|
|
34
|
+
"""
|
|
35
|
+
CREATE TABLE IF NOT EXISTS graph_nodes (
|
|
36
|
+
node_id TEXT PRIMARY KEY,
|
|
37
|
+
kind TEXT NOT NULL CHECK (kind IN ('file', 'class', 'function', 'method', 'module')),
|
|
38
|
+
name TEXT NOT NULL,
|
|
39
|
+
qualified_name TEXT NOT NULL UNIQUE,
|
|
40
|
+
file_path TEXT NOT NULL,
|
|
41
|
+
line_start INTEGER NOT NULL DEFAULT 0,
|
|
42
|
+
line_end INTEGER NOT NULL DEFAULT 0,
|
|
43
|
+
language TEXT NOT NULL DEFAULT '',
|
|
44
|
+
parent_name TEXT,
|
|
45
|
+
signature TEXT,
|
|
46
|
+
docstring TEXT,
|
|
47
|
+
is_test INTEGER NOT NULL DEFAULT 0,
|
|
48
|
+
content_hash TEXT,
|
|
49
|
+
community_id INTEGER,
|
|
50
|
+
extra_json TEXT NOT NULL DEFAULT '{}',
|
|
51
|
+
created_at REAL NOT NULL,
|
|
52
|
+
updated_at REAL NOT NULL
|
|
53
|
+
)
|
|
54
|
+
""",
|
|
55
|
+
|
|
56
|
+
# ── Table 2: graph_edges ──────────────────────────────────────────
|
|
57
|
+
"""
|
|
58
|
+
CREATE TABLE IF NOT EXISTS graph_edges (
|
|
59
|
+
edge_id TEXT PRIMARY KEY,
|
|
60
|
+
kind TEXT NOT NULL CHECK (kind IN ('calls', 'imports', 'inherits', 'contains', 'tested_by', 'depends_on')),
|
|
61
|
+
source_node_id TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
|
|
62
|
+
target_node_id TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
|
|
63
|
+
file_path TEXT NOT NULL,
|
|
64
|
+
line INTEGER NOT NULL DEFAULT 0,
|
|
65
|
+
confidence REAL NOT NULL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
|
|
66
|
+
extra_json TEXT NOT NULL DEFAULT '{}',
|
|
67
|
+
created_at REAL NOT NULL,
|
|
68
|
+
updated_at REAL NOT NULL
|
|
69
|
+
)
|
|
70
|
+
""",
|
|
71
|
+
|
|
72
|
+
# ── Table 3: graph_files ──────────────────────────────────────────
|
|
73
|
+
"""
|
|
74
|
+
CREATE TABLE IF NOT EXISTS graph_files (
|
|
75
|
+
file_path TEXT PRIMARY KEY,
|
|
76
|
+
content_hash TEXT NOT NULL,
|
|
77
|
+
mtime REAL NOT NULL,
|
|
78
|
+
language TEXT NOT NULL,
|
|
79
|
+
node_count INTEGER NOT NULL DEFAULT 0,
|
|
80
|
+
edge_count INTEGER NOT NULL DEFAULT 0,
|
|
81
|
+
last_indexed REAL NOT NULL
|
|
82
|
+
)
|
|
83
|
+
""",
|
|
84
|
+
|
|
85
|
+
# ── Table 4: graph_metadata ───────────────────────────────────────
|
|
86
|
+
"""
|
|
87
|
+
CREATE TABLE IF NOT EXISTS graph_metadata (
|
|
88
|
+
key TEXT PRIMARY KEY,
|
|
89
|
+
value TEXT NOT NULL,
|
|
90
|
+
updated_at REAL NOT NULL
|
|
91
|
+
)
|
|
92
|
+
""",
|
|
93
|
+
|
|
94
|
+
# ── Table 5: code_memory_links ────────────────────────────────────
|
|
95
|
+
"""
|
|
96
|
+
CREATE TABLE IF NOT EXISTS code_memory_links (
|
|
97
|
+
link_id TEXT PRIMARY KEY,
|
|
98
|
+
code_node_id TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
|
|
99
|
+
slm_fact_id TEXT NOT NULL,
|
|
100
|
+
slm_entity_id TEXT,
|
|
101
|
+
link_type TEXT NOT NULL CHECK (link_type IN (
|
|
102
|
+
'mentions', 'decision_about', 'bug_fix', 'refactor', 'design_rationale'
|
|
103
|
+
)),
|
|
104
|
+
confidence REAL NOT NULL DEFAULT 0.8 CHECK (confidence >= 0.0 AND confidence <= 1.0),
|
|
105
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
106
|
+
last_verified TEXT,
|
|
107
|
+
is_stale INTEGER NOT NULL DEFAULT 0
|
|
108
|
+
)
|
|
109
|
+
""",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Indexes (separate from tables for clarity)
|
|
113
|
+
_INDEX_STATEMENTS: tuple[str, ...] = (
|
|
114
|
+
# graph_nodes indexes
|
|
115
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_file_path ON graph_nodes(file_path)",
|
|
116
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_kind ON graph_nodes(kind)",
|
|
117
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_name ON graph_nodes(name)",
|
|
118
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_qualified ON graph_nodes(qualified_name)",
|
|
119
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_parent ON graph_nodes(parent_name)",
|
|
120
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_language ON graph_nodes(language)",
|
|
121
|
+
"CREATE INDEX IF NOT EXISTS idx_gn_community ON graph_nodes(community_id)",
|
|
122
|
+
# graph_edges indexes
|
|
123
|
+
"CREATE INDEX IF NOT EXISTS idx_ge_source ON graph_edges(source_node_id)",
|
|
124
|
+
"CREATE INDEX IF NOT EXISTS idx_ge_target ON graph_edges(target_node_id)",
|
|
125
|
+
"CREATE INDEX IF NOT EXISTS idx_ge_kind ON graph_edges(kind)",
|
|
126
|
+
"CREATE INDEX IF NOT EXISTS idx_ge_file ON graph_edges(file_path)",
|
|
127
|
+
"CREATE INDEX IF NOT EXISTS idx_ge_source_kind ON graph_edges(source_node_id, kind)",
|
|
128
|
+
"CREATE INDEX IF NOT EXISTS idx_ge_target_kind ON graph_edges(target_node_id, kind)",
|
|
129
|
+
# code_memory_links indexes
|
|
130
|
+
"CREATE INDEX IF NOT EXISTS idx_cml_node ON code_memory_links(code_node_id)",
|
|
131
|
+
"CREATE INDEX IF NOT EXISTS idx_cml_fact ON code_memory_links(slm_fact_id)",
|
|
132
|
+
"CREATE INDEX IF NOT EXISTS idx_cml_entity ON code_memory_links(slm_entity_id)",
|
|
133
|
+
"CREATE INDEX IF NOT EXISTS idx_cml_type ON code_memory_links(link_type)",
|
|
134
|
+
"CREATE INDEX IF NOT EXISTS idx_cml_stale ON code_memory_links(is_stale)",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# FTS5 virtual table + sync triggers
|
|
138
|
+
_FTS5_STATEMENTS: tuple[str, ...] = (
|
|
139
|
+
"""
|
|
140
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS graph_nodes_fts USING fts5(
|
|
141
|
+
name,
|
|
142
|
+
qualified_name,
|
|
143
|
+
file_path,
|
|
144
|
+
signature,
|
|
145
|
+
content='graph_nodes',
|
|
146
|
+
content_rowid='rowid',
|
|
147
|
+
tokenize='porter unicode61'
|
|
148
|
+
)
|
|
149
|
+
""",
|
|
150
|
+
# Auto-sync trigger: INSERT
|
|
151
|
+
"""
|
|
152
|
+
CREATE TRIGGER IF NOT EXISTS trg_gn_fts_insert AFTER INSERT ON graph_nodes
|
|
153
|
+
BEGIN
|
|
154
|
+
INSERT INTO graph_nodes_fts(rowid, name, qualified_name, file_path, signature)
|
|
155
|
+
VALUES (NEW.rowid, NEW.name, NEW.qualified_name, NEW.file_path, NEW.signature);
|
|
156
|
+
END
|
|
157
|
+
""",
|
|
158
|
+
# Auto-sync trigger: DELETE
|
|
159
|
+
"""
|
|
160
|
+
CREATE TRIGGER IF NOT EXISTS trg_gn_fts_delete AFTER DELETE ON graph_nodes
|
|
161
|
+
BEGIN
|
|
162
|
+
INSERT INTO graph_nodes_fts(graph_nodes_fts, rowid, name, qualified_name, file_path, signature)
|
|
163
|
+
VALUES ('delete', OLD.rowid, OLD.name, OLD.qualified_name, OLD.file_path, OLD.signature);
|
|
164
|
+
END
|
|
165
|
+
""",
|
|
166
|
+
# Auto-sync trigger: UPDATE
|
|
167
|
+
"""
|
|
168
|
+
CREATE TRIGGER IF NOT EXISTS trg_gn_fts_update AFTER UPDATE ON graph_nodes
|
|
169
|
+
BEGIN
|
|
170
|
+
INSERT INTO graph_nodes_fts(graph_nodes_fts, rowid, name, qualified_name, file_path, signature)
|
|
171
|
+
VALUES ('delete', OLD.rowid, OLD.name, OLD.qualified_name, OLD.file_path, OLD.signature);
|
|
172
|
+
INSERT INTO graph_nodes_fts(rowid, name, qualified_name, file_path, signature)
|
|
173
|
+
VALUES (NEW.rowid, NEW.name, NEW.qualified_name, NEW.file_path, NEW.signature);
|
|
174
|
+
END
|
|
175
|
+
""",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# Public API (matches SLM's schema.py pattern)
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def create_all_tables(conn: sqlite3.Connection) -> None:
|
|
184
|
+
"""Create all CodeGraph tables, indexes, and triggers.
|
|
185
|
+
|
|
186
|
+
Idempotent — safe to call multiple times (all DDL uses IF NOT EXISTS).
|
|
187
|
+
"""
|
|
188
|
+
cursor = conn.cursor()
|
|
189
|
+
|
|
190
|
+
# Enable foreign keys
|
|
191
|
+
cursor.execute("PRAGMA foreign_keys = ON")
|
|
192
|
+
|
|
193
|
+
# Core tables
|
|
194
|
+
for ddl in _DDL_STATEMENTS:
|
|
195
|
+
cursor.execute(ddl)
|
|
196
|
+
|
|
197
|
+
# Indexes
|
|
198
|
+
for idx in _INDEX_STATEMENTS:
|
|
199
|
+
cursor.execute(idx)
|
|
200
|
+
|
|
201
|
+
# FTS5 + triggers (may fail if SQLite lacks FTS5 — non-fatal)
|
|
202
|
+
for stmt in _FTS5_STATEMENTS:
|
|
203
|
+
try:
|
|
204
|
+
cursor.execute(stmt)
|
|
205
|
+
except sqlite3.OperationalError as exc:
|
|
206
|
+
logger.warning("FTS5 setup failed (non-fatal): %s", exc)
|
|
207
|
+
|
|
208
|
+
# vec0 virtual table for embeddings (may fail if sqlite-vec not loaded)
|
|
209
|
+
try:
|
|
210
|
+
cursor.execute("""
|
|
211
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS code_node_embeddings USING vec0(
|
|
212
|
+
node_id TEXT PRIMARY KEY,
|
|
213
|
+
embedding float[768] distance_metric=cosine
|
|
214
|
+
)
|
|
215
|
+
""")
|
|
216
|
+
except sqlite3.OperationalError as exc:
|
|
217
|
+
logger.warning("vec0 setup failed (non-fatal, embeddings disabled): %s", exc)
|
|
218
|
+
|
|
219
|
+
conn.commit()
|
|
220
|
+
logger.info("CodeGraph schema initialized (%d tables, %d indexes)",
|
|
221
|
+
len(_DDL_STATEMENTS), len(_INDEX_STATEMENTS))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def drop_all_tables(conn: sqlite3.Connection) -> None:
|
|
225
|
+
"""Drop all CodeGraph tables. Used in tests only."""
|
|
226
|
+
cursor = conn.cursor()
|
|
227
|
+
for table in (
|
|
228
|
+
"graph_nodes_fts", "code_node_embeddings",
|
|
229
|
+
"code_memory_links", "graph_metadata",
|
|
230
|
+
"graph_files", "graph_edges", "graph_nodes",
|
|
231
|
+
):
|
|
232
|
+
try:
|
|
233
|
+
cursor.execute(f"DROP TABLE IF EXISTS {table}")
|
|
234
|
+
except sqlite3.OperationalError:
|
|
235
|
+
pass
|
|
236
|
+
# Drop triggers
|
|
237
|
+
for trigger in ("trg_gn_fts_insert", "trg_gn_fts_delete", "trg_gn_fts_update"):
|
|
238
|
+
cursor.execute(f"DROP TRIGGER IF EXISTS {trigger}")
|
|
239
|
+
conn.commit()
|