superlocalmemory 3.3.19 → 3.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +9 -1
  3. package/src/superlocalmemory/cli/commands.py +140 -23
  4. package/src/superlocalmemory/cli/daemon.py +372 -0
  5. package/src/superlocalmemory/cli/main.py +10 -2
  6. package/src/superlocalmemory/cli/pending_store.py +158 -0
  7. package/src/superlocalmemory/cli/setup_wizard.py +39 -6
  8. package/src/superlocalmemory/code_graph/__init__.py +46 -0
  9. package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
  10. package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
  11. package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
  12. package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
  13. package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
  14. package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
  15. package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
  16. package/src/superlocalmemory/code_graph/changes.py +363 -0
  17. package/src/superlocalmemory/code_graph/communities.py +299 -0
  18. package/src/superlocalmemory/code_graph/config.py +88 -0
  19. package/src/superlocalmemory/code_graph/database.py +482 -0
  20. package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
  21. package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
  22. package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
  23. package/src/superlocalmemory/code_graph/flows.py +350 -0
  24. package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
  25. package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
  26. package/src/superlocalmemory/code_graph/graph_store.py +158 -0
  27. package/src/superlocalmemory/code_graph/incremental.py +200 -0
  28. package/src/superlocalmemory/code_graph/models.py +130 -0
  29. package/src/superlocalmemory/code_graph/parser.py +507 -0
  30. package/src/superlocalmemory/code_graph/resolver.py +321 -0
  31. package/src/superlocalmemory/code_graph/search.py +460 -0
  32. package/src/superlocalmemory/code_graph/service.py +95 -0
  33. package/src/superlocalmemory/code_graph/watcher.py +207 -0
  34. package/src/superlocalmemory/core/config.py +4 -3
  35. package/src/superlocalmemory/core/embedding_worker.py +4 -2
  36. package/src/superlocalmemory/core/embeddings.py +8 -2
  37. package/src/superlocalmemory/core/engine.py +32 -0
  38. package/src/superlocalmemory/core/engine_wiring.py +5 -0
  39. package/src/superlocalmemory/core/recall_pipeline.py +7 -3
  40. package/src/superlocalmemory/core/store_pipeline.py +23 -1
  41. package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
  42. package/src/superlocalmemory/infra/event_bus.py +5 -0
  43. package/src/superlocalmemory/mcp/server.py +23 -0
  44. package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
  45. package/src/superlocalmemory/retrieval/agentic.py +89 -17
  46. package/src/superlocalmemory/retrieval/engine.py +137 -2
  47. package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
  48. package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
  49. package/src/superlocalmemory/retrieval/strategy.py +16 -0
  50. package/src/superlocalmemory/server/api.py +4 -2
  51. package/src/superlocalmemory/server/ui.py +5 -2
  52. package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
  53. package/src/superlocalmemory/ui/index.html +1879 -0
  54. package/src/superlocalmemory/ui/js/agents.js +192 -0
  55. package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
  56. package/src/superlocalmemory/ui/js/behavioral.js +276 -0
  57. package/src/superlocalmemory/ui/js/clusters.js +206 -0
  58. package/src/superlocalmemory/ui/js/compliance.js +252 -0
  59. package/src/superlocalmemory/ui/js/core.js +246 -0
  60. package/src/superlocalmemory/ui/js/dashboard.js +110 -0
  61. package/src/superlocalmemory/ui/js/events.js +178 -0
  62. package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
  63. package/src/superlocalmemory/ui/js/feedback.js +333 -0
  64. package/src/superlocalmemory/ui/js/graph-core.js +447 -0
  65. package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
  66. package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
  67. package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
  68. package/src/superlocalmemory/ui/js/ide-status.js +102 -0
  69. package/src/superlocalmemory/ui/js/init.js +45 -0
  70. package/src/superlocalmemory/ui/js/learning.js +435 -0
  71. package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
  72. package/src/superlocalmemory/ui/js/math-health.js +98 -0
  73. package/src/superlocalmemory/ui/js/memories.js +264 -0
  74. package/src/superlocalmemory/ui/js/modal.js +357 -0
  75. package/src/superlocalmemory/ui/js/patterns.js +93 -0
  76. package/src/superlocalmemory/ui/js/profiles.js +236 -0
  77. package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
  78. package/src/superlocalmemory/ui/js/search.js +59 -0
  79. package/src/superlocalmemory/ui/js/settings.js +224 -0
  80. package/src/superlocalmemory/ui/js/timeline.js +32 -0
  81. package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
@@ -31,7 +31,10 @@ logger = logging.getLogger(__name__)
31
31
 
32
32
  _MAX_ROUNDS = 2
33
33
  _SUFFICIENCY_SCORE_THRESHOLD = 0.6
34
- _SKIP_TYPES = frozenset({"temporal"}) # S15: agentic harms temporal queries
34
+ # V3.3.19: Removed "temporal" from skip list. S15's lesson was with
35
+ # weak alias expansion. The new rule-based decomposer (v3.3.19) helps
36
+ # temporal queries by generating entity+action sub-queries.
37
+ _SKIP_TYPES: frozenset[str] = frozenset() # No types skipped
35
38
 
36
39
  _SUFFICIENCY_SYSTEM = (
37
40
  "You evaluate whether retrieved context is sufficient to answer a query. "
@@ -241,22 +244,91 @@ class AgenticRetriever:
241
244
  def _heuristic_expand(
242
245
  self, query: str, profile_id: str,
243
246
  ) -> list[str]:
244
- """Mode A: expand query with entity aliases (no LLM)."""
245
- if self._db is None:
246
- return []
247
-
248
- expanded_parts: list[str] = []
249
- entities = re.findall(r"\b[A-Z][a-z]{2,}\b", query)
250
- for name in entities:
251
- entity = self._db.get_entity_by_name(name, profile_id)
252
- if entity:
253
- aliases = self._db.get_aliases_for_entity(entity.entity_id)
254
- for a in aliases[:3]:
255
- expanded_parts.append(a.alias)
256
-
257
- if expanded_parts:
258
- return [query + " " + " ".join(expanded_parts)]
259
- return []
247
+ """Mode A: rule-based query decomposition (no LLM).
248
+
249
+ V3.3.19: Full rewrite. Generates targeted sub-queries by:
250
+ 1. Extracting person/place names (real proper nouns only)
251
+ 2. Extracting action/event keywords (non-stopwords minus entities)
252
+ 3. Combining entity + action for focused retrieval
253
+ 4. Entity-only and action-only lookups for broader context
254
+
255
+ For LoCoMo "When did [Person] [Action]?" patterns, this generates:
256
+ "Caroline LGBTQ support group" (entity + action)
257
+ "Caroline" (entity only)
258
+ "LGBTQ support group" (action only)
259
+ """
260
+ sub_queries: list[str] = []
261
+
262
+ # Extract REAL proper nouns from original query (not title-cased)
263
+ # This avoids the extract_query_entities trap where "Support Group"
264
+ # from title-casing gets treated as entities.
265
+ _STARTERS = {
266
+ "What", "Where", "Who", "Which", "How", "When", "Does", "Did",
267
+ "Can", "Could", "Would", "Should", "Are", "Is", "Was", "Were",
268
+ "Has", "Have", "The", "Tell", "Do",
269
+ }
270
+ entities = [
271
+ m for m in re.findall(r"\b[A-Z][a-z]{2,}\b", query)
272
+ if m not in _STARTERS
273
+ ]
274
+ # Also grab all-caps abbreviations (LGBTQ, MIT, NYC)
275
+ abbrevs = re.findall(r"\b[A-Z]{2,}\b", query)
276
+ entities.extend(abbrevs)
277
+
278
+ # Extract action/event keywords (remove question words + entity names)
279
+ _STOP = {
280
+ "when", "did", "does", "do", "what", "where", "who", "which",
281
+ "how", "is", "was", "were", "are", "has", "have", "had",
282
+ "the", "a", "an", "to", "for", "of", "in", "on", "at",
283
+ "and", "or", "but", "with", "from", "about", "that", "this",
284
+ "it", "they", "she", "he", "her", "his", "their", "its",
285
+ "been", "being", "would", "could", "should", "will", "can",
286
+ "may", "might", "not", "no", "so", "if", "by", "up",
287
+ "go", "going", "went", "get", "got", "ago",
288
+ "many", "much", "some", "any", "ever",
289
+ }
290
+ entity_lower = {e.lower() for e in entities}
291
+ words = re.sub(r"[^\w\s]", "", query.lower()).split()
292
+ action_words = [
293
+ w for w in words
294
+ if w not in _STOP and w not in entity_lower and len(w) > 2
295
+ ]
296
+
297
+ # Strategy 1: Entity + action keywords (most targeted)
298
+ if entities and action_words:
299
+ action_phrase = " ".join(action_words)
300
+ for ent in entities[:2]:
301
+ sub_queries.append(f"{ent} {action_phrase}")
302
+
303
+ # Strategy 2: Action keywords only (finds the event regardless of entity)
304
+ if action_words:
305
+ sub_queries.append(" ".join(action_words))
306
+
307
+ # Strategy 3: Entity-only lookup (broad context)
308
+ for ent in entities[:2]:
309
+ sub_queries.append(ent)
310
+
311
+ # Strategy 4: Alias expansion (original approach, still useful)
312
+ if self._db is not None:
313
+ for name in entities[:2]:
314
+ entity = self._db.get_entity_by_name(name, profile_id)
315
+ if entity:
316
+ try:
317
+ aliases = self._db.get_aliases_for_entity(entity.entity_id)
318
+ for a in aliases[:2]:
319
+ sub_queries.append(f"{a.alias} {' '.join(action_words)}")
320
+ except Exception:
321
+ pass
322
+
323
+ # Deduplicate, limit to 3 sub-queries (keep round 2 fast)
324
+ seen: set[str] = set()
325
+ unique: list[str] = []
326
+ for sq in sub_queries:
327
+ sq_lower = sq.strip().lower()
328
+ if sq_lower and sq_lower not in seen and sq_lower != query.lower():
329
+ seen.add(sq_lower)
330
+ unique.append(sq.strip())
331
+ return unique[:3]
260
332
 
261
333
 
262
334
  # ---------------------------------------------------------------------------
@@ -134,7 +134,7 @@ class RetrievalEngine:
134
134
  profile_hits = []
135
135
 
136
136
  # Dynamic top-k for aggregation queries
137
- effective_limit = 50 if strat.query_type == "aggregation" else limit
137
+ effective_limit = 100 if strat.query_type == "aggregation" else limit
138
138
 
139
139
  # 3. Run 4 channels
140
140
  ch_results = self._run_channels(query, profile_id, strat)
@@ -145,6 +145,14 @@ class RetrievalEngine:
145
145
  # 3. Single-pass RRF fusion
146
146
  fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
147
147
 
148
+ # V3.3.21: Cross-channel intersection boost for multi-hop/temporal queries.
149
+ # Problem: channels work in ISOLATION. "When did Caroline go to X?" needs
150
+ # entity(Caroline) ∩ temporal(date). RRF averages scores but doesn't enforce
151
+ # the intersection constraint. Fix: boost facts that appear in 2+ signal-type
152
+ # channels (entity+temporal, entity+semantic, temporal+semantic).
153
+ if strat.query_type == "multi_hop" and len(ch_results) >= 2:
154
+ fused = self._apply_cross_channel_intersection(fused, ch_results, strat)
155
+
148
156
  # Bridge discovery for multi-hop queries
149
157
  # V3.3.19: Only bridge.discover() (86ms). Removed bridge.spreading_activation()
150
158
  # which did per-node SQL queries across 254K edges → 78s latency.
@@ -184,9 +192,23 @@ class RetrievalEngine:
184
192
  top = fused[:pool]
185
193
  facts = self._load_facts(top, profile_id)
186
194
 
195
+ # V3.3.21: Session diversity for aggregation queries.
196
+ # Cat 1 (single-hop/aggregation) needs facts from MULTIPLE sessions.
197
+ # Without diversity enforcement, top-20 may all come from 1-2 sessions,
198
+ # missing scattered mentions across 19+ sessions.
199
+ if strat.query_type == "aggregation" and facts:
200
+ top = self._enforce_session_diversity(top, facts, min_sessions=3, top_k=20)
201
+
187
202
  # 5. Cross-encoder rerank (optional)
188
203
  # Bug 4 fix: reduced alpha for multi-hop/temporal to preserve diversity
189
- if self._reranker is not None and facts:
204
+ # V3.3.21: Skip reranker if worker isn't ready yet (cold start).
205
+ # Returns results without CE reranking (~5-10pp lower quality) but instant
206
+ # instead of blocking 15-19s on first recall. Worker warms up in background.
207
+ reranker_ready = (
208
+ self._reranker is not None
209
+ and getattr(self._reranker, '_worker_ready', False)
210
+ )
211
+ if reranker_ready and facts:
190
212
  ce_alpha = 0.5 if strat.query_type in ("multi_hop", "temporal") else 0.75
191
213
  top = self._apply_reranker(query, top, facts, alpha=ce_alpha)
192
214
 
@@ -199,6 +221,119 @@ class RetrievalEngine:
199
221
  total_candidates=total, retrieval_time_ms=ms,
200
222
  )
201
223
 
224
+ # -- Cross-channel intersection boost -----------------------------------
225
+
226
+ @staticmethod
227
+ def _apply_cross_channel_intersection(
228
+ fused: list[FusionResult],
229
+ ch_results: dict[str, list[tuple[str, float]]],
230
+ strat: QueryStrategy,
231
+ ) -> list[FusionResult]:
232
+ """Boost facts that appear across multiple signal-type channels.
233
+
234
+ V3.3.21: Solves the channel isolation problem. When a query has both
235
+ entity and temporal signals (e.g., "When did Caroline go to X?"), facts
236
+ matching BOTH dimensions should rank higher than facts matching only one.
237
+
238
+ Channel groups:
239
+ - content: semantic, bm25 (text similarity)
240
+ - structure: entity_graph, spreading_activation (graph structure)
241
+ - temporal: temporal (date proximity)
242
+ - associative: hopfield (pattern completion)
243
+
244
+ Boost: facts in 2+ groups get 1.5x, facts in 3+ groups get 2.0x.
245
+ """
246
+ # Map channels to signal groups
247
+ _CHANNEL_GROUPS = {
248
+ "semantic": "content", "bm25": "content",
249
+ "entity_graph": "structure", "spreading_activation": "structure",
250
+ "temporal": "temporal",
251
+ "hopfield": "associative",
252
+ "profile": "content",
253
+ }
254
+
255
+ # Build fact_id -> set of signal groups it appears in
256
+ fact_groups: dict[str, set[str]] = {}
257
+ for ch_name, results in ch_results.items():
258
+ group = _CHANNEL_GROUPS.get(ch_name, ch_name)
259
+ for fid, _score in results:
260
+ if fid not in fact_groups:
261
+ fact_groups[fid] = set()
262
+ fact_groups[fid].add(group)
263
+
264
+ # Apply boost based on cross-group coverage
265
+ boosted: list[FusionResult] = []
266
+ for fr in fused:
267
+ groups = fact_groups.get(fr.fact_id, set())
268
+ n_groups = len(groups)
269
+ if n_groups >= 3:
270
+ boost = 2.0
271
+ elif n_groups >= 2:
272
+ # Extra boost for temporal+structure intersection (the exact gap)
273
+ if "temporal" in groups and "structure" in groups:
274
+ boost = 1.8
275
+ else:
276
+ boost = 1.5
277
+ else:
278
+ boost = 1.0
279
+ boosted.append(FusionResult(
280
+ fact_id=fr.fact_id,
281
+ fused_score=fr.fused_score * boost,
282
+ channel_ranks=fr.channel_ranks,
283
+ channel_scores=fr.channel_scores,
284
+ ))
285
+ boosted.sort(key=lambda r: r.fused_score, reverse=True)
286
+ return boosted
287
+
288
+ # -- Session diversity enforcement ----------------------------------------
289
+
290
+ @staticmethod
291
+ def _enforce_session_diversity(
292
+ fused: list[FusionResult],
293
+ fact_map: dict[str, AtomicFact],
294
+ min_sessions: int = 3,
295
+ top_k: int = 20,
296
+ ) -> list[FusionResult]:
297
+ """Ensure top-k results span at least min_sessions different session_ids.
298
+
299
+ V3.3.21: Category 1 (aggregation) needs facts from MULTIPLE sessions —
300
+ 95.7% of cat 1 questions require cross-session evidence. Without this,
301
+ top-20 may cluster around 1-2 sessions, missing scattered mentions.
302
+
303
+ Algorithm: if top-k has < min_sessions, promote the highest-scored facts
304
+ from underrepresented sessions into the top-k window.
305
+ """
306
+ if len(fused) <= top_k:
307
+ return fused
308
+
309
+ top = fused[:top_k]
310
+ rest = fused[top_k:]
311
+
312
+ sessions_in_top: set[str] = set()
313
+ for fr in top:
314
+ fact = fact_map.get(fr.fact_id)
315
+ if fact and fact.session_id:
316
+ sessions_in_top.add(fact.session_id)
317
+
318
+ if len(sessions_in_top) >= min_sessions:
319
+ return fused
320
+
321
+ promoted: list[FusionResult] = []
322
+ for fr in rest:
323
+ fact = fact_map.get(fr.fact_id)
324
+ if fact and fact.session_id and fact.session_id not in sessions_in_top:
325
+ sessions_in_top.add(fact.session_id)
326
+ promoted.append(fr)
327
+ if len(sessions_in_top) >= min_sessions:
328
+ break
329
+
330
+ if not promoted:
331
+ return fused
332
+
333
+ promoted_ids = {fr.fact_id for fr in promoted}
334
+ remaining = [fr for fr in rest if fr.fact_id not in promoted_ids]
335
+ return top + promoted + remaining
336
+
202
337
  # -- Channel execution --------------------------------------------------
203
338
 
204
339
  def _embed_query(self, query: str) -> list[float] | None:
@@ -183,8 +183,12 @@ class SemanticChannel:
183
183
  for fact in facts:
184
184
  cos_sim = knn_scores.get(fact.fact_id, 0.0)
185
185
 
186
- # Graduated Fisher-Rao ramp (preserved from original)
187
- fisher_weight = min(1.2, (fact.access_count or 0) / 10.0 * 1.2)
186
+ # V3.3.21: Fisher-Rao ramp with minimum floor.
187
+ # Bug fix: access_count=0 for fresh facts → Fisher weight=0 metric DEAD.
188
+ # Paper 2's +12pp on multi-hop came from Fisher-Rao. A 0.3 floor ensures
189
+ # fresh facts still benefit from variance-weighted similarity, while
190
+ # frequently accessed facts get progressively stronger Fisher influence.
191
+ fisher_weight = max(0.15, min(1.2, (fact.access_count or 0) / 10.0 * 1.2))
188
192
 
189
193
  if (fisher_weight > 0.01
190
194
  and fact.fisher_variance is not None
@@ -46,10 +46,12 @@ class SpreadingActivationConfig:
46
46
  alpha: float = 1.0 # Seed scaling factor
47
47
  delta: float = 0.5 # Node retention / self-decay per iteration
48
48
  spreading_factor: float = 0.8 # S: energy diffusion rate
49
- theta: float = 0.5 # Activation threshold for sigmoid
50
- top_m: int = 7 # Lateral inhibition: max active nodes
49
+ # V3.3.20: Recalibrated for SLM graph density (254K edges, 768d).
50
+ # SYNAPSE defaults (theta=0.5, top_m=7) were for 384d sparse graphs.
51
+ theta: float = 0.2 # Activation threshold for sigmoid (was 0.5)
52
+ top_m: int = 20 # Lateral inhibition: max active nodes (was 7)
51
53
  max_iterations: int = 3 # T: propagation depth
52
- tau_gate: float = 0.12 # FOK confidence gate
54
+ tau_gate: float = 0.05 # FOK confidence gate (was 0.12)
53
55
  enabled: bool = True # Ships enabled by default
54
56
 
55
57
 
@@ -66,8 +66,21 @@ _CAUSAL_TEMPORAL_WORDS: frozenset[str] = frozenset({
66
66
  _AGGREGATION_WORDS: frozenset[str] = frozenset({
67
67
  "all", "list", "every", "everything", "various", "different",
68
68
  "many", "several", "multiple", "summarize", "overview",
69
+ # V3.3.21 R5: LoCoMo cat 1 patterns — "What X does/did Y Z?" needs aggregation.
70
+ # "What activities does Melanie partake in?" = aggregation, not factual.
71
+ "activities", "events", "hobbies", "instruments", "types",
72
+ "things", "places", "jobs", "skills", "interests", "pets",
69
73
  })
70
74
 
75
+ # V3.3.21 R5: Plural noun patterns that signal aggregation queries.
76
+ # "What [noun]s has/does [entity] [verb]?" = needs cross-session aggregation.
77
+ _AGGREGATION_PATTERNS: tuple[str, ...] = (
78
+ r"what (?:\w+ )?(?:activities|events|hobbies|types|things|places|jobs)",
79
+ r"what (?:\w+ )?has .+ (?:done|visited|attended|participated|played|practiced)",
80
+ r"how many (?:\w+ )?(?:times|events|things|places)",
81
+ r"what are .+(?:'s|s') (?:\w+ )?(?:hobbies|interests|activities|skills)",
82
+ )
83
+
71
84
  _OPINION_WORDS: tuple[str, ...] = (
72
85
  "think", "feel", "opinion", "prefer", "favorite", "best", "worst",
73
86
  "believe", "like about", "dislike", "enjoy", "hate", "love",
@@ -126,6 +139,9 @@ class QueryStrategyClassifier:
126
139
  return "temporal"
127
140
  if words & _AGGREGATION_WORDS:
128
141
  return "aggregation"
142
+ # V3.3.21 R5: Regex patterns for aggregation questions
143
+ if any(re.search(p, q) for p in _AGGREGATION_PATTERNS):
144
+ return "aggregation"
129
145
  if any(w in q for w in _OPINION_WORDS):
130
146
  return "opinion"
131
147
  if len(proper_nouns) >= 2:
@@ -30,8 +30,10 @@ logger = logging.getLogger("superlocalmemory.api_server")
30
30
  # V3 paths
31
31
  MEMORY_DIR = Path.home() / ".superlocalmemory"
32
32
  DB_PATH = MEMORY_DIR / "memory.db"
33
- # ui/ is at repo root, 4 levels up from src/superlocalmemory/server/api.py
34
- UI_DIR = Path(__file__).resolve().parent.parent.parent.parent / "ui"
33
+ # V3.3.21: UI shipped inside the package for pip/npm installs.
34
+ _PKG_UI = Path(__file__).resolve().parent.parent / "ui"
35
+ _REPO_UI = Path(__file__).resolve().parent.parent.parent.parent / "ui"
36
+ UI_DIR = _PKG_UI if (_PKG_UI / "index.html").exists() else _REPO_UI
35
37
 
36
38
 
37
39
  # ============================================================================
@@ -48,8 +48,11 @@ from superlocalmemory.server.security_middleware import SecurityHeadersMiddlewar
48
48
  # V3 Paths (migrated from ~/.claude-memory to ~/.superlocalmemory)
49
49
  MEMORY_DIR = Path.home() / ".superlocalmemory"
50
50
  DB_PATH = MEMORY_DIR / "memory.db"
51
- # ui/ is at repo root, 4 levels up from src/superlocalmemory/server/ui.py
52
- UI_DIR = Path(__file__).resolve().parent.parent.parent.parent / "ui"
51
+ # V3.3.21: UI shipped inside the package for pip/npm installs.
52
+ # Check package location first, then fall back to repo root for dev mode.
53
+ _PKG_UI = Path(__file__).resolve().parent.parent / "ui"
54
+ _REPO_UI = Path(__file__).resolve().parent.parent.parent.parent / "ui"
55
+ UI_DIR = _PKG_UI if (_PKG_UI / "index.html").exists() else _REPO_UI
53
56
 
54
57
 
55
58
  def create_app() -> FastAPI:
@@ -0,0 +1,239 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """DDL for the code_graph.db database.
6
+
7
+ Single source of truth for all CodeGraph tables.
8
+ No other module should contain CREATE TABLE statements.
9
+
10
+ Tables:
11
+ 1. graph_nodes — Code entities (functions, classes, files, modules)
12
+ 2. graph_edges — Relationships (calls, imports, inherits, contains, tested_by)
13
+ 3. graph_files — File tracking for incremental updates
14
+ 4. graph_metadata — Key-value store for graph-level config
15
+ 5. code_memory_links — Bridge table linking code nodes to SLM memory facts
16
+ 6. code_node_embeddings — vec0 virtual table for semantic search (optional)
17
+ 7. graph_nodes_fts — FTS5 virtual table for text search
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ import sqlite3
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # DDL Statements (executed in order)
30
+ # ---------------------------------------------------------------------------
31
+
32
+ _DDL_STATEMENTS: tuple[str, ...] = (
33
+ # ── Table 1: graph_nodes ──────────────────────────────────────────
34
+ """
35
+ CREATE TABLE IF NOT EXISTS graph_nodes (
36
+ node_id TEXT PRIMARY KEY,
37
+ kind TEXT NOT NULL CHECK (kind IN ('file', 'class', 'function', 'method', 'module')),
38
+ name TEXT NOT NULL,
39
+ qualified_name TEXT NOT NULL UNIQUE,
40
+ file_path TEXT NOT NULL,
41
+ line_start INTEGER NOT NULL DEFAULT 0,
42
+ line_end INTEGER NOT NULL DEFAULT 0,
43
+ language TEXT NOT NULL DEFAULT '',
44
+ parent_name TEXT,
45
+ signature TEXT,
46
+ docstring TEXT,
47
+ is_test INTEGER NOT NULL DEFAULT 0,
48
+ content_hash TEXT,
49
+ community_id INTEGER,
50
+ extra_json TEXT NOT NULL DEFAULT '{}',
51
+ created_at REAL NOT NULL,
52
+ updated_at REAL NOT NULL
53
+ )
54
+ """,
55
+
56
+ # ── Table 2: graph_edges ──────────────────────────────────────────
57
+ """
58
+ CREATE TABLE IF NOT EXISTS graph_edges (
59
+ edge_id TEXT PRIMARY KEY,
60
+ kind TEXT NOT NULL CHECK (kind IN ('calls', 'imports', 'inherits', 'contains', 'tested_by', 'depends_on')),
61
+ source_node_id TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
62
+ target_node_id TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
63
+ file_path TEXT NOT NULL,
64
+ line INTEGER NOT NULL DEFAULT 0,
65
+ confidence REAL NOT NULL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
66
+ extra_json TEXT NOT NULL DEFAULT '{}',
67
+ created_at REAL NOT NULL,
68
+ updated_at REAL NOT NULL
69
+ )
70
+ """,
71
+
72
+ # ── Table 3: graph_files ──────────────────────────────────────────
73
+ """
74
+ CREATE TABLE IF NOT EXISTS graph_files (
75
+ file_path TEXT PRIMARY KEY,
76
+ content_hash TEXT NOT NULL,
77
+ mtime REAL NOT NULL,
78
+ language TEXT NOT NULL,
79
+ node_count INTEGER NOT NULL DEFAULT 0,
80
+ edge_count INTEGER NOT NULL DEFAULT 0,
81
+ last_indexed REAL NOT NULL
82
+ )
83
+ """,
84
+
85
+ # ── Table 4: graph_metadata ───────────────────────────────────────
86
+ """
87
+ CREATE TABLE IF NOT EXISTS graph_metadata (
88
+ key TEXT PRIMARY KEY,
89
+ value TEXT NOT NULL,
90
+ updated_at REAL NOT NULL
91
+ )
92
+ """,
93
+
94
+ # ── Table 5: code_memory_links ────────────────────────────────────
95
+ """
96
+ CREATE TABLE IF NOT EXISTS code_memory_links (
97
+ link_id TEXT PRIMARY KEY,
98
+ code_node_id TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
99
+ slm_fact_id TEXT NOT NULL,
100
+ slm_entity_id TEXT,
101
+ link_type TEXT NOT NULL CHECK (link_type IN (
102
+ 'mentions', 'decision_about', 'bug_fix', 'refactor', 'design_rationale'
103
+ )),
104
+ confidence REAL NOT NULL DEFAULT 0.8 CHECK (confidence >= 0.0 AND confidence <= 1.0),
105
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
106
+ last_verified TEXT,
107
+ is_stale INTEGER NOT NULL DEFAULT 0
108
+ )
109
+ """,
110
+ )
111
+
112
+ # Indexes (separate from tables for clarity)
113
+ _INDEX_STATEMENTS: tuple[str, ...] = (
114
+ # graph_nodes indexes
115
+ "CREATE INDEX IF NOT EXISTS idx_gn_file_path ON graph_nodes(file_path)",
116
+ "CREATE INDEX IF NOT EXISTS idx_gn_kind ON graph_nodes(kind)",
117
+ "CREATE INDEX IF NOT EXISTS idx_gn_name ON graph_nodes(name)",
118
+ "CREATE INDEX IF NOT EXISTS idx_gn_qualified ON graph_nodes(qualified_name)",
119
+ "CREATE INDEX IF NOT EXISTS idx_gn_parent ON graph_nodes(parent_name)",
120
+ "CREATE INDEX IF NOT EXISTS idx_gn_language ON graph_nodes(language)",
121
+ "CREATE INDEX IF NOT EXISTS idx_gn_community ON graph_nodes(community_id)",
122
+ # graph_edges indexes
123
+ "CREATE INDEX IF NOT EXISTS idx_ge_source ON graph_edges(source_node_id)",
124
+ "CREATE INDEX IF NOT EXISTS idx_ge_target ON graph_edges(target_node_id)",
125
+ "CREATE INDEX IF NOT EXISTS idx_ge_kind ON graph_edges(kind)",
126
+ "CREATE INDEX IF NOT EXISTS idx_ge_file ON graph_edges(file_path)",
127
+ "CREATE INDEX IF NOT EXISTS idx_ge_source_kind ON graph_edges(source_node_id, kind)",
128
+ "CREATE INDEX IF NOT EXISTS idx_ge_target_kind ON graph_edges(target_node_id, kind)",
129
+ # code_memory_links indexes
130
+ "CREATE INDEX IF NOT EXISTS idx_cml_node ON code_memory_links(code_node_id)",
131
+ "CREATE INDEX IF NOT EXISTS idx_cml_fact ON code_memory_links(slm_fact_id)",
132
+ "CREATE INDEX IF NOT EXISTS idx_cml_entity ON code_memory_links(slm_entity_id)",
133
+ "CREATE INDEX IF NOT EXISTS idx_cml_type ON code_memory_links(link_type)",
134
+ "CREATE INDEX IF NOT EXISTS idx_cml_stale ON code_memory_links(is_stale)",
135
+ )
136
+
137
+ # FTS5 virtual table + sync triggers
138
+ _FTS5_STATEMENTS: tuple[str, ...] = (
139
+ """
140
+ CREATE VIRTUAL TABLE IF NOT EXISTS graph_nodes_fts USING fts5(
141
+ name,
142
+ qualified_name,
143
+ file_path,
144
+ signature,
145
+ content='graph_nodes',
146
+ content_rowid='rowid',
147
+ tokenize='porter unicode61'
148
+ )
149
+ """,
150
+ # Auto-sync trigger: INSERT
151
+ """
152
+ CREATE TRIGGER IF NOT EXISTS trg_gn_fts_insert AFTER INSERT ON graph_nodes
153
+ BEGIN
154
+ INSERT INTO graph_nodes_fts(rowid, name, qualified_name, file_path, signature)
155
+ VALUES (NEW.rowid, NEW.name, NEW.qualified_name, NEW.file_path, NEW.signature);
156
+ END
157
+ """,
158
+ # Auto-sync trigger: DELETE
159
+ """
160
+ CREATE TRIGGER IF NOT EXISTS trg_gn_fts_delete AFTER DELETE ON graph_nodes
161
+ BEGIN
162
+ INSERT INTO graph_nodes_fts(graph_nodes_fts, rowid, name, qualified_name, file_path, signature)
163
+ VALUES ('delete', OLD.rowid, OLD.name, OLD.qualified_name, OLD.file_path, OLD.signature);
164
+ END
165
+ """,
166
+ # Auto-sync trigger: UPDATE
167
+ """
168
+ CREATE TRIGGER IF NOT EXISTS trg_gn_fts_update AFTER UPDATE ON graph_nodes
169
+ BEGIN
170
+ INSERT INTO graph_nodes_fts(graph_nodes_fts, rowid, name, qualified_name, file_path, signature)
171
+ VALUES ('delete', OLD.rowid, OLD.name, OLD.qualified_name, OLD.file_path, OLD.signature);
172
+ INSERT INTO graph_nodes_fts(rowid, name, qualified_name, file_path, signature)
173
+ VALUES (NEW.rowid, NEW.name, NEW.qualified_name, NEW.file_path, NEW.signature);
174
+ END
175
+ """,
176
+ )
177
+
178
+
179
+ # ---------------------------------------------------------------------------
180
+ # Public API (matches SLM's schema.py pattern)
181
+ # ---------------------------------------------------------------------------
182
+
183
+ def create_all_tables(conn: sqlite3.Connection) -> None:
184
+ """Create all CodeGraph tables, indexes, and triggers.
185
+
186
+ Idempotent — safe to call multiple times (all DDL uses IF NOT EXISTS).
187
+ """
188
+ cursor = conn.cursor()
189
+
190
+ # Enable foreign keys
191
+ cursor.execute("PRAGMA foreign_keys = ON")
192
+
193
+ # Core tables
194
+ for ddl in _DDL_STATEMENTS:
195
+ cursor.execute(ddl)
196
+
197
+ # Indexes
198
+ for idx in _INDEX_STATEMENTS:
199
+ cursor.execute(idx)
200
+
201
+ # FTS5 + triggers (may fail if SQLite lacks FTS5 — non-fatal)
202
+ for stmt in _FTS5_STATEMENTS:
203
+ try:
204
+ cursor.execute(stmt)
205
+ except sqlite3.OperationalError as exc:
206
+ logger.warning("FTS5 setup failed (non-fatal): %s", exc)
207
+
208
+ # vec0 virtual table for embeddings (may fail if sqlite-vec not loaded)
209
+ try:
210
+ cursor.execute("""
211
+ CREATE VIRTUAL TABLE IF NOT EXISTS code_node_embeddings USING vec0(
212
+ node_id TEXT PRIMARY KEY,
213
+ embedding float[768] distance_metric=cosine
214
+ )
215
+ """)
216
+ except sqlite3.OperationalError as exc:
217
+ logger.warning("vec0 setup failed (non-fatal, embeddings disabled): %s", exc)
218
+
219
+ conn.commit()
220
+ logger.info("CodeGraph schema initialized (%d tables, %d indexes)",
221
+ len(_DDL_STATEMENTS), len(_INDEX_STATEMENTS))
222
+
223
+
224
+ def drop_all_tables(conn: sqlite3.Connection) -> None:
225
+ """Drop all CodeGraph tables. Used in tests only."""
226
+ cursor = conn.cursor()
227
+ for table in (
228
+ "graph_nodes_fts", "code_node_embeddings",
229
+ "code_memory_links", "graph_metadata",
230
+ "graph_files", "graph_edges", "graph_nodes",
231
+ ):
232
+ try:
233
+ cursor.execute(f"DROP TABLE IF EXISTS {table}")
234
+ except sqlite3.OperationalError:
235
+ pass
236
+ # Drop triggers
237
+ for trigger in ("trg_gn_fts_insert", "trg_gn_fts_delete", "trg_gn_fts_update"):
238
+ cursor.execute(f"DROP TRIGGER IF EXISTS {trigger}")
239
+ conn.commit()