superlocalmemory 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -84,7 +84,7 @@ slm status
84
84
  }
85
85
  ```
86
86
 
87
- 24 MCP tools available. Works with Claude Code, Cursor, Windsurf, VS Code Copilot, Continue, Cody, ChatGPT Desktop, Gemini CLI, JetBrains, Zed, and 17+ AI tools.
87
+ 27 MCP tools + 7 resources available. Works with Claude Code, Cursor, Windsurf, VS Code Copilot, Continue, Cody, ChatGPT Desktop, Gemini CLI, JetBrains, Zed, and 17+ AI tools. **V3.1: Active Memory tools auto-learn your patterns.**
88
88
 
89
89
  ### Dual Interface: MCP + CLI
90
90
 
@@ -247,6 +247,42 @@ slm dashboard # Opens at http://localhost:8765
247
247
 
248
248
  ---
249
249
 
250
+ ## Active Memory (V3.1) — Memory That Learns
251
+
252
+ Most AI memory systems are passive databases — you store, you search, you get results. **SuperLocalMemory learns.**
253
+
254
+ Every recall you make generates learning signals. Over time, the system adapts to your patterns:
255
+
256
+ | Phase | Signals | What Happens |
257
+ |-------|---------|-------------|
258
+ | **Baseline** | 0-19 | Cross-encoder ranking (default behavior) |
259
+ | **Rule-Based** | 20+ | Heuristic boosts: recency, access count, trust score |
260
+ | **ML Model** | 200+ | LightGBM model trained on YOUR usage patterns |
261
+
262
+ ### Zero-Cost Learning Signals
263
+ No LLM tokens spent. Four mathematical signals computed locally:
264
+ - **Co-Retrieval** — memories retrieved together strengthen their connections
265
+ - **Confidence Lifecycle** — accessed facts get boosted, unused facts decay
266
+ - **Channel Performance** — tracks which retrieval channel works best for your queries
267
+ - **Entropy Gap** — surprising content gets prioritized for deeper indexing
268
+
269
+ ### Auto-Capture & Auto-Recall
270
+ ```bash
271
+ slm hooks install # Install Claude Code hooks for invisible injection
272
+ slm observe "We decided to use PostgreSQL" # Auto-detects decisions, bugs, preferences
273
+ slm session-context # Get relevant context at session start
274
+ ```
275
+
276
+ ### MCP Active Memory Tools
277
+ Three new tools for AI assistants:
278
+ - `session_init` — call at session start, get relevant project context automatically
279
+ - `observe` — send conversation content, auto-captures decisions/bugs/preferences
280
+ - `report_feedback` — explicit feedback for faster learning
281
+
282
+ **No competitor learns at zero token cost.** Mem0, Zep, and Letta all require cloud LLM calls for their learning loops. SLM learns through mathematics.
283
+
284
+ ---
285
+
250
286
  ## Features
251
287
 
252
288
  ### Retrieval
@@ -2,7 +2,7 @@
2
2
  > SuperLocalMemory V3 Documentation
3
3
  > https://superlocalmemory.com | Part of Qualixar
4
4
 
5
- Get your AI's memory system running in under 5 minutes.
5
+ Get your AI's memory system running in under 5 minutes. **V3.1: Now with Active Memory — your memory learns from your usage and gets smarter over time, at zero token cost.**
6
6
 
7
7
  ---
8
8
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.1.0",
3
+ "version": "3.1.1",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.1.0"
3
+ version = "3.1.1"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -291,6 +291,12 @@ def cmd_recall(args: Namespace) -> None:
291
291
  ])
292
292
  return
293
293
 
294
+ # Record learning signals (CLI path — works without MCP)
295
+ try:
296
+ _cli_record_signals(config, args.query, response.results)
297
+ except Exception:
298
+ pass
299
+
294
300
  if not response.results:
295
301
  print("No memories found.")
296
302
  return
@@ -298,6 +304,26 @@ def cmd_recall(args: Namespace) -> None:
298
304
  print(f" {i}. [{r.score:.2f}] {r.fact.content[:120]}")
299
305
 
300
306
 
307
+ def _cli_record_signals(config, query, results):
308
+ """Record learning signals from CLI recall (no MCP dependency)."""
309
+ from pathlib import Path
310
+ from superlocalmemory.learning.feedback import FeedbackCollector
311
+ from superlocalmemory.learning.signals import LearningSignals
312
+ slm_dir = Path.home() / ".superlocalmemory"
313
+ pid = config.active_profile
314
+ fact_ids = [r.fact.fact_id for r in results[:10]]
315
+ if not fact_ids:
316
+ return
317
+ FeedbackCollector(slm_dir / "learning.db").record_implicit(
318
+ profile_id=pid, query=query,
319
+ fact_ids_returned=fact_ids, fact_ids_available=fact_ids,
320
+ )
321
+ signals = LearningSignals(slm_dir / "learning.db")
322
+ signals.record_co_retrieval(pid, fact_ids)
323
+ for fid in fact_ids[:5]:
324
+ LearningSignals.boost_confidence(str(slm_dir / "memory.db"), fid)
325
+
326
+
301
327
  def cmd_forget(args: Namespace) -> None:
302
328
  """Delete memories matching a query."""
303
329
  from superlocalmemory.core.engine import MemoryEngine
@@ -314,3 +314,6 @@ class FeedbackCollector:
314
314
  }
315
315
  finally:
316
316
  conn.close()
317
+
318
+ # Alias used by dashboard routes
319
+ get_feedback_summary = get_summary
@@ -17,10 +17,37 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
17
17
  from __future__ import annotations
18
18
 
19
19
  import logging
20
+ from pathlib import Path
20
21
  from typing import Callable
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
25
+ MEMORY_DIR = Path.home() / ".superlocalmemory"
26
+ DB_PATH = MEMORY_DIR / "memory.db"
27
+
28
+
29
+ def _emit_event(event_type: str, payload: dict | None = None,
30
+ source_agent: str = "mcp_client") -> None:
31
+ """Emit an event to the EventBus (best-effort, never raises)."""
32
+ try:
33
+ from superlocalmemory.infra.event_bus import EventBus
34
+ bus = EventBus.get_instance(str(DB_PATH))
35
+ bus.emit(event_type, payload=payload, source_agent=source_agent,
36
+ source_protocol="mcp")
37
+ except Exception:
38
+ pass
39
+
40
+
41
+ def _register_agent(agent_id: str, profile_id: str) -> None:
42
+ """Register an agent in the AgentRegistry (best-effort)."""
43
+ try:
44
+ from superlocalmemory.core.registry import AgentRegistry
45
+ registry_path = MEMORY_DIR / "agents.json"
46
+ registry = AgentRegistry(persist_path=registry_path)
47
+ registry.register_agent(agent_id, profile_id)
48
+ except Exception:
49
+ pass
50
+
24
51
 
25
52
  def register_active_tools(server, get_engine: Callable) -> None:
26
53
  """Register 3 active memory tools on *server*."""
@@ -78,6 +105,14 @@ def register_active_tools(server, get_engine: Callable) -> None:
78
105
  except Exception:
79
106
  pass
80
107
 
108
+ # Register agent + emit event
109
+ _register_agent("mcp_client", pid)
110
+ _emit_event("agent.connected", {
111
+ "agent_id": "mcp_client",
112
+ "project_path": project_path,
113
+ "memory_count": len(memories),
114
+ })
115
+
81
116
  return {
82
117
  "success": True,
83
118
  "context": context,
@@ -148,6 +183,14 @@ def register_active_tools(server, get_engine: Callable) -> None:
148
183
  metadata={"agent_id": agent_id, "source": "auto-observe"},
149
184
  )
150
185
 
186
+ if stored:
187
+ _emit_event("memory.created", {
188
+ "agent_id": agent_id,
189
+ "category": decision.category,
190
+ "content_preview": content[:80],
191
+ "source": "auto-observe",
192
+ }, source_agent=agent_id)
193
+
151
194
  return {
152
195
  "captured": stored,
153
196
  "category": decision.category,
@@ -191,6 +234,13 @@ def register_active_tools(server, get_engine: Callable) -> None:
191
234
 
192
235
  count = engine._adaptive_learner.get_feedback_count(pid)
193
236
 
237
+ _emit_event("pattern.learned", {
238
+ "fact_id": fact_id,
239
+ "feedback": feedback,
240
+ "total_signals": count,
241
+ "phase": 1 if count < 50 else (2 if count < 200 else 3),
242
+ })
243
+
194
244
  return {
195
245
  "success": True,
196
246
  "feedback_id": record.feedback_id,
@@ -15,10 +15,25 @@ from __future__ import annotations
15
15
 
16
16
  import json
17
17
  import logging
18
+ from pathlib import Path
18
19
  from typing import Any, Callable
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
23
+ _DB_PATH = str(Path.home() / ".superlocalmemory" / "memory.db")
24
+
25
+
26
+ def _emit_event(event_type: str, payload: dict | None = None,
27
+ source_agent: str = "mcp_client") -> None:
28
+ """Emit an event to the EventBus (best-effort, never raises)."""
29
+ try:
30
+ from superlocalmemory.infra.event_bus import EventBus
31
+ bus = EventBus.get_instance(_DB_PATH)
32
+ bus.emit(event_type, payload=payload, source_agent=source_agent,
33
+ source_protocol="mcp")
34
+ except Exception:
35
+ pass
36
+
22
37
 
23
38
  def _record_recall_hits(get_engine: Callable, query: str, results: list[dict]) -> None:
24
39
  """Record implicit feedback + learning signals for each recall.
@@ -89,6 +104,11 @@ def register_core_tools(server, get_engine: Callable) -> None:
89
104
  "session_id": session_id,
90
105
  })
91
106
  if result.get("ok"):
107
+ _emit_event("memory.created", {
108
+ "content_preview": content[:80],
109
+ "agent_id": agent_id,
110
+ "fact_count": result.get("count", 0),
111
+ }, source_agent=agent_id)
92
112
  return {"success": True, "fact_ids": result.get("fact_ids", []), "count": result.get("count", 0)}
93
113
  return {"success": False, "error": result.get("error", "Store failed")}
94
114
  except Exception as exc:
@@ -108,6 +128,12 @@ def register_core_tools(server, get_engine: Callable) -> None:
108
128
  _record_recall_hits(get_engine, query, result.get("results", []))
109
129
  except Exception:
110
130
  pass # Feedback is non-critical, never block recall
131
+ _emit_event("memory.recalled", {
132
+ "query": query[:80],
133
+ "result_count": result.get("result_count", 0),
134
+ "query_type": result.get("query_type", "unknown"),
135
+ "agent_id": agent_id,
136
+ }, source_agent=agent_id)
111
137
  return {
112
138
  "success": True,
113
139
  "results": result.get("results", []),
@@ -362,6 +388,10 @@ def register_core_tools(server, get_engine: Callable) -> None:
362
388
  })
363
389
  if result.get("ok"):
364
390
  logger.info("Memory deleted: %s by agent: %s", fact_id[:16], agent_id)
391
+ _emit_event("memory.deleted", {
392
+ "fact_id": fact_id,
393
+ "agent_id": agent_id,
394
+ }, source_agent=agent_id)
365
395
  return {"success": True, "deleted": fact_id, "agent_id": agent_id}
366
396
  return {"success": False, "error": result.get("error", "Delete failed")}
367
397
  except Exception as exc:
@@ -43,13 +43,15 @@ async def get_agents(
43
43
  if not REGISTRY_AVAILABLE:
44
44
  return {"agents": [], "count": 0, "message": "Agent registry not available"}
45
45
  try:
46
- engine = getattr(request.app.state, "engine", None)
47
- if engine and hasattr(engine, '_db'):
48
- registry = AgentRegistry(engine._db)
49
- agents = registry.list_agents(protocol=protocol, limit=limit)
50
- stats = registry.get_stats()
51
- return {"agents": agents, "count": len(agents), "stats": stats}
52
- return {"agents": [], "count": 0, "message": "Engine not initialized"}
46
+ from pathlib import Path
47
+ registry_path = Path.home() / ".superlocalmemory" / "agents.json"
48
+ registry = AgentRegistry(persist_path=registry_path)
49
+ agents = registry.list_agents()
50
+ return {
51
+ "agents": agents,
52
+ "count": len(agents),
53
+ "stats": {"total_agents": len(agents)},
54
+ }
53
55
  except Exception as e:
54
56
  raise HTTPException(status_code=500, detail=f"Agent registry error: {str(e)}")
55
57
 
@@ -60,11 +62,11 @@ async def get_agent_stats(request: Request):
60
62
  if not REGISTRY_AVAILABLE:
61
63
  return {"total_agents": 0, "message": "Agent registry not available"}
62
64
  try:
63
- engine = getattr(request.app.state, "engine", None)
64
- if engine and hasattr(engine, '_db'):
65
- registry = AgentRegistry(engine._db)
66
- return registry.get_stats()
67
- return {"total_agents": 0, "message": "Engine not initialized"}
65
+ from pathlib import Path
66
+ registry_path = Path.home() / ".superlocalmemory" / "agents.json"
67
+ registry = AgentRegistry(persist_path=registry_path)
68
+ agents = registry.list_agents()
69
+ return {"total_agents": len(agents)}
68
70
  except Exception as e:
69
71
  raise HTTPException(status_code=500, detail=f"Agent stats error: {str(e)}")
70
72
 
@@ -104,7 +104,7 @@ async def learning_status():
104
104
  feedback = _get_feedback()
105
105
  if feedback:
106
106
  try:
107
- old_stats = feedback.get_feedback_summary()
107
+ old_stats = feedback.get_feedback_summary(active_profile)
108
108
  if isinstance(old_stats, dict):
109
109
  old_stats["feedback_count"] = signal_count
110
110
  old_stats["active_profile"] = active_profile
@@ -274,8 +274,9 @@ async def feedback_stats():
274
274
  by_type = {}
275
275
 
276
276
  if feedback:
277
- summary = feedback.get_feedback_summary()
278
- total = summary.get("total_signals", 0)
277
+ profile = get_active_profile()
278
+ summary = feedback.get_feedback_summary(profile)
279
+ total = summary.get("total", summary.get("total_signals", 0))
279
280
  by_channel = summary.get("by_channel", {})
280
281
  by_type = summary.get("by_type", {})
281
282
 
@@ -38,32 +38,32 @@ async def lifecycle_status():
38
38
  conn = sqlite3.connect(str(DB_PATH))
39
39
  conn.row_factory = sqlite3.Row
40
40
 
41
- # Try V3 schema first (atomic_facts with lifecycle_state)
41
+ # Try V3 schema first (atomic_facts with lifecycle column)
42
42
  states = {}
43
43
  try:
44
44
  rows = conn.execute(
45
- "SELECT lifecycle_state, COUNT(*) as cnt "
46
- "FROM atomic_facts WHERE profile_id = ? GROUP BY lifecycle_state",
45
+ "SELECT lifecycle, COUNT(*) as cnt "
46
+ "FROM atomic_facts WHERE profile_id = ? GROUP BY lifecycle",
47
47
  (profile,),
48
48
  ).fetchall()
49
49
  states = {
50
- (row['lifecycle_state'] or 'active'): row['cnt']
50
+ (row['lifecycle'] or 'active'): row['cnt']
51
51
  for row in rows
52
52
  }
53
53
  except sqlite3.OperationalError:
54
54
  # V2 fallback: memories table
55
55
  try:
56
56
  rows = conn.execute(
57
- "SELECT lifecycle_state, COUNT(*) as cnt "
58
- "FROM memories WHERE profile = ? GROUP BY lifecycle_state",
57
+ "SELECT lifecycle, COUNT(*) as cnt "
58
+ "FROM memories WHERE profile = ? GROUP BY lifecycle",
59
59
  (profile,),
60
60
  ).fetchall()
61
61
  states = {
62
- (row['lifecycle_state'] or 'active'): row['cnt']
62
+ (row['lifecycle'] or 'active'): row['cnt']
63
63
  for row in rows
64
64
  }
65
65
  except sqlite3.OperationalError:
66
- # No lifecycle_state column at all
66
+ # No lifecycle column at all — count everything as active
67
67
  total = conn.execute(
68
68
  "SELECT COUNT(*) FROM atomic_facts WHERE profile_id = ?",
69
69
  (profile,),
@@ -80,7 +80,7 @@ async def lifecycle_status():
80
80
  "SELECT AVG(julianday('now') - julianday(created_at)) as avg_age, "
81
81
  "MIN(julianday('now') - julianday(created_at)) as min_age, "
82
82
  "MAX(julianday('now') - julianday(created_at)) as max_age "
83
- "FROM atomic_facts WHERE profile_id = ? AND lifecycle_state = ?",
83
+ "FROM atomic_facts WHERE profile_id = ? AND lifecycle = ?",
84
84
  (profile, state),
85
85
  ).fetchone()
86
86
  if row and row['avg_age'] is not None:
@@ -46,53 +46,37 @@ def _fetch_graph_data(
46
46
  ) -> tuple[list, list, list]:
47
47
  """Fetch graph nodes, links, clusters from V3 or V2 schema."""
48
48
  if use_v3:
49
- # Graph-first: fetch edges, then get connected nodes, then fill slots
49
+ # Recency-first: get the most recent nodes, then find their edges
50
50
  cursor.execute("""
51
- SELECT source_id as source, target_id as target,
52
- weight, edge_type as relationship_type
53
- FROM graph_edges WHERE profile_id = ?
54
- ORDER BY weight DESC
55
- """, (profile,))
56
- all_links = cursor.fetchall()
51
+ SELECT fact_id as id, content, fact_type as category,
52
+ confidence as importance, session_id as project_name,
53
+ created_at
54
+ FROM atomic_facts
55
+ WHERE profile_id = ? AND confidence >= ?
56
+ ORDER BY created_at DESC
57
+ LIMIT ?
58
+ """, (profile, min_importance / 10.0, max_nodes))
59
+ nodes = cursor.fetchall()
57
60
 
58
- connected_ids = set()
59
- for lk in all_links:
60
- connected_ids.add(lk['source'])
61
- connected_ids.add(lk['target'])
61
+ node_ids = {n['id'] for n in nodes}
62
62
 
63
- # Fetch connected nodes first (these have edges to display)
64
- connected_nodes: list = []
65
- if connected_ids:
66
- ph = ','.join('?' * len(connected_ids))
63
+ # Fetch edges between these nodes
64
+ if node_ids:
65
+ ph = ','.join('?' * len(node_ids))
66
+ id_list = list(node_ids)
67
67
  cursor.execute(f"""
68
- SELECT fact_id as id, content, fact_type as category,
69
- confidence as importance, session_id as project_name,
70
- created_at
71
- FROM atomic_facts
72
- WHERE profile_id = ? AND fact_id IN ({ph})
73
- """, [profile] + list(connected_ids))
74
- connected_nodes = cursor.fetchall()
75
-
76
- # Fill remaining slots with top-confidence unconnected nodes
77
- remaining = max_nodes - len(connected_nodes)
78
- if remaining > 0:
79
- existing = {n['id'] for n in connected_nodes}
80
- cursor.execute("""
81
- SELECT fact_id as id, content, fact_type as category,
82
- confidence as importance, session_id as project_name,
83
- created_at
84
- FROM atomic_facts
85
- WHERE profile_id = ? AND confidence >= ?
86
- ORDER BY confidence DESC, created_at DESC
87
- LIMIT ?
88
- """, (profile, min_importance / 10.0, remaining + len(existing)))
89
- for n in cursor.fetchall():
90
- if n['id'] not in existing:
91
- connected_nodes.append(n)
92
- if len(connected_nodes) >= max_nodes:
93
- break
94
-
95
- nodes = connected_nodes[:max_nodes]
68
+ SELECT source_id as source, target_id as target,
69
+ weight, edge_type as relationship_type
70
+ FROM graph_edges
71
+ WHERE profile_id = ?
72
+ AND source_id IN ({ph}) AND target_id IN ({ph})
73
+ ORDER BY weight DESC
74
+ """, [profile] + id_list + id_list)
75
+ all_links = cursor.fetchall()
76
+ else:
77
+ all_links = []
78
+
79
+ links = all_links
96
80
  for n in nodes:
97
81
  n['entities'] = []
98
82
  n['content_preview'] = _preview(n.get('content'))
@@ -101,7 +85,33 @@ def _fetch_graph_data(
101
85
  node_ids = {n['id'] for n in nodes}
102
86
  links = [lk for lk in all_links
103
87
  if lk['source'] in node_ids and lk['target'] in node_ids]
104
- return nodes, links, []
88
+
89
+ # Compute clusters from memory_scenes
90
+ clusters = []
91
+ try:
92
+ cursor.execute("""
93
+ SELECT scene_id, theme, fact_ids_json
94
+ FROM memory_scenes WHERE profile_id = ?
95
+ """, (profile,))
96
+ for row in cursor.fetchall():
97
+ fact_ids = []
98
+ try:
99
+ fact_ids = json.loads(row.get('fact_ids_json', '[]') or '[]')
100
+ except (json.JSONDecodeError, TypeError):
101
+ pass
102
+ # Only include clusters that overlap with displayed nodes
103
+ overlap = [fid for fid in fact_ids if fid in node_ids]
104
+ if overlap:
105
+ clusters.append({
106
+ 'cluster_id': row['scene_id'],
107
+ 'size': len(fact_ids),
108
+ 'visible_size': len(overlap),
109
+ 'theme': row.get('theme', ''),
110
+ })
111
+ except Exception:
112
+ pass
113
+
114
+ return nodes, links, clusters
105
115
 
106
116
  # V2 fallback
107
117
  try:
@@ -362,15 +372,54 @@ async def get_clusters(request: Request):
362
372
  profile = get_active_profile()
363
373
  unclustered = 0
364
374
 
365
- if _has_table(cursor, 'scene_facts'):
375
+ # V3 schema: memory_scenes stores fact_ids_json (JSON array)
376
+ if _has_table(cursor, 'memory_scenes'):
366
377
  cursor.execute("""
367
- SELECT s.scene_id as cluster_id, COUNT(sf.fact_id) as member_count,
368
- s.summary, s.created_at as first_memory
369
- FROM scenes s JOIN scene_facts sf ON s.scene_id = sf.scene_id
370
- WHERE s.profile_id = ? GROUP BY s.scene_id ORDER BY member_count DESC
378
+ SELECT scene_id as cluster_id, theme, fact_ids_json,
379
+ entity_ids_json, created_at as first_memory
380
+ FROM memory_scenes WHERE profile_id = ?
381
+ ORDER BY created_at DESC
371
382
  """, (profile,))
372
- clusters = [dict(r, top_entities=[]) for r in cursor.fetchall()]
383
+ raw_scenes = cursor.fetchall()
384
+ clusters = []
385
+ for scene in raw_scenes:
386
+ fact_ids = []
387
+ try:
388
+ fact_ids = json.loads(scene.get('fact_ids_json', '[]') or '[]')
389
+ except (json.JSONDecodeError, TypeError):
390
+ pass
391
+ entity_ids = []
392
+ try:
393
+ entity_ids = json.loads(scene.get('entity_ids_json', '[]') or '[]')
394
+ except (json.JSONDecodeError, TypeError):
395
+ pass
396
+ clusters.append({
397
+ 'cluster_id': scene['cluster_id'],
398
+ 'member_count': len(fact_ids),
399
+ 'categories': scene.get('theme', ''),
400
+ 'summary': scene.get('theme', ''),
401
+ 'first_memory': scene.get('first_memory', ''),
402
+ 'top_entities': entity_ids[:5],
403
+ })
404
+ # Filter out empty clusters
405
+ clusters = [c for c in clusters if c['member_count'] > 0]
406
+ clusters.sort(key=lambda c: c['member_count'], reverse=True)
407
+
408
+ # Count facts not in any scene
409
+ all_scene_fact_ids = set()
410
+ for scene in raw_scenes:
411
+ try:
412
+ ids = json.loads(scene.get('fact_ids_json', '[]') or '[]')
413
+ all_scene_fact_ids.update(ids)
414
+ except (json.JSONDecodeError, TypeError):
415
+ pass
416
+ total_facts = cursor.execute(
417
+ "SELECT COUNT(*) as c FROM atomic_facts WHERE profile_id = ?",
418
+ (profile,),
419
+ ).fetchone()['c']
420
+ unclustered = total_facts - len(all_scene_fact_ids)
373
421
  else:
422
+ # V2 fallback
374
423
  try:
375
424
  cursor.execute("""
376
425
  SELECT cluster_id, COUNT(*) as member_count,
@@ -382,8 +431,14 @@ async def get_clusters(request: Request):
382
431
  clusters = [dict(r, top_entities=[]) for r in cursor.fetchall()]
383
432
  except Exception:
384
433
  clusters = []
385
- cursor.execute("SELECT COUNT(*) as c FROM memories WHERE cluster_id IS NULL AND profile = ?", (profile,))
386
- unclustered = cursor.fetchone()['c']
434
+ try:
435
+ cursor.execute(
436
+ "SELECT COUNT(*) as c FROM memories WHERE cluster_id IS NULL AND profile = ?",
437
+ (profile,),
438
+ )
439
+ unclustered = cursor.fetchone()['c']
440
+ except Exception:
441
+ unclustered = 0
387
442
 
388
443
  conn.close()
389
444
  return {"clusters": clusters, "total_clusters": len(clusters), "unclustered_count": unclustered}
@@ -392,21 +447,41 @@ async def get_clusters(request: Request):
392
447
 
393
448
 
394
449
  @router.get("/api/clusters/{cluster_id}")
395
- async def get_cluster_detail(request: Request, cluster_id: int, limit: int = Query(50, ge=1, le=200)):
396
- """Get detailed view of a specific cluster."""
450
+ async def get_cluster_detail(request: Request, cluster_id: str, limit: int = Query(50, ge=1, le=200)):
451
+ """Get detailed view of a specific cluster (scene)."""
397
452
  try:
398
453
  conn = get_db_connection()
399
454
  conn.row_factory = dict_factory
400
455
  cursor = conn.cursor()
401
456
  profile = get_active_profile()
402
457
 
403
- if _has_table(cursor, 'scene_facts'):
404
- cursor.execute("""
405
- SELECT f.fact_id as id, f.content, f.fact_type as category,
406
- f.confidence as importance, f.created_at
407
- FROM atomic_facts f JOIN scene_facts sf ON f.fact_id = sf.fact_id
408
- WHERE sf.scene_id = ? AND f.profile_id = ? ORDER BY f.confidence DESC LIMIT ?
409
- """, (str(cluster_id), profile, limit))
458
+ if _has_table(cursor, 'memory_scenes'):
459
+ # Get fact IDs from the scene's JSON array
460
+ cursor.execute(
461
+ "SELECT fact_ids_json, theme FROM memory_scenes "
462
+ "WHERE scene_id = ? AND profile_id = ?",
463
+ (cluster_id, profile),
464
+ )
465
+ scene_row = cursor.fetchone()
466
+ if scene_row:
467
+ fact_ids = []
468
+ try:
469
+ fact_ids = json.loads(scene_row.get('fact_ids_json', '[]') or '[]')
470
+ except (json.JSONDecodeError, TypeError):
471
+ pass
472
+ if fact_ids:
473
+ ph = ','.join('?' * min(len(fact_ids), limit))
474
+ cursor.execute(f"""
475
+ SELECT fact_id as id, content, fact_type as category,
476
+ confidence as importance, created_at
477
+ FROM atomic_facts
478
+ WHERE profile_id = ? AND fact_id IN ({ph})
479
+ ORDER BY confidence DESC
480
+ """, [profile] + fact_ids[:limit])
481
+ else:
482
+ cursor.execute("SELECT 1 WHERE 0") # empty result
483
+ else:
484
+ cursor.execute("SELECT 1 WHERE 0") # empty result
410
485
  else:
411
486
  cursor.execute("""
412
487
  SELECT id, content, summary, category, project_name, importance, created_at, tags