nexo-brain 7.24.0 → 7.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.24.0",
3
+ "version": "7.25.0",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,9 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.24.0` is the current packaged-runtime line. Minor release over v7.23.13 - Home Agents, cognitive quality controls, English operational copy, and non-blocking task-open context are integrated into main.
21
+ Version `7.25.0` is the current packaged-runtime line. Minor release over v7.24.0 - Memory Fabric links transcript lookup, historical backup diary recovery, unified search and knowledge graph evidence so memories are not available only inside expiring snapshots.
22
+
23
+ Previously in `7.24.0`: minor release over v7.23.13 - Home Agents, cognitive quality controls, English operational copy, and non-blocking task-open context are integrated into main.
22
24
 
23
25
  Previously in `7.23.13`: patch over v7.23.12 - release guardrails now audit publish workflows for masked failures and add minimal-delta coverage for punctual UI edits.
24
26
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.24.0",
3
+ "version": "7.25.0",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -4715,6 +4715,11 @@ def _run_runtime_post_sync(dest: Path = NEXO_HOME, progress_fn=None) -> tuple[bo
4715
4715
  "reconcile_scripts = getattr(script_registry, 'reconcile_personal_scripts', None); "
4716
4716
  "result = reconcile_scripts(dry_run=False) if callable(reconcile_scripts) else {}; "
4717
4717
  "result = result if isinstance(result, dict) else {}; "
4718
+ "exec(\"try:\\n"
4719
+ " import memory_fabric\\n"
4720
+ " result['memory_fabric'] = memory_fabric.repair_memory_fabric(transcript_limit=1000, backup_limit=5000)\\n"
4721
+ "except Exception as exc:\\n"
4722
+ " result['memory_fabric_error'] = repr(exc)\"); "
4718
4723
  "result['retired_superseded_scripts'] = retired; "
4719
4724
  "result['retired_superseded_skills'] = retired_skills; "
4720
4725
  "print(json.dumps(result))"
@@ -4732,6 +4737,31 @@ def _run_runtime_post_sync(dest: Path = NEXO_HOME, progress_fn=None) -> tuple[bo
4732
4737
  reconcile_payload = _parse_runtime_init_payload(init_result.stdout or "")
4733
4738
  extra_actions, reconcile_message = _personal_schedule_reconcile_summary(reconcile_payload)
4734
4739
  actions.extend(extra_actions)
4740
+ memory_fabric_result = reconcile_payload.get("memory_fabric")
4741
+ if isinstance(memory_fabric_result, dict):
4742
+ transcript_indexed = int((memory_fabric_result.get("transcripts") or {}).get("indexed") or 0)
4743
+ historical_inserted = int((memory_fabric_result.get("backups") or {}).get("inserted") or 0)
4744
+ health = memory_fabric_result.get("health") or {}
4745
+ health_issues = health.get("issues") or []
4746
+ historical_health = health.get("historical_diaries") or {}
4747
+ unreconciled = int(historical_health.get("backup_rows_unreconciled") or 0)
4748
+ if transcript_indexed or historical_inserted:
4749
+ actions.append(f"memory-fabric-repaired:{transcript_indexed + historical_inserted}")
4750
+ _emit_progress(
4751
+ progress_fn,
4752
+ f"Memory Fabric: indexed {transcript_indexed} transcript(s), reconciled {historical_inserted} historical diary row(s).",
4753
+ )
4754
+ else:
4755
+ actions.append("memory-fabric-checked")
4756
+ if unreconciled:
4757
+ actions.append(f"memory-fabric-unreconciled:{unreconciled}")
4758
+ if memory_fabric_result.get("ok") is False or any(
4759
+ isinstance(issue, dict) and issue.get("code") == "backup_diaries_not_reconciled"
4760
+ for issue in health_issues
4761
+ ):
4762
+ actions.append("memory-fabric-warning")
4763
+ elif reconcile_payload.get("memory_fabric_error"):
4764
+ actions.append("memory-fabric-warning")
4735
4765
  if reconcile_message:
4736
4766
  _emit_progress(progress_fn, reconcile_message)
4737
4767
  except Exception as e:
@@ -302,6 +302,19 @@
302
302
  "run_on_boot": true,
303
303
  "run_on_wake": true
304
304
  },
305
+ {
306
+ "id": "memory-fabric",
307
+ "script": "scripts/nexo-memory-fabric.py",
308
+ "schedule": {"hour": 2, "minute": 35},
309
+ "description": "Daily Memory Fabric maintenance — refresh transcript search, historical backup diaries, and graph links",
310
+ "core": true,
311
+ "recovery_policy": "catchup",
312
+ "idempotent": true,
313
+ "max_catchup_age": 172800,
314
+ "stuck_after_seconds": 3600,
315
+ "run_on_boot": true,
316
+ "run_on_wake": true
317
+ },
305
318
  {
306
319
  "id": "local-index",
307
320
  "script": "scripts/nexo-local-index.py",
package/src/db/_fts.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """NEXO DB — Fts module."""
2
- import os, pathlib, sqlite3, threading, datetime
2
+ import os, pathlib, re, sqlite3, threading, datetime
3
3
  import paths
4
4
  from db._core import get_db, now_epoch, DB_PATH
5
5
 
@@ -328,22 +328,26 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
328
328
  limit: Max results (default 20)
329
329
  """
330
330
  conn = get_db()
331
- words = query.strip().split()
331
+ raw_query = query.strip()
332
+ words = raw_query.split()
332
333
  if not words:
333
334
  return []
334
335
 
335
336
  # Expand with synonyms for cross-language matching
336
337
  all_words = _expand_synonyms(words)
337
338
 
338
- # Build FTS5 query: each word as quoted term with OR for broad matching
339
+ # Build FTS5 query: each word as quoted term with OR for broad matching.
340
+ # Symbol-heavy identifiers (emails, paths, refs) need deterministic token
341
+ # boundaries so FTS5 never treats punctuation as query syntax.
339
342
  fts_terms = []
340
343
  for w in all_words:
341
344
  # Strip FTS5 special chars to avoid syntax errors
342
- safe = w.replace('"', '').replace("'", '').replace('*', '').replace('^', '').replace('-', ' ').strip()
345
+ safe = w.replace('"', '').replace("'", '').replace('*', '').replace('^', '').strip()
346
+ safe = re.sub(r"[-@/\\:]+", " ", safe)
343
347
  if not safe:
344
348
  continue
345
- # Split on dots (e.g., "capabilities.json" "capabilities" + "json")
346
- parts = [p.strip() for p in safe.split('.') if p.strip()]
349
+ # Split on dots and punctuation boundaries (e.g., emails, paths, files).
350
+ parts = [p.strip() for p in re.split(r"[.\s]+", safe) if p.strip()]
347
351
  for part in parts:
348
352
  fts_terms.append(f'"{part}"')
349
353
  # Add prefix search for camelCase/code identifiers (contains uppercase mid-word)
@@ -361,6 +365,24 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
361
365
  params.append(limit)
362
366
 
363
367
  try:
368
+ exact_rows = []
369
+ if re.search(r"[@/\\:.-]", raw_query):
370
+ exact_where = ""
371
+ exact_params = [f"%{raw_query}%", f"%{raw_query}%", f"%{raw_query}%"]
372
+ if source_filter:
373
+ exact_where = "AND source = ?"
374
+ exact_params.append(source_filter)
375
+ exact_params.append(limit)
376
+ exact_rows = conn.execute(f"""
377
+ SELECT source, source_id, title,
378
+ substr(body, 1, 240) AS snippet,
379
+ category, updated_at, -100.0 AS rank
380
+ FROM unified_search
381
+ WHERE (title LIKE ? OR body LIKE ? OR source_id LIKE ?) {exact_where}
382
+ ORDER BY updated_at DESC
383
+ LIMIT ?
384
+ """, exact_params).fetchall()
385
+
364
386
  rows = conn.execute(f"""
365
387
  SELECT source, source_id, title,
366
388
  snippet(unified_search, 3, '»', '«', '...', 40) AS snippet,
@@ -370,7 +392,16 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
370
392
  ORDER BY rank
371
393
  LIMIT ?
372
394
  """, params).fetchall()
373
- return [dict(r) for r in rows]
395
+ merged = []
396
+ seen = set()
397
+ for row in list(exact_rows) + list(rows):
398
+ item = dict(row)
399
+ key = (item.get("source"), item.get("source_id"))
400
+ if key in seen:
401
+ continue
402
+ seen.add(key)
403
+ merged.append(item)
404
+ return merged[:limit]
374
405
  except Exception:
375
406
  return []
376
407
 
@@ -403,4 +434,3 @@ def _migrate_add_index(conn, index_name: str, table: str, column: str):
403
434
  """Create index if it doesn't exist (idempotent)."""
404
435
  conn.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table}({column})")
405
436
  conn.commit()
406
-
package/src/db/_schema.py CHANGED
@@ -2080,6 +2080,51 @@ def _m67_diary_quality_backfill_repair(conn):
2080
2080
  _migrate_add_index(conn, "idx_diary_archive_quality", "diary_archive", "quality_tier, quality_score, created_at")
2081
2081
 
2082
2082
 
2083
+ def _m68_memory_fabric_index(conn):
2084
+ """Memory Fabric v1 index tables for historical backup memory."""
2085
+ conn.executescript(
2086
+ """
2087
+ CREATE TABLE IF NOT EXISTS memory_fabric_sources (
2088
+ source_id TEXT PRIMARY KEY,
2089
+ source_type TEXT NOT NULL,
2090
+ source_ref TEXT NOT NULL,
2091
+ status TEXT NOT NULL DEFAULT 'active',
2092
+ item_count INTEGER NOT NULL DEFAULT 0,
2093
+ last_indexed_at TEXT DEFAULT '',
2094
+ metadata_json TEXT NOT NULL DEFAULT '{}'
2095
+ );
2096
+
2097
+ CREATE TABLE IF NOT EXISTS historical_diary_index (
2098
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2099
+ source_backup_path TEXT NOT NULL,
2100
+ source_table TEXT NOT NULL DEFAULT 'session_diary',
2101
+ source_row_id INTEGER NOT NULL,
2102
+ session_id TEXT NOT NULL DEFAULT '',
2103
+ created_at TEXT NOT NULL DEFAULT '',
2104
+ domain TEXT NOT NULL DEFAULT '',
2105
+ summary TEXT NOT NULL DEFAULT '',
2106
+ decisions TEXT NOT NULL DEFAULT '',
2107
+ pending TEXT NOT NULL DEFAULT '',
2108
+ context_next TEXT NOT NULL DEFAULT '',
2109
+ mental_state TEXT NOT NULL DEFAULT '',
2110
+ self_critique TEXT NOT NULL DEFAULT '',
2111
+ source TEXT NOT NULL DEFAULT '',
2112
+ content_hash TEXT NOT NULL UNIQUE,
2113
+ indexed_at TEXT DEFAULT (datetime('now')),
2114
+ metadata_json TEXT NOT NULL DEFAULT '{}',
2115
+ UNIQUE(source_backup_path, source_table, source_row_id)
2116
+ );
2117
+
2118
+ CREATE INDEX IF NOT EXISTS idx_historical_diary_session
2119
+ ON historical_diary_index(session_id);
2120
+ CREATE INDEX IF NOT EXISTS idx_historical_diary_created
2121
+ ON historical_diary_index(created_at);
2122
+ CREATE INDEX IF NOT EXISTS idx_historical_diary_domain
2123
+ ON historical_diary_index(domain);
2124
+ """
2125
+ )
2126
+
2127
+
2083
2128
  MIGRATIONS = [
2084
2129
  (1, "learnings_columns", _m1_learnings_columns),
2085
2130
  (2, "followups_reasoning", _m2_followups_reasoning),
@@ -2148,6 +2193,7 @@ MIGRATIONS = [
2148
2193
  (65, "diary_quality", _m65_diary_quality),
2149
2194
  (66, "transcript_index", _m66_transcript_index),
2150
2195
  (67, "diary_quality_backfill_repair", _m67_diary_quality_backfill_repair),
2196
+ (68, "memory_fabric_index", _m68_memory_fabric_index),
2151
2197
  ]
2152
2198
 
2153
2199
 
@@ -3900,6 +3900,74 @@ def check_local_index_hygiene(fix: bool = False) -> DoctorCheck:
3900
3900
  )
3901
3901
 
3902
3902
 
3903
+ def check_memory_fabric_health(fix: bool = False) -> DoctorCheck:
3904
+ try:
3905
+ import memory_fabric
3906
+
3907
+ repair = None
3908
+ if fix:
3909
+ repair = memory_fabric.repair_memory_fabric(
3910
+ transcript_hours=720,
3911
+ transcript_limit=1000,
3912
+ backup_limit=5000,
3913
+ )
3914
+ report = memory_fabric.memory_fabric_health(include_backup_scan=True)
3915
+ issues = report.get("issues") or []
3916
+ evidence = [
3917
+ "transcripts=" + json.dumps(report.get("transcripts") or {}, sort_keys=True),
3918
+ "historical_diaries=" + json.dumps(report.get("historical_diaries") or {}, sort_keys=True),
3919
+ "local_context=" + json.dumps(report.get("local_context") or {}, sort_keys=True),
3920
+ "knowledge_graph=" + json.dumps(report.get("knowledge_graph") or {}, sort_keys=True),
3921
+ ]
3922
+ evidence.extend(
3923
+ f"issue={item.get('severity')}:{item.get('code')}:{item.get('message')}"
3924
+ for item in issues[:6]
3925
+ if isinstance(item, dict)
3926
+ )
3927
+ if repair:
3928
+ evidence.append("repair=" + json.dumps({
3929
+ "transcripts_indexed": (repair.get("transcripts") or {}).get("indexed"),
3930
+ "historical_diaries_inserted": (repair.get("backups") or {}).get("inserted"),
3931
+ }, sort_keys=True))
3932
+ blocking = [
3933
+ item for item in issues
3934
+ if isinstance(item, dict) and item.get("code") in {"transcript_index_empty", "backup_diaries_not_reconciled"}
3935
+ ]
3936
+ if not blocking:
3937
+ return DoctorCheck(
3938
+ id="runtime.memory_fabric",
3939
+ tier="runtime",
3940
+ status="healthy",
3941
+ severity="info",
3942
+ summary="Memory Fabric coverage is queryable",
3943
+ evidence=evidence,
3944
+ repair_plan=[],
3945
+ fixed=bool(repair),
3946
+ )
3947
+ return DoctorCheck(
3948
+ id="runtime.memory_fabric",
3949
+ tier="runtime",
3950
+ status="degraded",
3951
+ severity="warn",
3952
+ summary="Memory Fabric coverage needs repair",
3953
+ evidence=evidence,
3954
+ repair_plan=["Run `nexo doctor --tier runtime --fix` or `nexo update` to warm transcript and historical backup indexes"],
3955
+ escalation_prompt="Some memory sources exist outside the active query indexes, so exact historical lookup may fall back to slow raw scans.",
3956
+ fixed=bool(repair),
3957
+ )
3958
+ except Exception as exc:
3959
+ return DoctorCheck(
3960
+ id="runtime.memory_fabric",
3961
+ tier="runtime",
3962
+ status="degraded",
3963
+ severity="warn",
3964
+ summary="Memory Fabric health could not be checked",
3965
+ evidence=[str(exc)],
3966
+ repair_plan=["Inspect memory_fabric.py and DB migrations"],
3967
+ escalation_prompt="Support cannot verify unified memory coverage.",
3968
+ )
3969
+
3970
+
3903
3971
  def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
3904
3972
  """Run all runtime-tier checks. Read-only by default."""
3905
3973
  return [
@@ -3922,6 +3990,7 @@ def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
3922
3990
  safe_check(check_automation_caller_coverage),
3923
3991
  safe_check(check_state_watchers),
3924
3992
  safe_check(check_local_index_hygiene, fix=fix),
3993
+ safe_check(check_memory_fabric_health, fix=fix),
3925
3994
  safe_check(check_release_artifact_sync),
3926
3995
  safe_check(check_release_trace_hygiene),
3927
3996
  safe_check(check_launchagent_inventory),
@@ -0,0 +1,536 @@
1
+ from __future__ import annotations
2
+
3
+ """Memory Fabric release helpers.
4
+
5
+ This module is the product-owned bridge between existing memory islands:
6
+ transcript metadata, historical diary backups, local-context embeddings and the
7
+ cognitive knowledge graph. It does not copy raw transcripts into the DB.
8
+ """
9
+
10
+ import hashlib
11
+ import json
12
+ import re
13
+ import sqlite3
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ import paths
18
+ from db import get_db
19
+ from transcript_index import ensure_transcript_index
20
+ from transcript_utils import (
21
+ MAX_TRANSCRIPT_HOURS,
22
+ find_claude_session_files,
23
+ find_codex_session_files,
24
+ )
25
+
26
+ HISTORICAL_DIARY_SOURCE = "historical_diary"
27
+ HASH_EMBEDDING_MODEL = "nexo-local-hash-embedding"
28
+ EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
29
+
30
+
31
+ def ensure_memory_fabric_schema(conn: sqlite3.Connection | None = None) -> None:
32
+ db = conn or get_db()
33
+ db.executescript(
34
+ """
35
+ CREATE TABLE IF NOT EXISTS memory_fabric_sources (
36
+ source_id TEXT PRIMARY KEY,
37
+ source_type TEXT NOT NULL,
38
+ source_ref TEXT NOT NULL,
39
+ status TEXT NOT NULL DEFAULT 'active',
40
+ item_count INTEGER NOT NULL DEFAULT 0,
41
+ last_indexed_at TEXT DEFAULT '',
42
+ metadata_json TEXT NOT NULL DEFAULT '{}'
43
+ );
44
+
45
+ CREATE TABLE IF NOT EXISTS historical_diary_index (
46
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
47
+ source_backup_path TEXT NOT NULL,
48
+ source_table TEXT NOT NULL DEFAULT 'session_diary',
49
+ source_row_id INTEGER NOT NULL,
50
+ session_id TEXT NOT NULL DEFAULT '',
51
+ created_at TEXT NOT NULL DEFAULT '',
52
+ domain TEXT NOT NULL DEFAULT '',
53
+ summary TEXT NOT NULL DEFAULT '',
54
+ decisions TEXT NOT NULL DEFAULT '',
55
+ pending TEXT NOT NULL DEFAULT '',
56
+ context_next TEXT NOT NULL DEFAULT '',
57
+ mental_state TEXT NOT NULL DEFAULT '',
58
+ self_critique TEXT NOT NULL DEFAULT '',
59
+ source TEXT NOT NULL DEFAULT '',
60
+ content_hash TEXT NOT NULL UNIQUE,
61
+ indexed_at TEXT DEFAULT (datetime('now')),
62
+ metadata_json TEXT NOT NULL DEFAULT '{}',
63
+ UNIQUE(source_backup_path, source_table, source_row_id)
64
+ );
65
+
66
+ CREATE INDEX IF NOT EXISTS idx_historical_diary_session
67
+ ON historical_diary_index(session_id);
68
+ CREATE INDEX IF NOT EXISTS idx_historical_diary_created
69
+ ON historical_diary_index(created_at);
70
+ CREATE INDEX IF NOT EXISTS idx_historical_diary_domain
71
+ ON historical_diary_index(domain);
72
+ """
73
+ )
74
+ if conn is None:
75
+ db.commit()
76
+
77
+
78
+ def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
79
+ row = conn.execute(
80
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
81
+ (table,),
82
+ ).fetchone()
83
+ return bool(row)
84
+
85
+
86
+ def _fts_upsert_with_conn(
87
+ conn: sqlite3.Connection,
88
+ source: str,
89
+ source_id: str,
90
+ title: str,
91
+ body: str,
92
+ category: str = "",
93
+ ) -> None:
94
+ conn.execute("DELETE FROM unified_search WHERE source = ? AND source_id = ?", (source, str(source_id)))
95
+ conn.execute(
96
+ """
97
+ INSERT INTO unified_search(source, source_id, title, body, category, updated_at)
98
+ VALUES (?, ?, ?, ?, ?, datetime('now'))
99
+ """,
100
+ (source, str(source_id), str(title)[:200], body or "", category or ""),
101
+ )
102
+
103
+
104
+ def _row_value(row: sqlite3.Row | dict[str, Any], key: str, default: str = "") -> str:
105
+ try:
106
+ if isinstance(row, sqlite3.Row) and key not in row.keys():
107
+ return default
108
+ value = row[key]
109
+ except Exception:
110
+ return default
111
+ return "" if value is None else str(value)
112
+
113
+
114
+ def _historical_diary_hash(backup_path: Path, row: sqlite3.Row | dict[str, Any]) -> str:
115
+ payload = {
116
+ "id": _row_value(row, "id"),
117
+ "session_id": _row_value(row, "session_id"),
118
+ "created_at": _row_value(row, "created_at"),
119
+ "summary": _row_value(row, "summary"),
120
+ "decisions": _row_value(row, "decisions"),
121
+ "pending": _row_value(row, "pending"),
122
+ "context_next": _row_value(row, "context_next"),
123
+ }
124
+ return hashlib.sha256(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
125
+
126
+
127
+ def _diary_body(row: sqlite3.Row | dict[str, Any]) -> str:
128
+ return " | ".join(
129
+ part
130
+ for part in [
131
+ _row_value(row, "summary"),
132
+ _row_value(row, "decisions"),
133
+ _row_value(row, "pending"),
134
+ _row_value(row, "context_next"),
135
+ _row_value(row, "mental_state"),
136
+ _row_value(row, "self_critique"),
137
+ _row_value(row, "user_signals"),
138
+ ]
139
+ if part
140
+ )
141
+
142
+
143
+ def _link_historical_diary_to_kg(hist: sqlite3.Row, row: sqlite3.Row | dict[str, Any]) -> int:
144
+ try:
145
+ import knowledge_graph as kg
146
+
147
+ diary_ref = f"historical_diary:{hist['id']}"
148
+ session_id = _row_value(row, "session_id")
149
+ domain = _row_value(row, "domain") or "general"
150
+ body = _diary_body(row)
151
+ label = _row_value(row, "summary") or session_id or diary_ref
152
+ kg.upsert_node(
153
+ "diary",
154
+ diary_ref,
155
+ label,
156
+ {
157
+ "created_at": _row_value(row, "created_at"),
158
+ "session_id": session_id,
159
+ "source": "backup",
160
+ "backup_path": _row_value(hist, "source_backup_path"),
161
+ },
162
+ )
163
+ edges = 0
164
+ if session_id:
165
+ kg.upsert_node("session", f"session:{session_id}", session_id, {"source": "historical_diary"})
166
+ kg.upsert_edge(
167
+ "diary",
168
+ diary_ref,
169
+ "describes_session",
170
+ "session",
171
+ f"session:{session_id}",
172
+ confidence=0.95,
173
+ source_memory_id=diary_ref,
174
+ )
175
+ edges += 1
176
+ if domain:
177
+ kg.upsert_node("area", f"area:{domain}", domain, {"source": "historical_diary"})
178
+ kg.upsert_edge(
179
+ "diary",
180
+ diary_ref,
181
+ "belongs_to_area",
182
+ "area",
183
+ f"area:{domain}",
184
+ confidence=0.8,
185
+ source_memory_id=diary_ref,
186
+ )
187
+ edges += 1
188
+ for email in sorted(set(EMAIL_RE.findall(body)))[:12]:
189
+ kg.upsert_node("email", f"email:{email.lower()}", email.lower(), {"source": "historical_diary"})
190
+ kg.upsert_edge(
191
+ "diary",
192
+ diary_ref,
193
+ "mentions_email",
194
+ "email",
195
+ f"email:{email.lower()}",
196
+ confidence=0.75,
197
+ source_memory_id=diary_ref,
198
+ )
199
+ edges += 1
200
+ return edges
201
+ except Exception:
202
+ return 0
203
+
204
+
205
+ def _backup_db_paths(backups_root: str | Path | None = None, *, max_files: int = 40) -> list[Path]:
206
+ root = Path(backups_root) if backups_root is not None else paths.backups_dir()
207
+ if not root.exists():
208
+ return []
209
+ candidates: list[Path] = []
210
+ for path in root.rglob("*.db"):
211
+ name = path.name.lower()
212
+ if name.endswith("-wal") or name.endswith("-shm"):
213
+ continue
214
+ candidates.append(path)
215
+ def sort_key(item: Path) -> tuple[int, float]:
216
+ try:
217
+ mtime = item.stat().st_mtime if item.exists() else 0.0
218
+ except OSError:
219
+ mtime = 0.0
220
+ weekly_priority = 1 if item.name.startswith("weekly-") or "weekly" in item.parts else 0
221
+ return (weekly_priority, mtime)
222
+
223
+ candidates.sort(key=sort_key, reverse=True)
224
+ return candidates[: max(1, int(max_files or 1))]
225
+
226
+
227
+ def _connect_backup(path: Path) -> sqlite3.Connection | None:
228
+ try:
229
+ uri = f"file:{path.resolve().as_posix()}?mode=ro"
230
+ conn = sqlite3.connect(uri, uri=True, timeout=1.0)
231
+ conn.row_factory = sqlite3.Row
232
+ return conn
233
+ except Exception:
234
+ return None
235
+
236
+
237
+ def _active_diary_keys(conn: sqlite3.Connection) -> set[tuple[str, str]]:
238
+ keys: set[tuple[str, str]] = set()
239
+ for table in ("session_diary", "diary_archive"):
240
+ if not _table_exists(conn, table):
241
+ continue
242
+ for row in conn.execute(f"SELECT session_id, created_at FROM {table}").fetchall():
243
+ keys.add((str(row["session_id"] or ""), str(row["created_at"] or "")))
244
+ return keys
245
+
246
+
247
+ def reconcile_backup_diaries(
248
+ *,
249
+ backups_root: str | Path | None = None,
250
+ max_backup_files: int = 40,
251
+ limit: int = 5000,
252
+ ) -> dict[str, Any]:
253
+ """Index missing session diaries from technical backups into active search.
254
+
255
+ Rows are copied into a historical index, not into active `session_diary`.
256
+ That keeps provenance intact and avoids overwriting current memory.
257
+ """
258
+ conn = get_db()
259
+ ensure_memory_fabric_schema(conn)
260
+ active_keys = _active_diary_keys(conn)
261
+ scanned_backups = 0
262
+ scanned_rows = 0
263
+ skipped_active = 0
264
+ inserted = 0
265
+ fts_rows = 0
266
+ kg_edges = 0
267
+
268
+ for backup_path in _backup_db_paths(backups_root, max_files=max_backup_files):
269
+ if scanned_rows >= limit:
270
+ break
271
+ backup_conn = _connect_backup(backup_path)
272
+ if backup_conn is None:
273
+ continue
274
+ try:
275
+ if not _table_exists(backup_conn, "session_diary"):
276
+ continue
277
+ scanned_backups += 1
278
+ rows = backup_conn.execute(
279
+ "SELECT * FROM session_diary ORDER BY created_at DESC LIMIT ?",
280
+ (max(1, int(limit - scanned_rows)),),
281
+ ).fetchall()
282
+ for row in rows:
283
+ scanned_rows += 1
284
+ key = (_row_value(row, "session_id"), _row_value(row, "created_at"))
285
+ if key in active_keys:
286
+ skipped_active += 1
287
+ continue
288
+ content_hash = _historical_diary_hash(backup_path, row)
289
+ metadata = {
290
+ "backup_name": backup_path.name,
291
+ "quality_tier": _row_value(row, "quality_tier"),
292
+ "quality_score": _row_value(row, "quality_score"),
293
+ }
294
+ before = conn.total_changes
295
+ conn.execute(
296
+ """
297
+ INSERT OR IGNORE INTO historical_diary_index (
298
+ source_backup_path, source_table, source_row_id,
299
+ session_id, created_at, domain, summary, decisions,
300
+ pending, context_next, mental_state, self_critique,
301
+ source, content_hash, metadata_json
302
+ )
303
+ VALUES (?, 'session_diary', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
304
+ """,
305
+ (
306
+ str(backup_path),
307
+ int(_row_value(row, "id", "0") or 0),
308
+ _row_value(row, "session_id"),
309
+ _row_value(row, "created_at"),
310
+ _row_value(row, "domain"),
311
+ _row_value(row, "summary"),
312
+ _row_value(row, "decisions"),
313
+ _row_value(row, "pending"),
314
+ _row_value(row, "context_next"),
315
+ _row_value(row, "mental_state"),
316
+ _row_value(row, "self_critique"),
317
+ _row_value(row, "source"),
318
+ content_hash,
319
+ json.dumps(metadata, ensure_ascii=False, sort_keys=True),
320
+ ),
321
+ )
322
+ if conn.total_changes > before:
323
+ inserted += 1
324
+ hist = conn.execute(
325
+ "SELECT id, summary, domain FROM historical_diary_index WHERE content_hash=?",
326
+ (content_hash,),
327
+ ).fetchone()
328
+ if hist:
329
+ title = str(hist["summary"] or _row_value(row, "session_id") or "Historical diary")
330
+ _fts_upsert_with_conn(
331
+ conn,
332
+ HISTORICAL_DIARY_SOURCE,
333
+ str(hist["id"]),
334
+ title,
335
+ _diary_body(row),
336
+ str(hist["domain"] or "backup"),
337
+ )
338
+ fts_rows += 1
339
+ kg_edges += _link_historical_diary_to_kg(hist, row)
340
+ finally:
341
+ backup_conn.close()
342
+
343
+ conn.execute(
344
+ """
345
+ INSERT INTO memory_fabric_sources(source_id, source_type, source_ref, status, item_count, last_indexed_at, metadata_json)
346
+ VALUES ('historical_diary_backups', 'backup', ?, 'active', ?, datetime('now'), ?)
347
+ ON CONFLICT(source_id) DO UPDATE SET
348
+ source_ref=excluded.source_ref,
349
+ item_count=excluded.item_count,
350
+ last_indexed_at=excluded.last_indexed_at,
351
+ metadata_json=excluded.metadata_json
352
+ """,
353
+ (
354
+ str(Path(backups_root) if backups_root is not None else paths.backups_dir()),
355
+ int(conn.execute("SELECT COUNT(*) AS total FROM historical_diary_index").fetchone()["total"] or 0),
356
+ json.dumps({"scanned_backups": scanned_backups, "scanned_rows": scanned_rows}, sort_keys=True),
357
+ ),
358
+ )
359
+ conn.commit()
360
+ return {
361
+ "ok": True,
362
+ "scanned_backups": scanned_backups,
363
+ "scanned_rows": scanned_rows,
364
+ "skipped_active": skipped_active,
365
+ "inserted": inserted,
366
+ "fts_rows": fts_rows,
367
+ "kg_edges": kg_edges,
368
+ }
369
+
370
+
371
+ def _count_transcript_files() -> dict[str, int]:
372
+ return {
373
+ "claude_code": len(find_claude_session_files()),
374
+ "codex": len(find_codex_session_files()),
375
+ }
376
+
377
+
378
+ def _local_context_embedding_stats() -> dict[str, Any]:
379
+ try:
380
+ from local_context.db import local_context_db_path
381
+
382
+ db_path = local_context_db_path()
383
+ if not db_path.is_file():
384
+ return {"exists": False}
385
+ conn = sqlite3.connect(f"file:{db_path.resolve().as_posix()}?mode=ro", uri=True, timeout=1.0)
386
+ conn.row_factory = sqlite3.Row
387
+ try:
388
+ if not _table_exists(conn, "local_embeddings"):
389
+ return {"exists": True, "embeddings": 0, "models": {}}
390
+ rows = conn.execute(
391
+ "SELECT model_id, dimension, COUNT(*) AS total FROM local_embeddings GROUP BY model_id, dimension"
392
+ ).fetchall()
393
+ models = {
394
+ f"{row['model_id']}:{row['dimension']}": int(row["total"] or 0)
395
+ for row in rows
396
+ }
397
+ return {
398
+ "exists": True,
399
+ "embeddings": sum(models.values()),
400
+ "models": models,
401
+ "hash_embeddings": sum(
402
+ total for key, total in models.items() if key.startswith(HASH_EMBEDDING_MODEL + ":")
403
+ ),
404
+ }
405
+ finally:
406
+ conn.close()
407
+ except Exception as exc:
408
+ return {"exists": False, "error": str(exc)}
409
+
410
+
411
+ def _cognitive_kg_stats() -> dict[str, Any]:
412
+ try:
413
+ from cognitive_paths import resolve_cognitive_db
414
+
415
+ db_path = resolve_cognitive_db(for_write=False)
416
+ if not db_path.is_file():
417
+ return {"exists": False}
418
+ conn = sqlite3.connect(f"file:{db_path.resolve().as_posix()}?mode=ro", uri=True, timeout=1.0)
419
+ try:
420
+ nodes = conn.execute("SELECT COUNT(*) FROM kg_nodes").fetchone()[0]
421
+ edges = conn.execute("SELECT COUNT(*) FROM kg_edges").fetchone()[0]
422
+ return {"exists": True, "nodes": int(nodes or 0), "edges": int(edges or 0)}
423
+ finally:
424
+ conn.close()
425
+ except Exception as exc:
426
+ return {"exists": False, "error": str(exc)}
427
+
428
+
429
+ def memory_fabric_health(
430
+ *,
431
+ include_backup_scan: bool = True,
432
+ backups_root: str | Path | None = None,
433
+ ) -> dict[str, Any]:
434
+ ensure_memory_fabric_schema()
435
+ conn = get_db()
436
+ transcript_files = _count_transcript_files()
437
+ transcript_index_count = int(conn.execute("SELECT COUNT(*) AS total FROM transcript_index").fetchone()["total"] or 0)
438
+ historical_count = int(conn.execute("SELECT COUNT(*) AS total FROM historical_diary_index").fetchone()["total"] or 0)
439
+ issues: list[dict[str, str]] = []
440
+
441
+ if sum(transcript_files.values()) > 0 and transcript_index_count == 0:
442
+ issues.append({
443
+ "code": "transcript_index_empty",
444
+ "severity": "warn",
445
+ "message": "Transcript files exist but compact transcript_index is empty.",
446
+ })
447
+
448
+ backup_rows = 0
449
+ backup_files = 0
450
+ backup_unreconciled = 0
451
+ if include_backup_scan:
452
+ active_keys = _active_diary_keys(conn)
453
+ historical_hashes = {
454
+ str(row["content_hash"] or "")
455
+ for row in conn.execute("SELECT content_hash FROM historical_diary_index").fetchall()
456
+ }
457
+ for backup_path in _backup_db_paths(backups_root, max_files=12):
458
+ backup_conn = _connect_backup(backup_path)
459
+ if backup_conn is None:
460
+ continue
461
+ try:
462
+ if not _table_exists(backup_conn, "session_diary"):
463
+ continue
464
+ backup_files += 1
465
+ rows = backup_conn.execute("SELECT * FROM session_diary ORDER BY created_at DESC LIMIT 1000").fetchall()
466
+ backup_rows += len(rows)
467
+ for row in rows:
468
+ key = (_row_value(row, "session_id"), _row_value(row, "created_at"))
469
+ if key in active_keys:
470
+ continue
471
+ if _historical_diary_hash(backup_path, row) in historical_hashes:
472
+ continue
473
+ backup_unreconciled += 1
474
+ finally:
475
+ backup_conn.close()
476
+ if backup_unreconciled > 0:
477
+ issues.append({
478
+ "code": "backup_diaries_not_reconciled",
479
+ "severity": "warn",
480
+ "message": "Backup session diaries exist outside active memory and historical index.",
481
+ })
482
+
483
+ embeddings = _local_context_embedding_stats()
484
+ if int(embeddings.get("hash_embeddings") or 0) > 0:
485
+ issues.append({
486
+ "code": "hash_embeddings_present",
487
+ "severity": "info",
488
+ "message": "Local context still has deterministic fallback embeddings; re-embedding is recommended.",
489
+ })
490
+
491
+ kg = _cognitive_kg_stats()
492
+ if kg.get("exists") and int(kg.get("nodes") or 0) == 0:
493
+ issues.append({
494
+ "code": "kg_empty",
495
+ "severity": "info",
496
+ "message": "Knowledge graph tables exist but have no nodes.",
497
+ })
498
+
499
+ return {
500
+ "ok": not any(issue["severity"] == "error" for issue in issues),
501
+ "issues": issues,
502
+ "transcripts": {
503
+ "files": transcript_files,
504
+ "index_rows": transcript_index_count,
505
+ },
506
+ "historical_diaries": {
507
+ "index_rows": historical_count,
508
+ "backup_files_scanned": backup_files,
509
+ "backup_rows_seen": backup_rows,
510
+ "backup_rows_unreconciled": backup_unreconciled,
511
+ },
512
+ "local_context": embeddings,
513
+ "knowledge_graph": kg,
514
+ }
515
+
516
+
517
+ def repair_memory_fabric(
518
+ *,
519
+ transcript_hours: int = MAX_TRANSCRIPT_HOURS,
520
+ transcript_limit: int = 1000,
521
+ backup_limit: int = 5000,
522
+ ) -> dict[str, Any]:
523
+ transcript_result = ensure_transcript_index(
524
+ hours=transcript_hours,
525
+ limit=transcript_limit,
526
+ min_user_messages=1,
527
+ force=True,
528
+ )
529
+ backup_result = reconcile_backup_diaries(limit=backup_limit)
530
+ health = memory_fabric_health(include_backup_scan=True)
531
+ return {
532
+ "ok": True,
533
+ "transcripts": transcript_result,
534
+ "backups": backup_result,
535
+ "health": health,
536
+ }
@@ -1100,10 +1100,11 @@ def _source_diary(request: SourceRequest) -> SourceResult:
1100
1100
 
1101
1101
  def _source_transcripts(request: SourceRequest) -> SourceResult:
1102
1102
  try:
1103
- from transcript_index import index_recent_transcripts, search_transcript_index
1103
+ from transcript_index import ensure_transcript_index, search_transcript_index
1104
+ from transcript_utils import MAX_TRANSCRIPT_HOURS
1104
1105
 
1105
- index_recent_transcripts(hours=72, limit=120, min_user_messages=1)
1106
- indexed_rows = search_transcript_index(request.query, hours=72, limit=4)
1106
+ ensure_transcript_index(hours=MAX_TRANSCRIPT_HOURS, limit=1000, min_user_messages=1)
1107
+ indexed_rows = search_transcript_index(request.query, hours=MAX_TRANSCRIPT_HOURS, limit=4)
1107
1108
  if indexed_rows:
1108
1109
  indexed_result = _rows_result(
1109
1110
  "transcript_index",
@@ -2,6 +2,8 @@
2
2
  # NEXO DB hourly backup — crontab: 0 * * * * $NEXO_HOME/core/scripts/nexo-backup.sh
3
3
  NEXO_HOME="${NEXO_HOME:-$HOME/.nexo}"
4
4
  NEXO_DIR="$NEXO_HOME"
5
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
6
+ CORE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
5
7
  BACKUP_DIR="$NEXO_HOME/runtime/backups"
6
8
  if [ ! -d "$BACKUP_DIR" ] && [ -d "$NEXO_HOME/backups" ]; then
7
9
  BACKUP_DIR="$NEXO_HOME/backups"
@@ -23,7 +25,35 @@ LOCAL_CONTEXT_MAX_BACKUP_BYTES="${NEXO_LOCAL_CONTEXT_MAX_BACKUP_BYTES:-214748364
23
25
 
24
26
  mkdir -p "$BACKUP_DIR" "$WEEKLY_DIR"
25
27
 
28
+ reconcile_memory_fabric_before_prune() {
29
+ python3 - "$BACKUP_DIR" "$CORE_DIR" <<'PY' >/dev/null 2>&1 || true
30
+ from __future__ import annotations
31
+
32
+ import sys
33
+ from pathlib import Path
34
+
35
+ backup_dir = Path(sys.argv[1])
36
+ core_dir = Path(sys.argv[2])
37
+ for candidate in (core_dir, core_dir.parent / "src"):
38
+ if candidate.exists():
39
+ sys.path.insert(0, str(candidate))
40
+
41
+ try:
42
+ import memory_fabric
43
+
44
+ memory_fabric.reconcile_backup_diaries(
45
+ backups_root=backup_dir,
46
+ max_backup_files=80,
47
+ limit=10000,
48
+ )
49
+ except Exception:
50
+ pass
51
+ PY
52
+ }
53
+
26
54
  cleanup_backups() {
55
+ reconcile_memory_fabric_before_prune
56
+
27
57
  PRUNER="$NEXO_HOME/core/scripts/prune_runtime_backups.py"
28
58
  if [ ! -f "$PRUNER" ]; then
29
59
  PRUNER="$(dirname "$0")/prune_runtime_backups.py"
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env python3
2
+ # nexo: name=memory-fabric
3
+ # nexo: description=Refresh transcript search, historical backup diaries, and graph links.
4
+ # nexo: runtime=python
5
+ # nexo: cron_id=memory-fabric
6
+ # nexo: schedule=02:35
7
+ # nexo: recovery_policy=catchup
8
+ # nexo: run_on_boot=true
9
+ # nexo: run_on_wake=true
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+
17
+
18
+ RUNTIME_ROOT = Path(__file__).resolve().parents[1]
19
+ if str(RUNTIME_ROOT) not in sys.path:
20
+ sys.path.insert(0, str(RUNTIME_ROOT))
21
+
22
+
23
+ def _int_env(name: str, default: int) -> int:
24
+ raw = os.environ.get(name, "").strip()
25
+ if not raw:
26
+ return default
27
+ try:
28
+ return max(1, int(raw))
29
+ except ValueError:
30
+ return default
31
+
32
+
33
+ def main() -> int:
34
+ import memory_fabric
35
+
36
+ result = memory_fabric.repair_memory_fabric(
37
+ transcript_limit=_int_env("NEXO_MEMORY_FABRIC_TRANSCRIPT_LIMIT", 1000),
38
+ backup_limit=_int_env("NEXO_MEMORY_FABRIC_BACKUP_LIMIT", 10000),
39
+ )
40
+ print(json.dumps(result, ensure_ascii=False, sort_keys=True))
41
+ return 0
42
+
43
+
44
+ if __name__ == "__main__":
45
+ raise SystemExit(main())
@@ -8,26 +8,53 @@ from transcript_utils import (
8
8
  load_transcript,
9
9
  search_transcripts,
10
10
  )
11
+ from transcript_index import ensure_transcript_index, search_transcript_index
11
12
 
12
13
 
13
14
  def handle_transcript_search(query: str = "", hours: int = 24, client: str = "", limit: int = 10) -> str:
14
15
  """Search recent Claude Code / Codex transcripts as a fallback when memory is insufficient."""
15
16
  window = clamp_transcript_hours(hours)
16
- rows = search_transcripts(query or "", hours=window, client=(client or "").strip(), limit=limit)
17
+ clean_client = (client or "").strip()
18
+ ensure_transcript_index(
19
+ hours=window,
20
+ client=clean_client,
21
+ limit=max(200, min(2000, int(limit or 10) * 50)),
22
+ min_user_messages=1,
23
+ )
24
+ rows = search_transcript_index(query or "", hours=window, client=clean_client, limit=limit)
25
+ source = "index"
26
+ if not rows:
27
+ rows = search_transcripts(
28
+ query or "",
29
+ hours=window,
30
+ client=clean_client,
31
+ limit=limit,
32
+ min_user_messages=1,
33
+ )
34
+ source = "raw"
17
35
  if not rows:
18
36
  scope = f"query='{query}'" if query else "recent transcripts"
19
37
  return f"No transcript matches for {scope} in the last {window}h."
20
38
 
21
- lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h"]
39
+ lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
22
40
  for item in rows:
41
+ session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
42
+ display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
43
+ modified = item.get("modified") or item.get("modified_at")
23
44
  lines.append(
24
- f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
25
- f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
45
+ f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
46
+ f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
26
47
  )
27
48
  if item.get("cwd"):
28
49
  lines.append(f" cwd: {item['cwd']}")
29
50
  if item.get("session_uid"):
30
51
  lines.append(f" session_uid: {item['session_uid']}")
52
+ if item.get("conversation_id") and item.get("conversation_id") != item.get("session_id"):
53
+ lines.append(f" conversation_id: {item['conversation_id']}")
54
+ if item.get("path_ref"):
55
+ lines.append(f" path: {item['path_ref']}")
56
+ if item.get("sanitized_summary"):
57
+ lines.append(f" summary: {item['sanitized_summary']}")
31
58
  for snippet in item.get("matched_messages") or []:
32
59
  lines.append(
33
60
  f" [{snippet.get('role')}#{snippet.get('index')}] {snippet.get('snippet')}"
@@ -38,15 +65,29 @@ def handle_transcript_search(query: str = "", hours: int = 24, client: str = "",
38
65
  def handle_transcript_recent(hours: int = 24, client: str = "", limit: int = 10) -> str:
39
66
  """List recent transcripts without searching full text."""
40
67
  window = clamp_transcript_hours(hours)
41
- rows = list_recent_transcripts(hours=window, client=(client or "").strip(), limit=limit)
68
+ clean_client = (client or "").strip()
69
+ ensure_transcript_index(
70
+ hours=window,
71
+ client=clean_client,
72
+ limit=max(200, min(2000, int(limit or 10) * 50)),
73
+ min_user_messages=1,
74
+ )
75
+ rows = search_transcript_index("", hours=window, client=clean_client, limit=limit)
76
+ source = "index"
77
+ if not rows:
78
+ rows = list_recent_transcripts(hours=window, client=clean_client, limit=limit, min_user_messages=1)
79
+ source = "raw"
42
80
  if not rows:
43
81
  return f"No transcripts found in the last {window}h."
44
82
 
45
- lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h"]
83
+ lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
46
84
  for item in rows:
85
+ session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
86
+ display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
87
+ modified = item.get("modified") or item.get("modified_at")
47
88
  lines.append(
48
- f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
49
- f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
89
+ f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
90
+ f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
50
91
  )
51
92
  return "\n".join(lines)
52
93
 
@@ -62,6 +103,7 @@ def handle_transcript_read(
62
103
  session_ref=(session_ref or "").strip(),
63
104
  transcript_path=(transcript_path or "").strip(),
64
105
  client=(client or "").strip(),
106
+ min_user_messages=1,
65
107
  )
66
108
  if not transcript:
67
109
  target = session_ref or transcript_path or "(empty ref)"
@@ -15,9 +15,12 @@ from typing import Any
15
15
  from db import get_db
16
16
  from transcript_utils import (
17
17
  DEFAULT_TRANSCRIPT_HOURS,
18
+ MAX_TRANSCRIPT_HOURS,
18
19
  _score_text_match,
19
20
  _tokenize,
20
21
  _truncate,
22
+ find_claude_session_files,
23
+ find_codex_session_files,
21
24
  list_recent_transcripts,
22
25
  )
23
26
 
@@ -103,6 +106,29 @@ def _sanitized_summary(session: dict[str, Any], *, limit: int = 900) -> str:
103
106
  return _truncate(summary, limit)
104
107
 
105
108
 
109
+ def _row_ref_matches(query: str, row: dict[str, Any]) -> bool:
110
+ clean = str(query or "").strip().lower()
111
+ if len(clean) < 6:
112
+ return False
113
+ values = [
114
+ row.get("session_id"),
115
+ row.get("conversation_id"),
116
+ row.get("display_name"),
117
+ row.get("path_ref"),
118
+ Path(str(row.get("path_ref") or "")).name,
119
+ Path(str(row.get("path_ref") or "")).stem,
120
+ ]
121
+ for value in values:
122
+ candidate = str(value or "").strip().lower()
123
+ if not candidate:
124
+ continue
125
+ if candidate.startswith(clean):
126
+ return True
127
+ if candidate.split(":")[-1].startswith(clean):
128
+ return True
129
+ return False
130
+
131
+
106
132
  def index_transcript_session(session: dict[str, Any]) -> dict[str, Any]:
107
133
  """Upsert a single transcript metadata row and return it."""
108
134
  _ensure_transcript_index_table()
@@ -186,6 +212,81 @@ def index_recent_transcripts(
186
212
  return indexed
187
213
 
188
214
 
215
+ def _latest_source_modified_ts(client: str = "") -> float:
216
+ paths: list[Path] = []
217
+ if not client or client == "claude_code":
218
+ paths.extend(find_claude_session_files())
219
+ if not client or client == "codex":
220
+ paths.extend(find_codex_session_files())
221
+ latest = 0.0
222
+ for path in paths:
223
+ try:
224
+ latest = max(latest, path.stat().st_mtime)
225
+ except OSError:
226
+ continue
227
+ return latest
228
+
229
+
230
+ def _parse_iso_ts(value: str) -> float:
231
+ if not value:
232
+ return 0.0
233
+ try:
234
+ return datetime.fromisoformat(value).timestamp()
235
+ except Exception:
236
+ return 0.0
237
+
238
+
239
+ def ensure_transcript_index(
240
+ *,
241
+ hours: int = MAX_TRANSCRIPT_HOURS,
242
+ client: str = "",
243
+ limit: int = 1000,
244
+ min_user_messages: int = 1,
245
+ force: bool = False,
246
+ ) -> dict[str, Any]:
247
+ """Keep the compact transcript DB index warm enough for fast lookup.
248
+
249
+ This is intentionally bounded. Raw JSONL remains the source of truth, but
250
+ normal MCP searches should hit this table before falling back to slow file
251
+ scans.
252
+ """
253
+ _ensure_transcript_index_table()
254
+ conn = get_db()
255
+ params: list[Any] = []
256
+ where = "1=1"
257
+ if client:
258
+ where += " AND source_client = ?"
259
+ params.append(client)
260
+ before = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
261
+ latest_indexed = str(conn.execute(
262
+ f"SELECT MAX(modified_at) AS latest FROM transcript_index WHERE {where}",
263
+ tuple(params),
264
+ ).fetchone()["latest"] or "")
265
+ latest_source_ts = _latest_source_modified_ts(client)
266
+ latest_indexed_ts = _parse_iso_ts(latest_indexed)
267
+ stale = bool(latest_source_ts and latest_source_ts > latest_indexed_ts + 1.0)
268
+ should_index = bool(force or before == 0 or stale)
269
+ indexed: list[dict[str, Any]] = []
270
+ if should_index:
271
+ indexed = index_recent_transcripts(
272
+ hours=hours,
273
+ client=client,
274
+ limit=limit,
275
+ min_user_messages=min_user_messages,
276
+ )
277
+ after = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
278
+ return {
279
+ "ok": True,
280
+ "before": before,
281
+ "after": after,
282
+ "indexed": len(indexed),
283
+ "forced": bool(force),
284
+ "stale": stale,
285
+ "hours": hours,
286
+ "client": client,
287
+ }
288
+
289
+
189
290
  def search_transcript_index(
190
291
  query: str = "",
191
292
  *,
@@ -201,7 +302,7 @@ def search_transcript_index(
201
302
  where += " AND source_client = ?"
202
303
  params.append(client)
203
304
  rows = [dict(row) for row in conn.execute(
204
- f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 500",
305
+ f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 5000",
205
306
  tuple(params),
206
307
  ).fetchall()]
207
308
 
@@ -222,9 +323,11 @@ def search_transcript_index(
222
323
  continue
223
324
  haystack = " ".join(
224
325
  str(row.get(field) or "")
225
- for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "metadata_json")
326
+ for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "path_ref", "metadata_json")
226
327
  )
227
328
  score = _score_text_match(query_tokens, haystack)
329
+ if _row_ref_matches(query, row):
330
+ score = max(score, 2.0)
228
331
  if score <= 0:
229
332
  continue
230
333
  row["_score"] = round(score, 4)
@@ -110,7 +110,10 @@ def find_codex_session_files() -> list[Path]:
110
110
  if not root.exists():
111
111
  continue
112
112
  for jsonl in sorted(root.rglob("*.jsonl")):
113
- key = jsonl.name
113
+ try:
114
+ key = str(jsonl.resolve())
115
+ except OSError:
116
+ key = str(jsonl)
114
117
  if key in seen:
115
118
  continue
116
119
  seen.add(key)
@@ -346,8 +349,20 @@ def list_recent_transcripts(
346
349
  return filtered[: max(1, int(limit or 10))]
347
350
 
348
351
 
349
- def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
350
- rows = list_recent_transcripts(hours=hours, client=client, limit=200)
352
+ def search_transcripts(
353
+ query: str,
354
+ *,
355
+ hours: int = DEFAULT_TRANSCRIPT_HOURS,
356
+ client: str = "",
357
+ limit: int = 10,
358
+ min_user_messages: int = MIN_USER_MESSAGES,
359
+ ) -> list[dict]:
360
+ rows = list_recent_transcripts(
361
+ hours=hours,
362
+ client=client,
363
+ limit=200,
364
+ min_user_messages=min_user_messages,
365
+ )
351
366
  query_tokens = _tokenize(query)
352
367
  if not query_tokens:
353
368
  return rows[: max(1, int(limit or 10))]
@@ -398,7 +413,46 @@ def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, cli
398
413
  return matches[: max(1, int(limit or 10))]
399
414
 
400
415
 
401
- def load_transcript(session_ref: str = "", transcript_path: str = "", client: str = "") -> dict | None:
416
+ def _transcript_ref_matches(ref: str, session: dict, path: Path) -> bool:
417
+ clean = str(ref or "").strip()
418
+ if not clean:
419
+ return True
420
+ candidates = {
421
+ str(session.get("session_file", "")),
422
+ str(session.get("display_name", "")),
423
+ str(session.get("session_uid", "")),
424
+ str(session.get("conversation_id", "")),
425
+ str(path),
426
+ path.name,
427
+ path.stem,
428
+ }
429
+ if clean in candidates:
430
+ return True
431
+
432
+ # Operator-facing refs are often short prefixes copied from filenames
433
+ # or session ids. Require a minimum length so common words do not match
434
+ # arbitrary historical transcripts.
435
+ if len(clean) < 6:
436
+ return False
437
+ lowered = clean.lower()
438
+ for candidate in candidates:
439
+ value = str(candidate or "").strip().lower()
440
+ if not value:
441
+ continue
442
+ if value.startswith(lowered):
443
+ return True
444
+ if value.split(":")[-1].startswith(lowered):
445
+ return True
446
+ return False
447
+
448
+
449
+ def load_transcript(
450
+ session_ref: str = "",
451
+ transcript_path: str = "",
452
+ client: str = "",
453
+ *,
454
+ min_user_messages: int = 1,
455
+ ) -> dict | None:
402
456
  ref = str(session_ref or "").strip()
403
457
  path_ref = str(transcript_path or "").strip()
404
458
 
@@ -416,17 +470,15 @@ def load_transcript(session_ref: str = "", transcript_path: str = "", client: st
416
470
  continue
417
471
  except Exception:
418
472
  continue
419
- session = extract_codex_session(path) if detected_client == "codex" else extract_claude_session(path)
473
+ session = (
474
+ extract_codex_session(path, min_user_messages=min_user_messages)
475
+ if detected_client == "codex"
476
+ else extract_claude_session(path, min_user_messages=min_user_messages)
477
+ )
420
478
  if not session:
421
479
  continue
422
- if ref:
423
- if ref not in {
424
- str(session.get("session_file", "")),
425
- str(session.get("display_name", "")),
426
- str(session.get("session_uid", "")),
427
- str(path),
428
- }:
429
- continue
480
+ if ref and not _transcript_ref_matches(ref, session, path):
481
+ continue
430
482
  try:
431
483
  session["modified"] = datetime.fromtimestamp(path.stat().st_mtime).isoformat()
432
484
  except OSError: