npm - nexo-brain - Versions diffs - 7.24.0 → 7.25.0 - Mend

nexo-brain 7.24.0 → 7.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +3 -1
package/package.json +1 -1
package/src/auto_update.py +30 -0
package/src/crons/manifest.json +13 -0
package/src/db/_fts.py +38 -8
package/src/db/_schema.py +46 -0
package/src/doctor/providers/runtime.py +69 -0
package/src/memory_fabric.py +536 -0
package/src/pre_answer_router.py +4 -3
package/src/scripts/nexo-backup.sh +30 -0
package/src/scripts/nexo-memory-fabric.py +45 -0
package/src/tools_transcripts.py +50 -8
package/src/transcript_index.py +105 -2
package/src/transcript_utils.py +65 -13

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.24.0",
+  "version": "7.25.0",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -18,7 +18,9 @@
 [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
-Version `7.24.0` is the current packaged-runtime line. Minor release over v7.23.13 - Home Agents, cognitive quality controls, English operational copy, and non-blocking task-open context are integrated into main.
+Version `7.25.0` is the current packaged-runtime line. Minor release over v7.24.0 - Memory Fabric links transcript lookup, historical backup diary recovery, unified search and knowledge graph evidence so memories are not available only inside expiring snapshots.
+Previously in `7.24.0`: minor release over v7.23.13 - Home Agents, cognitive quality controls, English operational copy, and non-blocking task-open context are integrated into main.
 Previously in `7.23.13`: patch over v7.23.12 - release guardrails now audit publish workflows for masked failures and add minimal-delta coverage for punctual UI edits.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.24.0",
+  "version": "7.25.0",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/auto_update.py CHANGED Viewed

@@ -4715,6 +4715,11 @@ def _run_runtime_post_sync(dest: Path = NEXO_HOME, progress_fn=None) -> tuple[bo
                     "reconcile_scripts = getattr(script_registry, 'reconcile_personal_scripts', None); "
                     "result = reconcile_scripts(dry_run=False) if callable(reconcile_scripts) else {}; "
                     "result = result if isinstance(result, dict) else {}; "
+                    "exec(\"try:\\n"
+                    " import memory_fabric\\n"
+                    " result['memory_fabric'] = memory_fabric.repair_memory_fabric(transcript_limit=1000, backup_limit=5000)\\n"
+                    "except Exception as exc:\\n"
+                    " result['memory_fabric_error'] = repr(exc)\"); "
                     "result['retired_superseded_scripts'] = retired; "
                     "result['retired_superseded_skills'] = retired_skills; "
                     "print(json.dumps(result))"
@@ -4732,6 +4737,31 @@ def _run_runtime_post_sync(dest: Path = NEXO_HOME, progress_fn=None) -> tuple[bo
         reconcile_payload = _parse_runtime_init_payload(init_result.stdout or "")
         extra_actions, reconcile_message = _personal_schedule_reconcile_summary(reconcile_payload)
         actions.extend(extra_actions)
+        memory_fabric_result = reconcile_payload.get("memory_fabric")
+        if isinstance(memory_fabric_result, dict):
+            transcript_indexed = int((memory_fabric_result.get("transcripts") or {}).get("indexed") or 0)
+            historical_inserted = int((memory_fabric_result.get("backups") or {}).get("inserted") or 0)
+            health = memory_fabric_result.get("health") or {}
+            health_issues = health.get("issues") or []
+            historical_health = health.get("historical_diaries") or {}
+            unreconciled = int(historical_health.get("backup_rows_unreconciled") or 0)
+            if transcript_indexed or historical_inserted:
+                actions.append(f"memory-fabric-repaired:{transcript_indexed + historical_inserted}")
+                _emit_progress(
+                    progress_fn,
+                    f"Memory Fabric: indexed {transcript_indexed} transcript(s), reconciled {historical_inserted} historical diary row(s).",
+                )
+            else:
+                actions.append("memory-fabric-checked")
+            if unreconciled:
+                actions.append(f"memory-fabric-unreconciled:{unreconciled}")
+            if memory_fabric_result.get("ok") is False or any(
+                isinstance(issue, dict) and issue.get("code") == "backup_diaries_not_reconciled"
+                for issue in health_issues
+            ):
+                actions.append("memory-fabric-warning")
+        elif reconcile_payload.get("memory_fabric_error"):
+            actions.append("memory-fabric-warning")
         if reconcile_message:
             _emit_progress(progress_fn, reconcile_message)
     except Exception as e:

package/src/crons/manifest.json CHANGED Viewed

@@ -302,6 +302,19 @@
       "run_on_boot": true,
       "run_on_wake": true
     },
+    {
+      "id": "memory-fabric",
+      "script": "scripts/nexo-memory-fabric.py",
+      "schedule": {"hour": 2, "minute": 35},
+      "description": "Daily Memory Fabric maintenance — refresh transcript search, historical backup diaries, and graph links",
+      "core": true,
+      "recovery_policy": "catchup",
+      "idempotent": true,
+      "max_catchup_age": 172800,
+      "stuck_after_seconds": 3600,
+      "run_on_boot": true,
+      "run_on_wake": true
+    },
     {
       "id": "local-index",
       "script": "scripts/nexo-local-index.py",

package/src/db/_fts.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """NEXO DB — Fts module."""
-import os, pathlib, sqlite3, threading, datetime
+import os, pathlib, re, sqlite3, threading, datetime
 import paths
 from db._core import get_db, now_epoch, DB_PATH
@@ -328,22 +328,26 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
         limit: Max results (default 20)
     """
     conn = get_db()
-    words = query.strip().split()
+    raw_query = query.strip()
+    words = raw_query.split()
     if not words:
         return []
     # Expand with synonyms for cross-language matching
     all_words = _expand_synonyms(words)
-    # Build FTS5 query: each word as quoted term with OR for broad matching
+    # Build FTS5 query: each word as quoted term with OR for broad matching.
+    # Symbol-heavy identifiers (emails, paths, refs) need deterministic token
+    # boundaries so FTS5 never treats punctuation as query syntax.
     fts_terms = []
     for w in all_words:
         # Strip FTS5 special chars to avoid syntax errors
-        safe = w.replace('"', '').replace("'", '').replace('*', '').replace('^', '').replace('-', ' ').strip()
+        safe = w.replace('"', '').replace("'", '').replace('*', '').replace('^', '').strip()
+        safe = re.sub(r"[-@/\\:]+", " ", safe)
         if not safe:
             continue
-        # Split on dots (e.g., "capabilities.json" → "capabilities" + "json")
-        parts = [p.strip() for p in safe.split('.') if p.strip()]
+        # Split on dots and punctuation boundaries (e.g., emails, paths, files).
+        parts = [p.strip() for p in re.split(r"[.\s]+", safe) if p.strip()]
         for part in parts:
             fts_terms.append(f'"{part}"')
             # Add prefix search for camelCase/code identifiers (contains uppercase mid-word)
@@ -361,6 +365,24 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
     params.append(limit)
     try:
+        exact_rows = []
+        if re.search(r"[@/\\:.-]", raw_query):
+            exact_where = ""
+            exact_params = [f"%{raw_query}%", f"%{raw_query}%", f"%{raw_query}%"]
+            if source_filter:
+                exact_where = "AND source = ?"
+                exact_params.append(source_filter)
+            exact_params.append(limit)
+            exact_rows = conn.execute(f"""
+                SELECT source, source_id, title,
+                       substr(body, 1, 240) AS snippet,
+                       category, updated_at, -100.0 AS rank
+                FROM unified_search
+                WHERE (title LIKE ? OR body LIKE ? OR source_id LIKE ?) {exact_where}
+                ORDER BY updated_at DESC
+                LIMIT ?
+            """, exact_params).fetchall()
         rows = conn.execute(f"""
             SELECT source, source_id, title,
                    snippet(unified_search, 3, '»', '«', '...', 40) AS snippet,
@@ -370,7 +392,16 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
             ORDER BY rank
             LIMIT ?
         """, params).fetchall()
-        return [dict(r) for r in rows]
+        merged = []
+        seen = set()
+        for row in list(exact_rows) + list(rows):
+            item = dict(row)
+            key = (item.get("source"), item.get("source_id"))
+            if key in seen:
+                continue
+            seen.add(key)
+            merged.append(item)
+        return merged[:limit]
     except Exception:
         return []
@@ -403,4 +434,3 @@ def _migrate_add_index(conn, index_name: str, table: str, column: str):
     """Create index if it doesn't exist (idempotent)."""
     conn.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table}({column})")
     conn.commit()

package/src/db/_schema.py CHANGED Viewed

@@ -2080,6 +2080,51 @@ def _m67_diary_quality_backfill_repair(conn):
     _migrate_add_index(conn, "idx_diary_archive_quality", "diary_archive", "quality_tier, quality_score, created_at")
+def _m68_memory_fabric_index(conn):
+    """Memory Fabric v1 index tables for historical backup memory."""
+    conn.executescript(
+        """
+        CREATE TABLE IF NOT EXISTS memory_fabric_sources (
+            source_id TEXT PRIMARY KEY,
+            source_type TEXT NOT NULL,
+            source_ref TEXT NOT NULL,
+            status TEXT NOT NULL DEFAULT 'active',
+            item_count INTEGER NOT NULL DEFAULT 0,
+            last_indexed_at TEXT DEFAULT '',
+            metadata_json TEXT NOT NULL DEFAULT '{}'
+        );
+        CREATE TABLE IF NOT EXISTS historical_diary_index (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            source_backup_path TEXT NOT NULL,
+            source_table TEXT NOT NULL DEFAULT 'session_diary',
+            source_row_id INTEGER NOT NULL,
+            session_id TEXT NOT NULL DEFAULT '',
+            created_at TEXT NOT NULL DEFAULT '',
+            domain TEXT NOT NULL DEFAULT '',
+            summary TEXT NOT NULL DEFAULT '',
+            decisions TEXT NOT NULL DEFAULT '',
+            pending TEXT NOT NULL DEFAULT '',
+            context_next TEXT NOT NULL DEFAULT '',
+            mental_state TEXT NOT NULL DEFAULT '',
+            self_critique TEXT NOT NULL DEFAULT '',
+            source TEXT NOT NULL DEFAULT '',
+            content_hash TEXT NOT NULL UNIQUE,
+            indexed_at TEXT DEFAULT (datetime('now')),
+            metadata_json TEXT NOT NULL DEFAULT '{}',
+            UNIQUE(source_backup_path, source_table, source_row_id)
+        );
+        CREATE INDEX IF NOT EXISTS idx_historical_diary_session
+            ON historical_diary_index(session_id);
+        CREATE INDEX IF NOT EXISTS idx_historical_diary_created
+            ON historical_diary_index(created_at);
+        CREATE INDEX IF NOT EXISTS idx_historical_diary_domain
+            ON historical_diary_index(domain);
+        """
+    )
 MIGRATIONS = [
     (1, "learnings_columns", _m1_learnings_columns),
     (2, "followups_reasoning", _m2_followups_reasoning),
@@ -2148,6 +2193,7 @@ MIGRATIONS = [
     (65, "diary_quality", _m65_diary_quality),
     (66, "transcript_index", _m66_transcript_index),
     (67, "diary_quality_backfill_repair", _m67_diary_quality_backfill_repair),
+    (68, "memory_fabric_index", _m68_memory_fabric_index),
 ]

package/src/doctor/providers/runtime.py CHANGED Viewed

@@ -3900,6 +3900,74 @@ def check_local_index_hygiene(fix: bool = False) -> DoctorCheck:
         )
+def check_memory_fabric_health(fix: bool = False) -> DoctorCheck:
+    try:
+        import memory_fabric
+        repair = None
+        if fix:
+            repair = memory_fabric.repair_memory_fabric(
+                transcript_hours=720,
+                transcript_limit=1000,
+                backup_limit=5000,
+            )
+        report = memory_fabric.memory_fabric_health(include_backup_scan=True)
+        issues = report.get("issues") or []
+        evidence = [
+            "transcripts=" + json.dumps(report.get("transcripts") or {}, sort_keys=True),
+            "historical_diaries=" + json.dumps(report.get("historical_diaries") or {}, sort_keys=True),
+            "local_context=" + json.dumps(report.get("local_context") or {}, sort_keys=True),
+            "knowledge_graph=" + json.dumps(report.get("knowledge_graph") or {}, sort_keys=True),
+        ]
+        evidence.extend(
+            f"issue={item.get('severity')}:{item.get('code')}:{item.get('message')}"
+            for item in issues[:6]
+            if isinstance(item, dict)
+        )
+        if repair:
+            evidence.append("repair=" + json.dumps({
+                "transcripts_indexed": (repair.get("transcripts") or {}).get("indexed"),
+                "historical_diaries_inserted": (repair.get("backups") or {}).get("inserted"),
+            }, sort_keys=True))
+        blocking = [
+            item for item in issues
+            if isinstance(item, dict) and item.get("code") in {"transcript_index_empty", "backup_diaries_not_reconciled"}
+        ]
+        if not blocking:
+            return DoctorCheck(
+                id="runtime.memory_fabric",
+                tier="runtime",
+                status="healthy",
+                severity="info",
+                summary="Memory Fabric coverage is queryable",
+                evidence=evidence,
+                repair_plan=[],
+                fixed=bool(repair),
+            )
+        return DoctorCheck(
+            id="runtime.memory_fabric",
+            tier="runtime",
+            status="degraded",
+            severity="warn",
+            summary="Memory Fabric coverage needs repair",
+            evidence=evidence,
+            repair_plan=["Run `nexo doctor --tier runtime --fix` or `nexo update` to warm transcript and historical backup indexes"],
+            escalation_prompt="Some memory sources exist outside the active query indexes, so exact historical lookup may fall back to slow raw scans.",
+            fixed=bool(repair),
+        )
+    except Exception as exc:
+        return DoctorCheck(
+            id="runtime.memory_fabric",
+            tier="runtime",
+            status="degraded",
+            severity="warn",
+            summary="Memory Fabric health could not be checked",
+            evidence=[str(exc)],
+            repair_plan=["Inspect memory_fabric.py and DB migrations"],
+            escalation_prompt="Support cannot verify unified memory coverage.",
+        )
 def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
     """Run all runtime-tier checks. Read-only by default."""
     return [
@@ -3922,6 +3990,7 @@ def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
         safe_check(check_automation_caller_coverage),
         safe_check(check_state_watchers),
         safe_check(check_local_index_hygiene, fix=fix),
+        safe_check(check_memory_fabric_health, fix=fix),
         safe_check(check_release_artifact_sync),
         safe_check(check_release_trace_hygiene),
         safe_check(check_launchagent_inventory),

package/src/memory_fabric.py ADDED Viewed

@@ -0,0 +1,536 @@
+from __future__ import annotations
+"""Memory Fabric release helpers.
+This module is the product-owned bridge between existing memory islands:
+transcript metadata, historical diary backups, local-context embeddings and the
+cognitive knowledge graph. It does not copy raw transcripts into the DB.
+"""
+import hashlib
+import json
+import re
+import sqlite3
+from pathlib import Path
+from typing import Any
+import paths
+from db import get_db
+from transcript_index import ensure_transcript_index
+from transcript_utils import (
+    MAX_TRANSCRIPT_HOURS,
+    find_claude_session_files,
+    find_codex_session_files,
+)
+HISTORICAL_DIARY_SOURCE = "historical_diary"
+HASH_EMBEDDING_MODEL = "nexo-local-hash-embedding"
+EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
+def ensure_memory_fabric_schema(conn: sqlite3.Connection | None = None) -> None:
+    db = conn or get_db()
+    db.executescript(
+        """
+        CREATE TABLE IF NOT EXISTS memory_fabric_sources (
+            source_id TEXT PRIMARY KEY,
+            source_type TEXT NOT NULL,
+            source_ref TEXT NOT NULL,
+            status TEXT NOT NULL DEFAULT 'active',
+            item_count INTEGER NOT NULL DEFAULT 0,
+            last_indexed_at TEXT DEFAULT '',
+            metadata_json TEXT NOT NULL DEFAULT '{}'
+        );
+        CREATE TABLE IF NOT EXISTS historical_diary_index (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            source_backup_path TEXT NOT NULL,
+            source_table TEXT NOT NULL DEFAULT 'session_diary',
+            source_row_id INTEGER NOT NULL,
+            session_id TEXT NOT NULL DEFAULT '',
+            created_at TEXT NOT NULL DEFAULT '',
+            domain TEXT NOT NULL DEFAULT '',
+            summary TEXT NOT NULL DEFAULT '',
+            decisions TEXT NOT NULL DEFAULT '',
+            pending TEXT NOT NULL DEFAULT '',
+            context_next TEXT NOT NULL DEFAULT '',
+            mental_state TEXT NOT NULL DEFAULT '',
+            self_critique TEXT NOT NULL DEFAULT '',
+            source TEXT NOT NULL DEFAULT '',
+            content_hash TEXT NOT NULL UNIQUE,
+            indexed_at TEXT DEFAULT (datetime('now')),
+            metadata_json TEXT NOT NULL DEFAULT '{}',
+            UNIQUE(source_backup_path, source_table, source_row_id)
+        );
+        CREATE INDEX IF NOT EXISTS idx_historical_diary_session
+            ON historical_diary_index(session_id);
+        CREATE INDEX IF NOT EXISTS idx_historical_diary_created
+            ON historical_diary_index(created_at);
+        CREATE INDEX IF NOT EXISTS idx_historical_diary_domain
+            ON historical_diary_index(domain);
+        """
+    )
+    if conn is None:
+        db.commit()
+def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
+    row = conn.execute(
+        "SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
+        (table,),
+    ).fetchone()
+    return bool(row)
+def _fts_upsert_with_conn(
+    conn: sqlite3.Connection,
+    source: str,
+    source_id: str,
+    title: str,
+    body: str,
+    category: str = "",
+) -> None:
+    conn.execute("DELETE FROM unified_search WHERE source = ? AND source_id = ?", (source, str(source_id)))
+    conn.execute(
+        """
+        INSERT INTO unified_search(source, source_id, title, body, category, updated_at)
+        VALUES (?, ?, ?, ?, ?, datetime('now'))
+        """,
+        (source, str(source_id), str(title)[:200], body or "", category or ""),
+    )
+def _row_value(row: sqlite3.Row | dict[str, Any], key: str, default: str = "") -> str:
+    try:
+        if isinstance(row, sqlite3.Row) and key not in row.keys():
+            return default
+        value = row[key]
+    except Exception:
+        return default
+    return "" if value is None else str(value)
+def _historical_diary_hash(backup_path: Path, row: sqlite3.Row | dict[str, Any]) -> str:
+    payload = {
+        "id": _row_value(row, "id"),
+        "session_id": _row_value(row, "session_id"),
+        "created_at": _row_value(row, "created_at"),
+        "summary": _row_value(row, "summary"),
+        "decisions": _row_value(row, "decisions"),
+        "pending": _row_value(row, "pending"),
+        "context_next": _row_value(row, "context_next"),
+    }
+    return hashlib.sha256(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
+def _diary_body(row: sqlite3.Row | dict[str, Any]) -> str:
+    return " | ".join(
+        part
+        for part in [
+            _row_value(row, "summary"),
+            _row_value(row, "decisions"),
+            _row_value(row, "pending"),
+            _row_value(row, "context_next"),
+            _row_value(row, "mental_state"),
+            _row_value(row, "self_critique"),
+            _row_value(row, "user_signals"),
+        ]
+        if part
+    )
+def _link_historical_diary_to_kg(hist: sqlite3.Row, row: sqlite3.Row | dict[str, Any]) -> int:
+    try:
+        import knowledge_graph as kg
+        diary_ref = f"historical_diary:{hist['id']}"
+        session_id = _row_value(row, "session_id")
+        domain = _row_value(row, "domain") or "general"
+        body = _diary_body(row)
+        label = _row_value(row, "summary") or session_id or diary_ref
+        kg.upsert_node(
+            "diary",
+            diary_ref,
+            label,
+            {
+                "created_at": _row_value(row, "created_at"),
+                "session_id": session_id,
+                "source": "backup",
+                "backup_path": _row_value(hist, "source_backup_path"),
+            },
+        )
+        edges = 0
+        if session_id:
+            kg.upsert_node("session", f"session:{session_id}", session_id, {"source": "historical_diary"})
+            kg.upsert_edge(
+                "diary",
+                diary_ref,
+                "describes_session",
+                "session",
+                f"session:{session_id}",
+                confidence=0.95,
+                source_memory_id=diary_ref,
+            )
+            edges += 1
+        if domain:
+            kg.upsert_node("area", f"area:{domain}", domain, {"source": "historical_diary"})
+            kg.upsert_edge(
+                "diary",
+                diary_ref,
+                "belongs_to_area",
+                "area",
+                f"area:{domain}",
+                confidence=0.8,
+                source_memory_id=diary_ref,
+            )
+            edges += 1
+        for email in sorted(set(EMAIL_RE.findall(body)))[:12]:
+            kg.upsert_node("email", f"email:{email.lower()}", email.lower(), {"source": "historical_diary"})
+            kg.upsert_edge(
+                "diary",
+                diary_ref,
+                "mentions_email",
+                "email",
+                f"email:{email.lower()}",
+                confidence=0.75,
+                source_memory_id=diary_ref,
+            )
+            edges += 1
+        return edges
+    except Exception:
+        return 0
+def _backup_db_paths(backups_root: str | Path | None = None, *, max_files: int = 40) -> list[Path]:
+    root = Path(backups_root) if backups_root is not None else paths.backups_dir()
+    if not root.exists():
+        return []
+    candidates: list[Path] = []
+    for path in root.rglob("*.db"):
+        name = path.name.lower()
+        if name.endswith("-wal") or name.endswith("-shm"):
+            continue
+        candidates.append(path)
+    def sort_key(item: Path) -> tuple[int, float]:
+        try:
+            mtime = item.stat().st_mtime if item.exists() else 0.0
+        except OSError:
+            mtime = 0.0
+        weekly_priority = 1 if item.name.startswith("weekly-") or "weekly" in item.parts else 0
+        return (weekly_priority, mtime)
+    candidates.sort(key=sort_key, reverse=True)
+    return candidates[: max(1, int(max_files or 1))]
+def _connect_backup(path: Path) -> sqlite3.Connection | None:
+    try:
+        uri = f"file:{path.resolve().as_posix()}?mode=ro"
+        conn = sqlite3.connect(uri, uri=True, timeout=1.0)
+        conn.row_factory = sqlite3.Row
+        return conn
+    except Exception:
+        return None
+def _active_diary_keys(conn: sqlite3.Connection) -> set[tuple[str, str]]:
+    keys: set[tuple[str, str]] = set()
+    for table in ("session_diary", "diary_archive"):
+        if not _table_exists(conn, table):
+            continue
+        for row in conn.execute(f"SELECT session_id, created_at FROM {table}").fetchall():
+            keys.add((str(row["session_id"] or ""), str(row["created_at"] or "")))
+    return keys
+def reconcile_backup_diaries(
+    *,
+    backups_root: str | Path | None = None,
+    max_backup_files: int = 40,
+    limit: int = 5000,
+) -> dict[str, Any]:
+    """Index missing session diaries from technical backups into active search.
+    Rows are copied into a historical index, not into active `session_diary`.
+    That keeps provenance intact and avoids overwriting current memory.
+    """
+    conn = get_db()
+    ensure_memory_fabric_schema(conn)
+    active_keys = _active_diary_keys(conn)
+    scanned_backups = 0
+    scanned_rows = 0
+    skipped_active = 0
+    inserted = 0
+    fts_rows = 0
+    kg_edges = 0
+    for backup_path in _backup_db_paths(backups_root, max_files=max_backup_files):
+        if scanned_rows >= limit:
+            break
+        backup_conn = _connect_backup(backup_path)
+        if backup_conn is None:
+            continue
+        try:
+            if not _table_exists(backup_conn, "session_diary"):
+                continue
+            scanned_backups += 1
+            rows = backup_conn.execute(
+                "SELECT * FROM session_diary ORDER BY created_at DESC LIMIT ?",
+                (max(1, int(limit - scanned_rows)),),
+            ).fetchall()
+            for row in rows:
+                scanned_rows += 1
+                key = (_row_value(row, "session_id"), _row_value(row, "created_at"))
+                if key in active_keys:
+                    skipped_active += 1
+                    continue
+                content_hash = _historical_diary_hash(backup_path, row)
+                metadata = {
+                    "backup_name": backup_path.name,
+                    "quality_tier": _row_value(row, "quality_tier"),
+                    "quality_score": _row_value(row, "quality_score"),
+                }
+                before = conn.total_changes
+                conn.execute(
+                    """
+                    INSERT OR IGNORE INTO historical_diary_index (
+                        source_backup_path, source_table, source_row_id,
+                        session_id, created_at, domain, summary, decisions,
+                        pending, context_next, mental_state, self_critique,
+                        source, content_hash, metadata_json
+                    )
+                    VALUES (?, 'session_diary', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        str(backup_path),
+                        int(_row_value(row, "id", "0") or 0),
+                        _row_value(row, "session_id"),
+                        _row_value(row, "created_at"),
+                        _row_value(row, "domain"),
+                        _row_value(row, "summary"),
+                        _row_value(row, "decisions"),
+                        _row_value(row, "pending"),
+                        _row_value(row, "context_next"),
+                        _row_value(row, "mental_state"),
+                        _row_value(row, "self_critique"),
+                        _row_value(row, "source"),
+                        content_hash,
+                        json.dumps(metadata, ensure_ascii=False, sort_keys=True),
+                    ),
+                )
+                if conn.total_changes > before:
+                    inserted += 1
+                hist = conn.execute(
+                    "SELECT id, summary, domain FROM historical_diary_index WHERE content_hash=?",
+                    (content_hash,),
+                ).fetchone()
+                if hist:
+                    title = str(hist["summary"] or _row_value(row, "session_id") or "Historical diary")
+                    _fts_upsert_with_conn(
+                        conn,
+                        HISTORICAL_DIARY_SOURCE,
+                        str(hist["id"]),
+                        title,
+                        _diary_body(row),
+                        str(hist["domain"] or "backup"),
+                    )
+                    fts_rows += 1
+                    kg_edges += _link_historical_diary_to_kg(hist, row)
+        finally:
+            backup_conn.close()
+    conn.execute(
+        """
+        INSERT INTO memory_fabric_sources(source_id, source_type, source_ref, status, item_count, last_indexed_at, metadata_json)
+        VALUES ('historical_diary_backups', 'backup', ?, 'active', ?, datetime('now'), ?)
+        ON CONFLICT(source_id) DO UPDATE SET
+            source_ref=excluded.source_ref,
+            item_count=excluded.item_count,
+            last_indexed_at=excluded.last_indexed_at,
+            metadata_json=excluded.metadata_json
+        """,
+        (
+            str(Path(backups_root) if backups_root is not None else paths.backups_dir()),
+            int(conn.execute("SELECT COUNT(*) AS total FROM historical_diary_index").fetchone()["total"] or 0),
+            json.dumps({"scanned_backups": scanned_backups, "scanned_rows": scanned_rows}, sort_keys=True),
+        ),
+    )
+    conn.commit()
+    return {
+        "ok": True,
+        "scanned_backups": scanned_backups,
+        "scanned_rows": scanned_rows,
+        "skipped_active": skipped_active,
+        "inserted": inserted,
+        "fts_rows": fts_rows,
+        "kg_edges": kg_edges,
+    }
+def _count_transcript_files() -> dict[str, int]:
+    return {
+        "claude_code": len(find_claude_session_files()),
+        "codex": len(find_codex_session_files()),
+    }
+def _local_context_embedding_stats() -> dict[str, Any]:
+    try:
+        from local_context.db import local_context_db_path
+        db_path = local_context_db_path()
+        if not db_path.is_file():
+            return {"exists": False}
+        conn = sqlite3.connect(f"file:{db_path.resolve().as_posix()}?mode=ro", uri=True, timeout=1.0)
+        conn.row_factory = sqlite3.Row
+        try:
+            if not _table_exists(conn, "local_embeddings"):
+                return {"exists": True, "embeddings": 0, "models": {}}
+            rows = conn.execute(
+                "SELECT model_id, dimension, COUNT(*) AS total FROM local_embeddings GROUP BY model_id, dimension"
+            ).fetchall()
+            models = {
+                f"{row['model_id']}:{row['dimension']}": int(row["total"] or 0)
+                for row in rows
+            }
+            return {
+                "exists": True,
+                "embeddings": sum(models.values()),
+                "models": models,
+                "hash_embeddings": sum(
+                    total for key, total in models.items() if key.startswith(HASH_EMBEDDING_MODEL + ":")
+                ),
+            }
+        finally:
+            conn.close()
+    except Exception as exc:
+        return {"exists": False, "error": str(exc)}
+def _cognitive_kg_stats() -> dict[str, Any]:
+    try:
+        from cognitive_paths import resolve_cognitive_db
+        db_path = resolve_cognitive_db(for_write=False)
+        if not db_path.is_file():
+            return {"exists": False}
+        conn = sqlite3.connect(f"file:{db_path.resolve().as_posix()}?mode=ro", uri=True, timeout=1.0)
+        try:
+            nodes = conn.execute("SELECT COUNT(*) FROM kg_nodes").fetchone()[0]
+            edges = conn.execute("SELECT COUNT(*) FROM kg_edges").fetchone()[0]
+            return {"exists": True, "nodes": int(nodes or 0), "edges": int(edges or 0)}
+        finally:
+            conn.close()
+    except Exception as exc:
+        return {"exists": False, "error": str(exc)}
+def memory_fabric_health(
+    *,
+    include_backup_scan: bool = True,
+    backups_root: str | Path | None = None,
+) -> dict[str, Any]:
+    ensure_memory_fabric_schema()
+    conn = get_db()
+    transcript_files = _count_transcript_files()
+    transcript_index_count = int(conn.execute("SELECT COUNT(*) AS total FROM transcript_index").fetchone()["total"] or 0)
+    historical_count = int(conn.execute("SELECT COUNT(*) AS total FROM historical_diary_index").fetchone()["total"] or 0)
+    issues: list[dict[str, str]] = []
+    if sum(transcript_files.values()) > 0 and transcript_index_count == 0:
+        issues.append({
+            "code": "transcript_index_empty",
+            "severity": "warn",
+            "message": "Transcript files exist but compact transcript_index is empty.",
+        })
+    backup_rows = 0
+    backup_files = 0
+    backup_unreconciled = 0
+    if include_backup_scan:
+        active_keys = _active_diary_keys(conn)
+        historical_hashes = {
+            str(row["content_hash"] or "")
+            for row in conn.execute("SELECT content_hash FROM historical_diary_index").fetchall()
+        }
+        for backup_path in _backup_db_paths(backups_root, max_files=12):
+            backup_conn = _connect_backup(backup_path)
+            if backup_conn is None:
+                continue
+            try:
+                if not _table_exists(backup_conn, "session_diary"):
+                    continue
+                backup_files += 1
+                rows = backup_conn.execute("SELECT * FROM session_diary ORDER BY created_at DESC LIMIT 1000").fetchall()
+                backup_rows += len(rows)
+                for row in rows:
+                    key = (_row_value(row, "session_id"), _row_value(row, "created_at"))
+                    if key in active_keys:
+                        continue
+                    if _historical_diary_hash(backup_path, row) in historical_hashes:
+                        continue
+                    backup_unreconciled += 1
+            finally:
+                backup_conn.close()
+        if backup_unreconciled > 0:
+            issues.append({
+                "code": "backup_diaries_not_reconciled",
+                "severity": "warn",
+                "message": "Backup session diaries exist outside active memory and historical index.",
+            })
+    embeddings = _local_context_embedding_stats()
+    if int(embeddings.get("hash_embeddings") or 0) > 0:
+        issues.append({
+            "code": "hash_embeddings_present",
+            "severity": "info",
+            "message": "Local context still has deterministic fallback embeddings; re-embedding is recommended.",
+        })
+    kg = _cognitive_kg_stats()
+    if kg.get("exists") and int(kg.get("nodes") or 0) == 0:
+        issues.append({
+            "code": "kg_empty",
+            "severity": "info",
+            "message": "Knowledge graph tables exist but have no nodes.",
+        })
+    return {
+        "ok": not any(issue["severity"] == "error" for issue in issues),
+        "issues": issues,
+        "transcripts": {
+            "files": transcript_files,
+            "index_rows": transcript_index_count,
+        },
+        "historical_diaries": {
+            "index_rows": historical_count,
+            "backup_files_scanned": backup_files,
+            "backup_rows_seen": backup_rows,
+            "backup_rows_unreconciled": backup_unreconciled,
+        },
+        "local_context": embeddings,
+        "knowledge_graph": kg,
+    }
+def repair_memory_fabric(
+    *,
+    transcript_hours: int = MAX_TRANSCRIPT_HOURS,
+    transcript_limit: int = 1000,
+    backup_limit: int = 5000,
+) -> dict[str, Any]:
+    transcript_result = ensure_transcript_index(
+        hours=transcript_hours,
+        limit=transcript_limit,
+        min_user_messages=1,
+        force=True,
+    )
+    backup_result = reconcile_backup_diaries(limit=backup_limit)
+    health = memory_fabric_health(include_backup_scan=True)
+    return {
+        "ok": True,
+        "transcripts": transcript_result,
+        "backups": backup_result,
+        "health": health,
+    }

package/src/pre_answer_router.py CHANGED Viewed

@@ -1100,10 +1100,11 @@ def _source_diary(request: SourceRequest) -> SourceResult:
 def _source_transcripts(request: SourceRequest) -> SourceResult:
     try:
-        from transcript_index import index_recent_transcripts, search_transcript_index
+        from transcript_index import ensure_transcript_index, search_transcript_index
+        from transcript_utils import MAX_TRANSCRIPT_HOURS
-        index_recent_transcripts(hours=72, limit=120, min_user_messages=1)
-        indexed_rows = search_transcript_index(request.query, hours=72, limit=4)
+        ensure_transcript_index(hours=MAX_TRANSCRIPT_HOURS, limit=1000, min_user_messages=1)
+        indexed_rows = search_transcript_index(request.query, hours=MAX_TRANSCRIPT_HOURS, limit=4)
         if indexed_rows:
             indexed_result = _rows_result(
                 "transcript_index",

package/src/scripts/nexo-backup.sh CHANGED Viewed

@@ -2,6 +2,8 @@
 # NEXO DB hourly backup — crontab: 0 * * * * $NEXO_HOME/core/scripts/nexo-backup.sh
 NEXO_HOME="${NEXO_HOME:-$HOME/.nexo}"
 NEXO_DIR="$NEXO_HOME"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+CORE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 BACKUP_DIR="$NEXO_HOME/runtime/backups"
 if [ ! -d "$BACKUP_DIR" ] && [ -d "$NEXO_HOME/backups" ]; then
     BACKUP_DIR="$NEXO_HOME/backups"
@@ -23,7 +25,35 @@ LOCAL_CONTEXT_MAX_BACKUP_BYTES="${NEXO_LOCAL_CONTEXT_MAX_BACKUP_BYTES:-214748364
 mkdir -p "$BACKUP_DIR" "$WEEKLY_DIR"
+reconcile_memory_fabric_before_prune() {
+    python3 - "$BACKUP_DIR" "$CORE_DIR" <<'PY' >/dev/null 2>&1 || true
+from __future__ import annotations
+import sys
+from pathlib import Path
+backup_dir = Path(sys.argv[1])
+core_dir = Path(sys.argv[2])
+for candidate in (core_dir, core_dir.parent / "src"):
+    if candidate.exists():
+        sys.path.insert(0, str(candidate))
+try:
+    import memory_fabric
+    memory_fabric.reconcile_backup_diaries(
+        backups_root=backup_dir,
+        max_backup_files=80,
+        limit=10000,
+    )
+except Exception:
+    pass
+PY
+}
 cleanup_backups() {
+    reconcile_memory_fabric_before_prune
     PRUNER="$NEXO_HOME/core/scripts/prune_runtime_backups.py"
     if [ ! -f "$PRUNER" ]; then
         PRUNER="$(dirname "$0")/prune_runtime_backups.py"

package/src/scripts/nexo-memory-fabric.py ADDED Viewed

@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# nexo: name=memory-fabric
+# nexo: description=Refresh transcript search, historical backup diaries, and graph links.
+# nexo: runtime=python
+# nexo: cron_id=memory-fabric
+# nexo: schedule=02:35
+# nexo: recovery_policy=catchup
+# nexo: run_on_boot=true
+# nexo: run_on_wake=true
+from __future__ import annotations
+import json
+import os
+import sys
+from pathlib import Path
+RUNTIME_ROOT = Path(__file__).resolve().parents[1]
+if str(RUNTIME_ROOT) not in sys.path:
+    sys.path.insert(0, str(RUNTIME_ROOT))
+def _int_env(name: str, default: int) -> int:
+    raw = os.environ.get(name, "").strip()
+    if not raw:
+        return default
+    try:
+        return max(1, int(raw))
+    except ValueError:
+        return default
+def main() -> int:
+    import memory_fabric
+    result = memory_fabric.repair_memory_fabric(
+        transcript_limit=_int_env("NEXO_MEMORY_FABRIC_TRANSCRIPT_LIMIT", 1000),
+        backup_limit=_int_env("NEXO_MEMORY_FABRIC_BACKUP_LIMIT", 10000),
+    )
+    print(json.dumps(result, ensure_ascii=False, sort_keys=True))
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/src/tools_transcripts.py CHANGED Viewed

@@ -8,26 +8,53 @@ from transcript_utils import (
     load_transcript,
     search_transcripts,
 )
+from transcript_index import ensure_transcript_index, search_transcript_index
 def handle_transcript_search(query: str = "", hours: int = 24, client: str = "", limit: int = 10) -> str:
     """Search recent Claude Code / Codex transcripts as a fallback when memory is insufficient."""
     window = clamp_transcript_hours(hours)
-    rows = search_transcripts(query or "", hours=window, client=(client or "").strip(), limit=limit)
+    clean_client = (client or "").strip()
+    ensure_transcript_index(
+        hours=window,
+        client=clean_client,
+        limit=max(200, min(2000, int(limit or 10) * 50)),
+        min_user_messages=1,
+    )
+    rows = search_transcript_index(query or "", hours=window, client=clean_client, limit=limit)
+    source = "index"
+    if not rows:
+        rows = search_transcripts(
+            query or "",
+            hours=window,
+            client=clean_client,
+            limit=limit,
+            min_user_messages=1,
+        )
+        source = "raw"
     if not rows:
         scope = f"query='{query}'" if query else "recent transcripts"
         return f"No transcript matches for {scope} in the last {window}h."
-    lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h"]
+    lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
     for item in rows:
+        session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
+        display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
+        modified = item.get("modified") or item.get("modified_at")
         lines.append(
-            f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
-            f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
+            f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
+            f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
         )
         if item.get("cwd"):
             lines.append(f"  cwd: {item['cwd']}")
         if item.get("session_uid"):
             lines.append(f"  session_uid: {item['session_uid']}")
+        if item.get("conversation_id") and item.get("conversation_id") != item.get("session_id"):
+            lines.append(f"  conversation_id: {item['conversation_id']}")
+        if item.get("path_ref"):
+            lines.append(f"  path: {item['path_ref']}")
+        if item.get("sanitized_summary"):
+            lines.append(f"  summary: {item['sanitized_summary']}")
         for snippet in item.get("matched_messages") or []:
             lines.append(
                 f"  [{snippet.get('role')}#{snippet.get('index')}] {snippet.get('snippet')}"
@@ -38,15 +65,29 @@ def handle_transcript_search(query: str = "", hours: int = 24, client: str = "",
 def handle_transcript_recent(hours: int = 24, client: str = "", limit: int = 10) -> str:
     """List recent transcripts without searching full text."""
     window = clamp_transcript_hours(hours)
-    rows = list_recent_transcripts(hours=window, client=(client or "").strip(), limit=limit)
+    clean_client = (client or "").strip()
+    ensure_transcript_index(
+        hours=window,
+        client=clean_client,
+        limit=max(200, min(2000, int(limit or 10) * 50)),
+        min_user_messages=1,
+    )
+    rows = search_transcript_index("", hours=window, client=clean_client, limit=limit)
+    source = "index"
+    if not rows:
+        rows = list_recent_transcripts(hours=window, client=clean_client, limit=limit, min_user_messages=1)
+        source = "raw"
     if not rows:
         return f"No transcripts found in the last {window}h."
-    lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h"]
+    lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
     for item in rows:
+        session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
+        display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
+        modified = item.get("modified") or item.get("modified_at")
         lines.append(
-            f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
-            f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
+            f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
+            f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
         )
     return "\n".join(lines)
@@ -62,6 +103,7 @@ def handle_transcript_read(
         session_ref=(session_ref or "").strip(),
         transcript_path=(transcript_path or "").strip(),
         client=(client or "").strip(),
+        min_user_messages=1,
     )
     if not transcript:
         target = session_ref or transcript_path or "(empty ref)"

package/src/transcript_index.py CHANGED Viewed

@@ -15,9 +15,12 @@ from typing import Any
 from db import get_db
 from transcript_utils import (
     DEFAULT_TRANSCRIPT_HOURS,
+    MAX_TRANSCRIPT_HOURS,
     _score_text_match,
     _tokenize,
     _truncate,
+    find_claude_session_files,
+    find_codex_session_files,
     list_recent_transcripts,
 )
@@ -103,6 +106,29 @@ def _sanitized_summary(session: dict[str, Any], *, limit: int = 900) -> str:
     return _truncate(summary, limit)
+def _row_ref_matches(query: str, row: dict[str, Any]) -> bool:
+    clean = str(query or "").strip().lower()
+    if len(clean) < 6:
+        return False
+    values = [
+        row.get("session_id"),
+        row.get("conversation_id"),
+        row.get("display_name"),
+        row.get("path_ref"),
+        Path(str(row.get("path_ref") or "")).name,
+        Path(str(row.get("path_ref") or "")).stem,
+    ]
+    for value in values:
+        candidate = str(value or "").strip().lower()
+        if not candidate:
+            continue
+        if candidate.startswith(clean):
+            return True
+        if candidate.split(":")[-1].startswith(clean):
+            return True
+    return False
 def index_transcript_session(session: dict[str, Any]) -> dict[str, Any]:
     """Upsert a single transcript metadata row and return it."""
     _ensure_transcript_index_table()
@@ -186,6 +212,81 @@ def index_recent_transcripts(
     return indexed
+def _latest_source_modified_ts(client: str = "") -> float:
+    paths: list[Path] = []
+    if not client or client == "claude_code":
+        paths.extend(find_claude_session_files())
+    if not client or client == "codex":
+        paths.extend(find_codex_session_files())
+    latest = 0.0
+    for path in paths:
+        try:
+            latest = max(latest, path.stat().st_mtime)
+        except OSError:
+            continue
+    return latest
+def _parse_iso_ts(value: str) -> float:
+    if not value:
+        return 0.0
+    try:
+        return datetime.fromisoformat(value).timestamp()
+    except Exception:
+        return 0.0
+def ensure_transcript_index(
+    *,
+    hours: int = MAX_TRANSCRIPT_HOURS,
+    client: str = "",
+    limit: int = 1000,
+    min_user_messages: int = 1,
+    force: bool = False,
+) -> dict[str, Any]:
+    """Keep the compact transcript DB index warm enough for fast lookup.
+    This is intentionally bounded. Raw JSONL remains the source of truth, but
+    normal MCP searches should hit this table before falling back to slow file
+    scans.
+    """
+    _ensure_transcript_index_table()
+    conn = get_db()
+    params: list[Any] = []
+    where = "1=1"
+    if client:
+        where += " AND source_client = ?"
+        params.append(client)
+    before = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
+    latest_indexed = str(conn.execute(
+        f"SELECT MAX(modified_at) AS latest FROM transcript_index WHERE {where}",
+        tuple(params),
+    ).fetchone()["latest"] or "")
+    latest_source_ts = _latest_source_modified_ts(client)
+    latest_indexed_ts = _parse_iso_ts(latest_indexed)
+    stale = bool(latest_source_ts and latest_source_ts > latest_indexed_ts + 1.0)
+    should_index = bool(force or before == 0 or stale)
+    indexed: list[dict[str, Any]] = []
+    if should_index:
+        indexed = index_recent_transcripts(
+            hours=hours,
+            client=client,
+            limit=limit,
+            min_user_messages=min_user_messages,
+        )
+    after = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
+    return {
+        "ok": True,
+        "before": before,
+        "after": after,
+        "indexed": len(indexed),
+        "forced": bool(force),
+        "stale": stale,
+        "hours": hours,
+        "client": client,
+    }
 def search_transcript_index(
     query: str = "",
     *,
@@ -201,7 +302,7 @@ def search_transcript_index(
         where += " AND source_client = ?"
         params.append(client)
     rows = [dict(row) for row in conn.execute(
-        f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 500",
+        f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 5000",
         tuple(params),
     ).fetchall()]
@@ -222,9 +323,11 @@ def search_transcript_index(
             continue
         haystack = " ".join(
             str(row.get(field) or "")
-            for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "metadata_json")
+            for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "path_ref", "metadata_json")
         )
         score = _score_text_match(query_tokens, haystack)
+        if _row_ref_matches(query, row):
+            score = max(score, 2.0)
         if score <= 0:
             continue
         row["_score"] = round(score, 4)

package/src/transcript_utils.py CHANGED Viewed

@@ -110,7 +110,10 @@ def find_codex_session_files() -> list[Path]:
         if not root.exists():
             continue
         for jsonl in sorted(root.rglob("*.jsonl")):
-            key = jsonl.name
+            try:
+                key = str(jsonl.resolve())
+            except OSError:
+                key = str(jsonl)
             if key in seen:
                 continue
             seen.add(key)
@@ -346,8 +349,20 @@ def list_recent_transcripts(
     return filtered[: max(1, int(limit or 10))]
-def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
-    rows = list_recent_transcripts(hours=hours, client=client, limit=200)
+def search_transcripts(
+    query: str,
+    *,
+    hours: int = DEFAULT_TRANSCRIPT_HOURS,
+    client: str = "",
+    limit: int = 10,
+    min_user_messages: int = MIN_USER_MESSAGES,
+) -> list[dict]:
+    rows = list_recent_transcripts(
+        hours=hours,
+        client=client,
+        limit=200,
+        min_user_messages=min_user_messages,
+    )
     query_tokens = _tokenize(query)
     if not query_tokens:
         return rows[: max(1, int(limit or 10))]
@@ -398,7 +413,46 @@ def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, cli
     return matches[: max(1, int(limit or 10))]
-def load_transcript(session_ref: str = "", transcript_path: str = "", client: str = "") -> dict | None:
+def _transcript_ref_matches(ref: str, session: dict, path: Path) -> bool:
+    clean = str(ref or "").strip()
+    if not clean:
+        return True
+    candidates = {
+        str(session.get("session_file", "")),
+        str(session.get("display_name", "")),
+        str(session.get("session_uid", "")),
+        str(session.get("conversation_id", "")),
+        str(path),
+        path.name,
+        path.stem,
+    }
+    if clean in candidates:
+        return True
+    # Operator-facing refs are often short prefixes copied from filenames
+    # or session ids. Require a minimum length so common words do not match
+    # arbitrary historical transcripts.
+    if len(clean) < 6:
+        return False
+    lowered = clean.lower()
+    for candidate in candidates:
+        value = str(candidate or "").strip().lower()
+        if not value:
+            continue
+        if value.startswith(lowered):
+            return True
+        if value.split(":")[-1].startswith(lowered):
+            return True
+    return False
+def load_transcript(
+    session_ref: str = "",
+    transcript_path: str = "",
+    client: str = "",
+    *,
+    min_user_messages: int = 1,
+) -> dict | None:
     ref = str(session_ref or "").strip()
     path_ref = str(transcript_path or "").strip()
@@ -416,17 +470,15 @@ def load_transcript(session_ref: str = "", transcript_path: str = "", client: st
                     continue
             except Exception:
                 continue
-        session = extract_codex_session(path) if detected_client == "codex" else extract_claude_session(path)
+        session = (
+            extract_codex_session(path, min_user_messages=min_user_messages)
+            if detected_client == "codex"
+            else extract_claude_session(path, min_user_messages=min_user_messages)
+        )
         if not session:
             continue
-        if ref:
-            if ref not in {
-                str(session.get("session_file", "")),
-                str(session.get("display_name", "")),
-                str(session.get("session_uid", "")),
-                str(path),
-            }:
-                continue
+        if ref and not _transcript_ref_matches(ref, session, path):
+            continue
         try:
             session["modified"] = datetime.fromtimestamp(path.stat().st_mtime).isoformat()
         except OSError: