npm - nexo-brain - Versions diffs - 7.35.0 → 7.36.0 - Mend

nexo-brain 7.35.0 → 7.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +1 -1
package/package.json +1 -1
package/src/db/_schema.py +1 -0
package/src/deep_sleep_retention.py +8 -0
package/src/enforcement_engine.py +58 -0
package/src/hooks/post_tool_use.py +114 -0
package/src/local_context/api.py +157 -7
package/src/local_context/db.py +61 -1
package/src/scripts/nexo-daily-self-audit.py +57 -13

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.35.0",
+  "version": "7.36.0",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -18,7 +18,7 @@
 [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
-Version `7.35.0` is the current packaged-runtime line. Minor release - Cognitive OS Ola 4: selective forget lets you delete a leaked secret or a wrong memory and prove it is gone (zeroed across every live store, `secure_delete=ON`) or correct a fact reversibly, recurring failure archetypes are distilled into reusable diagnostic templates primed before a matching action (strong/weak marker tiers so benign success phrasing never triggers them, guidance-only), and closing a local-only followup-runner is no longer mis-flagged as an external real-world action. Builds on v7.34.0 (working memory + self-error learning + associative graph + deep-sleep rewrite + evals).
+Version `7.36.0` is the current packaged-runtime line. Minor release - local index disk reclaim: the local file/code index (`local-context.db`) no longer grows without bound. It now uses `auto_vacuum=INCREMENTAL` plus a one-time guarded `VACUUM` to convert existing databases, stores embeddings as compact float32 BLOBs instead of JSON text (~4-6x smaller, back-compatible dual-write/dual-read with a resumable backfill and kill switches), reclaims disk on purge/clear, and the daily self-audit now actively compacts at its size cap (`NEXO_LOCAL_INDEX_MAX_BYTES`) instead of only warning. An established index reclaims ~10-20GB immediately and grows several-fold slower; the backup subsystem was audited and is already bounded. Builds on v7.35.0 (selective forget + recurring-incident diagnostic templates).
 Previously in `7.31.9`: patch release over v7.31.8 - UI release closeout now has to prove the original reported symptom was reopened with observable evidence before claiming the release is ready.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.35.0",
+  "version": "7.36.0",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/db/_schema.py CHANGED Viewed

@@ -2007,6 +2007,7 @@ def _m63_local_context_layer(conn):
             model_revision TEXT NOT NULL DEFAULT '',
             dimension INTEGER NOT NULL,
             vector_json TEXT NOT NULL,
+            vector_blob BLOB,
             created_at REAL NOT NULL
         );

package/src/deep_sleep_retention.py CHANGED Viewed

@@ -146,6 +146,14 @@ def _prune_db_backups(deep_sleep_dir: Path, report: dict, *, keep: int, apply: b
             _record_delete(report, backup, reason=f"old-db-backup:{family}", apply=apply)
             for sidecar in _sidecars(backup):
                 _record_delete(report, sidecar, reason=f"old-db-backup-sidecar:{family}", apply=apply)
+    # Orphan sweep: -wal/-shm sidecars whose base .db no longer exists (left by
+    # interrupted/legacy deep-sleep processes). The online-backup path produces
+    # sidecar-free snapshots, so any sidecar with a missing base is a true
+    # orphan. Scoped strictly to this deep-sleep backup dir; never the live DBs.
+    for sidecar in list(deep_sleep_dir.glob("*-backup-*.db-wal")) + list(deep_sleep_dir.glob("*-backup-*.db-shm")):
+        base = Path(str(sidecar)[: -len("-wal")]) if str(sidecar).endswith("-wal") else Path(str(sidecar)[: -len("-shm")])
+        if not base.exists():
+            _record_delete(report, sidecar, reason="orphan-db-sidecar", apply=apply)
 def _prune_contexts(deep_sleep_dir: Path, report: dict, *, keep: int, apply: bool) -> None:

package/src/enforcement_engine.py CHANGED Viewed

@@ -465,6 +465,7 @@ class HeadlessEnforcer:
         self.user_message_count = 0
         self.tool_timestamps: dict[str, float] = {}
         self.msg_since_tool: dict[str, int] = {}
+        self._tool_user_message_index: dict[str, int] = {}
         self.injection_queue: list[dict] = []
         self._started_at = time.time()
         self._injections_done = 0
@@ -551,6 +552,8 @@ class HeadlessEnforcer:
         # seen, periodic/conditional reminders stay suppressed so cron
         # runners can reach TURN_END instead of reopening the task loop.
         self._session_stopped: bool = False
+        self._first_visible_startup_gate_fired: bool = False
+        self._first_visible_text_allowed: bool = False
         try:
             self._post_close_cooldown_seconds = max(
                 0,
@@ -1036,6 +1039,52 @@ class HeadlessEnforcer:
         except Exception:
             pass
+    def should_block_first_visible_text(self) -> bool:
+        """Fail closed before the first visible answer when startup context is missing."""
+        if self._first_visible_text_allowed:
+            return False
+        if self.user_message_count <= 0:
+            self._first_visible_text_allowed = True
+            return False
+        current_turn = int(self.user_message_count or 0)
+        has_startup = "nexo_startup" in self.tools_called
+        continuity_tools = {
+            "nexo_smart_startup",
+            "nexo_session_diary_read",
+            "nexo_reminders",
+            "nexo_checkpoint_read",
+        }
+        has_continuity = bool(self.tools_called.intersection(continuity_tools))
+        heartbeat_turn = max(
+            self._tool_user_message_index.get("nexo_heartbeat", -1),
+            self._tool_user_message_index.get("nexo_task_open", -1),
+        )
+        has_turn_heartbeat = heartbeat_turn >= current_turn
+        missing = []
+        if not has_startup:
+            missing.append("nexo_startup")
+        if not has_continuity:
+            missing.append("continuidad minima")
+        if not has_turn_heartbeat:
+            missing.append("nexo_heartbeat")
+        if not missing:
+            self._first_visible_text_allowed = True
+            return False
+        if self._first_visible_startup_gate_fired:
+            return True
+        prompt = (
+            "Before any visible answer, register the session, load minimal continuity, "
+            "and associate the current user message with a heartbeat. Missing: "
+            f"{', '.join(missing)}. Execute the required NEXO tool calls now. "
+            "Do not produce visible text for this reminder."
+        )
+        self._enqueue(prompt, "first-visible-startup-heartbeat-gate", rule_id="R38_first_visible_startup_gate")
+        self._first_visible_startup_gate_fired = True
+        return True
     def _check_capability_denial_requires_reality(self, text: str):
         """Block unsupported capability denials until a live source was checked."""
         if not text or not _CAPABILITY_DENIAL_RE.search(text):
@@ -2537,6 +2586,7 @@ class HeadlessEnforcer:
         self.tools_called.add(name)
         self.tool_timestamps[name] = time.time()
         self.msg_since_tool[name] = 0
+        self._tool_user_message_index[name] = int(self.user_message_count or 0)
         # v7.6 conditional counter advance. Tools watched by a
         # conditional rule tick a counter on every non-matching call.
@@ -3346,6 +3396,14 @@ def run_with_enforcement(
             msg = event.get("message", {})
             for block in msg.get("content", []):
                 if block.get("type") == "text":
+                    try:
+                        if enforcer.should_block_first_visible_text():
+                            item = enforcer.flush()
+                            if item:
+                                _inject(item["prompt"])
+                            return False
+                    except Exception as _startup_gate_exc:  # noqa: BLE001
+                        _logger.warning("first visible startup gate failed: %s", _startup_gate_exc)
                     collected_text.append(block["text"])
                     # R16 — probe each assistant text block as it arrives
                     # so a declared-done line is caught on the same turn

package/src/hooks/post_tool_use.py CHANGED Viewed

@@ -374,6 +374,118 @@ def _write_json(path: Path, payload: dict) -> None:
     tmp.replace(path)
+def _pending_trace_path(sid: str) -> Path:
+    safe_sid = "".join(ch if ch.isalnum() or ch in "-_" else "_" for ch in (sid or "unknown"))
+    return _production_closeout_dir() / f"post-change-trace-{safe_sid}.json"
+def _split_files(value: object) -> set[str]:
+    if value is None:
+        return set()
+    if isinstance(value, (list, tuple, set)):
+        raw = "\n".join(str(item) for item in value)
+    else:
+        raw = str(value)
+    parts = re.split(r"[\n,;]+", raw)
+    return {part.strip() for part in parts if part and part.strip()}
+def _record_post_change_trace(payload: dict, sid: str) -> None:
+    if not sid:
+        sid = "unknown"
+    path = _pending_trace_path(sid)
+    trace = _read_json(path) or {
+        "sid": sid,
+        "touched_files": [],
+        "guard_files": [],
+        "change_log_files": [],
+        "production_mutation": False,
+        "created_at": time.time(),
+    }
+    tool_name = _tool_name(payload)
+    tool_input = _tool_input(payload)
+    cmd = _extract_command(payload)
+    touched = set(trace.get("touched_files") or [])
+    guards = set(trace.get("guard_files") or [])
+    logged = set(trace.get("change_log_files") or [])
+    if _is_shared_mutation_payload(payload):
+        touched.update(_split_files(tool_input.get("file_path")))
+        touched.update(_split_files(tool_input.get("path")))
+        touched.update(_split_files(tool_input.get("files")))
+        touched.update(_split_files(tool_input.get("paths")))
+        if cmd:
+            trace["last_mutation_command"] = cmd[:500]
+            if _is_production_mutation_command(cmd):
+                trace["production_mutation"] = True
+    if tool_name in {"nexo_guard_check", "mcp__nexo__nexo_guard_check"}:
+        guards.update(_split_files(tool_input.get("files")))
+    if _is_change_log_tool(tool_name):
+        logged.update(_split_files(tool_input.get("files")))
+        logged.update(_split_files(tool_input.get("files_changed")))
+        if not logged and touched:
+            logged.update(touched)
+    if _is_task_close_tool(tool_name):
+        touched.update(_split_files(tool_input.get("files_changed")))
+    trace["touched_files"] = sorted(touched)
+    trace["guard_files"] = sorted(guards)
+    trace["change_log_files"] = sorted(logged)
+    trace["updated_at"] = time.time()
+    if touched or guards or logged or trace.get("production_mutation"):
+        _write_json(path, trace)
+def _missing_trace_items(payload: dict, sid: str) -> list[str]:
+    if not _is_task_close_tool(_tool_name(payload)):
+        return []
+    trace = _read_json(_pending_trace_path(sid or "unknown"))
+    if not trace:
+        return []
+    tool_input = _tool_input(payload)
+    touched = set(trace.get("touched_files") or [])
+    if not touched and not trace.get("production_mutation"):
+        return []
+    guards = set(trace.get("guard_files") or [])
+    logged = set(trace.get("change_log_files") or [])
+    closing_files = _split_files(tool_input.get("files_changed"))
+    missing = []
+    if touched and not guards:
+        missing.append("guardias ejecutados")
+    if trace.get("production_mutation") and not logged and not _task_close_payload_has_change_trace(payload):
+        missing.append("registro de cambios")
+    if touched and closing_files and not touched.issubset(closing_files):
+        missing.append("files_changed completo")
+    if touched and not closing_files:
+        missing.append("files_changed")
+    return missing
+def check_post_change_trace_closeout(payload: dict, sid: str) -> str | None:
+    if not sid:
+        sid = "unknown"
+    _record_post_change_trace(payload, sid)
+    missing = _missing_trace_items(payload, sid)
+    if not missing:
+        if _is_task_close_tool(_tool_name(payload)):
+            _pending_trace_path(sid).unlink(missing_ok=True)
+        return None
+    trace = _read_json(_pending_trace_path(sid))
+    files = ", ".join((trace.get("touched_files") or [])[:6]) or "cambio detectado"
+    message = (
+        "Cierre bloqueado: antes de marcar completado hay que cuadrar archivos tocados, "
+        f"guardias y registro de cambios. Falta: {', '.join(missing)}. "
+        f"Archivos detectados: {files}."
+    )
+    return append_operator_language_contract(message)
 def check_production_change_log_closeout(payload: dict, sid: str) -> str | None:
     if not sid:
         sid = "unknown"
@@ -551,6 +663,7 @@ def main() -> int:
         sid = _resolve_sid_from_payload(payload)
         reminder = check_inbox_and_emit_reminder(sid)
         change_log_message = check_production_change_log_closeout(payload, sid)
+        post_change_trace_message = check_post_change_trace_closeout(payload, sid)
         shared_scope_message = check_shared_scope_closeout(payload)
         g1_message: str | None = None
         try:
@@ -562,6 +675,7 @@ def main() -> int:
             protocol_message,
             reminder,
             change_log_message,
+            post_change_trace_message,
             shared_scope_message,
             g1_message,
         )

package/src/local_context/api.py CHANGED Viewed

@@ -7,6 +7,7 @@ import re
 import shutil
 import sqlite3
 import stat
+import struct
 import subprocess
 import sys
 import time
@@ -56,6 +57,16 @@ FTS_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_FTS_BACKFILL_BATCH", "500")
 FTS_MIGRATION_CURSOR_KEY = "fts_migration_cursor"
 FTS_MIGRATION_DONE_KEY = "fts_migration_done"
 FTS_BACKFILL_TOTAL_KEY = "fts_backfill_total"
+# Compact float32 BLOB embedding storage (replaces JSON-text vector_json, which
+# bloated the index ~4-6x). Dual-write both columns, read prefers the BLOB and
+# falls back to JSON, backfill converts old rows incrementally. Feature flags
+# are kill switches that revert to JSON-only with no redeploy.
+EMB_BLOB_WRITE_ENABLED = os.environ.get("NEXO_LOCAL_EMB_BLOB_WRITE", "1") != "0"
+EMB_BLOB_READ_ENABLED = os.environ.get("NEXO_LOCAL_EMB_BLOB_READ", "1") != "0"
+EMB_BLOB_BACKFILL_BATCH = int(os.environ.get("NEXO_LOCAL_EMB_BLOB_BACKFILL_BATCH", "500") or "500")
+EMB_BLOB_CURSOR_KEY = "emb_blob_backfill_cursor"
+EMB_BLOB_DONE_KEY = "emb_blob_backfill_done"
+EMB_BLOB_TOTAL_KEY = "emb_blob_backfill_total"
 EMBEDDING_REFRESH_JOB = "embedding_refresh"
 ENTITY_FACTS_JOB = "entity_facts"
 BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
@@ -2888,6 +2899,47 @@ def _latest_version_id(conn, asset_id: str) -> str:
     return row["version_id"] if row else stable_id("ver", asset_id)
+def _encode_embedding_blob(vector) -> bytes | None:
+    """Pack a vector of floats into a little-endian float32 BLOB (dimension*4
+    bytes). Returns None when blob writes are disabled or the vector is empty,
+    so the caller still writes vector_json (the source of truth during the
+    transition). float32 vs the legacy float64 JSON is a deliberate, negligible
+    cosine drift (vectors are L2-normalized / already 8-dp-rounded)."""
+    if not EMB_BLOB_WRITE_ENABLED:
+        return None
+    try:
+        floats = [float(v) for v in (vector or [])]
+        if not floats:
+            return None
+        return struct.pack(f"<{len(floats)}f", *floats)
+    except (TypeError, ValueError, struct.error):
+        return None
+def _decode_embedding(row) -> list:
+    """Read a stored embedding, preferring the compact BLOB and falling back to
+    the legacy JSON text. The BLOB is trusted only when its length matches
+    dimension*4 (4 bytes per float32); a short/garbage blob falls through to
+    JSON so it can never reach the cosine loop. Returns a plain Python list so
+    embeddings.cosine() and the `elif vector:` truthiness need no changes."""
+    if EMB_BLOB_READ_ENABLED:
+        try:
+            blob = row["vector_blob"]
+        except (KeyError, IndexError):
+            blob = None
+        if blob:
+            try:
+                dim = int(row["dimension"] or 0)
+            except (KeyError, IndexError, TypeError, ValueError):
+                dim = 0
+            if dim and len(blob) == dim * 4:
+                try:
+                    return list(struct.unpack(f"<{dim}f", blob))
+                except struct.error:
+                    pass  # fall through to JSON
+    return json_loads(row["vector_json"], [])
 def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
     record = embeddings.embed_record(text)
     model_id = str(record["model_id"])
@@ -2895,8 +2947,8 @@ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> No
     dimension = int(record["dimension"])
     conn.execute(
         """
-        INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, vector_blob, created_at)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
         """,
         (
             stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
@@ -2906,6 +2958,7 @@ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> No
             model_revision,
             dimension,
             json_dumps(record["vector"]),
+            _encode_embedding_blob(record["vector"]),
             now(),
         ),
     )
@@ -3555,6 +3608,13 @@ def run_once(
             _backfill_fts_rows(conn, batch_limit=FTS_BACKFILL_BATCH)
         except Exception:
             pass
+    # Incremental embedding TEXT->BLOB backfill: same bounded one-batch-per-tick
+    # discipline. Best-effort; skips when disabled or already done.
+    if EMB_BLOB_BACKFILL_BATCH > 0:
+        try:
+            _backfill_embedding_blobs(conn, batch_limit=EMB_BLOB_BACKFILL_BATCH)
+        except Exception:
+            pass
     conn_after = _conn()
     initial_after = _initial_scan_status(conn_after, list_roots(readonly=False))
     blocking_active_after = _active_job_count(conn_after, blocking_only=True)
@@ -4603,6 +4663,76 @@ def _backfill_fts_rows(conn, *, batch_limit: int | None = None) -> dict:
     return _with_sqlite_busy_retry(_run)
+def _backfill_embedding_blobs(conn, *, batch_limit: int | None = None) -> dict:
+    """Incrementally convert legacy vector_json TEXT rows to compact float32
+    vector_blob. Idempotent + resumable via a rowid cursor in local_index_state,
+    committing per batch. Converts the EXISTING JSON in place (never re-embeds —
+    re-embedding could re-stamp model_id if fastembed availability differs). New
+    rows already get vector_blob from the dual-write, so this only handles
+    pre-existing rows (the legacy ~19GB DB). Rows whose JSON length != dimension
+    are skipped (left JSON-only; dual-read falls back) but still advance the
+    cursor so they are not retried forever.
+    """
+    if batch_limit is None:
+        batch_limit = EMB_BLOB_BACKFILL_BATCH
+    batch_limit = int(batch_limit)
+    if batch_limit <= 0:
+        return {"ok": True, "skipped": "disabled", "done": _get_state_conn(conn, EMB_BLOB_DONE_KEY, "0") == "1"}
+    if not EMB_BLOB_WRITE_ENABLED:
+        return {"ok": True, "skipped": "blob_write_disabled", "done": False}
+    if _get_state_conn(conn, EMB_BLOB_DONE_KEY, "0") == "1":
+        return {"ok": True, "skipped": "already_done", "done": True}
+    def _run() -> dict:
+        try:
+            cursor = int(_get_state_conn(conn, EMB_BLOB_CURSOR_KEY, "0") or "0")
+        except Exception:
+            cursor = 0
+        if _get_state_conn(conn, EMB_BLOB_TOTAL_KEY, "") == "":
+            try:
+                total_row = conn.execute(
+                    "SELECT COUNT(*) AS total FROM local_embeddings WHERE vector_blob IS NULL"
+                ).fetchone()
+                _set_state_conn(conn, EMB_BLOB_TOTAL_KEY, str(int(total_row["total"] or 0)))
+            except Exception:
+                pass
+        rows = conn.execute(
+            """
+            SELECT rowid AS rid, dimension, vector_json
+            FROM local_embeddings
+            WHERE rowid > ? AND vector_blob IS NULL
+            ORDER BY rowid ASC
+            LIMIT ?
+            """,
+            (cursor, batch_limit),
+        ).fetchall()
+        if not rows:
+            _set_state_conn(conn, EMB_BLOB_DONE_KEY, "1")
+            conn.commit()
+            return {"ok": True, "done": True, "processed": 0, "cursor": cursor}
+        max_rid = cursor
+        converted = 0
+        for row in rows:
+            rid = int(row["rid"])
+            if rid > max_rid:
+                max_rid = rid
+            try:
+                dim = int(row["dimension"] or 0)
+            except (TypeError, ValueError):
+                dim = 0
+            vec = json_loads(row["vector_json"], [])
+            if dim and len(vec) == dim:
+                blob = _encode_embedding_blob(vec)
+                if blob is not None and len(blob) == dim * 4:
+                    conn.execute("UPDATE local_embeddings SET vector_blob=? WHERE rowid=?", (blob, rid))
+                    converted += 1
+        _set_state_conn(conn, EMB_BLOB_CURSOR_KEY, str(max_rid))
+        conn.commit()
+        return {"ok": True, "done": False, "processed": len(rows), "converted": converted, "cursor": max_rid}
+    return _with_sqlite_busy_retry(_run)
 def _context_candidate_rows(
     conn,
     entity_asset_ids: list[str],
@@ -4625,7 +4755,7 @@ def _context_candidate_rows(
                     prefilter_rows = conn.execute(
                         """
                         SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
-                               e.vector_json, e.model_id, e.model_revision, e.dimension
+                               e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
                         FROM local_chunks_fts f
                         JOIN local_chunks c ON c.rowid = f.rowid
                         JOIN local_assets a ON a.asset_id = c.asset_id
@@ -4657,7 +4787,7 @@ def _context_candidate_rows(
             prefilter_rows = conn.execute(
                 f"""
                 SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
-                       e.vector_json, e.model_id, e.model_revision, e.dimension
+                       e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
                 FROM local_chunks c
                 JOIN local_assets a ON a.asset_id = c.asset_id
                 LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -4686,7 +4816,7 @@ def _context_candidate_rows(
     base_rows = conn.execute(
         """
         SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
-               e.vector_json, e.model_id, e.model_revision, e.dimension
+               e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
         FROM local_chunks c
         JOIN local_assets a ON a.asset_id = c.asset_id
         LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -4713,7 +4843,7 @@ def _context_candidate_rows(
     entity_rows = conn.execute(
         f"""
         SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
-               e.vector_json, e.model_id, e.model_revision, e.dimension
+               e.vector_json, e.vector_blob, e.model_id, e.model_revision, e.dimension
         FROM local_chunks c
         JOIN local_assets a ON a.asset_id = c.asset_id
         LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -5200,7 +5330,7 @@ def _context_query_conn(
     for row in rows:
         if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
             continue
-        vector = json_loads(row["vector_json"], [])
+        vector = _decode_embedding(row)
         text_score = _search_text_score(search_query, row["text"])
         path_score = _search_text_score(search_query, row["path"] or "")
         summary_score = _search_text_score(search_query, row["summary"] or "")
@@ -5756,6 +5886,14 @@ def purge_asset(asset_id: str) -> dict:
     conn = _conn()
     _purge_asset_ids(conn, [asset_id])
     conn.commit()
+    # Reclaim the just-freed pages. Cheap incremental_vacuum (not a full VACUUM
+    # — this is a frequent single-asset op; a 19GB rewrite per purge would be
+    # catastrophic). No-op unless auto_vacuum=INCREMENTAL is active. Best-effort.
+    try:
+        conn.execute("PRAGMA incremental_vacuum")
+        conn.commit()
+    except Exception:
+        pass
     log_event("info", "asset_purged", "Asset purged", asset_id=asset_id)
     return {"ok": True, "asset_id": asset_id}
@@ -5790,6 +5928,18 @@ def clear_index() -> dict:
     )
     _set_initial_index_complete(conn, False)
     conn.commit()
+    # The index is now near-empty, so a full VACUUM rewrites a tiny file and
+    # actually returns the freed disk to the OS (DELETE alone only moves pages
+    # to the free-list). Checkpoint the WAL first so its pages are folded in,
+    # VACUUM, then checkpoint again — in WAL mode VACUUM's rewrite lands in the
+    # WAL, so the main file is only truncated by the trailing checkpoint. Works
+    # regardless of auto_vacuum mode. Best-effort — never fail the clear.
+    try:
+        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+        conn.execute("VACUUM")
+        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+    except Exception:
+        pass
     log_event("warn", "index_cleared", "Local memory index cleared")
     return {"ok": True}

package/src/local_context/db.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import os
+import shutil
 import sqlite3
 import time
 from pathlib import Path
@@ -14,6 +15,11 @@ LOCAL_CONTEXT_DB_NAME = "local-context.db"
 MIGRATION_STATE_KEY = "local_context_db_migrated_from_main"
 MIGRATION_SKIPPED_KEY = "local_context_db_migration_skipped"
 MAIN_CLEANUP_STATE_KEY = "local_context_main_tables_drained"
+# One-time conversion flag: auto_vacuum=INCREMENTAL is a no-op on an already
+# populated DB until exactly one full VACUUM runs. We do that conversion once
+# per never-converted DB (guarded by free disk) and record it here so it never
+# re-runs the expensive rewrite. See ensure_local_context_db().
+AUTO_VACUUM_CONVERTED_KEY = "auto_vacuum_converted"
 LOCAL_CONTEXT_TABLES: tuple[str, ...] = (
     "local_index_roots",
@@ -77,6 +83,12 @@ def _connect(db_path: Path) -> sqlite3.Connection:
     conn = sqlite3.connect(str(db_path), timeout=max(_busy_timeout_ms() / 1000.0, 1.0), check_same_thread=False)
     conn.row_factory = sqlite3.Row
     conn.execute(f"PRAGMA busy_timeout={_busy_timeout_ms()}")
+    # auto_vacuum must be set BEFORE the first table is created to take effect on
+    # a brand-new DB (it is a no-op on an already-populated file — those are
+    # converted once via a guarded full VACUUM in ensure_local_context_db()).
+    # INCREMENTAL lets deletes (privacy purge, reconcile, purge_asset) reclaim
+    # pages via `PRAGMA incremental_vacuum` instead of growing the file forever.
+    conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
     conn.execute("PRAGMA journal_mode=WAL")
     conn.execute("PRAGMA synchronous=NORMAL")
     conn.execute("PRAGMA temp_store=MEMORY")
@@ -119,10 +131,20 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
     _ensure_entity_dossier_schema(conn)
     _ensure_local_context_v2_schema(conn)
     _m84_local_chunks_fts(conn)
-    conn.execute("PRAGMA user_version=84")
+    _m85_local_embeddings_blob(conn)
+    conn.execute("PRAGMA user_version=85")
     conn.commit()
+def _m85_local_embeddings_blob(conn: sqlite3.Connection) -> None:
+    """v85: compact float32 BLOB embedding storage alongside the legacy
+    vector_json TEXT. Nullable + no DEFAULT so the ALTER is metadata-only (a
+    DEFAULT would rewrite the whole table). The write path dual-writes both
+    columns; the read path prefers the BLOB and falls back to JSON, so adding
+    the column is safe even before any backfill runs."""
+    _add_column_if_missing(conn, "local_embeddings", "vector_blob", "BLOB")
 def _table_columns(conn: sqlite3.Connection, table: str) -> set[str]:
     rows = conn.execute(f"PRAGMA table_info({table})").fetchall()
     return {str(row["name"] if isinstance(row, sqlite3.Row) else row[1]) for row in rows}
@@ -421,11 +443,49 @@ def ensure_local_context_db() -> None:
                 pass
         return
     _ensure_schema(_CONN)
+    _convert_auto_vacuum_once(_CONN, db_path)
     _LAST_MIGRATION_ATTEMPT = now
     migration = migrate_from_main_if_needed(_CONN)
     _READY = True
+def _convert_auto_vacuum_once(conn: sqlite3.Connection, db_path: Path) -> None:
+    """Flip an existing DB from auto_vacuum=NONE to INCREMENTAL.
+    Setting the PRAGMA only takes effect after one full VACUUM that writes the
+    pointer-map pages. This rewrites the whole file once, so we guard on free
+    disk (VACUUM needs ~1x the DB size of scratch; require 2x margin) and only
+    record the done-flag once the mode is actually INCREMENTAL, so a machine
+    that was too full retries on a later boot. Best-effort: a failure here must
+    never block index startup. Runs on the writer connection only.
+    """
+    try:
+        if _state(conn, AUTO_VACUUM_CONVERTED_KEY) == "1":
+            return
+        mode = int(conn.execute("PRAGMA auto_vacuum").fetchone()[0])
+        if mode == 2:  # already INCREMENTAL (e.g. freshly created DB)
+            _set_state(conn, AUTO_VACUUM_CONVERTED_KEY, "1")
+            conn.commit()
+            return
+        try:
+            db_size = db_path.stat().st_size
+            free = shutil.disk_usage(db_path.parent).free
+        except OSError:
+            return
+        if free <= db_size * 2:
+            # Not enough scratch room — leave NONE mode, retry on a later boot.
+            return
+        conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
+        conn.execute("VACUUM")
+        new_mode = int(conn.execute("PRAGMA auto_vacuum").fetchone()[0])
+        if new_mode == 2:
+            _set_state(conn, AUTO_VACUUM_CONVERTED_KEY, "1")
+            conn.commit()
+    except Exception:
+        # Conversion is an optimization; never break startup over it.
+        pass
 def get_local_context_db() -> sqlite3.Connection:
     ensure_local_context_db()
     assert _CONN is not None

package/src/scripts/nexo-daily-self-audit.py CHANGED Viewed

@@ -1043,19 +1043,63 @@ def check_db_size():
         local_ctx = paths_module.memory_dir() / "local-context.db"
         if local_ctx.exists():
-            size_gb = local_ctx.stat().st_size / (1024 ** 3)
-            if size_gb > 60:
-                finding(
-                    "ERROR",
-                    "database",
-                    f"local-context.db is {size_gb:.1f} GB — local index runaway; purge + VACUUM (see roots/exclusions)",
-                )
-            elif size_gb > 25:
-                finding(
-                    "WARN",
-                    "database",
-                    f"local-context.db is {size_gb:.1f} GB — local index growing; review indexed roots/exclusions",
-                )
+            def _index_bytes() -> int:
+                # Include the -wal/-shm sidecars: a large orphan WAL was invisible
+                # to a bare stat() and could hide real growth.
+                total = 0
+                for suffix in ("", "-wal", "-shm"):
+                    p = local_ctx.with_name(local_ctx.name + suffix)
+                    try:
+                        total += p.stat().st_size
+                    except OSError:
+                        pass
+                return total
+            # Distinct, stricter audit cap (NOT the 60 GiB runtime soft-pause
+            # NEXO_LOCAL_CONTEXT_MAX_DB_BYTES). Default 25 GiB.
+            try:
+                hard_cap = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_BYTES", str(25 * 1024 ** 3)) or str(25 * 1024 ** 3))
+            except ValueError:
+                hard_cap = 25 * 1024 ** 3
+            size_gb = _index_bytes() / (1024 ** 3)
+            if size_gb > 25:
+                # ACT, don't just warn (learning #824: the 268 GB burst went
+                # unseen because this check was advisory-only). Reclaim freed
+                # pages cheaply: checkpoint the WAL + incremental_vacuum (no-op
+                # unless auto_vacuum=INCREMENTAL is active). Best-effort, short
+                # timeout so we never fight the live indexer's write lock.
+                reclaimed_gb = 0.0
+                try:
+                    import sqlite3 as _sqlite3
+                    conn = _sqlite3.connect(str(local_ctx), timeout=5.0)
+                    try:
+                        conn.execute("PRAGMA busy_timeout=5000")
+                        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+                        conn.execute("PRAGMA incremental_vacuum")
+                        conn.commit()
+                    finally:
+                        conn.close()
+                    after_gb = _index_bytes() / (1024 ** 3)
+                    reclaimed_gb = max(0.0, size_gb - after_gb)
+                    size_gb = after_gb
+                except Exception:
+                    pass
+                reclaimed_note = f" (reclaimed {reclaimed_gb:.1f} GB)" if reclaimed_gb > 0.05 else ""
+                if (size_gb * 1024 ** 3) > hard_cap or size_gb > 60:
+                    finding(
+                        "ERROR",
+                        "database",
+                        f"local-context.db is {size_gb:.1f} GB{reclaimed_note} — over the local-index cap; "
+                        f"review indexed roots/exclusions or run clear_index (operator decision)",
+                    )
+                else:
+                    finding(
+                        "WARN",
+                        "database",
+                        f"local-context.db is {size_gb:.1f} GB{reclaimed_note} — local index growing; review roots/exclusions",
+                    )
     except Exception as exc:
         finding("WARN", "database", f"Could not check local-context.db size: {exc}")