npm - nexo-brain - Versions diffs - 7.20.24 → 7.21.0 - Mend

nexo-brain 7.20.24 → 7.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +5 -1
package/bin/nexo-brain.js +21 -1
package/package.json +1 -1
package/src/auto_update.py +1 -1
package/src/local_context/api.py +289 -35
package/src/local_context/embeddings.py +116 -13
package/src/runtime_service.py +426 -0
package/src/runtime_versioning.py +11 -0
package/src/server.py +42 -2
package/tool-enforcement-map.json +15 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.20.24",
+  "version": "7.21.0",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -18,7 +18,11 @@
 [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
-Version `7.20.24` is the current packaged-runtime line. Patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
+Version `7.21.0` is the current packaged-runtime line. Minor release over v7.20.25 - MCP now starts through a thin compatibility adapter backed by one resident local Runtime Service, reducing duplicate Brain processes and SQLite contention across Claude Code, Codex, Claude Desktop, and NEXO Desktop. The release also fingerprints Runtime Service state for safe update cutover, keeps document-first Local Memory scanning, and verifies bundled local LLM files before marking them installed.
+Previously in `7.20.25`: patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
+Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
 Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.

package/bin/nexo-brain.js CHANGED Viewed

@@ -3879,12 +3879,32 @@ async function runSetup() {
         const slug = (spec.name || "").trim().toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
         const targetDir = path.join(runtimeModelsDir, slug, spec.revision);
         fs.mkdirSync(targetDir, { recursive: true });
+        const missingFiles = [];
         for (const f of (spec.required_files || [])) {
           const src = path.join(sourceDir, f.path);
           const dst = path.join(targetDir, f.path);
-          if (fs.existsSync(src) && !fs.existsSync(dst)) {
+          if (!fs.existsSync(src)) {
+            missingFiles.push(f.path);
+            continue;
+          }
+          fs.mkdirSync(path.dirname(dst), { recursive: true });
+          if (!fs.existsSync(dst) || (f.size && fs.statSync(dst).size !== f.size)) {
             fs.copyFileSync(src, dst);
           }
+          if (f.size && fs.statSync(dst).size !== f.size) {
+            missingFiles.push(`${f.path}:size`);
+            continue;
+          }
+          if (f.sha256) {
+            const actual = crypto.createHash("sha256").update(fs.readFileSync(dst)).digest("hex");
+            if (actual !== f.sha256) {
+              missingFiles.push(`${f.path}:sha256`);
+            }
+          }
+        }
+        if (missingFiles.length) {
+          log(`  WARN: bundled LLM model ${spec.name} incomplete (${missingFiles.join(", ")})`);
+          continue;
         }
         // Write the lock file to match revision (avoids re-download).
         fs.writeFileSync(path.join(targetDir, ".nexo-model-lock.json"), JSON.stringify({

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.20.24",
+  "version": "7.21.0",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/auto_update.py CHANGED Viewed

@@ -3988,7 +3988,7 @@ def _auto_update_check_locked() -> dict:
     # Backfill runtime CLI modules for existing installs
     try:
-        for fname in ("cli.py", "script_registry.py", "skills_runtime.py", "cron_recovery.py", "client_preferences.py", "claude_cli.py", "agent_runner.py", "bootstrap_docs.py", "mcp_required_tools.py"):
+        for fname in ("cli.py", "script_registry.py", "skills_runtime.py", "cron_recovery.py", "client_preferences.py", "claude_cli.py", "agent_runner.py", "bootstrap_docs.py", "mcp_required_tools.py", "runtime_service.py"):
             src_file = SRC_DIR / fname
             dest_file = NEXO_HOME / fname
             if src_file.is_file() and (not dest_file.exists() or src_file.stat().st_mtime > dest_file.stat().st_mtime):

package/src/local_context/api.py CHANGED Viewed

@@ -10,6 +10,7 @@ import hashlib
 import subprocess
 import sys
 import time
+from functools import lru_cache
 from pathlib import Path
 from typing import Any
@@ -41,6 +42,77 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
 PERFORMANCE_PROFILE_KEY = "performance_profile"
 DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
 VALID_CONTEXT_MODES = {"compact", "full"}
+EMBEDDING_REFRESH_JOB = "embedding_refresh"
+HIGH_VALUE_DOCUMENT_SUFFIXES = {
+    ".pdf",
+    ".doc",
+    ".docx",
+    ".xls",
+    ".xlsx",
+    ".ppt",
+    ".pptx",
+    ".pages",
+    ".numbers",
+    ".key",
+    ".rtf",
+    ".odt",
+    ".ods",
+    ".odp",
+}
+KNOWN_TEXT_SUFFIXES = {
+    ".md",
+    ".markdown",
+    ".txt",
+    ".csv",
+    ".tsv",
+}
+EMAIL_DOCUMENT_SUFFIXES = {
+    ".eml",
+    ".emlx",
+    ".msg",
+}
+HIGH_VALUE_DIRECTORY_NAMES = {
+    "users",
+    "home",
+    "desktop",
+    "documents",
+    "downloads",
+    "documentos",
+    "escritorio",
+    "descargas",
+    "icloud drive",
+    "onedrive",
+    "google drive",
+    "dropbox",
+    "creative cloud files",
+    "clientes",
+    "clients",
+    "facturas",
+    "invoices",
+    "contratos",
+    "contracts",
+    "projects",
+    "proyectos",
+    "work",
+    "trabajo",
+}
+LOW_VALUE_DIRECTORY_NAMES = {
+    "applications",
+    "library",
+    "system",
+    "private",
+    "usr",
+    "var",
+    "opt",
+    "windows",
+    "program files",
+    "program files (x86)",
+    "programdata",
+    "appdata",
+    ".cache",
+    "caches",
+}
+RERANKER_MODEL_SPEC = "cross-encoder-reranker"
 PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
     "low": {
         "profile": "low",
@@ -1092,7 +1164,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
             (version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
         )
         if should_extract(normalized, depth):
-            enqueue_job(conn, asset_id, "light_extraction", priority=60)
+            enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
         enqueue_job(conn, asset_id, "graph", priority=40)
     return asset_id, changed, "ok"
@@ -1203,6 +1275,44 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
     return job_id
+def _extraction_priority(path: Path) -> int:
+    suffix = path.suffix.lower()
+    if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
+        return 90
+    if suffix in KNOWN_TEXT_SUFFIXES:
+        return 82
+    if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
+        return 70
+    if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
+        return 55
+    return 45
+def _directory_scan_priority(path: Path) -> int:
+    name = path.name.strip().lower()
+    if name in {"users", "home"}:
+        return 0
+    if name in HIGH_VALUE_DIRECTORY_NAMES:
+        return 10
+    if "icloud" in name or "onedrive" in name or "google drive" in name:
+        return 10
+    if is_local_email_tree(str(path)):
+        return 65
+    if name in LOW_VALUE_DIRECTORY_NAMES:
+        return 90
+    return 40
+def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
+    try:
+        is_file = item.is_file()
+    except Exception:
+        is_file = False
+    if is_file:
+        return (1, -_extraction_priority(item), str(item).lower())
+    return (0, _directory_scan_priority(item), str(item).lower())
 def _iter_files(
     conn,
     root_id: int,
@@ -1236,7 +1346,7 @@ def _iter_files(
         seen_dirs.add(key)
         _upsert_dir(conn, root_id, current, seen_at, st)
         try:
-            entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
+            entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
         except Exception as exc:
             _record_scan_error(conn, stats, str(current), "quick_index", exc)
             continue
@@ -1420,7 +1530,7 @@ def _scan_known_directory(
             st = current.stat()
             if not current.is_dir():
                 continue
-            entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
+            entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
         except Exception as exc:
             _record_scan_error(conn, stats, str(current), "live_reconcile", exc)
             continue
@@ -1661,6 +1771,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
     return row["version_id"] if row else stable_id("ver", asset_id)
+def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
+    record = embeddings.embed_record(text)
+    model_id = str(record["model_id"])
+    model_revision = str(record["model_revision"])
+    dimension = int(record["dimension"])
+    conn.execute(
+        """
+        INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        """,
+        (
+            stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
+            asset_id,
+            chunk_id,
+            model_id,
+            model_revision,
+            dimension,
+            json_dumps(record["vector"]),
+            now(),
+        ),
+    )
 def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
     conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
     conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
@@ -1673,23 +1806,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
             """,
             (chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
         )
-        vector = embeddings.embed_text(chunk)
-        conn.execute(
-            """
-            INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-            """,
-            (
-                stable_id("emb", f"{chunk_id}:{embeddings.MODEL_ID}:{embeddings.MODEL_REVISION}"),
-                asset_id,
-                chunk_id,
-                embeddings.MODEL_ID,
-                embeddings.MODEL_REVISION,
-                embeddings.DIMENSION,
-                json_dumps(vector),
-                now(),
-            ),
-        )
+        _insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
+def _refresh_asset_embeddings(conn, asset_id: str) -> int:
+    rows = conn.execute(
+        """
+        SELECT chunk_id, text
+        FROM local_chunks
+        WHERE asset_id=?
+        ORDER BY chunk_index ASC
+        """,
+        (asset_id,),
+    ).fetchall()
+    conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
+    for row in rows:
+        _insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
+    if rows:
+        conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
+    return len(rows)
+def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
+    if row is None:
+        return False
+    return (
+        str(row["model_id"] or "") == profile.model_id
+        and str(row["model_revision"] or "") == profile.model_revision
+        and int(row["dimension"] or 0) == int(profile.dimension)
+    )
+def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
+    profile = embeddings.active_profile()
+    if profile.kind == "deterministic_embedding":
+        return 0
+    rows = conn.execute(
+        """
+        SELECT DISTINCT c.asset_id
+        FROM local_chunks c
+        JOIN local_assets a ON a.asset_id=c.asset_id
+        LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
+        WHERE a.status='active'
+          AND a.privacy_class='normal'
+          AND (
+            e.embedding_id IS NULL
+            OR e.model_id != ?
+            OR e.model_revision != ?
+            OR e.dimension != ?
+          )
+        ORDER BY a.updated_at ASC
+        LIMIT ?
+        """,
+        (profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
+    ).fetchall()
+    for row in rows:
+        enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
+    return len(rows)
 def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
@@ -1751,6 +1924,9 @@ def process_jobs(*, limit: int = 100) -> dict:
         log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
         return {"ok": True, "paused": True, "processed": 0, "failed": 0}
     recovered = _requeue_due_jobs(conn)
+    refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
+    if refresh_queued:
+        conn.commit()
     rows = conn.execute(
         """
         SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
@@ -1804,6 +1980,8 @@ def process_jobs(*, limit: int = 100) -> dict:
                 _replace_chunks(conn, asset_id, version_id, text)
                 _replace_entities(conn, asset_id, version_id, entities(text))
                 conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
+            elif job_type == EMBEDDING_REFRESH_JOB:
+                _refresh_asset_embeddings(conn, asset_id)
             elif job_type == "graph":
                 conn.execute(
                     """
@@ -1843,8 +2021,8 @@ def process_jobs(*, limit: int = 100) -> dict:
             conn.commit()
     conn.commit()
     if processed or failed:
-        log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
-    return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
+        log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
+    return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
 def run_once(
@@ -2433,27 +2611,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
 def model_status() -> dict:
-    models = [{
-        "profile": "local_context_embedding_fallback",
-        "name": embeddings.MODEL_ID,
-        "kind": "deterministic_embedding",
-        "revision": embeddings.MODEL_REVISION,
-        "dimension": embeddings.DIMENSION,
-        "state": "available",
+    active_embedding = embeddings.active_profile()
+    active_entry = {
+        "profile": active_embedding.profile,
+        "name": active_embedding.model_id,
+        "kind": active_embedding.kind,
+        "revision": active_embedding.model_revision,
+        "dimension": active_embedding.dimension,
+        "state": active_embedding.state,
         "required": True,
-    }]
+        "active": True,
+        "problems": list(active_embedding.problems),
+    }
+    models = []
+    active_in_manifest = False
     try:
         import local_models
         for spec in local_models.list_local_model_specs():
             verification = local_models.verify_local_model_dir(spec)
+            state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
+            is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
+            active_in_manifest = bool(active_in_manifest or is_active)
             models.append({
                 "profile": spec.name,
                 "name": spec.model_id,
                 "kind": spec.kind,
                 "revision": spec.revision,
                 "dimension": spec.dimension,
-                "state": "available" if verification["ok"] else "not_warmed",
+                "state": state,
                 "required": spec.required,
+                "active": is_active,
                 "path": verification["path"],
                 "problems": verification["problems"],
             })
@@ -2466,6 +2653,8 @@ def model_status() -> dict:
             "required": False,
             "problems": [str(exc)],
         })
+    if not active_in_manifest:
+        models.insert(0, active_entry)
     return {"ok": True, "models": models}
@@ -2608,7 +2797,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
 def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
     base_rows = conn.execute(
         """
-        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
+        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
+               e.vector_json, e.model_id, e.model_revision, e.dimension
         FROM local_chunks c
         JOIN local_assets a ON a.asset_id = c.asset_id
         LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2626,7 +2816,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
     placeholders = ",".join("?" for _ in entity_asset_ids)
     entity_rows = conn.execute(
         f"""
-        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
+        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
+               e.vector_json, e.model_id, e.model_revision, e.dimension
         FROM local_chunks c
         JOIN local_assets a ON a.asset_id = c.asset_id
         LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2658,6 +2849,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
     return text[: max(0, max_chars - 1)].rstrip() + "…"
+def _reranker_disabled() -> bool:
+    value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
+    if value in {"1", "true", "yes", "on"}:
+        return True
+    if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
+        return True
+    return False
+@lru_cache(maxsize=1)
+def _context_reranker():
+    if _reranker_disabled():
+        return None
+    try:
+        import local_models
+        from fastembed.rerank.cross_encoder import TextCrossEncoder
+        spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
+        target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
+        return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
+    except Exception:  # pragma: no cover - host/cache dependent
+        return None
+def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
+    if len(scored) <= 1:
+        return scored
+    reranker = _context_reranker()
+    if not reranker:
+        return scored
+    head_count = min(len(scored), max(int(limit) * 4, 20), 60)
+    head = scored[:head_count]
+    tail = scored[head_count:]
+    docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
+    try:
+        scores = [float(score) for score in reranker.rerank(search_query, docs)]
+    except Exception:  # pragma: no cover - runtime fallback only
+        return scored
+    if len(scores) != len(head):
+        return scored
+    reranked = sorted(
+        ((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
+        key=lambda item: item[1],
+        reverse=True,
+    )
+    return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
 def _payload_size(payload: dict) -> int:
     return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
@@ -2993,10 +3232,12 @@ def _context_query_conn(
     normalized_mode, mode_warnings = _normalize_context_mode(mode)
     context_tail = _compact_text(current_context or "", max_chars=1000)
     search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
-    qvec = embeddings.embed_text(search_query)
+    query_embedding = embeddings.embed_record(search_query)
+    qvec = query_embedding["vector"]
     entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
     rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
     scored = []
+    stale_embedding_seen = False
     for row in rows:
         if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
             continue
@@ -3005,7 +3246,15 @@ def _context_query_conn(
         path_score = _search_text_score(search_query, row["path"] or "")
         summary_score = _search_text_score(search_query, row["summary"] or "")
         entity_score = entity_boosts.get(row["asset_id"], 0.0)
-        vector_score = embeddings.cosine(qvec, vector)
+        vector_score = 0.0
+        if (
+            str(row["model_id"] or "") == str(query_embedding["model_id"])
+            and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
+            and int(row["dimension"] or 0) == int(query_embedding["dimension"])
+        ):
+            vector_score = embeddings.cosine(qvec, vector)
+        elif vector:
+            stale_embedding_seen = True
         score = max(text_score, path_score, summary_score, vector_score)
         if entity_score > 0:
             direct_score = max(text_score, path_score, summary_score)
@@ -3019,6 +3268,7 @@ def _context_query_conn(
         if score > 0:
             scored.append((min(float(score), 1.6), row))
     scored.sort(key=lambda item: item[0], reverse=True)
+    scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
     assets = []
     chunks = []
     evidence_refs = []
@@ -3057,6 +3307,10 @@ def _context_query_conn(
         ).fetchall()
         relations_payload = [dict(row) for row in relation_rows]
     warnings = list(mode_warnings)
+    if query_embedding.get("kind") == "deterministic_embedding":
+        warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
+    elif stale_embedding_seen:
+        warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
     if evidence_required and not evidence_refs:
         warnings.append("No local evidence found for this query.")
     summary = ""

package/src/local_context/embeddings.py CHANGED Viewed

@@ -2,32 +2,135 @@ from __future__ import annotations
 import hashlib
 import math
+import os
+import warnings
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Any
 from .util import tokenize
-MODEL_ID = "nexo-local-hash-embedding"
-MODEL_REVISION = "1"
-DIMENSION = 128
+FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
+FALLBACK_MODEL_REVISION = "1"
+FALLBACK_DIMENSION = 128
+PRIMARY_MODEL_SPEC = "bge-base-embeddings"
+# Backward-compatible constants. Callers that persist vectors should use
+# embed_record(), because the active profile can switch from fallback to BGE.
+MODEL_ID = FALLBACK_MODEL_ID
+MODEL_REVISION = FALLBACK_MODEL_REVISION
+DIMENSION = FALLBACK_DIMENSION
-def embed_text(text: str) -> list[float]:
-    """Deterministic local embedding fallback.
-    This is intentionally local and dependency-free. It gives the resolver a
-    working semantic-ish retrieval substrate even on machines where the pinned
-    FastEmbed model has not warmed yet. The model id/revision make it safe to
-    supersede later with pinned model vectors.
-    """
-    vec = [0.0] * DIMENSION
+@dataclass(frozen=True)
+class EmbeddingProfile:
+    model_id: str
+    model_revision: str
+    dimension: int
+    kind: str
+    state: str
+    profile: str
+    problems: tuple[str, ...] = ()
+def _hash_embed_text(text: str) -> list[float]:
+    vec = [0.0] * FALLBACK_DIMENSION
     for token in tokenize(text):
         digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
-        idx = int.from_bytes(digest[:2], "big") % DIMENSION
+        idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
         sign = -1.0 if digest[2] % 2 else 1.0
         vec[idx] += sign
     norm = math.sqrt(sum(value * value for value in vec)) or 1.0
     return [round(value / norm, 8) for value in vec]
+def _fallback_profile(*problems: str) -> EmbeddingProfile:
+    return EmbeddingProfile(
+        model_id=FALLBACK_MODEL_ID,
+        model_revision=FALLBACK_MODEL_REVISION,
+        dimension=FALLBACK_DIMENSION,
+        kind="deterministic_embedding",
+        state="available",
+        profile="local_context_embedding_fallback",
+        problems=tuple(item for item in problems if item),
+    )
+def _fastembed_disabled() -> bool:
+    value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
+    if value in {"1", "true", "yes", "on"}:
+        return True
+    # The unit suite uses temporary NEXO homes that intentionally do not carry
+    # model weights. Keep those tests dependency-free unless explicitly opted in.
+    if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
+        return True
+    return False
+@lru_cache(maxsize=1)
+def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
+    if _fastembed_disabled():
+        return None, _fallback_profile("fastembed disabled for this process")
+    try:
+        import local_models
+        from fastembed import TextEmbedding
+        spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
+        target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
+            model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
+        return model, EmbeddingProfile(
+            model_id=spec.model_id,
+            model_revision=spec.revision,
+            dimension=spec.dimension or 384,
+            kind=spec.kind,
+            state="available",
+            profile=spec.name,
+        )
+    except Exception as exc:  # pragma: no cover - host/cache dependent
+        return None, _fallback_profile(str(exc))
+def active_profile() -> EmbeddingProfile:
+    _model, profile = _fastembed_state()
+    return profile
+def reset_cache() -> None:
+    _fastembed_state.cache_clear()
+def embed_record(text: str) -> dict[str, Any]:
+    model, profile = _fastembed_state()
+    if model is not None and profile.kind == "fastembed_embedding":
+        try:
+            vector = list(next(iter(model.embed([text or ""]))))
+            return {
+                "vector": [float(value) for value in vector],
+                "model_id": profile.model_id,
+                "model_revision": profile.model_revision,
+                "dimension": profile.dimension,
+                "profile": profile.profile,
+                "kind": profile.kind,
+            }
+        except Exception:  # pragma: no cover - runtime fallback only
+            pass
+    fallback = _fallback_profile()
+    return {
+        "vector": _hash_embed_text(text),
+        "model_id": fallback.model_id,
+        "model_revision": fallback.model_revision,
+        "dimension": fallback.dimension,
+        "profile": fallback.profile,
+        "kind": fallback.kind,
+    }
+def embed_text(text: str) -> list[float]:
+    return embed_record(text)["vector"]
 def cosine(a: list[float], b: list[float]) -> float:
     if not a or not b or len(a) != len(b):
         return 0.0

package/src/runtime_service.py ADDED Viewed

@@ -0,0 +1,426 @@
+from __future__ import annotations
+"""Resident runtime service and MCP proxy bootstrap.
+The public MCP entrypoint remains ``server.py`` for compatibility.  By
+default, that entrypoint becomes a thin stdio proxy and forwards calls to a
+single resident FastMCP service over loopback HTTP.  The resident process is
+the only MCP process that initializes Brain, opens SQLite, and runs tool
+handlers.
+"""
+import asyncio
+import json
+import os
+import signal
+import socket
+import subprocess
+import sys
+import time
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+import paths
+DEFAULT_HOST = "127.0.0.1"
+DEFAULT_PORT = 17872
+PORT_SCAN_LIMIT = 30
+SERVICE_PATH = "/mcp"
+SERVICE_ENV = "NEXO_RUNTIME_SERVICE"
+DIRECT_ENV = "NEXO_MCP_DIRECT"
+ADAPTER_ENV = "NEXO_MCP_RUNTIME_ADAPTER"
+STATE_FILE = "runtime-service.json"
+LOCK_FILE = "runtime-service.lock"
+LOG_FILE = "runtime-service.log"
+def env_flag(name: str, *, default: bool = False) -> bool:
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    return str(value).strip().lower() in {"1", "true", "yes", "on", "y", "si"}
+def service_host() -> str:
+    return str(os.environ.get("NEXO_RUNTIME_HOST", DEFAULT_HOST) or DEFAULT_HOST).strip()
+def service_path() -> str:
+    raw = str(os.environ.get("NEXO_RUNTIME_MCP_PATH", SERVICE_PATH) or SERVICE_PATH).strip()
+    return raw if raw.startswith("/") else f"/{raw}"
+def service_url(host: str | None = None, port: int | None = None, path: str | None = None) -> str:
+    return f"http://{host or service_host()}:{int(port or service_port())}{path or service_path()}"
+def service_state_path() -> Path:
+    root = paths.runtime_state_dir()
+    root.mkdir(parents=True, exist_ok=True)
+    return root / STATE_FILE
+def service_log_path() -> Path:
+    root = paths.logs_dir()
+    root.mkdir(parents=True, exist_ok=True)
+    return root / LOG_FILE
+def service_lock_path() -> Path:
+    root = paths.runtime_state_dir()
+    root.mkdir(parents=True, exist_ok=True)
+    return root / LOCK_FILE
+@contextmanager
+def service_start_lock(*, timeout: float = 10.0):
+    path = service_lock_path()
+    handle = path.open("a+")
+    deadline = time.monotonic() + max(timeout, 0.5)
+    locked = False
+    try:
+        while not locked:
+            try:
+                if os.name == "nt":
+                    import msvcrt
+                    handle.seek(0)
+                    if not handle.read(1):
+                        handle.write("0")
+                        handle.flush()
+                    handle.seek(0)
+                    msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+                else:
+                    import fcntl
+                    fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+                locked = True
+            except (BlockingIOError, OSError):
+                if time.monotonic() >= deadline:
+                    raise TimeoutError(f"Timed out waiting for NEXO runtime service lock: {path}")
+                time.sleep(0.1)
+        handle.seek(0)
+        handle.truncate()
+        handle.write(f"{os.getpid()}:{time.time()}\n")
+        handle.flush()
+        yield
+    finally:
+        if locked:
+            try:
+                if os.name == "nt":
+                    import msvcrt
+                    handle.seek(0)
+                    msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+                else:
+                    import fcntl
+                    fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+            except Exception:
+                pass
+        try:
+            handle.close()
+        except Exception:
+            pass
+def read_service_state() -> dict[str, Any]:
+    try:
+        path = service_state_path()
+        if not path.is_file():
+            return {}
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return data if isinstance(data, dict) else {}
+    except Exception:
+        return {}
+def write_service_state(state: dict[str, Any]) -> None:
+    path = service_state_path()
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    payload = dict(state)
+    payload.update(current_runtime_identity())
+    payload["updated_at"] = time.time()
+    tmp.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    os.replace(tmp, path)
+def is_runtime_service_process() -> bool:
+    return env_flag(SERVICE_ENV)
+def should_use_mcp_adapter() -> bool:
+    if is_runtime_service_process():
+        return False
+    if env_flag(DIRECT_ENV):
+        return False
+    if not env_flag(ADAPTER_ENV, default=True):
+        return False
+    transport = str(os.environ.get("NEXO_MCP_TRANSPORT", "stdio") or "stdio").strip().lower()
+    return transport == "stdio"
+def service_port() -> int:
+    raw = os.environ.get("NEXO_RUNTIME_PORT")
+    if raw:
+        try:
+            return int(raw)
+        except Exception:
+            pass
+    state = read_service_state()
+    try:
+        port = int(state.get("port") or 0)
+        if port > 0:
+            return port
+    except Exception:
+        pass
+    return DEFAULT_PORT
+def pid_is_running(pid: int) -> bool:
+    if pid <= 0:
+        return False
+    try:
+        os.kill(pid, 0)
+        return True
+    except ProcessLookupError:
+        return False
+    except PermissionError:
+        return True
+    except Exception:
+        return False
+def _port_is_free(host: str, port: int) -> bool:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.settimeout(0.2)
+        try:
+            sock.bind((host, port))
+            return True
+        except OSError:
+            return False
+def choose_service_port(host: str | None = None) -> int:
+    host = host or service_host()
+    preferred = service_port()
+    for offset in range(PORT_SCAN_LIMIT):
+        port = preferred + offset
+        if _port_is_free(host, port):
+            return port
+    raise RuntimeError(f"No free NEXO runtime service port in range {preferred}-{preferred + PORT_SCAN_LIMIT - 1}")
+async def _probe_service_async(url: str, *, timeout: float = 1.5) -> bool:
+    from fastmcp import Client
+    try:
+        client = Client(url, timeout=timeout, init_timeout=timeout)
+        async with client:
+            return bool(await client.ping())
+    except Exception:
+        return False
+def probe_service(url: str, *, timeout: float = 1.5) -> bool:
+    try:
+        return bool(asyncio.run(_probe_service_async(url, timeout=timeout)))
+    except RuntimeError:
+        # If an event loop is already active, fall back to a tiny socket probe.
+        try:
+            host_port = url.split("//", 1)[1].split("/", 1)[0]
+            host, port_text = host_port.rsplit(":", 1)
+            with socket.create_connection((host, int(port_text)), timeout=timeout):
+                return True
+        except Exception:
+            return False
+def current_server_path() -> Path:
+    return Path(__file__).resolve().with_name("server.py")
+def current_runtime_identity() -> dict[str, str]:
+    try:
+        from runtime_versioning import compute_mcp_runtime_fingerprint, read_version_for_path
+        root = current_server_path().parent
+        version = read_version_for_path(root) or read_version_for_path(root.parent)
+        return {
+            "runtime_version": version,
+            "runtime_fingerprint": compute_mcp_runtime_fingerprint(root, use_cache=True),
+            "server_path": str(current_server_path()),
+        }
+    except Exception:
+        return {"runtime_version": "", "runtime_fingerprint": "", "server_path": str(current_server_path())}
+def state_matches_current_runtime(state: dict[str, Any]) -> bool:
+    if not state:
+        return False
+    current = current_runtime_identity()
+    state_server = str(state.get("server_path") or "").strip()
+    if state_server and state_server != current["server_path"]:
+        return False
+    current_fp = str(current.get("runtime_fingerprint") or "").strip()
+    state_fp = str(state.get("runtime_fingerprint") or "").strip()
+    if current_fp and state_fp and current_fp != state_fp:
+        return False
+    current_version = str(current.get("runtime_version") or "").strip()
+    state_version = str(state.get("runtime_version") or "").strip()
+    if current_version and state_version and current_version != state_version:
+        return False
+    return True
+def _terminate_pid(pid: int, *, timeout: float = 3.0) -> dict[str, Any]:
+    if pid <= 0:
+        return {"terminated": False, "reason": "no_pid"}
+    if not pid_is_running(pid):
+        return {"terminated": False, "reason": "not_running"}
+    try:
+        if os.name == "nt":
+            subprocess.run(
+                ["taskkill", "/PID", str(pid), "/T", "/F"],
+                capture_output=True,
+                text=True,
+                timeout=max(timeout, 1.0),
+            )
+        else:
+            os.kill(pid, signal.SIGTERM)
+            deadline = time.monotonic() + max(timeout, 0.2)
+            while time.monotonic() < deadline:
+                if not pid_is_running(pid):
+                    return {"terminated": True, "pid": pid, "signal": "SIGTERM"}
+                time.sleep(0.1)
+            if hasattr(signal, "SIGKILL"):
+                os.kill(pid, signal.SIGKILL)
+        return {"terminated": True, "pid": pid}
+    except Exception as exc:
+        return {"terminated": False, "pid": pid, "error": str(exc)[:300]}
+def stop_runtime_service(*, reason: str = "stop", timeout: float = 3.0) -> dict[str, Any]:
+    state = read_service_state()
+    pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
+    result = _terminate_pid(pid, timeout=timeout)
+    result["reason"] = reason
+    result["state_path"] = str(service_state_path())
+    try:
+        service_state_path().unlink(missing_ok=True)
+        result["state_removed"] = True
+    except Exception as exc:
+        result["state_removed"] = False
+        result["state_error"] = str(exc)[:300]
+    return result
+def _service_env(port: int, host: str) -> dict[str, str]:
+    env = os.environ.copy()
+    env[SERVICE_ENV] = "1"
+    env["NEXO_MCP_TRANSPORT"] = "streamable-http"
+    env["NEXO_MCP_HOST"] = host
+    env["NEXO_MCP_PORT"] = str(port)
+    env["NEXO_MCP_PATH"] = service_path()
+    # A probe client may inherit a deliberately tiny plugin mode.  The service
+    # should use the normal runtime defaults unless explicitly overridden.
+    if "NEXO_RUNTIME_SERVICE_PLUGIN_MODE" in env:
+        env["NEXO_MCP_PLUGIN_MODE"] = env["NEXO_RUNTIME_SERVICE_PLUGIN_MODE"]
+    return env
+def _spawn_service_process(port: int, host: str) -> subprocess.Popen:
+    log_path = service_log_path()
+    log_file = open(log_path, "ab", buffering=0)
+    kwargs: dict[str, Any] = {
+        "cwd": str(current_server_path().parent),
+        "env": _service_env(port, host),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": log_file,
+    }
+    if os.name == "nt":
+        kwargs["creationflags"] = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0)
+    else:
+        kwargs["start_new_session"] = True
+    return subprocess.Popen([sys.executable, str(current_server_path())], **kwargs)
+def ensure_runtime_service(*, wait_seconds: float = 10.0) -> str:
+    with service_start_lock(timeout=wait_seconds):
+        host = service_host()
+        state = read_service_state()
+        state_url = str(state.get("url") or "")
+        state_pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
+        if state_url and (state_pid <= 0 or pid_is_running(state_pid)):
+            if state_matches_current_runtime(state) and probe_service(state_url):
+                return state_url
+            if state_pid > 0:
+                stop_runtime_service(reason="stale_runtime")
+        port = choose_service_port(host)
+        url = service_url(host, port)
+        proc = _spawn_service_process(port, host)
+        write_service_state(
+            {
+                "pid": proc.pid,
+                "port": port,
+                "host": host,
+                "path": service_path(),
+                "url": url,
+                "server_path": str(current_server_path()),
+                "started_at": time.time(),
+                "mode": "runtime-service",
+            }
+        )
+        deadline = time.monotonic() + max(wait_seconds, 0.5)
+        delay = 0.15
+        while time.monotonic() < deadline:
+            if proc.poll() is not None:
+                break
+            if probe_service(url):
+                return url
+            time.sleep(delay)
+            delay = min(delay * 1.5, 1.0)
+        code = proc.poll()
+        raise RuntimeError(
+            "NEXO runtime service did not become ready"
+            + (f" (exit={code})" if code is not None else "")
+            + f"; log={service_log_path()}"
+        )
+def runtime_service_status() -> dict[str, Any]:
+    state = read_service_state()
+    current = current_runtime_identity()
+    url = str(state.get("url") or "")
+    pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
+    alive = pid_is_running(pid)
+    ready = bool(url and probe_service(url, timeout=0.8))
+    return {
+        "ok": ready,
+        "mode": "service" if is_runtime_service_process() else "adapter",
+        "pid": pid,
+        "pid_alive": alive,
+        "url": url,
+        "stale": bool(state and not state_matches_current_runtime(state)),
+        "runtime_version": current.get("runtime_version", ""),
+        "runtime_fingerprint": current.get("runtime_fingerprint", ""),
+        "state_runtime_version": str(state.get("runtime_version") or ""),
+        "state_runtime_fingerprint": str(state.get("runtime_fingerprint") or ""),
+        "state_path": str(service_state_path()),
+        "log_path": str(service_log_path()),
+        "server_path": str(current_server_path()),
+    }
+def run_mcp_proxy_adapter(*, name: str, instructions: str, run_kwargs: dict[str, Any]) -> None:
+    from fastmcp.server import create_proxy
+    url = ensure_runtime_service()
+    proxy = create_proxy(url, name=name, instructions=instructions)
+    proxy.run(**run_kwargs)

package/src/runtime_versioning.py CHANGED Viewed

@@ -732,6 +732,16 @@ def build_mcp_status(*, client: str = "") -> dict:
     marker = state["marker"]
     installed_fp = state.get("installed_fingerprint", "")
     process_fp = state.get("process_fingerprint", "")
+    try:
+        from runtime_service import runtime_service_status
+        service_status = runtime_service_status()
+    except Exception as exc:
+        service_status = {
+            "ok": False,
+            "error": "runtime_service_status_unavailable",
+            "message": str(exc)[:300],
+        }
     return {
         "ok": True,
         "schema_version": MCP_STATUS_SCHEMA_VERSION,
@@ -755,6 +765,7 @@ def build_mcp_status(*, client: str = "") -> dict:
         "marker_exists": bool(marker.get("exists")),
         "marker_corrupt": bool(marker.get("corrupt")),
         "continuity_api_level": CONTINUITY_API_LEVEL,
+        "runtime_service": service_status,
         "version_match": (
             bool(state["installed_version"])
             and bool(state["process_version"])

package/src/server.py CHANGED Viewed

@@ -117,6 +117,13 @@ from runtime_versioning import (
     prime_process_fingerprint,
     prime_process_version,
 )
+from runtime_service import (
+    is_runtime_service_process,
+    run_mcp_proxy_adapter,
+    runtime_service_status,
+    should_use_mcp_adapter,
+    write_service_state,
+)
 from local_context import api as local_context_api
 from local_context.db import close_local_context_db
@@ -766,6 +773,12 @@ def nexo_status(keyword: str = "") -> str:
     return handle_status(keyword if keyword else None)
+@mcp.tool
+def nexo_runtime_service_status() -> str:
+    """Return the resident NEXO Runtime Service status for diagnostics."""
+    return json.dumps(runtime_service_status(), indent=2, ensure_ascii=False)
 @mcp.tool
 def nexo_local_index_status() -> str:
     """Return local memory index status for Desktop settings and support diagnostics."""
@@ -2300,5 +2313,32 @@ def nexo_create_app_token(
 if __name__ == "__main__":
-    _server_init()
-    mcp.run(**_run_kwargs_from_env())
+    if should_use_mcp_adapter():
+        run_mcp_proxy_adapter(
+            name="nexo",
+            instructions=render_core_prompt(
+                "server-mcp-instructions",
+                assistant_name=_get_ctx().assistant_name,
+            ),
+            run_kwargs=_run_kwargs_from_env(),
+        )
+    else:
+        _server_init()
+        run_kwargs = _run_kwargs_from_env()
+        if is_runtime_service_process():
+            host = str(run_kwargs.get("host") or os.environ.get("NEXO_MCP_HOST", "127.0.0.1"))
+            port = int(run_kwargs.get("port") or os.environ.get("NEXO_MCP_PORT", "0") or 0)
+            path = str(run_kwargs.get("path") or os.environ.get("NEXO_MCP_PATH", "/mcp"))
+            write_service_state(
+                {
+                    "pid": os.getpid(),
+                    "port": port,
+                    "host": host,
+                    "path": path,
+                    "url": f"http://{host}:{port}{path}",
+                    "server_path": str(os.path.abspath(__file__)),
+                    "started_at": time.time(),
+                    "mode": "runtime-service",
+                }
+            )
+        mcp.run(**run_kwargs)

package/tool-enforcement-map.json CHANGED Viewed

@@ -2383,6 +2383,21 @@
       },
       "triggers_after": []
     },
+    "nexo_runtime_service_status": {
+      "description": "Return resident Runtime Service health, PID, version, fingerprint and state paths",
+      "category": "system",
+      "source": "server",
+      "requires": [],
+      "provides": [
+        "runtime_service_status"
+      ],
+      "internal_calls": [],
+      "enforcement": {
+        "level": "none",
+        "rules": []
+      },
+      "triggers_after": []
+    },
     "nexo_media_memory_add": {
       "description": "Store non-text artifact metadata",
       "category": "media",