nexo-brain 7.20.24 → 7.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.24",
3
+ "version": "7.21.0",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,11 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.20.24` is the current packaged-runtime line. Patch release over v7.20.23 Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
21
+ Version `7.21.0` is the current packaged-runtime line. Minor release over v7.20.25 - MCP now starts through a thin compatibility adapter backed by one resident local Runtime Service, reducing duplicate Brain processes and SQLite contention across Claude Code, Codex, Claude Desktop, and NEXO Desktop. The release also fingerprints Runtime Service state for safe update cutover, keeps document-first Local Memory scanning, and verifies bundled local LLM files before marking them installed.
22
+
23
+ Previously in `7.20.25`: patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
24
+
25
+ Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
22
26
 
23
27
  Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
24
28
 
package/bin/nexo-brain.js CHANGED
@@ -3879,12 +3879,32 @@ async function runSetup() {
3879
3879
  const slug = (spec.name || "").trim().toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
3880
3880
  const targetDir = path.join(runtimeModelsDir, slug, spec.revision);
3881
3881
  fs.mkdirSync(targetDir, { recursive: true });
3882
+ const missingFiles = [];
3882
3883
  for (const f of (spec.required_files || [])) {
3883
3884
  const src = path.join(sourceDir, f.path);
3884
3885
  const dst = path.join(targetDir, f.path);
3885
- if (fs.existsSync(src) && !fs.existsSync(dst)) {
3886
+ if (!fs.existsSync(src)) {
3887
+ missingFiles.push(f.path);
3888
+ continue;
3889
+ }
3890
+ fs.mkdirSync(path.dirname(dst), { recursive: true });
3891
+ if (!fs.existsSync(dst) || (f.size && fs.statSync(dst).size !== f.size)) {
3886
3892
  fs.copyFileSync(src, dst);
3887
3893
  }
3894
+ if (f.size && fs.statSync(dst).size !== f.size) {
3895
+ missingFiles.push(`${f.path}:size`);
3896
+ continue;
3897
+ }
3898
+ if (f.sha256) {
3899
+ const actual = crypto.createHash("sha256").update(fs.readFileSync(dst)).digest("hex");
3900
+ if (actual !== f.sha256) {
3901
+ missingFiles.push(`${f.path}:sha256`);
3902
+ }
3903
+ }
3904
+ }
3905
+ if (missingFiles.length) {
3906
+ log(` WARN: bundled LLM model ${spec.name} incomplete (${missingFiles.join(", ")})`);
3907
+ continue;
3888
3908
  }
3889
3909
  // Write the lock file to match revision (avoids re-download).
3890
3910
  fs.writeFileSync(path.join(targetDir, ".nexo-model-lock.json"), JSON.stringify({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.24",
3
+ "version": "7.21.0",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -3988,7 +3988,7 @@ def _auto_update_check_locked() -> dict:
3988
3988
 
3989
3989
  # Backfill runtime CLI modules for existing installs
3990
3990
  try:
3991
- for fname in ("cli.py", "script_registry.py", "skills_runtime.py", "cron_recovery.py", "client_preferences.py", "claude_cli.py", "agent_runner.py", "bootstrap_docs.py", "mcp_required_tools.py"):
3991
+ for fname in ("cli.py", "script_registry.py", "skills_runtime.py", "cron_recovery.py", "client_preferences.py", "claude_cli.py", "agent_runner.py", "bootstrap_docs.py", "mcp_required_tools.py", "runtime_service.py"):
3992
3992
  src_file = SRC_DIR / fname
3993
3993
  dest_file = NEXO_HOME / fname
3994
3994
  if src_file.is_file() and (not dest_file.exists() or src_file.stat().st_mtime > dest_file.stat().st_mtime):
@@ -10,6 +10,7 @@ import hashlib
10
10
  import subprocess
11
11
  import sys
12
12
  import time
13
+ from functools import lru_cache
13
14
  from pathlib import Path
14
15
  from typing import Any
15
16
 
@@ -41,6 +42,77 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
41
42
  PERFORMANCE_PROFILE_KEY = "performance_profile"
42
43
  DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
43
44
  VALID_CONTEXT_MODES = {"compact", "full"}
45
+ EMBEDDING_REFRESH_JOB = "embedding_refresh"
46
+ HIGH_VALUE_DOCUMENT_SUFFIXES = {
47
+ ".pdf",
48
+ ".doc",
49
+ ".docx",
50
+ ".xls",
51
+ ".xlsx",
52
+ ".ppt",
53
+ ".pptx",
54
+ ".pages",
55
+ ".numbers",
56
+ ".key",
57
+ ".rtf",
58
+ ".odt",
59
+ ".ods",
60
+ ".odp",
61
+ }
62
+ KNOWN_TEXT_SUFFIXES = {
63
+ ".md",
64
+ ".markdown",
65
+ ".txt",
66
+ ".csv",
67
+ ".tsv",
68
+ }
69
+ EMAIL_DOCUMENT_SUFFIXES = {
70
+ ".eml",
71
+ ".emlx",
72
+ ".msg",
73
+ }
74
+ HIGH_VALUE_DIRECTORY_NAMES = {
75
+ "users",
76
+ "home",
77
+ "desktop",
78
+ "documents",
79
+ "downloads",
80
+ "documentos",
81
+ "escritorio",
82
+ "descargas",
83
+ "icloud drive",
84
+ "onedrive",
85
+ "google drive",
86
+ "dropbox",
87
+ "creative cloud files",
88
+ "clientes",
89
+ "clients",
90
+ "facturas",
91
+ "invoices",
92
+ "contratos",
93
+ "contracts",
94
+ "projects",
95
+ "proyectos",
96
+ "work",
97
+ "trabajo",
98
+ }
99
+ LOW_VALUE_DIRECTORY_NAMES = {
100
+ "applications",
101
+ "library",
102
+ "system",
103
+ "private",
104
+ "usr",
105
+ "var",
106
+ "opt",
107
+ "windows",
108
+ "program files",
109
+ "program files (x86)",
110
+ "programdata",
111
+ "appdata",
112
+ ".cache",
113
+ "caches",
114
+ }
115
+ RERANKER_MODEL_SPEC = "cross-encoder-reranker"
44
116
  PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
45
117
  "low": {
46
118
  "profile": "low",
@@ -1092,7 +1164,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
1092
1164
  (version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
1093
1165
  )
1094
1166
  if should_extract(normalized, depth):
1095
- enqueue_job(conn, asset_id, "light_extraction", priority=60)
1167
+ enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
1096
1168
  enqueue_job(conn, asset_id, "graph", priority=40)
1097
1169
  return asset_id, changed, "ok"
1098
1170
 
@@ -1203,6 +1275,44 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
1203
1275
  return job_id
1204
1276
 
1205
1277
 
1278
+ def _extraction_priority(path: Path) -> int:
1279
+ suffix = path.suffix.lower()
1280
+ if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
1281
+ return 90
1282
+ if suffix in KNOWN_TEXT_SUFFIXES:
1283
+ return 82
1284
+ if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
1285
+ return 70
1286
+ if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
1287
+ return 55
1288
+ return 45
1289
+
1290
+
1291
+ def _directory_scan_priority(path: Path) -> int:
1292
+ name = path.name.strip().lower()
1293
+ if name in {"users", "home"}:
1294
+ return 0
1295
+ if name in HIGH_VALUE_DIRECTORY_NAMES:
1296
+ return 10
1297
+ if "icloud" in name or "onedrive" in name or "google drive" in name:
1298
+ return 10
1299
+ if is_local_email_tree(str(path)):
1300
+ return 65
1301
+ if name in LOW_VALUE_DIRECTORY_NAMES:
1302
+ return 90
1303
+ return 40
1304
+
1305
+
1306
+ def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
1307
+ try:
1308
+ is_file = item.is_file()
1309
+ except Exception:
1310
+ is_file = False
1311
+ if is_file:
1312
+ return (1, -_extraction_priority(item), str(item).lower())
1313
+ return (0, _directory_scan_priority(item), str(item).lower())
1314
+
1315
+
1206
1316
  def _iter_files(
1207
1317
  conn,
1208
1318
  root_id: int,
@@ -1236,7 +1346,7 @@ def _iter_files(
1236
1346
  seen_dirs.add(key)
1237
1347
  _upsert_dir(conn, root_id, current, seen_at, st)
1238
1348
  try:
1239
- entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
1349
+ entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
1240
1350
  except Exception as exc:
1241
1351
  _record_scan_error(conn, stats, str(current), "quick_index", exc)
1242
1352
  continue
@@ -1420,7 +1530,7 @@ def _scan_known_directory(
1420
1530
  st = current.stat()
1421
1531
  if not current.is_dir():
1422
1532
  continue
1423
- entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
1533
+ entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
1424
1534
  except Exception as exc:
1425
1535
  _record_scan_error(conn, stats, str(current), "live_reconcile", exc)
1426
1536
  continue
@@ -1661,6 +1771,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
1661
1771
  return row["version_id"] if row else stable_id("ver", asset_id)
1662
1772
 
1663
1773
 
1774
+ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
1775
+ record = embeddings.embed_record(text)
1776
+ model_id = str(record["model_id"])
1777
+ model_revision = str(record["model_revision"])
1778
+ dimension = int(record["dimension"])
1779
+ conn.execute(
1780
+ """
1781
+ INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
1782
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1783
+ """,
1784
+ (
1785
+ stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
1786
+ asset_id,
1787
+ chunk_id,
1788
+ model_id,
1789
+ model_revision,
1790
+ dimension,
1791
+ json_dumps(record["vector"]),
1792
+ now(),
1793
+ ),
1794
+ )
1795
+
1796
+
1664
1797
  def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
1665
1798
  conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
1666
1799
  conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
@@ -1673,23 +1806,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
1673
1806
  """,
1674
1807
  (chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
1675
1808
  )
1676
- vector = embeddings.embed_text(chunk)
1677
- conn.execute(
1678
- """
1679
- INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
1680
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1681
- """,
1682
- (
1683
- stable_id("emb", f"{chunk_id}:{embeddings.MODEL_ID}:{embeddings.MODEL_REVISION}"),
1684
- asset_id,
1685
- chunk_id,
1686
- embeddings.MODEL_ID,
1687
- embeddings.MODEL_REVISION,
1688
- embeddings.DIMENSION,
1689
- json_dumps(vector),
1690
- now(),
1691
- ),
1692
- )
1809
+ _insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
1810
+
1811
+
1812
+ def _refresh_asset_embeddings(conn, asset_id: str) -> int:
1813
+ rows = conn.execute(
1814
+ """
1815
+ SELECT chunk_id, text
1816
+ FROM local_chunks
1817
+ WHERE asset_id=?
1818
+ ORDER BY chunk_index ASC
1819
+ """,
1820
+ (asset_id,),
1821
+ ).fetchall()
1822
+ conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
1823
+ for row in rows:
1824
+ _insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
1825
+ if rows:
1826
+ conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
1827
+ return len(rows)
1828
+
1829
+
1830
+ def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
1831
+ if row is None:
1832
+ return False
1833
+ return (
1834
+ str(row["model_id"] or "") == profile.model_id
1835
+ and str(row["model_revision"] or "") == profile.model_revision
1836
+ and int(row["dimension"] or 0) == int(profile.dimension)
1837
+ )
1838
+
1839
+
1840
+ def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
1841
+ profile = embeddings.active_profile()
1842
+ if profile.kind == "deterministic_embedding":
1843
+ return 0
1844
+ rows = conn.execute(
1845
+ """
1846
+ SELECT DISTINCT c.asset_id
1847
+ FROM local_chunks c
1848
+ JOIN local_assets a ON a.asset_id=c.asset_id
1849
+ LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
1850
+ WHERE a.status='active'
1851
+ AND a.privacy_class='normal'
1852
+ AND (
1853
+ e.embedding_id IS NULL
1854
+ OR e.model_id != ?
1855
+ OR e.model_revision != ?
1856
+ OR e.dimension != ?
1857
+ )
1858
+ ORDER BY a.updated_at ASC
1859
+ LIMIT ?
1860
+ """,
1861
+ (profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
1862
+ ).fetchall()
1863
+ for row in rows:
1864
+ enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
1865
+ return len(rows)
1693
1866
 
1694
1867
 
1695
1868
  def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
@@ -1751,6 +1924,9 @@ def process_jobs(*, limit: int = 100) -> dict:
1751
1924
  log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
1752
1925
  return {"ok": True, "paused": True, "processed": 0, "failed": 0}
1753
1926
  recovered = _requeue_due_jobs(conn)
1927
+ refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
1928
+ if refresh_queued:
1929
+ conn.commit()
1754
1930
  rows = conn.execute(
1755
1931
  """
1756
1932
  SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
@@ -1804,6 +1980,8 @@ def process_jobs(*, limit: int = 100) -> dict:
1804
1980
  _replace_chunks(conn, asset_id, version_id, text)
1805
1981
  _replace_entities(conn, asset_id, version_id, entities(text))
1806
1982
  conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
1983
+ elif job_type == EMBEDDING_REFRESH_JOB:
1984
+ _refresh_asset_embeddings(conn, asset_id)
1807
1985
  elif job_type == "graph":
1808
1986
  conn.execute(
1809
1987
  """
@@ -1843,8 +2021,8 @@ def process_jobs(*, limit: int = 100) -> dict:
1843
2021
  conn.commit()
1844
2022
  conn.commit()
1845
2023
  if processed or failed:
1846
- log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
1847
- return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
2024
+ log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
2025
+ return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
1848
2026
 
1849
2027
 
1850
2028
  def run_once(
@@ -2433,27 +2611,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
2433
2611
 
2434
2612
 
2435
2613
  def model_status() -> dict:
2436
- models = [{
2437
- "profile": "local_context_embedding_fallback",
2438
- "name": embeddings.MODEL_ID,
2439
- "kind": "deterministic_embedding",
2440
- "revision": embeddings.MODEL_REVISION,
2441
- "dimension": embeddings.DIMENSION,
2442
- "state": "available",
2614
+ active_embedding = embeddings.active_profile()
2615
+ active_entry = {
2616
+ "profile": active_embedding.profile,
2617
+ "name": active_embedding.model_id,
2618
+ "kind": active_embedding.kind,
2619
+ "revision": active_embedding.model_revision,
2620
+ "dimension": active_embedding.dimension,
2621
+ "state": active_embedding.state,
2443
2622
  "required": True,
2444
- }]
2623
+ "active": True,
2624
+ "problems": list(active_embedding.problems),
2625
+ }
2626
+ models = []
2627
+ active_in_manifest = False
2445
2628
  try:
2446
2629
  import local_models
2447
2630
  for spec in local_models.list_local_model_specs():
2448
2631
  verification = local_models.verify_local_model_dir(spec)
2632
+ state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
2633
+ is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
2634
+ active_in_manifest = bool(active_in_manifest or is_active)
2449
2635
  models.append({
2450
2636
  "profile": spec.name,
2451
2637
  "name": spec.model_id,
2452
2638
  "kind": spec.kind,
2453
2639
  "revision": spec.revision,
2454
2640
  "dimension": spec.dimension,
2455
- "state": "available" if verification["ok"] else "not_warmed",
2641
+ "state": state,
2456
2642
  "required": spec.required,
2643
+ "active": is_active,
2457
2644
  "path": verification["path"],
2458
2645
  "problems": verification["problems"],
2459
2646
  })
@@ -2466,6 +2653,8 @@ def model_status() -> dict:
2466
2653
  "required": False,
2467
2654
  "problems": [str(exc)],
2468
2655
  })
2656
+ if not active_in_manifest:
2657
+ models.insert(0, active_entry)
2469
2658
  return {"ok": True, "models": models}
2470
2659
 
2471
2660
 
@@ -2608,7 +2797,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
2608
2797
  def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
2609
2798
  base_rows = conn.execute(
2610
2799
  """
2611
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2800
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
2801
+ e.vector_json, e.model_id, e.model_revision, e.dimension
2612
2802
  FROM local_chunks c
2613
2803
  JOIN local_assets a ON a.asset_id = c.asset_id
2614
2804
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2626,7 +2816,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
2626
2816
  placeholders = ",".join("?" for _ in entity_asset_ids)
2627
2817
  entity_rows = conn.execute(
2628
2818
  f"""
2629
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2819
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
2820
+ e.vector_json, e.model_id, e.model_revision, e.dimension
2630
2821
  FROM local_chunks c
2631
2822
  JOIN local_assets a ON a.asset_id = c.asset_id
2632
2823
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2658,6 +2849,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
2658
2849
  return text[: max(0, max_chars - 1)].rstrip() + "…"
2659
2850
 
2660
2851
 
2852
+ def _reranker_disabled() -> bool:
2853
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
2854
+ if value in {"1", "true", "yes", "on"}:
2855
+ return True
2856
+ if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
2857
+ return True
2858
+ return False
2859
+
2860
+
2861
+ @lru_cache(maxsize=1)
2862
+ def _context_reranker():
2863
+ if _reranker_disabled():
2864
+ return None
2865
+ try:
2866
+ import local_models
2867
+ from fastembed.rerank.cross_encoder import TextCrossEncoder
2868
+
2869
+ spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
2870
+ target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
2871
+ return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
2872
+ except Exception: # pragma: no cover - host/cache dependent
2873
+ return None
2874
+
2875
+
2876
+ def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
2877
+ if len(scored) <= 1:
2878
+ return scored
2879
+ reranker = _context_reranker()
2880
+ if not reranker:
2881
+ return scored
2882
+ head_count = min(len(scored), max(int(limit) * 4, 20), 60)
2883
+ head = scored[:head_count]
2884
+ tail = scored[head_count:]
2885
+ docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
2886
+ try:
2887
+ scores = [float(score) for score in reranker.rerank(search_query, docs)]
2888
+ except Exception: # pragma: no cover - runtime fallback only
2889
+ return scored
2890
+ if len(scores) != len(head):
2891
+ return scored
2892
+ reranked = sorted(
2893
+ ((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
2894
+ key=lambda item: item[1],
2895
+ reverse=True,
2896
+ )
2897
+ return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
2898
+
2899
+
2661
2900
  def _payload_size(payload: dict) -> int:
2662
2901
  return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
2663
2902
 
@@ -2993,10 +3232,12 @@ def _context_query_conn(
2993
3232
  normalized_mode, mode_warnings = _normalize_context_mode(mode)
2994
3233
  context_tail = _compact_text(current_context or "", max_chars=1000)
2995
3234
  search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
2996
- qvec = embeddings.embed_text(search_query)
3235
+ query_embedding = embeddings.embed_record(search_query)
3236
+ qvec = query_embedding["vector"]
2997
3237
  entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
2998
3238
  rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
2999
3239
  scored = []
3240
+ stale_embedding_seen = False
3000
3241
  for row in rows:
3001
3242
  if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
3002
3243
  continue
@@ -3005,7 +3246,15 @@ def _context_query_conn(
3005
3246
  path_score = _search_text_score(search_query, row["path"] or "")
3006
3247
  summary_score = _search_text_score(search_query, row["summary"] or "")
3007
3248
  entity_score = entity_boosts.get(row["asset_id"], 0.0)
3008
- vector_score = embeddings.cosine(qvec, vector)
3249
+ vector_score = 0.0
3250
+ if (
3251
+ str(row["model_id"] or "") == str(query_embedding["model_id"])
3252
+ and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
3253
+ and int(row["dimension"] or 0) == int(query_embedding["dimension"])
3254
+ ):
3255
+ vector_score = embeddings.cosine(qvec, vector)
3256
+ elif vector:
3257
+ stale_embedding_seen = True
3009
3258
  score = max(text_score, path_score, summary_score, vector_score)
3010
3259
  if entity_score > 0:
3011
3260
  direct_score = max(text_score, path_score, summary_score)
@@ -3019,6 +3268,7 @@ def _context_query_conn(
3019
3268
  if score > 0:
3020
3269
  scored.append((min(float(score), 1.6), row))
3021
3270
  scored.sort(key=lambda item: item[0], reverse=True)
3271
+ scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
3022
3272
  assets = []
3023
3273
  chunks = []
3024
3274
  evidence_refs = []
@@ -3057,6 +3307,10 @@ def _context_query_conn(
3057
3307
  ).fetchall()
3058
3308
  relations_payload = [dict(row) for row in relation_rows]
3059
3309
  warnings = list(mode_warnings)
3310
+ if query_embedding.get("kind") == "deterministic_embedding":
3311
+ warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
3312
+ elif stale_embedding_seen:
3313
+ warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
3060
3314
  if evidence_required and not evidence_refs:
3061
3315
  warnings.append("No local evidence found for this query.")
3062
3316
  summary = ""
@@ -2,32 +2,135 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  import math
5
+ import os
6
+ import warnings
7
+ from dataclasses import dataclass
8
+ from functools import lru_cache
9
+ from typing import Any
5
10
 
6
11
  from .util import tokenize
7
12
 
8
- MODEL_ID = "nexo-local-hash-embedding"
9
- MODEL_REVISION = "1"
10
- DIMENSION = 128
13
+ FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
14
+ FALLBACK_MODEL_REVISION = "1"
15
+ FALLBACK_DIMENSION = 128
16
+ PRIMARY_MODEL_SPEC = "bge-base-embeddings"
11
17
 
18
+ # Backward-compatible constants. Callers that persist vectors should use
19
+ # embed_record(), because the active profile can switch from fallback to BGE.
20
+ MODEL_ID = FALLBACK_MODEL_ID
21
+ MODEL_REVISION = FALLBACK_MODEL_REVISION
22
+ DIMENSION = FALLBACK_DIMENSION
12
23
 
13
- def embed_text(text: str) -> list[float]:
14
- """Deterministic local embedding fallback.
15
-
16
- This is intentionally local and dependency-free. It gives the resolver a
17
- working semantic-ish retrieval substrate even on machines where the pinned
18
- FastEmbed model has not warmed yet. The model id/revision make it safe to
19
- supersede later with pinned model vectors.
20
- """
21
- vec = [0.0] * DIMENSION
24
+
25
+ @dataclass(frozen=True)
26
+ class EmbeddingProfile:
27
+ model_id: str
28
+ model_revision: str
29
+ dimension: int
30
+ kind: str
31
+ state: str
32
+ profile: str
33
+ problems: tuple[str, ...] = ()
34
+
35
+
36
+ def _hash_embed_text(text: str) -> list[float]:
37
+ vec = [0.0] * FALLBACK_DIMENSION
22
38
  for token in tokenize(text):
23
39
  digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
24
- idx = int.from_bytes(digest[:2], "big") % DIMENSION
40
+ idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
25
41
  sign = -1.0 if digest[2] % 2 else 1.0
26
42
  vec[idx] += sign
27
43
  norm = math.sqrt(sum(value * value for value in vec)) or 1.0
28
44
  return [round(value / norm, 8) for value in vec]
29
45
 
30
46
 
47
+ def _fallback_profile(*problems: str) -> EmbeddingProfile:
48
+ return EmbeddingProfile(
49
+ model_id=FALLBACK_MODEL_ID,
50
+ model_revision=FALLBACK_MODEL_REVISION,
51
+ dimension=FALLBACK_DIMENSION,
52
+ kind="deterministic_embedding",
53
+ state="available",
54
+ profile="local_context_embedding_fallback",
55
+ problems=tuple(item for item in problems if item),
56
+ )
57
+
58
+
59
+ def _fastembed_disabled() -> bool:
60
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
61
+ if value in {"1", "true", "yes", "on"}:
62
+ return True
63
+ # The unit suite uses temporary NEXO homes that intentionally do not carry
64
+ # model weights. Keep those tests dependency-free unless explicitly opted in.
65
+ if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
66
+ return True
67
+ return False
68
+
69
+
70
+ @lru_cache(maxsize=1)
71
+ def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
72
+ if _fastembed_disabled():
73
+ return None, _fallback_profile("fastembed disabled for this process")
74
+ try:
75
+ import local_models
76
+ from fastembed import TextEmbedding
77
+
78
+ spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
79
+ target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
80
+ with warnings.catch_warnings():
81
+ warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
82
+ model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
83
+ return model, EmbeddingProfile(
84
+ model_id=spec.model_id,
85
+ model_revision=spec.revision,
86
+ dimension=spec.dimension or 384,
87
+ kind=spec.kind,
88
+ state="available",
89
+ profile=spec.name,
90
+ )
91
+ except Exception as exc: # pragma: no cover - host/cache dependent
92
+ return None, _fallback_profile(str(exc))
93
+
94
+
95
+ def active_profile() -> EmbeddingProfile:
96
+ _model, profile = _fastembed_state()
97
+ return profile
98
+
99
+
100
+ def reset_cache() -> None:
101
+ _fastembed_state.cache_clear()
102
+
103
+
104
+ def embed_record(text: str) -> dict[str, Any]:
105
+ model, profile = _fastembed_state()
106
+ if model is not None and profile.kind == "fastembed_embedding":
107
+ try:
108
+ vector = list(next(iter(model.embed([text or ""]))))
109
+ return {
110
+ "vector": [float(value) for value in vector],
111
+ "model_id": profile.model_id,
112
+ "model_revision": profile.model_revision,
113
+ "dimension": profile.dimension,
114
+ "profile": profile.profile,
115
+ "kind": profile.kind,
116
+ }
117
+ except Exception: # pragma: no cover - runtime fallback only
118
+ pass
119
+ fallback = _fallback_profile()
120
+ return {
121
+ "vector": _hash_embed_text(text),
122
+ "model_id": fallback.model_id,
123
+ "model_revision": fallback.model_revision,
124
+ "dimension": fallback.dimension,
125
+ "profile": fallback.profile,
126
+ "kind": fallback.kind,
127
+ }
128
+
129
+
130
+ def embed_text(text: str) -> list[float]:
131
+ return embed_record(text)["vector"]
132
+
133
+
31
134
  def cosine(a: list[float], b: list[float]) -> float:
32
135
  if not a or not b or len(a) != len(b):
33
136
  return 0.0
@@ -0,0 +1,426 @@
1
+ from __future__ import annotations
2
+ """Resident runtime service and MCP proxy bootstrap.
3
+
4
+ The public MCP entrypoint remains ``server.py`` for compatibility. By
5
+ default, that entrypoint becomes a thin stdio proxy and forwards calls to a
6
+ single resident FastMCP service over loopback HTTP. The resident process is
7
+ the only MCP process that initializes Brain, opens SQLite, and runs tool
8
+ handlers.
9
+ """
10
+
11
+ import asyncio
12
+ import json
13
+ import os
14
+ import signal
15
+ import socket
16
+ import subprocess
17
+ import sys
18
+ import time
19
+ from contextlib import contextmanager
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ import paths
24
+
25
+ DEFAULT_HOST = "127.0.0.1"
26
+ DEFAULT_PORT = 17872
27
+ PORT_SCAN_LIMIT = 30
28
+ SERVICE_PATH = "/mcp"
29
+ SERVICE_ENV = "NEXO_RUNTIME_SERVICE"
30
+ DIRECT_ENV = "NEXO_MCP_DIRECT"
31
+ ADAPTER_ENV = "NEXO_MCP_RUNTIME_ADAPTER"
32
+ STATE_FILE = "runtime-service.json"
33
+ LOCK_FILE = "runtime-service.lock"
34
+ LOG_FILE = "runtime-service.log"
35
+
36
+
37
+ def env_flag(name: str, *, default: bool = False) -> bool:
38
+ value = os.environ.get(name)
39
+ if value is None:
40
+ return default
41
+ return str(value).strip().lower() in {"1", "true", "yes", "on", "y", "si"}
42
+
43
+
44
+ def service_host() -> str:
45
+ return str(os.environ.get("NEXO_RUNTIME_HOST", DEFAULT_HOST) or DEFAULT_HOST).strip()
46
+
47
+
48
+ def service_path() -> str:
49
+ raw = str(os.environ.get("NEXO_RUNTIME_MCP_PATH", SERVICE_PATH) or SERVICE_PATH).strip()
50
+ return raw if raw.startswith("/") else f"/{raw}"
51
+
52
+
53
+ def service_url(host: str | None = None, port: int | None = None, path: str | None = None) -> str:
54
+ return f"http://{host or service_host()}:{int(port or service_port())}{path or service_path()}"
55
+
56
+
57
+ def service_state_path() -> Path:
58
+ root = paths.runtime_state_dir()
59
+ root.mkdir(parents=True, exist_ok=True)
60
+ return root / STATE_FILE
61
+
62
+
63
+ def service_log_path() -> Path:
64
+ root = paths.logs_dir()
65
+ root.mkdir(parents=True, exist_ok=True)
66
+ return root / LOG_FILE
67
+
68
+
69
+ def service_lock_path() -> Path:
70
+ root = paths.runtime_state_dir()
71
+ root.mkdir(parents=True, exist_ok=True)
72
+ return root / LOCK_FILE
73
+
74
+
75
+ @contextmanager
76
+ def service_start_lock(*, timeout: float = 10.0):
77
+ path = service_lock_path()
78
+ handle = path.open("a+")
79
+ deadline = time.monotonic() + max(timeout, 0.5)
80
+ locked = False
81
+ try:
82
+ while not locked:
83
+ try:
84
+ if os.name == "nt":
85
+ import msvcrt
86
+
87
+ handle.seek(0)
88
+ if not handle.read(1):
89
+ handle.write("0")
90
+ handle.flush()
91
+ handle.seek(0)
92
+ msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
93
+ else:
94
+ import fcntl
95
+
96
+ fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
97
+ locked = True
98
+ except (BlockingIOError, OSError):
99
+ if time.monotonic() >= deadline:
100
+ raise TimeoutError(f"Timed out waiting for NEXO runtime service lock: {path}")
101
+ time.sleep(0.1)
102
+ handle.seek(0)
103
+ handle.truncate()
104
+ handle.write(f"{os.getpid()}:{time.time()}\n")
105
+ handle.flush()
106
+ yield
107
+ finally:
108
+ if locked:
109
+ try:
110
+ if os.name == "nt":
111
+ import msvcrt
112
+
113
+ handle.seek(0)
114
+ msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
115
+ else:
116
+ import fcntl
117
+
118
+ fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
119
+ except Exception:
120
+ pass
121
+ try:
122
+ handle.close()
123
+ except Exception:
124
+ pass
125
+
126
+
127
+ def read_service_state() -> dict[str, Any]:
128
+ try:
129
+ path = service_state_path()
130
+ if not path.is_file():
131
+ return {}
132
+ data = json.loads(path.read_text(encoding="utf-8"))
133
+ return data if isinstance(data, dict) else {}
134
+ except Exception:
135
+ return {}
136
+
137
+
138
+ def write_service_state(state: dict[str, Any]) -> None:
139
+ path = service_state_path()
140
+ tmp = path.with_suffix(path.suffix + ".tmp")
141
+ payload = dict(state)
142
+ payload.update(current_runtime_identity())
143
+ payload["updated_at"] = time.time()
144
+ tmp.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
145
+ os.replace(tmp, path)
146
+
147
+
148
+ def is_runtime_service_process() -> bool:
149
+ return env_flag(SERVICE_ENV)
150
+
151
+
152
+ def should_use_mcp_adapter() -> bool:
153
+ if is_runtime_service_process():
154
+ return False
155
+ if env_flag(DIRECT_ENV):
156
+ return False
157
+ if not env_flag(ADAPTER_ENV, default=True):
158
+ return False
159
+ transport = str(os.environ.get("NEXO_MCP_TRANSPORT", "stdio") or "stdio").strip().lower()
160
+ return transport == "stdio"
161
+
162
+
163
+ def service_port() -> int:
164
+ raw = os.environ.get("NEXO_RUNTIME_PORT")
165
+ if raw:
166
+ try:
167
+ return int(raw)
168
+ except Exception:
169
+ pass
170
+ state = read_service_state()
171
+ try:
172
+ port = int(state.get("port") or 0)
173
+ if port > 0:
174
+ return port
175
+ except Exception:
176
+ pass
177
+ return DEFAULT_PORT
178
+
179
+
180
+ def pid_is_running(pid: int) -> bool:
181
+ if pid <= 0:
182
+ return False
183
+ try:
184
+ os.kill(pid, 0)
185
+ return True
186
+ except ProcessLookupError:
187
+ return False
188
+ except PermissionError:
189
+ return True
190
+ except Exception:
191
+ return False
192
+
193
+
194
+ def _port_is_free(host: str, port: int) -> bool:
195
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
196
+ sock.settimeout(0.2)
197
+ try:
198
+ sock.bind((host, port))
199
+ return True
200
+ except OSError:
201
+ return False
202
+
203
+
204
+ def choose_service_port(host: str | None = None) -> int:
205
+ host = host or service_host()
206
+ preferred = service_port()
207
+ for offset in range(PORT_SCAN_LIMIT):
208
+ port = preferred + offset
209
+ if _port_is_free(host, port):
210
+ return port
211
+ raise RuntimeError(f"No free NEXO runtime service port in range {preferred}-{preferred + PORT_SCAN_LIMIT - 1}")
212
+
213
+
214
+ async def _probe_service_async(url: str, *, timeout: float = 1.5) -> bool:
215
+ from fastmcp import Client
216
+
217
+ try:
218
+ client = Client(url, timeout=timeout, init_timeout=timeout)
219
+ async with client:
220
+ return bool(await client.ping())
221
+ except Exception:
222
+ return False
223
+
224
+
225
+ def probe_service(url: str, *, timeout: float = 1.5) -> bool:
226
+ try:
227
+ return bool(asyncio.run(_probe_service_async(url, timeout=timeout)))
228
+ except RuntimeError:
229
+ # If an event loop is already active, fall back to a tiny socket probe.
230
+ try:
231
+ host_port = url.split("//", 1)[1].split("/", 1)[0]
232
+ host, port_text = host_port.rsplit(":", 1)
233
+ with socket.create_connection((host, int(port_text)), timeout=timeout):
234
+ return True
235
+ except Exception:
236
+ return False
237
+
238
+
239
+ def current_server_path() -> Path:
240
+ return Path(__file__).resolve().with_name("server.py")
241
+
242
+
243
+ def current_runtime_identity() -> dict[str, str]:
244
+ try:
245
+ from runtime_versioning import compute_mcp_runtime_fingerprint, read_version_for_path
246
+
247
+ root = current_server_path().parent
248
+ version = read_version_for_path(root) or read_version_for_path(root.parent)
249
+ return {
250
+ "runtime_version": version,
251
+ "runtime_fingerprint": compute_mcp_runtime_fingerprint(root, use_cache=True),
252
+ "server_path": str(current_server_path()),
253
+ }
254
+ except Exception:
255
+ return {"runtime_version": "", "runtime_fingerprint": "", "server_path": str(current_server_path())}
256
+
257
+
258
+ def state_matches_current_runtime(state: dict[str, Any]) -> bool:
259
+ if not state:
260
+ return False
261
+ current = current_runtime_identity()
262
+ state_server = str(state.get("server_path") or "").strip()
263
+ if state_server and state_server != current["server_path"]:
264
+ return False
265
+
266
+ current_fp = str(current.get("runtime_fingerprint") or "").strip()
267
+ state_fp = str(state.get("runtime_fingerprint") or "").strip()
268
+ if current_fp and state_fp and current_fp != state_fp:
269
+ return False
270
+
271
+ current_version = str(current.get("runtime_version") or "").strip()
272
+ state_version = str(state.get("runtime_version") or "").strip()
273
+ if current_version and state_version and current_version != state_version:
274
+ return False
275
+ return True
276
+
277
+
278
+ def _terminate_pid(pid: int, *, timeout: float = 3.0) -> dict[str, Any]:
279
+ if pid <= 0:
280
+ return {"terminated": False, "reason": "no_pid"}
281
+ if not pid_is_running(pid):
282
+ return {"terminated": False, "reason": "not_running"}
283
+ try:
284
+ if os.name == "nt":
285
+ subprocess.run(
286
+ ["taskkill", "/PID", str(pid), "/T", "/F"],
287
+ capture_output=True,
288
+ text=True,
289
+ timeout=max(timeout, 1.0),
290
+ )
291
+ else:
292
+ os.kill(pid, signal.SIGTERM)
293
+ deadline = time.monotonic() + max(timeout, 0.2)
294
+ while time.monotonic() < deadline:
295
+ if not pid_is_running(pid):
296
+ return {"terminated": True, "pid": pid, "signal": "SIGTERM"}
297
+ time.sleep(0.1)
298
+ if hasattr(signal, "SIGKILL"):
299
+ os.kill(pid, signal.SIGKILL)
300
+ return {"terminated": True, "pid": pid}
301
+ except Exception as exc:
302
+ return {"terminated": False, "pid": pid, "error": str(exc)[:300]}
303
+
304
+
305
+ def stop_runtime_service(*, reason: str = "stop", timeout: float = 3.0) -> dict[str, Any]:
306
+ state = read_service_state()
307
+ pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
308
+ result = _terminate_pid(pid, timeout=timeout)
309
+ result["reason"] = reason
310
+ result["state_path"] = str(service_state_path())
311
+ try:
312
+ service_state_path().unlink(missing_ok=True)
313
+ result["state_removed"] = True
314
+ except Exception as exc:
315
+ result["state_removed"] = False
316
+ result["state_error"] = str(exc)[:300]
317
+ return result
318
+
319
+
320
+ def _service_env(port: int, host: str) -> dict[str, str]:
321
+ env = os.environ.copy()
322
+ env[SERVICE_ENV] = "1"
323
+ env["NEXO_MCP_TRANSPORT"] = "streamable-http"
324
+ env["NEXO_MCP_HOST"] = host
325
+ env["NEXO_MCP_PORT"] = str(port)
326
+ env["NEXO_MCP_PATH"] = service_path()
327
+ # A probe client may inherit a deliberately tiny plugin mode. The service
328
+ # should use the normal runtime defaults unless explicitly overridden.
329
+ if "NEXO_RUNTIME_SERVICE_PLUGIN_MODE" in env:
330
+ env["NEXO_MCP_PLUGIN_MODE"] = env["NEXO_RUNTIME_SERVICE_PLUGIN_MODE"]
331
+ return env
332
+
333
+
334
+ def _spawn_service_process(port: int, host: str) -> subprocess.Popen:
335
+ log_path = service_log_path()
336
+ log_file = open(log_path, "ab", buffering=0)
337
+ kwargs: dict[str, Any] = {
338
+ "cwd": str(current_server_path().parent),
339
+ "env": _service_env(port, host),
340
+ "stdin": subprocess.DEVNULL,
341
+ "stdout": log_file,
342
+ "stderr": log_file,
343
+ }
344
+ if os.name == "nt":
345
+ kwargs["creationflags"] = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0)
346
+ else:
347
+ kwargs["start_new_session"] = True
348
+ return subprocess.Popen([sys.executable, str(current_server_path())], **kwargs)
349
+
350
+
351
+ def ensure_runtime_service(*, wait_seconds: float = 10.0) -> str:
352
+ with service_start_lock(timeout=wait_seconds):
353
+ host = service_host()
354
+ state = read_service_state()
355
+ state_url = str(state.get("url") or "")
356
+ state_pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
357
+ if state_url and (state_pid <= 0 or pid_is_running(state_pid)):
358
+ if state_matches_current_runtime(state) and probe_service(state_url):
359
+ return state_url
360
+ if state_pid > 0:
361
+ stop_runtime_service(reason="stale_runtime")
362
+
363
+ port = choose_service_port(host)
364
+ url = service_url(host, port)
365
+ proc = _spawn_service_process(port, host)
366
+ write_service_state(
367
+ {
368
+ "pid": proc.pid,
369
+ "port": port,
370
+ "host": host,
371
+ "path": service_path(),
372
+ "url": url,
373
+ "server_path": str(current_server_path()),
374
+ "started_at": time.time(),
375
+ "mode": "runtime-service",
376
+ }
377
+ )
378
+
379
+ deadline = time.monotonic() + max(wait_seconds, 0.5)
380
+ delay = 0.15
381
+ while time.monotonic() < deadline:
382
+ if proc.poll() is not None:
383
+ break
384
+ if probe_service(url):
385
+ return url
386
+ time.sleep(delay)
387
+ delay = min(delay * 1.5, 1.0)
388
+
389
+ code = proc.poll()
390
+ raise RuntimeError(
391
+ "NEXO runtime service did not become ready"
392
+ + (f" (exit={code})" if code is not None else "")
393
+ + f"; log={service_log_path()}"
394
+ )
395
+
396
+
397
+ def runtime_service_status() -> dict[str, Any]:
398
+ state = read_service_state()
399
+ current = current_runtime_identity()
400
+ url = str(state.get("url") or "")
401
+ pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
402
+ alive = pid_is_running(pid)
403
+ ready = bool(url and probe_service(url, timeout=0.8))
404
+ return {
405
+ "ok": ready,
406
+ "mode": "service" if is_runtime_service_process() else "adapter",
407
+ "pid": pid,
408
+ "pid_alive": alive,
409
+ "url": url,
410
+ "stale": bool(state and not state_matches_current_runtime(state)),
411
+ "runtime_version": current.get("runtime_version", ""),
412
+ "runtime_fingerprint": current.get("runtime_fingerprint", ""),
413
+ "state_runtime_version": str(state.get("runtime_version") or ""),
414
+ "state_runtime_fingerprint": str(state.get("runtime_fingerprint") or ""),
415
+ "state_path": str(service_state_path()),
416
+ "log_path": str(service_log_path()),
417
+ "server_path": str(current_server_path()),
418
+ }
419
+
420
+
421
+ def run_mcp_proxy_adapter(*, name: str, instructions: str, run_kwargs: dict[str, Any]) -> None:
422
+ from fastmcp.server import create_proxy
423
+
424
+ url = ensure_runtime_service()
425
+ proxy = create_proxy(url, name=name, instructions=instructions)
426
+ proxy.run(**run_kwargs)
@@ -732,6 +732,16 @@ def build_mcp_status(*, client: str = "") -> dict:
732
732
  marker = state["marker"]
733
733
  installed_fp = state.get("installed_fingerprint", "")
734
734
  process_fp = state.get("process_fingerprint", "")
735
+ try:
736
+ from runtime_service import runtime_service_status
737
+
738
+ service_status = runtime_service_status()
739
+ except Exception as exc:
740
+ service_status = {
741
+ "ok": False,
742
+ "error": "runtime_service_status_unavailable",
743
+ "message": str(exc)[:300],
744
+ }
735
745
  return {
736
746
  "ok": True,
737
747
  "schema_version": MCP_STATUS_SCHEMA_VERSION,
@@ -755,6 +765,7 @@ def build_mcp_status(*, client: str = "") -> dict:
755
765
  "marker_exists": bool(marker.get("exists")),
756
766
  "marker_corrupt": bool(marker.get("corrupt")),
757
767
  "continuity_api_level": CONTINUITY_API_LEVEL,
768
+ "runtime_service": service_status,
758
769
  "version_match": (
759
770
  bool(state["installed_version"])
760
771
  and bool(state["process_version"])
package/src/server.py CHANGED
@@ -117,6 +117,13 @@ from runtime_versioning import (
117
117
  prime_process_fingerprint,
118
118
  prime_process_version,
119
119
  )
120
+ from runtime_service import (
121
+ is_runtime_service_process,
122
+ run_mcp_proxy_adapter,
123
+ runtime_service_status,
124
+ should_use_mcp_adapter,
125
+ write_service_state,
126
+ )
120
127
  from local_context import api as local_context_api
121
128
  from local_context.db import close_local_context_db
122
129
 
@@ -766,6 +773,12 @@ def nexo_status(keyword: str = "") -> str:
766
773
  return handle_status(keyword if keyword else None)
767
774
 
768
775
 
776
+ @mcp.tool
777
+ def nexo_runtime_service_status() -> str:
778
+ """Return the resident NEXO Runtime Service status for diagnostics."""
779
+ return json.dumps(runtime_service_status(), indent=2, ensure_ascii=False)
780
+
781
+
769
782
  @mcp.tool
770
783
  def nexo_local_index_status() -> str:
771
784
  """Return local memory index status for Desktop settings and support diagnostics."""
@@ -2300,5 +2313,32 @@ def nexo_create_app_token(
2300
2313
 
2301
2314
 
2302
2315
  if __name__ == "__main__":
2303
- _server_init()
2304
- mcp.run(**_run_kwargs_from_env())
2316
+ if should_use_mcp_adapter():
2317
+ run_mcp_proxy_adapter(
2318
+ name="nexo",
2319
+ instructions=render_core_prompt(
2320
+ "server-mcp-instructions",
2321
+ assistant_name=_get_ctx().assistant_name,
2322
+ ),
2323
+ run_kwargs=_run_kwargs_from_env(),
2324
+ )
2325
+ else:
2326
+ _server_init()
2327
+ run_kwargs = _run_kwargs_from_env()
2328
+ if is_runtime_service_process():
2329
+ host = str(run_kwargs.get("host") or os.environ.get("NEXO_MCP_HOST", "127.0.0.1"))
2330
+ port = int(run_kwargs.get("port") or os.environ.get("NEXO_MCP_PORT", "0") or 0)
2331
+ path = str(run_kwargs.get("path") or os.environ.get("NEXO_MCP_PATH", "/mcp"))
2332
+ write_service_state(
2333
+ {
2334
+ "pid": os.getpid(),
2335
+ "port": port,
2336
+ "host": host,
2337
+ "path": path,
2338
+ "url": f"http://{host}:{port}{path}",
2339
+ "server_path": str(os.path.abspath(__file__)),
2340
+ "started_at": time.time(),
2341
+ "mode": "runtime-service",
2342
+ }
2343
+ )
2344
+ mcp.run(**run_kwargs)
@@ -2383,6 +2383,21 @@
2383
2383
  },
2384
2384
  "triggers_after": []
2385
2385
  },
2386
+ "nexo_runtime_service_status": {
2387
+ "description": "Return resident Runtime Service health, PID, version, fingerprint and state paths",
2388
+ "category": "system",
2389
+ "source": "server",
2390
+ "requires": [],
2391
+ "provides": [
2392
+ "runtime_service_status"
2393
+ ],
2394
+ "internal_calls": [],
2395
+ "enforcement": {
2396
+ "level": "none",
2397
+ "rules": []
2398
+ },
2399
+ "triggers_after": []
2400
+ },
2386
2401
  "nexo_media_memory_add": {
2387
2402
  "description": "Store non-text artifact metadata",
2388
2403
  "category": "media",