nexo-brain 7.20.24 → 7.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/bin/nexo-brain.js +21 -1
- package/package.json +1 -1
- package/src/auto_update.py +1 -1
- package/src/local_context/api.py +289 -35
- package/src/local_context/embeddings.py +116 -13
- package/src/runtime_service.py +426 -0
- package/src/runtime_versioning.py +11 -0
- package/src/server.py +42 -2
- package/tool-enforcement-map.json +15 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.21.0",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,11 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.
|
|
21
|
+
Version `7.21.0` is the current packaged-runtime line. Minor release over v7.20.25 - MCP now starts through a thin compatibility adapter backed by one resident local Runtime Service, reducing duplicate Brain processes and SQLite contention across Claude Code, Codex, Claude Desktop, and NEXO Desktop. The release also fingerprints Runtime Service state for safe update cutover, keeps document-first Local Memory scanning, and verifies bundled local LLM files before marking them installed.
|
|
22
|
+
|
|
23
|
+
Previously in `7.20.25`: patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
|
|
24
|
+
|
|
25
|
+
Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
|
|
22
26
|
|
|
23
27
|
Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
|
|
24
28
|
|
package/bin/nexo-brain.js
CHANGED
|
@@ -3879,12 +3879,32 @@ async function runSetup() {
|
|
|
3879
3879
|
const slug = (spec.name || "").trim().toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
3880
3880
|
const targetDir = path.join(runtimeModelsDir, slug, spec.revision);
|
|
3881
3881
|
fs.mkdirSync(targetDir, { recursive: true });
|
|
3882
|
+
const missingFiles = [];
|
|
3882
3883
|
for (const f of (spec.required_files || [])) {
|
|
3883
3884
|
const src = path.join(sourceDir, f.path);
|
|
3884
3885
|
const dst = path.join(targetDir, f.path);
|
|
3885
|
-
if (fs.existsSync(src)
|
|
3886
|
+
if (!fs.existsSync(src)) {
|
|
3887
|
+
missingFiles.push(f.path);
|
|
3888
|
+
continue;
|
|
3889
|
+
}
|
|
3890
|
+
fs.mkdirSync(path.dirname(dst), { recursive: true });
|
|
3891
|
+
if (!fs.existsSync(dst) || (f.size && fs.statSync(dst).size !== f.size)) {
|
|
3886
3892
|
fs.copyFileSync(src, dst);
|
|
3887
3893
|
}
|
|
3894
|
+
if (f.size && fs.statSync(dst).size !== f.size) {
|
|
3895
|
+
missingFiles.push(`${f.path}:size`);
|
|
3896
|
+
continue;
|
|
3897
|
+
}
|
|
3898
|
+
if (f.sha256) {
|
|
3899
|
+
const actual = crypto.createHash("sha256").update(fs.readFileSync(dst)).digest("hex");
|
|
3900
|
+
if (actual !== f.sha256) {
|
|
3901
|
+
missingFiles.push(`${f.path}:sha256`);
|
|
3902
|
+
}
|
|
3903
|
+
}
|
|
3904
|
+
}
|
|
3905
|
+
if (missingFiles.length) {
|
|
3906
|
+
log(` WARN: bundled LLM model ${spec.name} incomplete (${missingFiles.join(", ")})`);
|
|
3907
|
+
continue;
|
|
3888
3908
|
}
|
|
3889
3909
|
// Write the lock file to match revision (avoids re-download).
|
|
3890
3910
|
fs.writeFileSync(path.join(targetDir, ".nexo-model-lock.json"), JSON.stringify({
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.21.0",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/auto_update.py
CHANGED
|
@@ -3988,7 +3988,7 @@ def _auto_update_check_locked() -> dict:
|
|
|
3988
3988
|
|
|
3989
3989
|
# Backfill runtime CLI modules for existing installs
|
|
3990
3990
|
try:
|
|
3991
|
-
for fname in ("cli.py", "script_registry.py", "skills_runtime.py", "cron_recovery.py", "client_preferences.py", "claude_cli.py", "agent_runner.py", "bootstrap_docs.py", "mcp_required_tools.py"):
|
|
3991
|
+
for fname in ("cli.py", "script_registry.py", "skills_runtime.py", "cron_recovery.py", "client_preferences.py", "claude_cli.py", "agent_runner.py", "bootstrap_docs.py", "mcp_required_tools.py", "runtime_service.py"):
|
|
3992
3992
|
src_file = SRC_DIR / fname
|
|
3993
3993
|
dest_file = NEXO_HOME / fname
|
|
3994
3994
|
if src_file.is_file() and (not dest_file.exists() or src_file.stat().st_mtime > dest_file.stat().st_mtime):
|
package/src/local_context/api.py
CHANGED
|
@@ -10,6 +10,7 @@ import hashlib
|
|
|
10
10
|
import subprocess
|
|
11
11
|
import sys
|
|
12
12
|
import time
|
|
13
|
+
from functools import lru_cache
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Any
|
|
15
16
|
|
|
@@ -41,6 +42,77 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
|
|
|
41
42
|
PERFORMANCE_PROFILE_KEY = "performance_profile"
|
|
42
43
|
DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
|
|
43
44
|
VALID_CONTEXT_MODES = {"compact", "full"}
|
|
45
|
+
EMBEDDING_REFRESH_JOB = "embedding_refresh"
|
|
46
|
+
HIGH_VALUE_DOCUMENT_SUFFIXES = {
|
|
47
|
+
".pdf",
|
|
48
|
+
".doc",
|
|
49
|
+
".docx",
|
|
50
|
+
".xls",
|
|
51
|
+
".xlsx",
|
|
52
|
+
".ppt",
|
|
53
|
+
".pptx",
|
|
54
|
+
".pages",
|
|
55
|
+
".numbers",
|
|
56
|
+
".key",
|
|
57
|
+
".rtf",
|
|
58
|
+
".odt",
|
|
59
|
+
".ods",
|
|
60
|
+
".odp",
|
|
61
|
+
}
|
|
62
|
+
KNOWN_TEXT_SUFFIXES = {
|
|
63
|
+
".md",
|
|
64
|
+
".markdown",
|
|
65
|
+
".txt",
|
|
66
|
+
".csv",
|
|
67
|
+
".tsv",
|
|
68
|
+
}
|
|
69
|
+
EMAIL_DOCUMENT_SUFFIXES = {
|
|
70
|
+
".eml",
|
|
71
|
+
".emlx",
|
|
72
|
+
".msg",
|
|
73
|
+
}
|
|
74
|
+
HIGH_VALUE_DIRECTORY_NAMES = {
|
|
75
|
+
"users",
|
|
76
|
+
"home",
|
|
77
|
+
"desktop",
|
|
78
|
+
"documents",
|
|
79
|
+
"downloads",
|
|
80
|
+
"documentos",
|
|
81
|
+
"escritorio",
|
|
82
|
+
"descargas",
|
|
83
|
+
"icloud drive",
|
|
84
|
+
"onedrive",
|
|
85
|
+
"google drive",
|
|
86
|
+
"dropbox",
|
|
87
|
+
"creative cloud files",
|
|
88
|
+
"clientes",
|
|
89
|
+
"clients",
|
|
90
|
+
"facturas",
|
|
91
|
+
"invoices",
|
|
92
|
+
"contratos",
|
|
93
|
+
"contracts",
|
|
94
|
+
"projects",
|
|
95
|
+
"proyectos",
|
|
96
|
+
"work",
|
|
97
|
+
"trabajo",
|
|
98
|
+
}
|
|
99
|
+
LOW_VALUE_DIRECTORY_NAMES = {
|
|
100
|
+
"applications",
|
|
101
|
+
"library",
|
|
102
|
+
"system",
|
|
103
|
+
"private",
|
|
104
|
+
"usr",
|
|
105
|
+
"var",
|
|
106
|
+
"opt",
|
|
107
|
+
"windows",
|
|
108
|
+
"program files",
|
|
109
|
+
"program files (x86)",
|
|
110
|
+
"programdata",
|
|
111
|
+
"appdata",
|
|
112
|
+
".cache",
|
|
113
|
+
"caches",
|
|
114
|
+
}
|
|
115
|
+
RERANKER_MODEL_SPEC = "cross-encoder-reranker"
|
|
44
116
|
PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
|
|
45
117
|
"low": {
|
|
46
118
|
"profile": "low",
|
|
@@ -1092,7 +1164,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
|
|
|
1092
1164
|
(version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
|
|
1093
1165
|
)
|
|
1094
1166
|
if should_extract(normalized, depth):
|
|
1095
|
-
enqueue_job(conn, asset_id, "light_extraction", priority=
|
|
1167
|
+
enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
|
|
1096
1168
|
enqueue_job(conn, asset_id, "graph", priority=40)
|
|
1097
1169
|
return asset_id, changed, "ok"
|
|
1098
1170
|
|
|
@@ -1203,6 +1275,44 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
|
|
|
1203
1275
|
return job_id
|
|
1204
1276
|
|
|
1205
1277
|
|
|
1278
|
+
def _extraction_priority(path: Path) -> int:
|
|
1279
|
+
suffix = path.suffix.lower()
|
|
1280
|
+
if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
|
|
1281
|
+
return 90
|
|
1282
|
+
if suffix in KNOWN_TEXT_SUFFIXES:
|
|
1283
|
+
return 82
|
|
1284
|
+
if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
|
|
1285
|
+
return 70
|
|
1286
|
+
if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
|
|
1287
|
+
return 55
|
|
1288
|
+
return 45
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
def _directory_scan_priority(path: Path) -> int:
|
|
1292
|
+
name = path.name.strip().lower()
|
|
1293
|
+
if name in {"users", "home"}:
|
|
1294
|
+
return 0
|
|
1295
|
+
if name in HIGH_VALUE_DIRECTORY_NAMES:
|
|
1296
|
+
return 10
|
|
1297
|
+
if "icloud" in name or "onedrive" in name or "google drive" in name:
|
|
1298
|
+
return 10
|
|
1299
|
+
if is_local_email_tree(str(path)):
|
|
1300
|
+
return 65
|
|
1301
|
+
if name in LOW_VALUE_DIRECTORY_NAMES:
|
|
1302
|
+
return 90
|
|
1303
|
+
return 40
|
|
1304
|
+
|
|
1305
|
+
|
|
1306
|
+
def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
|
|
1307
|
+
try:
|
|
1308
|
+
is_file = item.is_file()
|
|
1309
|
+
except Exception:
|
|
1310
|
+
is_file = False
|
|
1311
|
+
if is_file:
|
|
1312
|
+
return (1, -_extraction_priority(item), str(item).lower())
|
|
1313
|
+
return (0, _directory_scan_priority(item), str(item).lower())
|
|
1314
|
+
|
|
1315
|
+
|
|
1206
1316
|
def _iter_files(
|
|
1207
1317
|
conn,
|
|
1208
1318
|
root_id: int,
|
|
@@ -1236,7 +1346,7 @@ def _iter_files(
|
|
|
1236
1346
|
seen_dirs.add(key)
|
|
1237
1347
|
_upsert_dir(conn, root_id, current, seen_at, st)
|
|
1238
1348
|
try:
|
|
1239
|
-
entries = sorted(current.iterdir(), key=
|
|
1349
|
+
entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
|
|
1240
1350
|
except Exception as exc:
|
|
1241
1351
|
_record_scan_error(conn, stats, str(current), "quick_index", exc)
|
|
1242
1352
|
continue
|
|
@@ -1420,7 +1530,7 @@ def _scan_known_directory(
|
|
|
1420
1530
|
st = current.stat()
|
|
1421
1531
|
if not current.is_dir():
|
|
1422
1532
|
continue
|
|
1423
|
-
entries = sorted(current.iterdir(), key=
|
|
1533
|
+
entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
|
|
1424
1534
|
except Exception as exc:
|
|
1425
1535
|
_record_scan_error(conn, stats, str(current), "live_reconcile", exc)
|
|
1426
1536
|
continue
|
|
@@ -1661,6 +1771,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
|
|
|
1661
1771
|
return row["version_id"] if row else stable_id("ver", asset_id)
|
|
1662
1772
|
|
|
1663
1773
|
|
|
1774
|
+
def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
|
|
1775
|
+
record = embeddings.embed_record(text)
|
|
1776
|
+
model_id = str(record["model_id"])
|
|
1777
|
+
model_revision = str(record["model_revision"])
|
|
1778
|
+
dimension = int(record["dimension"])
|
|
1779
|
+
conn.execute(
|
|
1780
|
+
"""
|
|
1781
|
+
INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
|
|
1782
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
1783
|
+
""",
|
|
1784
|
+
(
|
|
1785
|
+
stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
|
|
1786
|
+
asset_id,
|
|
1787
|
+
chunk_id,
|
|
1788
|
+
model_id,
|
|
1789
|
+
model_revision,
|
|
1790
|
+
dimension,
|
|
1791
|
+
json_dumps(record["vector"]),
|
|
1792
|
+
now(),
|
|
1793
|
+
),
|
|
1794
|
+
)
|
|
1795
|
+
|
|
1796
|
+
|
|
1664
1797
|
def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
|
|
1665
1798
|
conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
|
|
1666
1799
|
conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
|
|
@@ -1673,23 +1806,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
|
|
|
1673
1806
|
""",
|
|
1674
1807
|
(chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
|
|
1675
1808
|
)
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1809
|
+
_insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
|
|
1810
|
+
|
|
1811
|
+
|
|
1812
|
+
def _refresh_asset_embeddings(conn, asset_id: str) -> int:
|
|
1813
|
+
rows = conn.execute(
|
|
1814
|
+
"""
|
|
1815
|
+
SELECT chunk_id, text
|
|
1816
|
+
FROM local_chunks
|
|
1817
|
+
WHERE asset_id=?
|
|
1818
|
+
ORDER BY chunk_index ASC
|
|
1819
|
+
""",
|
|
1820
|
+
(asset_id,),
|
|
1821
|
+
).fetchall()
|
|
1822
|
+
conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
|
|
1823
|
+
for row in rows:
|
|
1824
|
+
_insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
|
|
1825
|
+
if rows:
|
|
1826
|
+
conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
|
|
1827
|
+
return len(rows)
|
|
1828
|
+
|
|
1829
|
+
|
|
1830
|
+
def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
|
|
1831
|
+
if row is None:
|
|
1832
|
+
return False
|
|
1833
|
+
return (
|
|
1834
|
+
str(row["model_id"] or "") == profile.model_id
|
|
1835
|
+
and str(row["model_revision"] or "") == profile.model_revision
|
|
1836
|
+
and int(row["dimension"] or 0) == int(profile.dimension)
|
|
1837
|
+
)
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
|
|
1841
|
+
profile = embeddings.active_profile()
|
|
1842
|
+
if profile.kind == "deterministic_embedding":
|
|
1843
|
+
return 0
|
|
1844
|
+
rows = conn.execute(
|
|
1845
|
+
"""
|
|
1846
|
+
SELECT DISTINCT c.asset_id
|
|
1847
|
+
FROM local_chunks c
|
|
1848
|
+
JOIN local_assets a ON a.asset_id=c.asset_id
|
|
1849
|
+
LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
|
|
1850
|
+
WHERE a.status='active'
|
|
1851
|
+
AND a.privacy_class='normal'
|
|
1852
|
+
AND (
|
|
1853
|
+
e.embedding_id IS NULL
|
|
1854
|
+
OR e.model_id != ?
|
|
1855
|
+
OR e.model_revision != ?
|
|
1856
|
+
OR e.dimension != ?
|
|
1857
|
+
)
|
|
1858
|
+
ORDER BY a.updated_at ASC
|
|
1859
|
+
LIMIT ?
|
|
1860
|
+
""",
|
|
1861
|
+
(profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
|
|
1862
|
+
).fetchall()
|
|
1863
|
+
for row in rows:
|
|
1864
|
+
enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
|
|
1865
|
+
return len(rows)
|
|
1693
1866
|
|
|
1694
1867
|
|
|
1695
1868
|
def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
|
|
@@ -1751,6 +1924,9 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1751
1924
|
log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
|
|
1752
1925
|
return {"ok": True, "paused": True, "processed": 0, "failed": 0}
|
|
1753
1926
|
recovered = _requeue_due_jobs(conn)
|
|
1927
|
+
refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
|
|
1928
|
+
if refresh_queued:
|
|
1929
|
+
conn.commit()
|
|
1754
1930
|
rows = conn.execute(
|
|
1755
1931
|
"""
|
|
1756
1932
|
SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
|
|
@@ -1804,6 +1980,8 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1804
1980
|
_replace_chunks(conn, asset_id, version_id, text)
|
|
1805
1981
|
_replace_entities(conn, asset_id, version_id, entities(text))
|
|
1806
1982
|
conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
|
|
1983
|
+
elif job_type == EMBEDDING_REFRESH_JOB:
|
|
1984
|
+
_refresh_asset_embeddings(conn, asset_id)
|
|
1807
1985
|
elif job_type == "graph":
|
|
1808
1986
|
conn.execute(
|
|
1809
1987
|
"""
|
|
@@ -1843,8 +2021,8 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1843
2021
|
conn.commit()
|
|
1844
2022
|
conn.commit()
|
|
1845
2023
|
if processed or failed:
|
|
1846
|
-
log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
|
|
1847
|
-
return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
|
|
2024
|
+
log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
|
|
2025
|
+
return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
|
|
1848
2026
|
|
|
1849
2027
|
|
|
1850
2028
|
def run_once(
|
|
@@ -2433,27 +2611,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
|
|
|
2433
2611
|
|
|
2434
2612
|
|
|
2435
2613
|
def model_status() -> dict:
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
"
|
|
2439
|
-
"
|
|
2440
|
-
"
|
|
2441
|
-
"
|
|
2442
|
-
"
|
|
2614
|
+
active_embedding = embeddings.active_profile()
|
|
2615
|
+
active_entry = {
|
|
2616
|
+
"profile": active_embedding.profile,
|
|
2617
|
+
"name": active_embedding.model_id,
|
|
2618
|
+
"kind": active_embedding.kind,
|
|
2619
|
+
"revision": active_embedding.model_revision,
|
|
2620
|
+
"dimension": active_embedding.dimension,
|
|
2621
|
+
"state": active_embedding.state,
|
|
2443
2622
|
"required": True,
|
|
2444
|
-
|
|
2623
|
+
"active": True,
|
|
2624
|
+
"problems": list(active_embedding.problems),
|
|
2625
|
+
}
|
|
2626
|
+
models = []
|
|
2627
|
+
active_in_manifest = False
|
|
2445
2628
|
try:
|
|
2446
2629
|
import local_models
|
|
2447
2630
|
for spec in local_models.list_local_model_specs():
|
|
2448
2631
|
verification = local_models.verify_local_model_dir(spec)
|
|
2632
|
+
state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
|
|
2633
|
+
is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
|
|
2634
|
+
active_in_manifest = bool(active_in_manifest or is_active)
|
|
2449
2635
|
models.append({
|
|
2450
2636
|
"profile": spec.name,
|
|
2451
2637
|
"name": spec.model_id,
|
|
2452
2638
|
"kind": spec.kind,
|
|
2453
2639
|
"revision": spec.revision,
|
|
2454
2640
|
"dimension": spec.dimension,
|
|
2455
|
-
"state":
|
|
2641
|
+
"state": state,
|
|
2456
2642
|
"required": spec.required,
|
|
2643
|
+
"active": is_active,
|
|
2457
2644
|
"path": verification["path"],
|
|
2458
2645
|
"problems": verification["problems"],
|
|
2459
2646
|
})
|
|
@@ -2466,6 +2653,8 @@ def model_status() -> dict:
|
|
|
2466
2653
|
"required": False,
|
|
2467
2654
|
"problems": [str(exc)],
|
|
2468
2655
|
})
|
|
2656
|
+
if not active_in_manifest:
|
|
2657
|
+
models.insert(0, active_entry)
|
|
2469
2658
|
return {"ok": True, "models": models}
|
|
2470
2659
|
|
|
2471
2660
|
|
|
@@ -2608,7 +2797,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
|
|
|
2608
2797
|
def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
|
|
2609
2798
|
base_rows = conn.execute(
|
|
2610
2799
|
"""
|
|
2611
|
-
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2800
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2801
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
2612
2802
|
FROM local_chunks c
|
|
2613
2803
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
2614
2804
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -2626,7 +2816,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
|
|
|
2626
2816
|
placeholders = ",".join("?" for _ in entity_asset_ids)
|
|
2627
2817
|
entity_rows = conn.execute(
|
|
2628
2818
|
f"""
|
|
2629
|
-
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2819
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2820
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
2630
2821
|
FROM local_chunks c
|
|
2631
2822
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
2632
2823
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -2658,6 +2849,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
|
|
|
2658
2849
|
return text[: max(0, max_chars - 1)].rstrip() + "…"
|
|
2659
2850
|
|
|
2660
2851
|
|
|
2852
|
+
def _reranker_disabled() -> bool:
|
|
2853
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
|
|
2854
|
+
if value in {"1", "true", "yes", "on"}:
|
|
2855
|
+
return True
|
|
2856
|
+
if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
|
|
2857
|
+
return True
|
|
2858
|
+
return False
|
|
2859
|
+
|
|
2860
|
+
|
|
2861
|
+
@lru_cache(maxsize=1)
|
|
2862
|
+
def _context_reranker():
|
|
2863
|
+
if _reranker_disabled():
|
|
2864
|
+
return None
|
|
2865
|
+
try:
|
|
2866
|
+
import local_models
|
|
2867
|
+
from fastembed.rerank.cross_encoder import TextCrossEncoder
|
|
2868
|
+
|
|
2869
|
+
spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
|
|
2870
|
+
target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
|
|
2871
|
+
return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
|
|
2872
|
+
except Exception: # pragma: no cover - host/cache dependent
|
|
2873
|
+
return None
|
|
2874
|
+
|
|
2875
|
+
|
|
2876
|
+
def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
|
|
2877
|
+
if len(scored) <= 1:
|
|
2878
|
+
return scored
|
|
2879
|
+
reranker = _context_reranker()
|
|
2880
|
+
if not reranker:
|
|
2881
|
+
return scored
|
|
2882
|
+
head_count = min(len(scored), max(int(limit) * 4, 20), 60)
|
|
2883
|
+
head = scored[:head_count]
|
|
2884
|
+
tail = scored[head_count:]
|
|
2885
|
+
docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
|
|
2886
|
+
try:
|
|
2887
|
+
scores = [float(score) for score in reranker.rerank(search_query, docs)]
|
|
2888
|
+
except Exception: # pragma: no cover - runtime fallback only
|
|
2889
|
+
return scored
|
|
2890
|
+
if len(scores) != len(head):
|
|
2891
|
+
return scored
|
|
2892
|
+
reranked = sorted(
|
|
2893
|
+
((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
|
|
2894
|
+
key=lambda item: item[1],
|
|
2895
|
+
reverse=True,
|
|
2896
|
+
)
|
|
2897
|
+
return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
|
|
2898
|
+
|
|
2899
|
+
|
|
2661
2900
|
def _payload_size(payload: dict) -> int:
|
|
2662
2901
|
return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
|
|
2663
2902
|
|
|
@@ -2993,10 +3232,12 @@ def _context_query_conn(
|
|
|
2993
3232
|
normalized_mode, mode_warnings = _normalize_context_mode(mode)
|
|
2994
3233
|
context_tail = _compact_text(current_context or "", max_chars=1000)
|
|
2995
3234
|
search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
|
|
2996
|
-
|
|
3235
|
+
query_embedding = embeddings.embed_record(search_query)
|
|
3236
|
+
qvec = query_embedding["vector"]
|
|
2997
3237
|
entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
|
|
2998
3238
|
rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
|
|
2999
3239
|
scored = []
|
|
3240
|
+
stale_embedding_seen = False
|
|
3000
3241
|
for row in rows:
|
|
3001
3242
|
if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
|
|
3002
3243
|
continue
|
|
@@ -3005,7 +3246,15 @@ def _context_query_conn(
|
|
|
3005
3246
|
path_score = _search_text_score(search_query, row["path"] or "")
|
|
3006
3247
|
summary_score = _search_text_score(search_query, row["summary"] or "")
|
|
3007
3248
|
entity_score = entity_boosts.get(row["asset_id"], 0.0)
|
|
3008
|
-
vector_score =
|
|
3249
|
+
vector_score = 0.0
|
|
3250
|
+
if (
|
|
3251
|
+
str(row["model_id"] or "") == str(query_embedding["model_id"])
|
|
3252
|
+
and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
|
|
3253
|
+
and int(row["dimension"] or 0) == int(query_embedding["dimension"])
|
|
3254
|
+
):
|
|
3255
|
+
vector_score = embeddings.cosine(qvec, vector)
|
|
3256
|
+
elif vector:
|
|
3257
|
+
stale_embedding_seen = True
|
|
3009
3258
|
score = max(text_score, path_score, summary_score, vector_score)
|
|
3010
3259
|
if entity_score > 0:
|
|
3011
3260
|
direct_score = max(text_score, path_score, summary_score)
|
|
@@ -3019,6 +3268,7 @@ def _context_query_conn(
|
|
|
3019
3268
|
if score > 0:
|
|
3020
3269
|
scored.append((min(float(score), 1.6), row))
|
|
3021
3270
|
scored.sort(key=lambda item: item[0], reverse=True)
|
|
3271
|
+
scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
|
|
3022
3272
|
assets = []
|
|
3023
3273
|
chunks = []
|
|
3024
3274
|
evidence_refs = []
|
|
@@ -3057,6 +3307,10 @@ def _context_query_conn(
|
|
|
3057
3307
|
).fetchall()
|
|
3058
3308
|
relations_payload = [dict(row) for row in relation_rows]
|
|
3059
3309
|
warnings = list(mode_warnings)
|
|
3310
|
+
if query_embedding.get("kind") == "deterministic_embedding":
|
|
3311
|
+
warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
|
|
3312
|
+
elif stale_embedding_seen:
|
|
3313
|
+
warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
|
|
3060
3314
|
if evidence_required and not evidence_refs:
|
|
3061
3315
|
warnings.append("No local evidence found for this query.")
|
|
3062
3316
|
summary = ""
|
|
@@ -2,32 +2,135 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import math
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from functools import lru_cache
|
|
9
|
+
from typing import Any
|
|
5
10
|
|
|
6
11
|
from .util import tokenize
|
|
7
12
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
13
|
+
FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
|
|
14
|
+
FALLBACK_MODEL_REVISION = "1"
|
|
15
|
+
FALLBACK_DIMENSION = 128
|
|
16
|
+
PRIMARY_MODEL_SPEC = "bge-base-embeddings"
|
|
11
17
|
|
|
18
|
+
# Backward-compatible constants. Callers that persist vectors should use
|
|
19
|
+
# embed_record(), because the active profile can switch from fallback to BGE.
|
|
20
|
+
MODEL_ID = FALLBACK_MODEL_ID
|
|
21
|
+
MODEL_REVISION = FALLBACK_MODEL_REVISION
|
|
22
|
+
DIMENSION = FALLBACK_DIMENSION
|
|
12
23
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class EmbeddingProfile:
|
|
27
|
+
model_id: str
|
|
28
|
+
model_revision: str
|
|
29
|
+
dimension: int
|
|
30
|
+
kind: str
|
|
31
|
+
state: str
|
|
32
|
+
profile: str
|
|
33
|
+
problems: tuple[str, ...] = ()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _hash_embed_text(text: str) -> list[float]:
|
|
37
|
+
vec = [0.0] * FALLBACK_DIMENSION
|
|
22
38
|
for token in tokenize(text):
|
|
23
39
|
digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
|
|
24
|
-
idx = int.from_bytes(digest[:2], "big") %
|
|
40
|
+
idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
|
|
25
41
|
sign = -1.0 if digest[2] % 2 else 1.0
|
|
26
42
|
vec[idx] += sign
|
|
27
43
|
norm = math.sqrt(sum(value * value for value in vec)) or 1.0
|
|
28
44
|
return [round(value / norm, 8) for value in vec]
|
|
29
45
|
|
|
30
46
|
|
|
47
|
+
def _fallback_profile(*problems: str) -> EmbeddingProfile:
|
|
48
|
+
return EmbeddingProfile(
|
|
49
|
+
model_id=FALLBACK_MODEL_ID,
|
|
50
|
+
model_revision=FALLBACK_MODEL_REVISION,
|
|
51
|
+
dimension=FALLBACK_DIMENSION,
|
|
52
|
+
kind="deterministic_embedding",
|
|
53
|
+
state="available",
|
|
54
|
+
profile="local_context_embedding_fallback",
|
|
55
|
+
problems=tuple(item for item in problems if item),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _fastembed_disabled() -> bool:
|
|
60
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
|
|
61
|
+
if value in {"1", "true", "yes", "on"}:
|
|
62
|
+
return True
|
|
63
|
+
# The unit suite uses temporary NEXO homes that intentionally do not carry
|
|
64
|
+
# model weights. Keep those tests dependency-free unless explicitly opted in.
|
|
65
|
+
if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@lru_cache(maxsize=1)
|
|
71
|
+
def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
|
|
72
|
+
if _fastembed_disabled():
|
|
73
|
+
return None, _fallback_profile("fastembed disabled for this process")
|
|
74
|
+
try:
|
|
75
|
+
import local_models
|
|
76
|
+
from fastembed import TextEmbedding
|
|
77
|
+
|
|
78
|
+
spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
|
|
79
|
+
target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
|
|
80
|
+
with warnings.catch_warnings():
|
|
81
|
+
warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
|
|
82
|
+
model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
|
|
83
|
+
return model, EmbeddingProfile(
|
|
84
|
+
model_id=spec.model_id,
|
|
85
|
+
model_revision=spec.revision,
|
|
86
|
+
dimension=spec.dimension or 384,
|
|
87
|
+
kind=spec.kind,
|
|
88
|
+
state="available",
|
|
89
|
+
profile=spec.name,
|
|
90
|
+
)
|
|
91
|
+
except Exception as exc: # pragma: no cover - host/cache dependent
|
|
92
|
+
return None, _fallback_profile(str(exc))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def active_profile() -> EmbeddingProfile:
|
|
96
|
+
_model, profile = _fastembed_state()
|
|
97
|
+
return profile
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def reset_cache() -> None:
|
|
101
|
+
_fastembed_state.cache_clear()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def embed_record(text: str) -> dict[str, Any]:
|
|
105
|
+
model, profile = _fastembed_state()
|
|
106
|
+
if model is not None and profile.kind == "fastembed_embedding":
|
|
107
|
+
try:
|
|
108
|
+
vector = list(next(iter(model.embed([text or ""]))))
|
|
109
|
+
return {
|
|
110
|
+
"vector": [float(value) for value in vector],
|
|
111
|
+
"model_id": profile.model_id,
|
|
112
|
+
"model_revision": profile.model_revision,
|
|
113
|
+
"dimension": profile.dimension,
|
|
114
|
+
"profile": profile.profile,
|
|
115
|
+
"kind": profile.kind,
|
|
116
|
+
}
|
|
117
|
+
except Exception: # pragma: no cover - runtime fallback only
|
|
118
|
+
pass
|
|
119
|
+
fallback = _fallback_profile()
|
|
120
|
+
return {
|
|
121
|
+
"vector": _hash_embed_text(text),
|
|
122
|
+
"model_id": fallback.model_id,
|
|
123
|
+
"model_revision": fallback.model_revision,
|
|
124
|
+
"dimension": fallback.dimension,
|
|
125
|
+
"profile": fallback.profile,
|
|
126
|
+
"kind": fallback.kind,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def embed_text(text: str) -> list[float]:
|
|
131
|
+
return embed_record(text)["vector"]
|
|
132
|
+
|
|
133
|
+
|
|
31
134
|
def cosine(a: list[float], b: list[float]) -> float:
|
|
32
135
|
if not a or not b or len(a) != len(b):
|
|
33
136
|
return 0.0
|
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
"""Resident runtime service and MCP proxy bootstrap.
|
|
3
|
+
|
|
4
|
+
The public MCP entrypoint remains ``server.py`` for compatibility. By
|
|
5
|
+
default, that entrypoint becomes a thin stdio proxy and forwards calls to a
|
|
6
|
+
single resident FastMCP service over loopback HTTP. The resident process is
|
|
7
|
+
the only MCP process that initializes Brain, opens SQLite, and runs tool
|
|
8
|
+
handlers.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import signal
|
|
15
|
+
import socket
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
import time
|
|
19
|
+
from contextlib import contextmanager
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
import paths
|
|
24
|
+
|
|
25
|
+
DEFAULT_HOST = "127.0.0.1"
|
|
26
|
+
DEFAULT_PORT = 17872
|
|
27
|
+
PORT_SCAN_LIMIT = 30
|
|
28
|
+
SERVICE_PATH = "/mcp"
|
|
29
|
+
SERVICE_ENV = "NEXO_RUNTIME_SERVICE"
|
|
30
|
+
DIRECT_ENV = "NEXO_MCP_DIRECT"
|
|
31
|
+
ADAPTER_ENV = "NEXO_MCP_RUNTIME_ADAPTER"
|
|
32
|
+
STATE_FILE = "runtime-service.json"
|
|
33
|
+
LOCK_FILE = "runtime-service.lock"
|
|
34
|
+
LOG_FILE = "runtime-service.log"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def env_flag(name: str, *, default: bool = False) -> bool:
|
|
38
|
+
value = os.environ.get(name)
|
|
39
|
+
if value is None:
|
|
40
|
+
return default
|
|
41
|
+
return str(value).strip().lower() in {"1", "true", "yes", "on", "y", "si"}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def service_host() -> str:
|
|
45
|
+
return str(os.environ.get("NEXO_RUNTIME_HOST", DEFAULT_HOST) or DEFAULT_HOST).strip()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def service_path() -> str:
|
|
49
|
+
raw = str(os.environ.get("NEXO_RUNTIME_MCP_PATH", SERVICE_PATH) or SERVICE_PATH).strip()
|
|
50
|
+
return raw if raw.startswith("/") else f"/{raw}"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def service_url(host: str | None = None, port: int | None = None, path: str | None = None) -> str:
|
|
54
|
+
return f"http://{host or service_host()}:{int(port or service_port())}{path or service_path()}"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def service_state_path() -> Path:
|
|
58
|
+
root = paths.runtime_state_dir()
|
|
59
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
return root / STATE_FILE
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def service_log_path() -> Path:
|
|
64
|
+
root = paths.logs_dir()
|
|
65
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
return root / LOG_FILE
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def service_lock_path() -> Path:
|
|
70
|
+
root = paths.runtime_state_dir()
|
|
71
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
return root / LOCK_FILE
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@contextmanager
|
|
76
|
+
def service_start_lock(*, timeout: float = 10.0):
|
|
77
|
+
path = service_lock_path()
|
|
78
|
+
handle = path.open("a+")
|
|
79
|
+
deadline = time.monotonic() + max(timeout, 0.5)
|
|
80
|
+
locked = False
|
|
81
|
+
try:
|
|
82
|
+
while not locked:
|
|
83
|
+
try:
|
|
84
|
+
if os.name == "nt":
|
|
85
|
+
import msvcrt
|
|
86
|
+
|
|
87
|
+
handle.seek(0)
|
|
88
|
+
if not handle.read(1):
|
|
89
|
+
handle.write("0")
|
|
90
|
+
handle.flush()
|
|
91
|
+
handle.seek(0)
|
|
92
|
+
msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
|
|
93
|
+
else:
|
|
94
|
+
import fcntl
|
|
95
|
+
|
|
96
|
+
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
97
|
+
locked = True
|
|
98
|
+
except (BlockingIOError, OSError):
|
|
99
|
+
if time.monotonic() >= deadline:
|
|
100
|
+
raise TimeoutError(f"Timed out waiting for NEXO runtime service lock: {path}")
|
|
101
|
+
time.sleep(0.1)
|
|
102
|
+
handle.seek(0)
|
|
103
|
+
handle.truncate()
|
|
104
|
+
handle.write(f"{os.getpid()}:{time.time()}\n")
|
|
105
|
+
handle.flush()
|
|
106
|
+
yield
|
|
107
|
+
finally:
|
|
108
|
+
if locked:
|
|
109
|
+
try:
|
|
110
|
+
if os.name == "nt":
|
|
111
|
+
import msvcrt
|
|
112
|
+
|
|
113
|
+
handle.seek(0)
|
|
114
|
+
msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
|
|
115
|
+
else:
|
|
116
|
+
import fcntl
|
|
117
|
+
|
|
118
|
+
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
try:
|
|
122
|
+
handle.close()
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def read_service_state() -> dict[str, Any]:
|
|
128
|
+
try:
|
|
129
|
+
path = service_state_path()
|
|
130
|
+
if not path.is_file():
|
|
131
|
+
return {}
|
|
132
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
133
|
+
return data if isinstance(data, dict) else {}
|
|
134
|
+
except Exception:
|
|
135
|
+
return {}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def write_service_state(state: dict[str, Any]) -> None:
|
|
139
|
+
path = service_state_path()
|
|
140
|
+
tmp = path.with_suffix(path.suffix + ".tmp")
|
|
141
|
+
payload = dict(state)
|
|
142
|
+
payload.update(current_runtime_identity())
|
|
143
|
+
payload["updated_at"] = time.time()
|
|
144
|
+
tmp.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
145
|
+
os.replace(tmp, path)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def is_runtime_service_process() -> bool:
|
|
149
|
+
return env_flag(SERVICE_ENV)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def should_use_mcp_adapter() -> bool:
|
|
153
|
+
if is_runtime_service_process():
|
|
154
|
+
return False
|
|
155
|
+
if env_flag(DIRECT_ENV):
|
|
156
|
+
return False
|
|
157
|
+
if not env_flag(ADAPTER_ENV, default=True):
|
|
158
|
+
return False
|
|
159
|
+
transport = str(os.environ.get("NEXO_MCP_TRANSPORT", "stdio") or "stdio").strip().lower()
|
|
160
|
+
return transport == "stdio"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def service_port() -> int:
|
|
164
|
+
raw = os.environ.get("NEXO_RUNTIME_PORT")
|
|
165
|
+
if raw:
|
|
166
|
+
try:
|
|
167
|
+
return int(raw)
|
|
168
|
+
except Exception:
|
|
169
|
+
pass
|
|
170
|
+
state = read_service_state()
|
|
171
|
+
try:
|
|
172
|
+
port = int(state.get("port") or 0)
|
|
173
|
+
if port > 0:
|
|
174
|
+
return port
|
|
175
|
+
except Exception:
|
|
176
|
+
pass
|
|
177
|
+
return DEFAULT_PORT
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def pid_is_running(pid: int) -> bool:
|
|
181
|
+
if pid <= 0:
|
|
182
|
+
return False
|
|
183
|
+
try:
|
|
184
|
+
os.kill(pid, 0)
|
|
185
|
+
return True
|
|
186
|
+
except ProcessLookupError:
|
|
187
|
+
return False
|
|
188
|
+
except PermissionError:
|
|
189
|
+
return True
|
|
190
|
+
except Exception:
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _port_is_free(host: str, port: int) -> bool:
|
|
195
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
196
|
+
sock.settimeout(0.2)
|
|
197
|
+
try:
|
|
198
|
+
sock.bind((host, port))
|
|
199
|
+
return True
|
|
200
|
+
except OSError:
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def choose_service_port(host: str | None = None) -> int:
|
|
205
|
+
host = host or service_host()
|
|
206
|
+
preferred = service_port()
|
|
207
|
+
for offset in range(PORT_SCAN_LIMIT):
|
|
208
|
+
port = preferred + offset
|
|
209
|
+
if _port_is_free(host, port):
|
|
210
|
+
return port
|
|
211
|
+
raise RuntimeError(f"No free NEXO runtime service port in range {preferred}-{preferred + PORT_SCAN_LIMIT - 1}")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
async def _probe_service_async(url: str, *, timeout: float = 1.5) -> bool:
|
|
215
|
+
from fastmcp import Client
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
client = Client(url, timeout=timeout, init_timeout=timeout)
|
|
219
|
+
async with client:
|
|
220
|
+
return bool(await client.ping())
|
|
221
|
+
except Exception:
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def probe_service(url: str, *, timeout: float = 1.5) -> bool:
|
|
226
|
+
try:
|
|
227
|
+
return bool(asyncio.run(_probe_service_async(url, timeout=timeout)))
|
|
228
|
+
except RuntimeError:
|
|
229
|
+
# If an event loop is already active, fall back to a tiny socket probe.
|
|
230
|
+
try:
|
|
231
|
+
host_port = url.split("//", 1)[1].split("/", 1)[0]
|
|
232
|
+
host, port_text = host_port.rsplit(":", 1)
|
|
233
|
+
with socket.create_connection((host, int(port_text)), timeout=timeout):
|
|
234
|
+
return True
|
|
235
|
+
except Exception:
|
|
236
|
+
return False
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def current_server_path() -> Path:
|
|
240
|
+
return Path(__file__).resolve().with_name("server.py")
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def current_runtime_identity() -> dict[str, str]:
|
|
244
|
+
try:
|
|
245
|
+
from runtime_versioning import compute_mcp_runtime_fingerprint, read_version_for_path
|
|
246
|
+
|
|
247
|
+
root = current_server_path().parent
|
|
248
|
+
version = read_version_for_path(root) or read_version_for_path(root.parent)
|
|
249
|
+
return {
|
|
250
|
+
"runtime_version": version,
|
|
251
|
+
"runtime_fingerprint": compute_mcp_runtime_fingerprint(root, use_cache=True),
|
|
252
|
+
"server_path": str(current_server_path()),
|
|
253
|
+
}
|
|
254
|
+
except Exception:
|
|
255
|
+
return {"runtime_version": "", "runtime_fingerprint": "", "server_path": str(current_server_path())}
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def state_matches_current_runtime(state: dict[str, Any]) -> bool:
|
|
259
|
+
if not state:
|
|
260
|
+
return False
|
|
261
|
+
current = current_runtime_identity()
|
|
262
|
+
state_server = str(state.get("server_path") or "").strip()
|
|
263
|
+
if state_server and state_server != current["server_path"]:
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
current_fp = str(current.get("runtime_fingerprint") or "").strip()
|
|
267
|
+
state_fp = str(state.get("runtime_fingerprint") or "").strip()
|
|
268
|
+
if current_fp and state_fp and current_fp != state_fp:
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
current_version = str(current.get("runtime_version") or "").strip()
|
|
272
|
+
state_version = str(state.get("runtime_version") or "").strip()
|
|
273
|
+
if current_version and state_version and current_version != state_version:
|
|
274
|
+
return False
|
|
275
|
+
return True
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _terminate_pid(pid: int, *, timeout: float = 3.0) -> dict[str, Any]:
|
|
279
|
+
if pid <= 0:
|
|
280
|
+
return {"terminated": False, "reason": "no_pid"}
|
|
281
|
+
if not pid_is_running(pid):
|
|
282
|
+
return {"terminated": False, "reason": "not_running"}
|
|
283
|
+
try:
|
|
284
|
+
if os.name == "nt":
|
|
285
|
+
subprocess.run(
|
|
286
|
+
["taskkill", "/PID", str(pid), "/T", "/F"],
|
|
287
|
+
capture_output=True,
|
|
288
|
+
text=True,
|
|
289
|
+
timeout=max(timeout, 1.0),
|
|
290
|
+
)
|
|
291
|
+
else:
|
|
292
|
+
os.kill(pid, signal.SIGTERM)
|
|
293
|
+
deadline = time.monotonic() + max(timeout, 0.2)
|
|
294
|
+
while time.monotonic() < deadline:
|
|
295
|
+
if not pid_is_running(pid):
|
|
296
|
+
return {"terminated": True, "pid": pid, "signal": "SIGTERM"}
|
|
297
|
+
time.sleep(0.1)
|
|
298
|
+
if hasattr(signal, "SIGKILL"):
|
|
299
|
+
os.kill(pid, signal.SIGKILL)
|
|
300
|
+
return {"terminated": True, "pid": pid}
|
|
301
|
+
except Exception as exc:
|
|
302
|
+
return {"terminated": False, "pid": pid, "error": str(exc)[:300]}
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def stop_runtime_service(*, reason: str = "stop", timeout: float = 3.0) -> dict[str, Any]:
|
|
306
|
+
state = read_service_state()
|
|
307
|
+
pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
|
|
308
|
+
result = _terminate_pid(pid, timeout=timeout)
|
|
309
|
+
result["reason"] = reason
|
|
310
|
+
result["state_path"] = str(service_state_path())
|
|
311
|
+
try:
|
|
312
|
+
service_state_path().unlink(missing_ok=True)
|
|
313
|
+
result["state_removed"] = True
|
|
314
|
+
except Exception as exc:
|
|
315
|
+
result["state_removed"] = False
|
|
316
|
+
result["state_error"] = str(exc)[:300]
|
|
317
|
+
return result
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _service_env(port: int, host: str) -> dict[str, str]:
|
|
321
|
+
env = os.environ.copy()
|
|
322
|
+
env[SERVICE_ENV] = "1"
|
|
323
|
+
env["NEXO_MCP_TRANSPORT"] = "streamable-http"
|
|
324
|
+
env["NEXO_MCP_HOST"] = host
|
|
325
|
+
env["NEXO_MCP_PORT"] = str(port)
|
|
326
|
+
env["NEXO_MCP_PATH"] = service_path()
|
|
327
|
+
# A probe client may inherit a deliberately tiny plugin mode. The service
|
|
328
|
+
# should use the normal runtime defaults unless explicitly overridden.
|
|
329
|
+
if "NEXO_RUNTIME_SERVICE_PLUGIN_MODE" in env:
|
|
330
|
+
env["NEXO_MCP_PLUGIN_MODE"] = env["NEXO_RUNTIME_SERVICE_PLUGIN_MODE"]
|
|
331
|
+
return env
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _spawn_service_process(port: int, host: str) -> subprocess.Popen:
|
|
335
|
+
log_path = service_log_path()
|
|
336
|
+
log_file = open(log_path, "ab", buffering=0)
|
|
337
|
+
kwargs: dict[str, Any] = {
|
|
338
|
+
"cwd": str(current_server_path().parent),
|
|
339
|
+
"env": _service_env(port, host),
|
|
340
|
+
"stdin": subprocess.DEVNULL,
|
|
341
|
+
"stdout": log_file,
|
|
342
|
+
"stderr": log_file,
|
|
343
|
+
}
|
|
344
|
+
if os.name == "nt":
|
|
345
|
+
kwargs["creationflags"] = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0)
|
|
346
|
+
else:
|
|
347
|
+
kwargs["start_new_session"] = True
|
|
348
|
+
return subprocess.Popen([sys.executable, str(current_server_path())], **kwargs)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def ensure_runtime_service(*, wait_seconds: float = 10.0) -> str:
|
|
352
|
+
with service_start_lock(timeout=wait_seconds):
|
|
353
|
+
host = service_host()
|
|
354
|
+
state = read_service_state()
|
|
355
|
+
state_url = str(state.get("url") or "")
|
|
356
|
+
state_pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
|
|
357
|
+
if state_url and (state_pid <= 0 or pid_is_running(state_pid)):
|
|
358
|
+
if state_matches_current_runtime(state) and probe_service(state_url):
|
|
359
|
+
return state_url
|
|
360
|
+
if state_pid > 0:
|
|
361
|
+
stop_runtime_service(reason="stale_runtime")
|
|
362
|
+
|
|
363
|
+
port = choose_service_port(host)
|
|
364
|
+
url = service_url(host, port)
|
|
365
|
+
proc = _spawn_service_process(port, host)
|
|
366
|
+
write_service_state(
|
|
367
|
+
{
|
|
368
|
+
"pid": proc.pid,
|
|
369
|
+
"port": port,
|
|
370
|
+
"host": host,
|
|
371
|
+
"path": service_path(),
|
|
372
|
+
"url": url,
|
|
373
|
+
"server_path": str(current_server_path()),
|
|
374
|
+
"started_at": time.time(),
|
|
375
|
+
"mode": "runtime-service",
|
|
376
|
+
}
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
deadline = time.monotonic() + max(wait_seconds, 0.5)
|
|
380
|
+
delay = 0.15
|
|
381
|
+
while time.monotonic() < deadline:
|
|
382
|
+
if proc.poll() is not None:
|
|
383
|
+
break
|
|
384
|
+
if probe_service(url):
|
|
385
|
+
return url
|
|
386
|
+
time.sleep(delay)
|
|
387
|
+
delay = min(delay * 1.5, 1.0)
|
|
388
|
+
|
|
389
|
+
code = proc.poll()
|
|
390
|
+
raise RuntimeError(
|
|
391
|
+
"NEXO runtime service did not become ready"
|
|
392
|
+
+ (f" (exit={code})" if code is not None else "")
|
|
393
|
+
+ f"; log={service_log_path()}"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def runtime_service_status() -> dict[str, Any]:
|
|
398
|
+
state = read_service_state()
|
|
399
|
+
current = current_runtime_identity()
|
|
400
|
+
url = str(state.get("url") or "")
|
|
401
|
+
pid = int(state.get("pid") or 0) if str(state.get("pid") or "").isdigit() else 0
|
|
402
|
+
alive = pid_is_running(pid)
|
|
403
|
+
ready = bool(url and probe_service(url, timeout=0.8))
|
|
404
|
+
return {
|
|
405
|
+
"ok": ready,
|
|
406
|
+
"mode": "service" if is_runtime_service_process() else "adapter",
|
|
407
|
+
"pid": pid,
|
|
408
|
+
"pid_alive": alive,
|
|
409
|
+
"url": url,
|
|
410
|
+
"stale": bool(state and not state_matches_current_runtime(state)),
|
|
411
|
+
"runtime_version": current.get("runtime_version", ""),
|
|
412
|
+
"runtime_fingerprint": current.get("runtime_fingerprint", ""),
|
|
413
|
+
"state_runtime_version": str(state.get("runtime_version") or ""),
|
|
414
|
+
"state_runtime_fingerprint": str(state.get("runtime_fingerprint") or ""),
|
|
415
|
+
"state_path": str(service_state_path()),
|
|
416
|
+
"log_path": str(service_log_path()),
|
|
417
|
+
"server_path": str(current_server_path()),
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def run_mcp_proxy_adapter(*, name: str, instructions: str, run_kwargs: dict[str, Any]) -> None:
|
|
422
|
+
from fastmcp.server import create_proxy
|
|
423
|
+
|
|
424
|
+
url = ensure_runtime_service()
|
|
425
|
+
proxy = create_proxy(url, name=name, instructions=instructions)
|
|
426
|
+
proxy.run(**run_kwargs)
|
|
@@ -732,6 +732,16 @@ def build_mcp_status(*, client: str = "") -> dict:
|
|
|
732
732
|
marker = state["marker"]
|
|
733
733
|
installed_fp = state.get("installed_fingerprint", "")
|
|
734
734
|
process_fp = state.get("process_fingerprint", "")
|
|
735
|
+
try:
|
|
736
|
+
from runtime_service import runtime_service_status
|
|
737
|
+
|
|
738
|
+
service_status = runtime_service_status()
|
|
739
|
+
except Exception as exc:
|
|
740
|
+
service_status = {
|
|
741
|
+
"ok": False,
|
|
742
|
+
"error": "runtime_service_status_unavailable",
|
|
743
|
+
"message": str(exc)[:300],
|
|
744
|
+
}
|
|
735
745
|
return {
|
|
736
746
|
"ok": True,
|
|
737
747
|
"schema_version": MCP_STATUS_SCHEMA_VERSION,
|
|
@@ -755,6 +765,7 @@ def build_mcp_status(*, client: str = "") -> dict:
|
|
|
755
765
|
"marker_exists": bool(marker.get("exists")),
|
|
756
766
|
"marker_corrupt": bool(marker.get("corrupt")),
|
|
757
767
|
"continuity_api_level": CONTINUITY_API_LEVEL,
|
|
768
|
+
"runtime_service": service_status,
|
|
758
769
|
"version_match": (
|
|
759
770
|
bool(state["installed_version"])
|
|
760
771
|
and bool(state["process_version"])
|
package/src/server.py
CHANGED
|
@@ -117,6 +117,13 @@ from runtime_versioning import (
|
|
|
117
117
|
prime_process_fingerprint,
|
|
118
118
|
prime_process_version,
|
|
119
119
|
)
|
|
120
|
+
from runtime_service import (
|
|
121
|
+
is_runtime_service_process,
|
|
122
|
+
run_mcp_proxy_adapter,
|
|
123
|
+
runtime_service_status,
|
|
124
|
+
should_use_mcp_adapter,
|
|
125
|
+
write_service_state,
|
|
126
|
+
)
|
|
120
127
|
from local_context import api as local_context_api
|
|
121
128
|
from local_context.db import close_local_context_db
|
|
122
129
|
|
|
@@ -766,6 +773,12 @@ def nexo_status(keyword: str = "") -> str:
|
|
|
766
773
|
return handle_status(keyword if keyword else None)
|
|
767
774
|
|
|
768
775
|
|
|
776
|
+
@mcp.tool
|
|
777
|
+
def nexo_runtime_service_status() -> str:
|
|
778
|
+
"""Return the resident NEXO Runtime Service status for diagnostics."""
|
|
779
|
+
return json.dumps(runtime_service_status(), indent=2, ensure_ascii=False)
|
|
780
|
+
|
|
781
|
+
|
|
769
782
|
@mcp.tool
|
|
770
783
|
def nexo_local_index_status() -> str:
|
|
771
784
|
"""Return local memory index status for Desktop settings and support diagnostics."""
|
|
@@ -2300,5 +2313,32 @@ def nexo_create_app_token(
|
|
|
2300
2313
|
|
|
2301
2314
|
|
|
2302
2315
|
if __name__ == "__main__":
|
|
2303
|
-
|
|
2304
|
-
|
|
2316
|
+
if should_use_mcp_adapter():
|
|
2317
|
+
run_mcp_proxy_adapter(
|
|
2318
|
+
name="nexo",
|
|
2319
|
+
instructions=render_core_prompt(
|
|
2320
|
+
"server-mcp-instructions",
|
|
2321
|
+
assistant_name=_get_ctx().assistant_name,
|
|
2322
|
+
),
|
|
2323
|
+
run_kwargs=_run_kwargs_from_env(),
|
|
2324
|
+
)
|
|
2325
|
+
else:
|
|
2326
|
+
_server_init()
|
|
2327
|
+
run_kwargs = _run_kwargs_from_env()
|
|
2328
|
+
if is_runtime_service_process():
|
|
2329
|
+
host = str(run_kwargs.get("host") or os.environ.get("NEXO_MCP_HOST", "127.0.0.1"))
|
|
2330
|
+
port = int(run_kwargs.get("port") or os.environ.get("NEXO_MCP_PORT", "0") or 0)
|
|
2331
|
+
path = str(run_kwargs.get("path") or os.environ.get("NEXO_MCP_PATH", "/mcp"))
|
|
2332
|
+
write_service_state(
|
|
2333
|
+
{
|
|
2334
|
+
"pid": os.getpid(),
|
|
2335
|
+
"port": port,
|
|
2336
|
+
"host": host,
|
|
2337
|
+
"path": path,
|
|
2338
|
+
"url": f"http://{host}:{port}{path}",
|
|
2339
|
+
"server_path": str(os.path.abspath(__file__)),
|
|
2340
|
+
"started_at": time.time(),
|
|
2341
|
+
"mode": "runtime-service",
|
|
2342
|
+
}
|
|
2343
|
+
)
|
|
2344
|
+
mcp.run(**run_kwargs)
|
|
@@ -2383,6 +2383,21 @@
|
|
|
2383
2383
|
},
|
|
2384
2384
|
"triggers_after": []
|
|
2385
2385
|
},
|
|
2386
|
+
"nexo_runtime_service_status": {
|
|
2387
|
+
"description": "Return resident Runtime Service health, PID, version, fingerprint and state paths",
|
|
2388
|
+
"category": "system",
|
|
2389
|
+
"source": "server",
|
|
2390
|
+
"requires": [],
|
|
2391
|
+
"provides": [
|
|
2392
|
+
"runtime_service_status"
|
|
2393
|
+
],
|
|
2394
|
+
"internal_calls": [],
|
|
2395
|
+
"enforcement": {
|
|
2396
|
+
"level": "none",
|
|
2397
|
+
"rules": []
|
|
2398
|
+
},
|
|
2399
|
+
"triggers_after": []
|
|
2400
|
+
},
|
|
2386
2401
|
"nexo_media_memory_add": {
|
|
2387
2402
|
"description": "Store non-text artifact metadata",
|
|
2388
2403
|
"category": "media",
|