nexo-brain 7.20.23 → 7.20.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/package.json +1 -1
- package/src/local_context/api.py +267 -39
- package/src/local_context/embeddings.py +116 -13
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.25",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,11 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.20.
|
|
21
|
+
Version `7.20.25` is the current packaged-runtime line. Patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
|
|
22
|
+
|
|
23
|
+
Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
|
|
24
|
+
|
|
25
|
+
Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
|
|
22
26
|
|
|
23
27
|
Previously in `7.20.22`: patch release over v7.20.19 — Local Memory moved out of the main Brain database, MCP readiness verifies required tools, and split-aware Desktop backups validate the main DB and Local Memory sidecar separately.
|
|
24
28
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.25",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/local_context/api.py
CHANGED
|
@@ -9,11 +9,13 @@ import stat
|
|
|
9
9
|
import hashlib
|
|
10
10
|
import subprocess
|
|
11
11
|
import sys
|
|
12
|
+
import time
|
|
13
|
+
from functools import lru_cache
|
|
12
14
|
from pathlib import Path
|
|
13
15
|
from typing import Any
|
|
14
16
|
|
|
15
17
|
from . import embeddings
|
|
16
|
-
from .db import LOCAL_CONTEXT_TABLES, connect_local_context_db_readonly, ensure_local_context_db, get_local_context_db
|
|
18
|
+
from .db import LOCAL_CONTEXT_TABLES, close_local_context_db, connect_local_context_db_readonly, ensure_local_context_db, get_local_context_db
|
|
17
19
|
from .extractors import chunk_text, contains_secret, entities, extract_text, summarize
|
|
18
20
|
from .logging import log_event, tail
|
|
19
21
|
from .privacy import classify_path, is_local_email_tree, is_queryable_path, should_extract, should_skip_file, should_skip_tree
|
|
@@ -33,11 +35,43 @@ DEFAULT_SYSTEM_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_SYSTEM_ROOT_DEP
|
|
|
33
35
|
DEFAULT_CONTEXT_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_MAX_CHARS", "20000") or "20000")
|
|
34
36
|
DEFAULT_ROUTER_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_ROUTER_MAX_CHARS", "6000") or "6000")
|
|
35
37
|
DEFAULT_MAX_JOB_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_JOB_ATTEMPTS", "3") or "3")
|
|
38
|
+
DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_ATTEMPTS", "5") or "5")
|
|
39
|
+
DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS = float(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_DELAY_SECONDS", "0.35") or "0.35")
|
|
36
40
|
INITIAL_INDEX_COMPLETE_KEY = "initial_index_complete"
|
|
37
41
|
INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
|
|
38
42
|
PERFORMANCE_PROFILE_KEY = "performance_profile"
|
|
39
43
|
DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
|
|
40
44
|
VALID_CONTEXT_MODES = {"compact", "full"}
|
|
45
|
+
EMBEDDING_REFRESH_JOB = "embedding_refresh"
|
|
46
|
+
HIGH_VALUE_DOCUMENT_SUFFIXES = {
|
|
47
|
+
".pdf",
|
|
48
|
+
".doc",
|
|
49
|
+
".docx",
|
|
50
|
+
".xls",
|
|
51
|
+
".xlsx",
|
|
52
|
+
".ppt",
|
|
53
|
+
".pptx",
|
|
54
|
+
".pages",
|
|
55
|
+
".numbers",
|
|
56
|
+
".key",
|
|
57
|
+
".rtf",
|
|
58
|
+
".odt",
|
|
59
|
+
".ods",
|
|
60
|
+
".odp",
|
|
61
|
+
}
|
|
62
|
+
KNOWN_TEXT_SUFFIXES = {
|
|
63
|
+
".md",
|
|
64
|
+
".markdown",
|
|
65
|
+
".txt",
|
|
66
|
+
".csv",
|
|
67
|
+
".tsv",
|
|
68
|
+
}
|
|
69
|
+
EMAIL_DOCUMENT_SUFFIXES = {
|
|
70
|
+
".eml",
|
|
71
|
+
".emlx",
|
|
72
|
+
".msg",
|
|
73
|
+
}
|
|
74
|
+
RERANKER_MODEL_SPEC = "cross-encoder-reranker"
|
|
41
75
|
PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
|
|
42
76
|
"low": {
|
|
43
77
|
"profile": "low",
|
|
@@ -108,6 +142,27 @@ def _close_read_conn(conn) -> None:
|
|
|
108
142
|
pass
|
|
109
143
|
|
|
110
144
|
|
|
145
|
+
def _sqlite_is_busy(exc: BaseException) -> bool:
|
|
146
|
+
return isinstance(exc, sqlite3.OperationalError) and "locked" in str(exc).lower()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _with_sqlite_busy_retry(callback, *, attempts: int | None = None):
|
|
150
|
+
max_attempts = max(1, int(attempts or DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS))
|
|
151
|
+
last_exc = None
|
|
152
|
+
for attempt in range(max_attempts):
|
|
153
|
+
try:
|
|
154
|
+
return callback()
|
|
155
|
+
except sqlite3.OperationalError as exc:
|
|
156
|
+
if not _sqlite_is_busy(exc) or attempt >= max_attempts - 1:
|
|
157
|
+
raise
|
|
158
|
+
last_exc = exc
|
|
159
|
+
close_local_context_db()
|
|
160
|
+
time.sleep(DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS * (attempt + 1))
|
|
161
|
+
if last_exc:
|
|
162
|
+
raise last_exc
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
|
|
111
166
|
def add_root(path: str, *, mode: str = "normal", depth: int | None = None) -> dict:
|
|
112
167
|
conn = _conn()
|
|
113
168
|
root_path = norm_path(path)
|
|
@@ -609,9 +664,12 @@ def _set_state_conn(conn, key: str, value: str) -> None:
|
|
|
609
664
|
|
|
610
665
|
|
|
611
666
|
def _set_state(key: str, value: str) -> None:
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
667
|
+
def write_state() -> None:
|
|
668
|
+
conn = _conn()
|
|
669
|
+
_set_state_conn(conn, key, value)
|
|
670
|
+
conn.commit()
|
|
671
|
+
|
|
672
|
+
_with_sqlite_busy_retry(write_state)
|
|
615
673
|
|
|
616
674
|
|
|
617
675
|
def _get_state_conn(conn, key: str, default: str = "") -> str:
|
|
@@ -1065,7 +1123,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
|
|
|
1065
1123
|
(version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
|
|
1066
1124
|
)
|
|
1067
1125
|
if should_extract(normalized, depth):
|
|
1068
|
-
enqueue_job(conn, asset_id, "light_extraction", priority=
|
|
1126
|
+
enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
|
|
1069
1127
|
enqueue_job(conn, asset_id, "graph", priority=40)
|
|
1070
1128
|
return asset_id, changed, "ok"
|
|
1071
1129
|
|
|
@@ -1176,6 +1234,27 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
|
|
|
1176
1234
|
return job_id
|
|
1177
1235
|
|
|
1178
1236
|
|
|
1237
|
+
def _extraction_priority(path: Path) -> int:
|
|
1238
|
+
suffix = path.suffix.lower()
|
|
1239
|
+
if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
|
|
1240
|
+
return 90
|
|
1241
|
+
if suffix in KNOWN_TEXT_SUFFIXES:
|
|
1242
|
+
return 82
|
|
1243
|
+
if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
|
|
1244
|
+
return 70
|
|
1245
|
+
if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
|
|
1246
|
+
return 55
|
|
1247
|
+
return 45
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
|
|
1251
|
+
try:
|
|
1252
|
+
is_file = item.is_file()
|
|
1253
|
+
except Exception:
|
|
1254
|
+
is_file = False
|
|
1255
|
+
return (0 if not is_file else 1, -_extraction_priority(item) if is_file else 0, str(item).lower())
|
|
1256
|
+
|
|
1257
|
+
|
|
1179
1258
|
def _iter_files(
|
|
1180
1259
|
conn,
|
|
1181
1260
|
root_id: int,
|
|
@@ -1209,7 +1288,7 @@ def _iter_files(
|
|
|
1209
1288
|
seen_dirs.add(key)
|
|
1210
1289
|
_upsert_dir(conn, root_id, current, seen_at, st)
|
|
1211
1290
|
try:
|
|
1212
|
-
entries = sorted(current.iterdir(), key=
|
|
1291
|
+
entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
|
|
1213
1292
|
except Exception as exc:
|
|
1214
1293
|
_record_scan_error(conn, stats, str(current), "quick_index", exc)
|
|
1215
1294
|
continue
|
|
@@ -1393,7 +1472,7 @@ def _scan_known_directory(
|
|
|
1393
1472
|
st = current.stat()
|
|
1394
1473
|
if not current.is_dir():
|
|
1395
1474
|
continue
|
|
1396
|
-
entries = sorted(current.iterdir(), key=
|
|
1475
|
+
entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
|
|
1397
1476
|
except Exception as exc:
|
|
1398
1477
|
_record_scan_error(conn, stats, str(current), "live_reconcile", exc)
|
|
1399
1478
|
continue
|
|
@@ -1634,6 +1713,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
|
|
|
1634
1713
|
return row["version_id"] if row else stable_id("ver", asset_id)
|
|
1635
1714
|
|
|
1636
1715
|
|
|
1716
|
+
def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
|
|
1717
|
+
record = embeddings.embed_record(text)
|
|
1718
|
+
model_id = str(record["model_id"])
|
|
1719
|
+
model_revision = str(record["model_revision"])
|
|
1720
|
+
dimension = int(record["dimension"])
|
|
1721
|
+
conn.execute(
|
|
1722
|
+
"""
|
|
1723
|
+
INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
|
|
1724
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
1725
|
+
""",
|
|
1726
|
+
(
|
|
1727
|
+
stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
|
|
1728
|
+
asset_id,
|
|
1729
|
+
chunk_id,
|
|
1730
|
+
model_id,
|
|
1731
|
+
model_revision,
|
|
1732
|
+
dimension,
|
|
1733
|
+
json_dumps(record["vector"]),
|
|
1734
|
+
now(),
|
|
1735
|
+
),
|
|
1736
|
+
)
|
|
1737
|
+
|
|
1738
|
+
|
|
1637
1739
|
def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
|
|
1638
1740
|
conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
|
|
1639
1741
|
conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
|
|
@@ -1646,23 +1748,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
|
|
|
1646
1748
|
""",
|
|
1647
1749
|
(chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
|
|
1648
1750
|
)
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1751
|
+
_insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
|
|
1752
|
+
|
|
1753
|
+
|
|
1754
|
+
def _refresh_asset_embeddings(conn, asset_id: str) -> int:
|
|
1755
|
+
rows = conn.execute(
|
|
1756
|
+
"""
|
|
1757
|
+
SELECT chunk_id, text
|
|
1758
|
+
FROM local_chunks
|
|
1759
|
+
WHERE asset_id=?
|
|
1760
|
+
ORDER BY chunk_index ASC
|
|
1761
|
+
""",
|
|
1762
|
+
(asset_id,),
|
|
1763
|
+
).fetchall()
|
|
1764
|
+
conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
|
|
1765
|
+
for row in rows:
|
|
1766
|
+
_insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
|
|
1767
|
+
if rows:
|
|
1768
|
+
conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
|
|
1769
|
+
return len(rows)
|
|
1770
|
+
|
|
1771
|
+
|
|
1772
|
+
def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
|
|
1773
|
+
if row is None:
|
|
1774
|
+
return False
|
|
1775
|
+
return (
|
|
1776
|
+
str(row["model_id"] or "") == profile.model_id
|
|
1777
|
+
and str(row["model_revision"] or "") == profile.model_revision
|
|
1778
|
+
and int(row["dimension"] or 0) == int(profile.dimension)
|
|
1779
|
+
)
|
|
1780
|
+
|
|
1781
|
+
|
|
1782
|
+
def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
|
|
1783
|
+
profile = embeddings.active_profile()
|
|
1784
|
+
if profile.kind == "deterministic_embedding":
|
|
1785
|
+
return 0
|
|
1786
|
+
rows = conn.execute(
|
|
1787
|
+
"""
|
|
1788
|
+
SELECT DISTINCT c.asset_id
|
|
1789
|
+
FROM local_chunks c
|
|
1790
|
+
JOIN local_assets a ON a.asset_id=c.asset_id
|
|
1791
|
+
LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
|
|
1792
|
+
WHERE a.status='active'
|
|
1793
|
+
AND a.privacy_class='normal'
|
|
1794
|
+
AND (
|
|
1795
|
+
e.embedding_id IS NULL
|
|
1796
|
+
OR e.model_id != ?
|
|
1797
|
+
OR e.model_revision != ?
|
|
1798
|
+
OR e.dimension != ?
|
|
1799
|
+
)
|
|
1800
|
+
ORDER BY a.updated_at ASC
|
|
1801
|
+
LIMIT ?
|
|
1802
|
+
""",
|
|
1803
|
+
(profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
|
|
1804
|
+
).fetchall()
|
|
1805
|
+
for row in rows:
|
|
1806
|
+
enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
|
|
1807
|
+
return len(rows)
|
|
1666
1808
|
|
|
1667
1809
|
|
|
1668
1810
|
def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
|
|
@@ -1724,6 +1866,9 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1724
1866
|
log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
|
|
1725
1867
|
return {"ok": True, "paused": True, "processed": 0, "failed": 0}
|
|
1726
1868
|
recovered = _requeue_due_jobs(conn)
|
|
1869
|
+
refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
|
|
1870
|
+
if refresh_queued:
|
|
1871
|
+
conn.commit()
|
|
1727
1872
|
rows = conn.execute(
|
|
1728
1873
|
"""
|
|
1729
1874
|
SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
|
|
@@ -1745,6 +1890,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1745
1890
|
"UPDATE local_index_jobs SET status='running', claimed_by='local-process', lease_expires_at=?, updated_at=? WHERE job_id=?",
|
|
1746
1891
|
(now() + 300, now(), job_id),
|
|
1747
1892
|
)
|
|
1893
|
+
conn.commit()
|
|
1748
1894
|
try:
|
|
1749
1895
|
if row["asset_status"] != "active":
|
|
1750
1896
|
raise FileNotFoundError(row["path"])
|
|
@@ -1754,6 +1900,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1754
1900
|
(now(), job_id),
|
|
1755
1901
|
)
|
|
1756
1902
|
processed += 1
|
|
1903
|
+
conn.commit()
|
|
1757
1904
|
continue
|
|
1758
1905
|
if job_type == "light_extraction":
|
|
1759
1906
|
text, metadata = extract_text(Path(row["path"]))
|
|
@@ -1765,6 +1912,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1765
1912
|
(now(), job_id),
|
|
1766
1913
|
)
|
|
1767
1914
|
processed += 1
|
|
1915
|
+
conn.commit()
|
|
1768
1916
|
continue
|
|
1769
1917
|
summary = summarize(text)
|
|
1770
1918
|
conn.execute(
|
|
@@ -1774,6 +1922,8 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1774
1922
|
_replace_chunks(conn, asset_id, version_id, text)
|
|
1775
1923
|
_replace_entities(conn, asset_id, version_id, entities(text))
|
|
1776
1924
|
conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
|
|
1925
|
+
elif job_type == EMBEDDING_REFRESH_JOB:
|
|
1926
|
+
_refresh_asset_embeddings(conn, asset_id)
|
|
1777
1927
|
elif job_type == "graph":
|
|
1778
1928
|
conn.execute(
|
|
1779
1929
|
"""
|
|
@@ -1787,6 +1937,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1787
1937
|
(now(), job_id),
|
|
1788
1938
|
)
|
|
1789
1939
|
processed += 1
|
|
1940
|
+
conn.commit()
|
|
1790
1941
|
except Exception as exc:
|
|
1791
1942
|
failed += 1
|
|
1792
1943
|
attempts = int(row["attempt_count"] or 0) + 1
|
|
@@ -1809,10 +1960,11 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1809
1960
|
technical_detail=str(exc),
|
|
1810
1961
|
retryable=not terminal,
|
|
1811
1962
|
)
|
|
1963
|
+
conn.commit()
|
|
1812
1964
|
conn.commit()
|
|
1813
1965
|
if processed or failed:
|
|
1814
|
-
log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
|
|
1815
|
-
return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
|
|
1966
|
+
log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
|
|
1967
|
+
return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
|
|
1816
1968
|
|
|
1817
1969
|
|
|
1818
1970
|
def run_once(
|
|
@@ -2401,27 +2553,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
|
|
|
2401
2553
|
|
|
2402
2554
|
|
|
2403
2555
|
def model_status() -> dict:
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
"
|
|
2407
|
-
"
|
|
2408
|
-
"
|
|
2409
|
-
"
|
|
2410
|
-
"
|
|
2556
|
+
active_embedding = embeddings.active_profile()
|
|
2557
|
+
active_entry = {
|
|
2558
|
+
"profile": active_embedding.profile,
|
|
2559
|
+
"name": active_embedding.model_id,
|
|
2560
|
+
"kind": active_embedding.kind,
|
|
2561
|
+
"revision": active_embedding.model_revision,
|
|
2562
|
+
"dimension": active_embedding.dimension,
|
|
2563
|
+
"state": active_embedding.state,
|
|
2411
2564
|
"required": True,
|
|
2412
|
-
|
|
2565
|
+
"active": True,
|
|
2566
|
+
"problems": list(active_embedding.problems),
|
|
2567
|
+
}
|
|
2568
|
+
models = []
|
|
2569
|
+
active_in_manifest = False
|
|
2413
2570
|
try:
|
|
2414
2571
|
import local_models
|
|
2415
2572
|
for spec in local_models.list_local_model_specs():
|
|
2416
2573
|
verification = local_models.verify_local_model_dir(spec)
|
|
2574
|
+
state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
|
|
2575
|
+
is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
|
|
2576
|
+
active_in_manifest = bool(active_in_manifest or is_active)
|
|
2417
2577
|
models.append({
|
|
2418
2578
|
"profile": spec.name,
|
|
2419
2579
|
"name": spec.model_id,
|
|
2420
2580
|
"kind": spec.kind,
|
|
2421
2581
|
"revision": spec.revision,
|
|
2422
2582
|
"dimension": spec.dimension,
|
|
2423
|
-
"state":
|
|
2583
|
+
"state": state,
|
|
2424
2584
|
"required": spec.required,
|
|
2585
|
+
"active": is_active,
|
|
2425
2586
|
"path": verification["path"],
|
|
2426
2587
|
"problems": verification["problems"],
|
|
2427
2588
|
})
|
|
@@ -2434,6 +2595,8 @@ def model_status() -> dict:
|
|
|
2434
2595
|
"required": False,
|
|
2435
2596
|
"problems": [str(exc)],
|
|
2436
2597
|
})
|
|
2598
|
+
if not active_in_manifest:
|
|
2599
|
+
models.insert(0, active_entry)
|
|
2437
2600
|
return {"ok": True, "models": models}
|
|
2438
2601
|
|
|
2439
2602
|
|
|
@@ -2576,7 +2739,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
|
|
|
2576
2739
|
def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
|
|
2577
2740
|
base_rows = conn.execute(
|
|
2578
2741
|
"""
|
|
2579
|
-
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2742
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2743
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
2580
2744
|
FROM local_chunks c
|
|
2581
2745
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
2582
2746
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -2594,7 +2758,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
|
|
|
2594
2758
|
placeholders = ",".join("?" for _ in entity_asset_ids)
|
|
2595
2759
|
entity_rows = conn.execute(
|
|
2596
2760
|
f"""
|
|
2597
|
-
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2761
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2762
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
2598
2763
|
FROM local_chunks c
|
|
2599
2764
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
2600
2765
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -2626,6 +2791,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
|
|
|
2626
2791
|
return text[: max(0, max_chars - 1)].rstrip() + "…"
|
|
2627
2792
|
|
|
2628
2793
|
|
|
2794
|
+
def _reranker_disabled() -> bool:
|
|
2795
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
|
|
2796
|
+
if value in {"1", "true", "yes", "on"}:
|
|
2797
|
+
return True
|
|
2798
|
+
if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
|
|
2799
|
+
return True
|
|
2800
|
+
return False
|
|
2801
|
+
|
|
2802
|
+
|
|
2803
|
+
@lru_cache(maxsize=1)
|
|
2804
|
+
def _context_reranker():
|
|
2805
|
+
if _reranker_disabled():
|
|
2806
|
+
return None
|
|
2807
|
+
try:
|
|
2808
|
+
import local_models
|
|
2809
|
+
from fastembed.rerank.cross_encoder import TextCrossEncoder
|
|
2810
|
+
|
|
2811
|
+
spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
|
|
2812
|
+
target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
|
|
2813
|
+
return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
|
|
2814
|
+
except Exception: # pragma: no cover - host/cache dependent
|
|
2815
|
+
return None
|
|
2816
|
+
|
|
2817
|
+
|
|
2818
|
+
def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
|
|
2819
|
+
if len(scored) <= 1:
|
|
2820
|
+
return scored
|
|
2821
|
+
reranker = _context_reranker()
|
|
2822
|
+
if not reranker:
|
|
2823
|
+
return scored
|
|
2824
|
+
head_count = min(len(scored), max(int(limit) * 4, 20), 60)
|
|
2825
|
+
head = scored[:head_count]
|
|
2826
|
+
tail = scored[head_count:]
|
|
2827
|
+
docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
|
|
2828
|
+
try:
|
|
2829
|
+
scores = [float(score) for score in reranker.rerank(search_query, docs)]
|
|
2830
|
+
except Exception: # pragma: no cover - runtime fallback only
|
|
2831
|
+
return scored
|
|
2832
|
+
if len(scores) != len(head):
|
|
2833
|
+
return scored
|
|
2834
|
+
reranked = sorted(
|
|
2835
|
+
((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
|
|
2836
|
+
key=lambda item: item[1],
|
|
2837
|
+
reverse=True,
|
|
2838
|
+
)
|
|
2839
|
+
return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
|
|
2840
|
+
|
|
2841
|
+
|
|
2629
2842
|
def _payload_size(payload: dict) -> int:
|
|
2630
2843
|
return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
|
|
2631
2844
|
|
|
@@ -2961,10 +3174,12 @@ def _context_query_conn(
|
|
|
2961
3174
|
normalized_mode, mode_warnings = _normalize_context_mode(mode)
|
|
2962
3175
|
context_tail = _compact_text(current_context or "", max_chars=1000)
|
|
2963
3176
|
search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
|
|
2964
|
-
|
|
3177
|
+
query_embedding = embeddings.embed_record(search_query)
|
|
3178
|
+
qvec = query_embedding["vector"]
|
|
2965
3179
|
entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
|
|
2966
3180
|
rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
|
|
2967
3181
|
scored = []
|
|
3182
|
+
stale_embedding_seen = False
|
|
2968
3183
|
for row in rows:
|
|
2969
3184
|
if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
|
|
2970
3185
|
continue
|
|
@@ -2973,7 +3188,15 @@ def _context_query_conn(
|
|
|
2973
3188
|
path_score = _search_text_score(search_query, row["path"] or "")
|
|
2974
3189
|
summary_score = _search_text_score(search_query, row["summary"] or "")
|
|
2975
3190
|
entity_score = entity_boosts.get(row["asset_id"], 0.0)
|
|
2976
|
-
vector_score =
|
|
3191
|
+
vector_score = 0.0
|
|
3192
|
+
if (
|
|
3193
|
+
str(row["model_id"] or "") == str(query_embedding["model_id"])
|
|
3194
|
+
and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
|
|
3195
|
+
and int(row["dimension"] or 0) == int(query_embedding["dimension"])
|
|
3196
|
+
):
|
|
3197
|
+
vector_score = embeddings.cosine(qvec, vector)
|
|
3198
|
+
elif vector:
|
|
3199
|
+
stale_embedding_seen = True
|
|
2977
3200
|
score = max(text_score, path_score, summary_score, vector_score)
|
|
2978
3201
|
if entity_score > 0:
|
|
2979
3202
|
direct_score = max(text_score, path_score, summary_score)
|
|
@@ -2987,6 +3210,7 @@ def _context_query_conn(
|
|
|
2987
3210
|
if score > 0:
|
|
2988
3211
|
scored.append((min(float(score), 1.6), row))
|
|
2989
3212
|
scored.sort(key=lambda item: item[0], reverse=True)
|
|
3213
|
+
scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
|
|
2990
3214
|
assets = []
|
|
2991
3215
|
chunks = []
|
|
2992
3216
|
evidence_refs = []
|
|
@@ -3025,6 +3249,10 @@ def _context_query_conn(
|
|
|
3025
3249
|
).fetchall()
|
|
3026
3250
|
relations_payload = [dict(row) for row in relation_rows]
|
|
3027
3251
|
warnings = list(mode_warnings)
|
|
3252
|
+
if query_embedding.get("kind") == "deterministic_embedding":
|
|
3253
|
+
warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
|
|
3254
|
+
elif stale_embedding_seen:
|
|
3255
|
+
warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
|
|
3028
3256
|
if evidence_required and not evidence_refs:
|
|
3029
3257
|
warnings.append("No local evidence found for this query.")
|
|
3030
3258
|
summary = ""
|
|
@@ -2,32 +2,135 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import math
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from functools import lru_cache
|
|
9
|
+
from typing import Any
|
|
5
10
|
|
|
6
11
|
from .util import tokenize
|
|
7
12
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
13
|
+
FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
|
|
14
|
+
FALLBACK_MODEL_REVISION = "1"
|
|
15
|
+
FALLBACK_DIMENSION = 128
|
|
16
|
+
PRIMARY_MODEL_SPEC = "bge-base-embeddings"
|
|
11
17
|
|
|
18
|
+
# Backward-compatible constants. Callers that persist vectors should use
|
|
19
|
+
# embed_record(), because the active profile can switch from fallback to BGE.
|
|
20
|
+
MODEL_ID = FALLBACK_MODEL_ID
|
|
21
|
+
MODEL_REVISION = FALLBACK_MODEL_REVISION
|
|
22
|
+
DIMENSION = FALLBACK_DIMENSION
|
|
12
23
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class EmbeddingProfile:
|
|
27
|
+
model_id: str
|
|
28
|
+
model_revision: str
|
|
29
|
+
dimension: int
|
|
30
|
+
kind: str
|
|
31
|
+
state: str
|
|
32
|
+
profile: str
|
|
33
|
+
problems: tuple[str, ...] = ()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _hash_embed_text(text: str) -> list[float]:
|
|
37
|
+
vec = [0.0] * FALLBACK_DIMENSION
|
|
22
38
|
for token in tokenize(text):
|
|
23
39
|
digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
|
|
24
|
-
idx = int.from_bytes(digest[:2], "big") %
|
|
40
|
+
idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
|
|
25
41
|
sign = -1.0 if digest[2] % 2 else 1.0
|
|
26
42
|
vec[idx] += sign
|
|
27
43
|
norm = math.sqrt(sum(value * value for value in vec)) or 1.0
|
|
28
44
|
return [round(value / norm, 8) for value in vec]
|
|
29
45
|
|
|
30
46
|
|
|
47
|
+
def _fallback_profile(*problems: str) -> EmbeddingProfile:
|
|
48
|
+
return EmbeddingProfile(
|
|
49
|
+
model_id=FALLBACK_MODEL_ID,
|
|
50
|
+
model_revision=FALLBACK_MODEL_REVISION,
|
|
51
|
+
dimension=FALLBACK_DIMENSION,
|
|
52
|
+
kind="deterministic_embedding",
|
|
53
|
+
state="available",
|
|
54
|
+
profile="local_context_embedding_fallback",
|
|
55
|
+
problems=tuple(item for item in problems if item),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _fastembed_disabled() -> bool:
|
|
60
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
|
|
61
|
+
if value in {"1", "true", "yes", "on"}:
|
|
62
|
+
return True
|
|
63
|
+
# The unit suite uses temporary NEXO homes that intentionally do not carry
|
|
64
|
+
# model weights. Keep those tests dependency-free unless explicitly opted in.
|
|
65
|
+
if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@lru_cache(maxsize=1)
|
|
71
|
+
def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
|
|
72
|
+
if _fastembed_disabled():
|
|
73
|
+
return None, _fallback_profile("fastembed disabled for this process")
|
|
74
|
+
try:
|
|
75
|
+
import local_models
|
|
76
|
+
from fastembed import TextEmbedding
|
|
77
|
+
|
|
78
|
+
spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
|
|
79
|
+
target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
|
|
80
|
+
with warnings.catch_warnings():
|
|
81
|
+
warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
|
|
82
|
+
model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
|
|
83
|
+
return model, EmbeddingProfile(
|
|
84
|
+
model_id=spec.model_id,
|
|
85
|
+
model_revision=spec.revision,
|
|
86
|
+
dimension=spec.dimension or 384,
|
|
87
|
+
kind=spec.kind,
|
|
88
|
+
state="available",
|
|
89
|
+
profile=spec.name,
|
|
90
|
+
)
|
|
91
|
+
except Exception as exc: # pragma: no cover - host/cache dependent
|
|
92
|
+
return None, _fallback_profile(str(exc))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def active_profile() -> EmbeddingProfile:
|
|
96
|
+
_model, profile = _fastembed_state()
|
|
97
|
+
return profile
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def reset_cache() -> None:
|
|
101
|
+
_fastembed_state.cache_clear()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def embed_record(text: str) -> dict[str, Any]:
|
|
105
|
+
model, profile = _fastembed_state()
|
|
106
|
+
if model is not None and profile.kind == "fastembed_embedding":
|
|
107
|
+
try:
|
|
108
|
+
vector = list(next(iter(model.embed([text or ""]))))
|
|
109
|
+
return {
|
|
110
|
+
"vector": [float(value) for value in vector],
|
|
111
|
+
"model_id": profile.model_id,
|
|
112
|
+
"model_revision": profile.model_revision,
|
|
113
|
+
"dimension": profile.dimension,
|
|
114
|
+
"profile": profile.profile,
|
|
115
|
+
"kind": profile.kind,
|
|
116
|
+
}
|
|
117
|
+
except Exception: # pragma: no cover - runtime fallback only
|
|
118
|
+
pass
|
|
119
|
+
fallback = _fallback_profile()
|
|
120
|
+
return {
|
|
121
|
+
"vector": _hash_embed_text(text),
|
|
122
|
+
"model_id": fallback.model_id,
|
|
123
|
+
"model_revision": fallback.model_revision,
|
|
124
|
+
"dimension": fallback.dimension,
|
|
125
|
+
"profile": fallback.profile,
|
|
126
|
+
"kind": fallback.kind,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def embed_text(text: str) -> list[float]:
|
|
131
|
+
return embed_record(text)["vector"]
|
|
132
|
+
|
|
133
|
+
|
|
31
134
|
def cosine(a: list[float], b: list[float]) -> float:
|
|
32
135
|
if not a or not b or len(a) != len(b):
|
|
33
136
|
return 0.0
|