nexo-brain 7.20.23 → 7.20.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.23",
3
+ "version": "7.20.25",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,11 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.20.23` is the current packaged-runtime line. Patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
21
+ Version `7.20.25` is the current packaged-runtime line. Patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
22
+
23
+ Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
24
+
25
+ Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
22
26
 
23
27
  Previously in `7.20.22`: patch release over v7.20.19 — Local Memory moved out of the main Brain database, MCP readiness verifies required tools, and split-aware Desktop backups validate the main DB and Local Memory sidecar separately.
24
28
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.23",
3
+ "version": "7.20.25",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -9,11 +9,13 @@ import stat
9
9
  import hashlib
10
10
  import subprocess
11
11
  import sys
12
+ import time
13
+ from functools import lru_cache
12
14
  from pathlib import Path
13
15
  from typing import Any
14
16
 
15
17
  from . import embeddings
16
- from .db import LOCAL_CONTEXT_TABLES, connect_local_context_db_readonly, ensure_local_context_db, get_local_context_db
18
+ from .db import LOCAL_CONTEXT_TABLES, close_local_context_db, connect_local_context_db_readonly, ensure_local_context_db, get_local_context_db
17
19
  from .extractors import chunk_text, contains_secret, entities, extract_text, summarize
18
20
  from .logging import log_event, tail
19
21
  from .privacy import classify_path, is_local_email_tree, is_queryable_path, should_extract, should_skip_file, should_skip_tree
@@ -33,11 +35,43 @@ DEFAULT_SYSTEM_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_SYSTEM_ROOT_DEP
33
35
  DEFAULT_CONTEXT_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_MAX_CHARS", "20000") or "20000")
34
36
  DEFAULT_ROUTER_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_ROUTER_MAX_CHARS", "6000") or "6000")
35
37
  DEFAULT_MAX_JOB_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_JOB_ATTEMPTS", "3") or "3")
38
+ DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_ATTEMPTS", "5") or "5")
39
+ DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS = float(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_DELAY_SECONDS", "0.35") or "0.35")
36
40
  INITIAL_INDEX_COMPLETE_KEY = "initial_index_complete"
37
41
  INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
38
42
  PERFORMANCE_PROFILE_KEY = "performance_profile"
39
43
  DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
40
44
  VALID_CONTEXT_MODES = {"compact", "full"}
45
+ EMBEDDING_REFRESH_JOB = "embedding_refresh"
46
+ HIGH_VALUE_DOCUMENT_SUFFIXES = {
47
+ ".pdf",
48
+ ".doc",
49
+ ".docx",
50
+ ".xls",
51
+ ".xlsx",
52
+ ".ppt",
53
+ ".pptx",
54
+ ".pages",
55
+ ".numbers",
56
+ ".key",
57
+ ".rtf",
58
+ ".odt",
59
+ ".ods",
60
+ ".odp",
61
+ }
62
+ KNOWN_TEXT_SUFFIXES = {
63
+ ".md",
64
+ ".markdown",
65
+ ".txt",
66
+ ".csv",
67
+ ".tsv",
68
+ }
69
+ EMAIL_DOCUMENT_SUFFIXES = {
70
+ ".eml",
71
+ ".emlx",
72
+ ".msg",
73
+ }
74
+ RERANKER_MODEL_SPEC = "cross-encoder-reranker"
41
75
  PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
42
76
  "low": {
43
77
  "profile": "low",
@@ -108,6 +142,27 @@ def _close_read_conn(conn) -> None:
108
142
  pass
109
143
 
110
144
 
145
+ def _sqlite_is_busy(exc: BaseException) -> bool:
146
+ return isinstance(exc, sqlite3.OperationalError) and "locked" in str(exc).lower()
147
+
148
+
149
+ def _with_sqlite_busy_retry(callback, *, attempts: int | None = None):
150
+ max_attempts = max(1, int(attempts or DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS))
151
+ last_exc = None
152
+ for attempt in range(max_attempts):
153
+ try:
154
+ return callback()
155
+ except sqlite3.OperationalError as exc:
156
+ if not _sqlite_is_busy(exc) or attempt >= max_attempts - 1:
157
+ raise
158
+ last_exc = exc
159
+ close_local_context_db()
160
+ time.sleep(DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS * (attempt + 1))
161
+ if last_exc:
162
+ raise last_exc
163
+ return None
164
+
165
+
111
166
  def add_root(path: str, *, mode: str = "normal", depth: int | None = None) -> dict:
112
167
  conn = _conn()
113
168
  root_path = norm_path(path)
@@ -609,9 +664,12 @@ def _set_state_conn(conn, key: str, value: str) -> None:
609
664
 
610
665
 
611
666
  def _set_state(key: str, value: str) -> None:
612
- conn = _conn()
613
- _set_state_conn(conn, key, value)
614
- conn.commit()
667
+ def write_state() -> None:
668
+ conn = _conn()
669
+ _set_state_conn(conn, key, value)
670
+ conn.commit()
671
+
672
+ _with_sqlite_busy_retry(write_state)
615
673
 
616
674
 
617
675
  def _get_state_conn(conn, key: str, default: str = "") -> str:
@@ -1065,7 +1123,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
1065
1123
  (version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
1066
1124
  )
1067
1125
  if should_extract(normalized, depth):
1068
- enqueue_job(conn, asset_id, "light_extraction", priority=60)
1126
+ enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
1069
1127
  enqueue_job(conn, asset_id, "graph", priority=40)
1070
1128
  return asset_id, changed, "ok"
1071
1129
 
@@ -1176,6 +1234,27 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
1176
1234
  return job_id
1177
1235
 
1178
1236
 
1237
+ def _extraction_priority(path: Path) -> int:
1238
+ suffix = path.suffix.lower()
1239
+ if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
1240
+ return 90
1241
+ if suffix in KNOWN_TEXT_SUFFIXES:
1242
+ return 82
1243
+ if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
1244
+ return 70
1245
+ if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
1246
+ return 55
1247
+ return 45
1248
+
1249
+
1250
+ def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
1251
+ try:
1252
+ is_file = item.is_file()
1253
+ except Exception:
1254
+ is_file = False
1255
+ return (0 if not is_file else 1, -_extraction_priority(item) if is_file else 0, str(item).lower())
1256
+
1257
+
1179
1258
  def _iter_files(
1180
1259
  conn,
1181
1260
  root_id: int,
@@ -1209,7 +1288,7 @@ def _iter_files(
1209
1288
  seen_dirs.add(key)
1210
1289
  _upsert_dir(conn, root_id, current, seen_at, st)
1211
1290
  try:
1212
- entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
1291
+ entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
1213
1292
  except Exception as exc:
1214
1293
  _record_scan_error(conn, stats, str(current), "quick_index", exc)
1215
1294
  continue
@@ -1393,7 +1472,7 @@ def _scan_known_directory(
1393
1472
  st = current.stat()
1394
1473
  if not current.is_dir():
1395
1474
  continue
1396
- entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
1475
+ entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
1397
1476
  except Exception as exc:
1398
1477
  _record_scan_error(conn, stats, str(current), "live_reconcile", exc)
1399
1478
  continue
@@ -1634,6 +1713,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
1634
1713
  return row["version_id"] if row else stable_id("ver", asset_id)
1635
1714
 
1636
1715
 
1716
+ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
1717
+ record = embeddings.embed_record(text)
1718
+ model_id = str(record["model_id"])
1719
+ model_revision = str(record["model_revision"])
1720
+ dimension = int(record["dimension"])
1721
+ conn.execute(
1722
+ """
1723
+ INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
1724
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1725
+ """,
1726
+ (
1727
+ stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
1728
+ asset_id,
1729
+ chunk_id,
1730
+ model_id,
1731
+ model_revision,
1732
+ dimension,
1733
+ json_dumps(record["vector"]),
1734
+ now(),
1735
+ ),
1736
+ )
1737
+
1738
+
1637
1739
  def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
1638
1740
  conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
1639
1741
  conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
@@ -1646,23 +1748,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
1646
1748
  """,
1647
1749
  (chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
1648
1750
  )
1649
- vector = embeddings.embed_text(chunk)
1650
- conn.execute(
1651
- """
1652
- INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
1653
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1654
- """,
1655
- (
1656
- stable_id("emb", f"{chunk_id}:{embeddings.MODEL_ID}:{embeddings.MODEL_REVISION}"),
1657
- asset_id,
1658
- chunk_id,
1659
- embeddings.MODEL_ID,
1660
- embeddings.MODEL_REVISION,
1661
- embeddings.DIMENSION,
1662
- json_dumps(vector),
1663
- now(),
1664
- ),
1665
- )
1751
+ _insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
1752
+
1753
+
1754
+ def _refresh_asset_embeddings(conn, asset_id: str) -> int:
1755
+ rows = conn.execute(
1756
+ """
1757
+ SELECT chunk_id, text
1758
+ FROM local_chunks
1759
+ WHERE asset_id=?
1760
+ ORDER BY chunk_index ASC
1761
+ """,
1762
+ (asset_id,),
1763
+ ).fetchall()
1764
+ conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
1765
+ for row in rows:
1766
+ _insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
1767
+ if rows:
1768
+ conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
1769
+ return len(rows)
1770
+
1771
+
1772
+ def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
1773
+ if row is None:
1774
+ return False
1775
+ return (
1776
+ str(row["model_id"] or "") == profile.model_id
1777
+ and str(row["model_revision"] or "") == profile.model_revision
1778
+ and int(row["dimension"] or 0) == int(profile.dimension)
1779
+ )
1780
+
1781
+
1782
+ def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
1783
+ profile = embeddings.active_profile()
1784
+ if profile.kind == "deterministic_embedding":
1785
+ return 0
1786
+ rows = conn.execute(
1787
+ """
1788
+ SELECT DISTINCT c.asset_id
1789
+ FROM local_chunks c
1790
+ JOIN local_assets a ON a.asset_id=c.asset_id
1791
+ LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
1792
+ WHERE a.status='active'
1793
+ AND a.privacy_class='normal'
1794
+ AND (
1795
+ e.embedding_id IS NULL
1796
+ OR e.model_id != ?
1797
+ OR e.model_revision != ?
1798
+ OR e.dimension != ?
1799
+ )
1800
+ ORDER BY a.updated_at ASC
1801
+ LIMIT ?
1802
+ """,
1803
+ (profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
1804
+ ).fetchall()
1805
+ for row in rows:
1806
+ enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
1807
+ return len(rows)
1666
1808
 
1667
1809
 
1668
1810
  def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
@@ -1724,6 +1866,9 @@ def process_jobs(*, limit: int = 100) -> dict:
1724
1866
  log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
1725
1867
  return {"ok": True, "paused": True, "processed": 0, "failed": 0}
1726
1868
  recovered = _requeue_due_jobs(conn)
1869
+ refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
1870
+ if refresh_queued:
1871
+ conn.commit()
1727
1872
  rows = conn.execute(
1728
1873
  """
1729
1874
  SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
@@ -1745,6 +1890,7 @@ def process_jobs(*, limit: int = 100) -> dict:
1745
1890
  "UPDATE local_index_jobs SET status='running', claimed_by='local-process', lease_expires_at=?, updated_at=? WHERE job_id=?",
1746
1891
  (now() + 300, now(), job_id),
1747
1892
  )
1893
+ conn.commit()
1748
1894
  try:
1749
1895
  if row["asset_status"] != "active":
1750
1896
  raise FileNotFoundError(row["path"])
@@ -1754,6 +1900,7 @@ def process_jobs(*, limit: int = 100) -> dict:
1754
1900
  (now(), job_id),
1755
1901
  )
1756
1902
  processed += 1
1903
+ conn.commit()
1757
1904
  continue
1758
1905
  if job_type == "light_extraction":
1759
1906
  text, metadata = extract_text(Path(row["path"]))
@@ -1765,6 +1912,7 @@ def process_jobs(*, limit: int = 100) -> dict:
1765
1912
  (now(), job_id),
1766
1913
  )
1767
1914
  processed += 1
1915
+ conn.commit()
1768
1916
  continue
1769
1917
  summary = summarize(text)
1770
1918
  conn.execute(
@@ -1774,6 +1922,8 @@ def process_jobs(*, limit: int = 100) -> dict:
1774
1922
  _replace_chunks(conn, asset_id, version_id, text)
1775
1923
  _replace_entities(conn, asset_id, version_id, entities(text))
1776
1924
  conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
1925
+ elif job_type == EMBEDDING_REFRESH_JOB:
1926
+ _refresh_asset_embeddings(conn, asset_id)
1777
1927
  elif job_type == "graph":
1778
1928
  conn.execute(
1779
1929
  """
@@ -1787,6 +1937,7 @@ def process_jobs(*, limit: int = 100) -> dict:
1787
1937
  (now(), job_id),
1788
1938
  )
1789
1939
  processed += 1
1940
+ conn.commit()
1790
1941
  except Exception as exc:
1791
1942
  failed += 1
1792
1943
  attempts = int(row["attempt_count"] or 0) + 1
@@ -1809,10 +1960,11 @@ def process_jobs(*, limit: int = 100) -> dict:
1809
1960
  technical_detail=str(exc),
1810
1961
  retryable=not terminal,
1811
1962
  )
1963
+ conn.commit()
1812
1964
  conn.commit()
1813
1965
  if processed or failed:
1814
- log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
1815
- return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
1966
+ log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
1967
+ return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
1816
1968
 
1817
1969
 
1818
1970
  def run_once(
@@ -2401,27 +2553,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
2401
2553
 
2402
2554
 
2403
2555
  def model_status() -> dict:
2404
- models = [{
2405
- "profile": "local_context_embedding_fallback",
2406
- "name": embeddings.MODEL_ID,
2407
- "kind": "deterministic_embedding",
2408
- "revision": embeddings.MODEL_REVISION,
2409
- "dimension": embeddings.DIMENSION,
2410
- "state": "available",
2556
+ active_embedding = embeddings.active_profile()
2557
+ active_entry = {
2558
+ "profile": active_embedding.profile,
2559
+ "name": active_embedding.model_id,
2560
+ "kind": active_embedding.kind,
2561
+ "revision": active_embedding.model_revision,
2562
+ "dimension": active_embedding.dimension,
2563
+ "state": active_embedding.state,
2411
2564
  "required": True,
2412
- }]
2565
+ "active": True,
2566
+ "problems": list(active_embedding.problems),
2567
+ }
2568
+ models = []
2569
+ active_in_manifest = False
2413
2570
  try:
2414
2571
  import local_models
2415
2572
  for spec in local_models.list_local_model_specs():
2416
2573
  verification = local_models.verify_local_model_dir(spec)
2574
+ state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
2575
+ is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
2576
+ active_in_manifest = bool(active_in_manifest or is_active)
2417
2577
  models.append({
2418
2578
  "profile": spec.name,
2419
2579
  "name": spec.model_id,
2420
2580
  "kind": spec.kind,
2421
2581
  "revision": spec.revision,
2422
2582
  "dimension": spec.dimension,
2423
- "state": "available" if verification["ok"] else "not_warmed",
2583
+ "state": state,
2424
2584
  "required": spec.required,
2585
+ "active": is_active,
2425
2586
  "path": verification["path"],
2426
2587
  "problems": verification["problems"],
2427
2588
  })
@@ -2434,6 +2595,8 @@ def model_status() -> dict:
2434
2595
  "required": False,
2435
2596
  "problems": [str(exc)],
2436
2597
  })
2598
+ if not active_in_manifest:
2599
+ models.insert(0, active_entry)
2437
2600
  return {"ok": True, "models": models}
2438
2601
 
2439
2602
 
@@ -2576,7 +2739,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
2576
2739
  def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
2577
2740
  base_rows = conn.execute(
2578
2741
  """
2579
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2742
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
2743
+ e.vector_json, e.model_id, e.model_revision, e.dimension
2580
2744
  FROM local_chunks c
2581
2745
  JOIN local_assets a ON a.asset_id = c.asset_id
2582
2746
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2594,7 +2758,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
2594
2758
  placeholders = ",".join("?" for _ in entity_asset_ids)
2595
2759
  entity_rows = conn.execute(
2596
2760
  f"""
2597
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2761
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
2762
+ e.vector_json, e.model_id, e.model_revision, e.dimension
2598
2763
  FROM local_chunks c
2599
2764
  JOIN local_assets a ON a.asset_id = c.asset_id
2600
2765
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2626,6 +2791,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
2626
2791
  return text[: max(0, max_chars - 1)].rstrip() + "…"
2627
2792
 
2628
2793
 
2794
+ def _reranker_disabled() -> bool:
2795
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
2796
+ if value in {"1", "true", "yes", "on"}:
2797
+ return True
2798
+ if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
2799
+ return True
2800
+ return False
2801
+
2802
+
2803
+ @lru_cache(maxsize=1)
2804
+ def _context_reranker():
2805
+ if _reranker_disabled():
2806
+ return None
2807
+ try:
2808
+ import local_models
2809
+ from fastembed.rerank.cross_encoder import TextCrossEncoder
2810
+
2811
+ spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
2812
+ target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
2813
+ return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
2814
+ except Exception: # pragma: no cover - host/cache dependent
2815
+ return None
2816
+
2817
+
2818
+ def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
2819
+ if len(scored) <= 1:
2820
+ return scored
2821
+ reranker = _context_reranker()
2822
+ if not reranker:
2823
+ return scored
2824
+ head_count = min(len(scored), max(int(limit) * 4, 20), 60)
2825
+ head = scored[:head_count]
2826
+ tail = scored[head_count:]
2827
+ docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
2828
+ try:
2829
+ scores = [float(score) for score in reranker.rerank(search_query, docs)]
2830
+ except Exception: # pragma: no cover - runtime fallback only
2831
+ return scored
2832
+ if len(scores) != len(head):
2833
+ return scored
2834
+ reranked = sorted(
2835
+ ((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
2836
+ key=lambda item: item[1],
2837
+ reverse=True,
2838
+ )
2839
+ return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
2840
+
2841
+
2629
2842
  def _payload_size(payload: dict) -> int:
2630
2843
  return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
2631
2844
 
@@ -2961,10 +3174,12 @@ def _context_query_conn(
2961
3174
  normalized_mode, mode_warnings = _normalize_context_mode(mode)
2962
3175
  context_tail = _compact_text(current_context or "", max_chars=1000)
2963
3176
  search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
2964
- qvec = embeddings.embed_text(search_query)
3177
+ query_embedding = embeddings.embed_record(search_query)
3178
+ qvec = query_embedding["vector"]
2965
3179
  entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
2966
3180
  rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
2967
3181
  scored = []
3182
+ stale_embedding_seen = False
2968
3183
  for row in rows:
2969
3184
  if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
2970
3185
  continue
@@ -2973,7 +3188,15 @@ def _context_query_conn(
2973
3188
  path_score = _search_text_score(search_query, row["path"] or "")
2974
3189
  summary_score = _search_text_score(search_query, row["summary"] or "")
2975
3190
  entity_score = entity_boosts.get(row["asset_id"], 0.0)
2976
- vector_score = embeddings.cosine(qvec, vector)
3191
+ vector_score = 0.0
3192
+ if (
3193
+ str(row["model_id"] or "") == str(query_embedding["model_id"])
3194
+ and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
3195
+ and int(row["dimension"] or 0) == int(query_embedding["dimension"])
3196
+ ):
3197
+ vector_score = embeddings.cosine(qvec, vector)
3198
+ elif vector:
3199
+ stale_embedding_seen = True
2977
3200
  score = max(text_score, path_score, summary_score, vector_score)
2978
3201
  if entity_score > 0:
2979
3202
  direct_score = max(text_score, path_score, summary_score)
@@ -2987,6 +3210,7 @@ def _context_query_conn(
2987
3210
  if score > 0:
2988
3211
  scored.append((min(float(score), 1.6), row))
2989
3212
  scored.sort(key=lambda item: item[0], reverse=True)
3213
+ scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
2990
3214
  assets = []
2991
3215
  chunks = []
2992
3216
  evidence_refs = []
@@ -3025,6 +3249,10 @@ def _context_query_conn(
3025
3249
  ).fetchall()
3026
3250
  relations_payload = [dict(row) for row in relation_rows]
3027
3251
  warnings = list(mode_warnings)
3252
+ if query_embedding.get("kind") == "deterministic_embedding":
3253
+ warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
3254
+ elif stale_embedding_seen:
3255
+ warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
3028
3256
  if evidence_required and not evidence_refs:
3029
3257
  warnings.append("No local evidence found for this query.")
3030
3258
  summary = ""
@@ -2,32 +2,135 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  import math
5
+ import os
6
+ import warnings
7
+ from dataclasses import dataclass
8
+ from functools import lru_cache
9
+ from typing import Any
5
10
 
6
11
  from .util import tokenize
7
12
 
8
- MODEL_ID = "nexo-local-hash-embedding"
9
- MODEL_REVISION = "1"
10
- DIMENSION = 128
13
+ FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
14
+ FALLBACK_MODEL_REVISION = "1"
15
+ FALLBACK_DIMENSION = 128
16
+ PRIMARY_MODEL_SPEC = "bge-base-embeddings"
11
17
 
18
+ # Backward-compatible constants. Callers that persist vectors should use
19
+ # embed_record(), because the active profile can switch from fallback to BGE.
20
+ MODEL_ID = FALLBACK_MODEL_ID
21
+ MODEL_REVISION = FALLBACK_MODEL_REVISION
22
+ DIMENSION = FALLBACK_DIMENSION
12
23
 
13
- def embed_text(text: str) -> list[float]:
14
- """Deterministic local embedding fallback.
15
-
16
- This is intentionally local and dependency-free. It gives the resolver a
17
- working semantic-ish retrieval substrate even on machines where the pinned
18
- FastEmbed model has not warmed yet. The model id/revision make it safe to
19
- supersede later with pinned model vectors.
20
- """
21
- vec = [0.0] * DIMENSION
24
+
25
+ @dataclass(frozen=True)
26
+ class EmbeddingProfile:
27
+ model_id: str
28
+ model_revision: str
29
+ dimension: int
30
+ kind: str
31
+ state: str
32
+ profile: str
33
+ problems: tuple[str, ...] = ()
34
+
35
+
36
+ def _hash_embed_text(text: str) -> list[float]:
37
+ vec = [0.0] * FALLBACK_DIMENSION
22
38
  for token in tokenize(text):
23
39
  digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
24
- idx = int.from_bytes(digest[:2], "big") % DIMENSION
40
+ idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
25
41
  sign = -1.0 if digest[2] % 2 else 1.0
26
42
  vec[idx] += sign
27
43
  norm = math.sqrt(sum(value * value for value in vec)) or 1.0
28
44
  return [round(value / norm, 8) for value in vec]
29
45
 
30
46
 
47
+ def _fallback_profile(*problems: str) -> EmbeddingProfile:
48
+ return EmbeddingProfile(
49
+ model_id=FALLBACK_MODEL_ID,
50
+ model_revision=FALLBACK_MODEL_REVISION,
51
+ dimension=FALLBACK_DIMENSION,
52
+ kind="deterministic_embedding",
53
+ state="available",
54
+ profile="local_context_embedding_fallback",
55
+ problems=tuple(item for item in problems if item),
56
+ )
57
+
58
+
59
+ def _fastembed_disabled() -> bool:
60
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
61
+ if value in {"1", "true", "yes", "on"}:
62
+ return True
63
+ # The unit suite uses temporary NEXO homes that intentionally do not carry
64
+ # model weights. Keep those tests dependency-free unless explicitly opted in.
65
+ if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
66
+ return True
67
+ return False
68
+
69
+
70
+ @lru_cache(maxsize=1)
71
+ def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
72
+ if _fastembed_disabled():
73
+ return None, _fallback_profile("fastembed disabled for this process")
74
+ try:
75
+ import local_models
76
+ from fastembed import TextEmbedding
77
+
78
+ spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
79
+ target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
80
+ with warnings.catch_warnings():
81
+ warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
82
+ model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
83
+ return model, EmbeddingProfile(
84
+ model_id=spec.model_id,
85
+ model_revision=spec.revision,
86
+ dimension=spec.dimension or 384,
87
+ kind=spec.kind,
88
+ state="available",
89
+ profile=spec.name,
90
+ )
91
+ except Exception as exc: # pragma: no cover - host/cache dependent
92
+ return None, _fallback_profile(str(exc))
93
+
94
+
95
+ def active_profile() -> EmbeddingProfile:
96
+ _model, profile = _fastembed_state()
97
+ return profile
98
+
99
+
100
+ def reset_cache() -> None:
101
+ _fastembed_state.cache_clear()
102
+
103
+
104
+ def embed_record(text: str) -> dict[str, Any]:
105
+ model, profile = _fastembed_state()
106
+ if model is not None and profile.kind == "fastembed_embedding":
107
+ try:
108
+ vector = list(next(iter(model.embed([text or ""]))))
109
+ return {
110
+ "vector": [float(value) for value in vector],
111
+ "model_id": profile.model_id,
112
+ "model_revision": profile.model_revision,
113
+ "dimension": profile.dimension,
114
+ "profile": profile.profile,
115
+ "kind": profile.kind,
116
+ }
117
+ except Exception: # pragma: no cover - runtime fallback only
118
+ pass
119
+ fallback = _fallback_profile()
120
+ return {
121
+ "vector": _hash_embed_text(text),
122
+ "model_id": fallback.model_id,
123
+ "model_revision": fallback.model_revision,
124
+ "dimension": fallback.dimension,
125
+ "profile": fallback.profile,
126
+ "kind": fallback.kind,
127
+ }
128
+
129
+
130
+ def embed_text(text: str) -> list[float]:
131
+ return embed_record(text)["vector"]
132
+
133
+
31
134
  def cosine(a: list[float], b: list[float]) -> float:
32
135
  if not a or not b or len(a) != len(b):
33
136
  return 0.0