nexo-brain 7.20.24 → 7.20.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.24",
3
+ "version": "7.20.25",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,9 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.20.24` is the current packaged-runtime line. Patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
21
+ Version `7.20.25` is the current packaged-runtime line. Patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
22
+
23
+ Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
22
24
 
23
25
  Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
24
26
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.24",
3
+ "version": "7.20.25",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -10,6 +10,7 @@ import hashlib
10
10
  import subprocess
11
11
  import sys
12
12
  import time
13
+ from functools import lru_cache
13
14
  from pathlib import Path
14
15
  from typing import Any
15
16
 
@@ -41,6 +42,36 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
41
42
  PERFORMANCE_PROFILE_KEY = "performance_profile"
42
43
  DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
43
44
  VALID_CONTEXT_MODES = {"compact", "full"}
45
+ EMBEDDING_REFRESH_JOB = "embedding_refresh"
46
+ HIGH_VALUE_DOCUMENT_SUFFIXES = {
47
+ ".pdf",
48
+ ".doc",
49
+ ".docx",
50
+ ".xls",
51
+ ".xlsx",
52
+ ".ppt",
53
+ ".pptx",
54
+ ".pages",
55
+ ".numbers",
56
+ ".key",
57
+ ".rtf",
58
+ ".odt",
59
+ ".ods",
60
+ ".odp",
61
+ }
62
+ KNOWN_TEXT_SUFFIXES = {
63
+ ".md",
64
+ ".markdown",
65
+ ".txt",
66
+ ".csv",
67
+ ".tsv",
68
+ }
69
+ EMAIL_DOCUMENT_SUFFIXES = {
70
+ ".eml",
71
+ ".emlx",
72
+ ".msg",
73
+ }
74
+ RERANKER_MODEL_SPEC = "cross-encoder-reranker"
44
75
  PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
45
76
  "low": {
46
77
  "profile": "low",
@@ -1092,7 +1123,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
1092
1123
  (version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
1093
1124
  )
1094
1125
  if should_extract(normalized, depth):
1095
- enqueue_job(conn, asset_id, "light_extraction", priority=60)
1126
+ enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
1096
1127
  enqueue_job(conn, asset_id, "graph", priority=40)
1097
1128
  return asset_id, changed, "ok"
1098
1129
 
@@ -1203,6 +1234,27 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
1203
1234
  return job_id
1204
1235
 
1205
1236
 
1237
+ def _extraction_priority(path: Path) -> int:
1238
+ suffix = path.suffix.lower()
1239
+ if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
1240
+ return 90
1241
+ if suffix in KNOWN_TEXT_SUFFIXES:
1242
+ return 82
1243
+ if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
1244
+ return 70
1245
+ if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
1246
+ return 55
1247
+ return 45
1248
+
1249
+
1250
+ def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
1251
+ try:
1252
+ is_file = item.is_file()
1253
+ except Exception:
1254
+ is_file = False
1255
+ return (0 if not is_file else 1, -_extraction_priority(item) if is_file else 0, str(item).lower())
1256
+
1257
+
1206
1258
  def _iter_files(
1207
1259
  conn,
1208
1260
  root_id: int,
@@ -1236,7 +1288,7 @@ def _iter_files(
1236
1288
  seen_dirs.add(key)
1237
1289
  _upsert_dir(conn, root_id, current, seen_at, st)
1238
1290
  try:
1239
- entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
1291
+ entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
1240
1292
  except Exception as exc:
1241
1293
  _record_scan_error(conn, stats, str(current), "quick_index", exc)
1242
1294
  continue
@@ -1420,7 +1472,7 @@ def _scan_known_directory(
1420
1472
  st = current.stat()
1421
1473
  if not current.is_dir():
1422
1474
  continue
1423
- entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
1475
+ entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
1424
1476
  except Exception as exc:
1425
1477
  _record_scan_error(conn, stats, str(current), "live_reconcile", exc)
1426
1478
  continue
@@ -1661,6 +1713,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
1661
1713
  return row["version_id"] if row else stable_id("ver", asset_id)
1662
1714
 
1663
1715
 
1716
+ def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
1717
+ record = embeddings.embed_record(text)
1718
+ model_id = str(record["model_id"])
1719
+ model_revision = str(record["model_revision"])
1720
+ dimension = int(record["dimension"])
1721
+ conn.execute(
1722
+ """
1723
+ INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
1724
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1725
+ """,
1726
+ (
1727
+ stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
1728
+ asset_id,
1729
+ chunk_id,
1730
+ model_id,
1731
+ model_revision,
1732
+ dimension,
1733
+ json_dumps(record["vector"]),
1734
+ now(),
1735
+ ),
1736
+ )
1737
+
1738
+
1664
1739
  def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
1665
1740
  conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
1666
1741
  conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
@@ -1673,23 +1748,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
1673
1748
  """,
1674
1749
  (chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
1675
1750
  )
1676
- vector = embeddings.embed_text(chunk)
1677
- conn.execute(
1678
- """
1679
- INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
1680
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1681
- """,
1682
- (
1683
- stable_id("emb", f"{chunk_id}:{embeddings.MODEL_ID}:{embeddings.MODEL_REVISION}"),
1684
- asset_id,
1685
- chunk_id,
1686
- embeddings.MODEL_ID,
1687
- embeddings.MODEL_REVISION,
1688
- embeddings.DIMENSION,
1689
- json_dumps(vector),
1690
- now(),
1691
- ),
1692
- )
1751
+ _insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
1752
+
1753
+
1754
+ def _refresh_asset_embeddings(conn, asset_id: str) -> int:
1755
+ rows = conn.execute(
1756
+ """
1757
+ SELECT chunk_id, text
1758
+ FROM local_chunks
1759
+ WHERE asset_id=?
1760
+ ORDER BY chunk_index ASC
1761
+ """,
1762
+ (asset_id,),
1763
+ ).fetchall()
1764
+ conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
1765
+ for row in rows:
1766
+ _insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
1767
+ if rows:
1768
+ conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
1769
+ return len(rows)
1770
+
1771
+
1772
+ def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
1773
+ if row is None:
1774
+ return False
1775
+ return (
1776
+ str(row["model_id"] or "") == profile.model_id
1777
+ and str(row["model_revision"] or "") == profile.model_revision
1778
+ and int(row["dimension"] or 0) == int(profile.dimension)
1779
+ )
1780
+
1781
+
1782
+ def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
1783
+ profile = embeddings.active_profile()
1784
+ if profile.kind == "deterministic_embedding":
1785
+ return 0
1786
+ rows = conn.execute(
1787
+ """
1788
+ SELECT DISTINCT c.asset_id
1789
+ FROM local_chunks c
1790
+ JOIN local_assets a ON a.asset_id=c.asset_id
1791
+ LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
1792
+ WHERE a.status='active'
1793
+ AND a.privacy_class='normal'
1794
+ AND (
1795
+ e.embedding_id IS NULL
1796
+ OR e.model_id != ?
1797
+ OR e.model_revision != ?
1798
+ OR e.dimension != ?
1799
+ )
1800
+ ORDER BY a.updated_at ASC
1801
+ LIMIT ?
1802
+ """,
1803
+ (profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
1804
+ ).fetchall()
1805
+ for row in rows:
1806
+ enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
1807
+ return len(rows)
1693
1808
 
1694
1809
 
1695
1810
  def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
@@ -1751,6 +1866,9 @@ def process_jobs(*, limit: int = 100) -> dict:
1751
1866
  log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
1752
1867
  return {"ok": True, "paused": True, "processed": 0, "failed": 0}
1753
1868
  recovered = _requeue_due_jobs(conn)
1869
+ refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
1870
+ if refresh_queued:
1871
+ conn.commit()
1754
1872
  rows = conn.execute(
1755
1873
  """
1756
1874
  SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
@@ -1804,6 +1922,8 @@ def process_jobs(*, limit: int = 100) -> dict:
1804
1922
  _replace_chunks(conn, asset_id, version_id, text)
1805
1923
  _replace_entities(conn, asset_id, version_id, entities(text))
1806
1924
  conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
1925
+ elif job_type == EMBEDDING_REFRESH_JOB:
1926
+ _refresh_asset_embeddings(conn, asset_id)
1807
1927
  elif job_type == "graph":
1808
1928
  conn.execute(
1809
1929
  """
@@ -1843,8 +1963,8 @@ def process_jobs(*, limit: int = 100) -> dict:
1843
1963
  conn.commit()
1844
1964
  conn.commit()
1845
1965
  if processed or failed:
1846
- log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
1847
- return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
1966
+ log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
1967
+ return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
1848
1968
 
1849
1969
 
1850
1970
  def run_once(
@@ -2433,27 +2553,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
2433
2553
 
2434
2554
 
2435
2555
  def model_status() -> dict:
2436
- models = [{
2437
- "profile": "local_context_embedding_fallback",
2438
- "name": embeddings.MODEL_ID,
2439
- "kind": "deterministic_embedding",
2440
- "revision": embeddings.MODEL_REVISION,
2441
- "dimension": embeddings.DIMENSION,
2442
- "state": "available",
2556
+ active_embedding = embeddings.active_profile()
2557
+ active_entry = {
2558
+ "profile": active_embedding.profile,
2559
+ "name": active_embedding.model_id,
2560
+ "kind": active_embedding.kind,
2561
+ "revision": active_embedding.model_revision,
2562
+ "dimension": active_embedding.dimension,
2563
+ "state": active_embedding.state,
2443
2564
  "required": True,
2444
- }]
2565
+ "active": True,
2566
+ "problems": list(active_embedding.problems),
2567
+ }
2568
+ models = []
2569
+ active_in_manifest = False
2445
2570
  try:
2446
2571
  import local_models
2447
2572
  for spec in local_models.list_local_model_specs():
2448
2573
  verification = local_models.verify_local_model_dir(spec)
2574
+ state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
2575
+ is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
2576
+ active_in_manifest = bool(active_in_manifest or is_active)
2449
2577
  models.append({
2450
2578
  "profile": spec.name,
2451
2579
  "name": spec.model_id,
2452
2580
  "kind": spec.kind,
2453
2581
  "revision": spec.revision,
2454
2582
  "dimension": spec.dimension,
2455
- "state": "available" if verification["ok"] else "not_warmed",
2583
+ "state": state,
2456
2584
  "required": spec.required,
2585
+ "active": is_active,
2457
2586
  "path": verification["path"],
2458
2587
  "problems": verification["problems"],
2459
2588
  })
@@ -2466,6 +2595,8 @@ def model_status() -> dict:
2466
2595
  "required": False,
2467
2596
  "problems": [str(exc)],
2468
2597
  })
2598
+ if not active_in_manifest:
2599
+ models.insert(0, active_entry)
2469
2600
  return {"ok": True, "models": models}
2470
2601
 
2471
2602
 
@@ -2608,7 +2739,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
2608
2739
  def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
2609
2740
  base_rows = conn.execute(
2610
2741
  """
2611
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2742
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
2743
+ e.vector_json, e.model_id, e.model_revision, e.dimension
2612
2744
  FROM local_chunks c
2613
2745
  JOIN local_assets a ON a.asset_id = c.asset_id
2614
2746
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2626,7 +2758,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
2626
2758
  placeholders = ",".join("?" for _ in entity_asset_ids)
2627
2759
  entity_rows = conn.execute(
2628
2760
  f"""
2629
- SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2761
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
2762
+ e.vector_json, e.model_id, e.model_revision, e.dimension
2630
2763
  FROM local_chunks c
2631
2764
  JOIN local_assets a ON a.asset_id = c.asset_id
2632
2765
  LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2658,6 +2791,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
2658
2791
  return text[: max(0, max_chars - 1)].rstrip() + "…"
2659
2792
 
2660
2793
 
2794
+ def _reranker_disabled() -> bool:
2795
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
2796
+ if value in {"1", "true", "yes", "on"}:
2797
+ return True
2798
+ if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
2799
+ return True
2800
+ return False
2801
+
2802
+
2803
+ @lru_cache(maxsize=1)
2804
+ def _context_reranker():
2805
+ if _reranker_disabled():
2806
+ return None
2807
+ try:
2808
+ import local_models
2809
+ from fastembed.rerank.cross_encoder import TextCrossEncoder
2810
+
2811
+ spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
2812
+ target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
2813
+ return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
2814
+ except Exception: # pragma: no cover - host/cache dependent
2815
+ return None
2816
+
2817
+
2818
+ def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
2819
+ if len(scored) <= 1:
2820
+ return scored
2821
+ reranker = _context_reranker()
2822
+ if not reranker:
2823
+ return scored
2824
+ head_count = min(len(scored), max(int(limit) * 4, 20), 60)
2825
+ head = scored[:head_count]
2826
+ tail = scored[head_count:]
2827
+ docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
2828
+ try:
2829
+ scores = [float(score) for score in reranker.rerank(search_query, docs)]
2830
+ except Exception: # pragma: no cover - runtime fallback only
2831
+ return scored
2832
+ if len(scores) != len(head):
2833
+ return scored
2834
+ reranked = sorted(
2835
+ ((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
2836
+ key=lambda item: item[1],
2837
+ reverse=True,
2838
+ )
2839
+ return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
2840
+
2841
+
2661
2842
  def _payload_size(payload: dict) -> int:
2662
2843
  return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
2663
2844
 
@@ -2993,10 +3174,12 @@ def _context_query_conn(
2993
3174
  normalized_mode, mode_warnings = _normalize_context_mode(mode)
2994
3175
  context_tail = _compact_text(current_context or "", max_chars=1000)
2995
3176
  search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
2996
- qvec = embeddings.embed_text(search_query)
3177
+ query_embedding = embeddings.embed_record(search_query)
3178
+ qvec = query_embedding["vector"]
2997
3179
  entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
2998
3180
  rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
2999
3181
  scored = []
3182
+ stale_embedding_seen = False
3000
3183
  for row in rows:
3001
3184
  if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
3002
3185
  continue
@@ -3005,7 +3188,15 @@ def _context_query_conn(
3005
3188
  path_score = _search_text_score(search_query, row["path"] or "")
3006
3189
  summary_score = _search_text_score(search_query, row["summary"] or "")
3007
3190
  entity_score = entity_boosts.get(row["asset_id"], 0.0)
3008
- vector_score = embeddings.cosine(qvec, vector)
3191
+ vector_score = 0.0
3192
+ if (
3193
+ str(row["model_id"] or "") == str(query_embedding["model_id"])
3194
+ and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
3195
+ and int(row["dimension"] or 0) == int(query_embedding["dimension"])
3196
+ ):
3197
+ vector_score = embeddings.cosine(qvec, vector)
3198
+ elif vector:
3199
+ stale_embedding_seen = True
3009
3200
  score = max(text_score, path_score, summary_score, vector_score)
3010
3201
  if entity_score > 0:
3011
3202
  direct_score = max(text_score, path_score, summary_score)
@@ -3019,6 +3210,7 @@ def _context_query_conn(
3019
3210
  if score > 0:
3020
3211
  scored.append((min(float(score), 1.6), row))
3021
3212
  scored.sort(key=lambda item: item[0], reverse=True)
3213
+ scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
3022
3214
  assets = []
3023
3215
  chunks = []
3024
3216
  evidence_refs = []
@@ -3057,6 +3249,10 @@ def _context_query_conn(
3057
3249
  ).fetchall()
3058
3250
  relations_payload = [dict(row) for row in relation_rows]
3059
3251
  warnings = list(mode_warnings)
3252
+ if query_embedding.get("kind") == "deterministic_embedding":
3253
+ warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
3254
+ elif stale_embedding_seen:
3255
+ warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
3060
3256
  if evidence_required and not evidence_refs:
3061
3257
  warnings.append("No local evidence found for this query.")
3062
3258
  summary = ""
@@ -2,32 +2,135 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  import math
5
+ import os
6
+ import warnings
7
+ from dataclasses import dataclass
8
+ from functools import lru_cache
9
+ from typing import Any
5
10
 
6
11
  from .util import tokenize
7
12
 
8
- MODEL_ID = "nexo-local-hash-embedding"
9
- MODEL_REVISION = "1"
10
- DIMENSION = 128
13
+ FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
14
+ FALLBACK_MODEL_REVISION = "1"
15
+ FALLBACK_DIMENSION = 128
16
+ PRIMARY_MODEL_SPEC = "bge-base-embeddings"
11
17
 
18
+ # Backward-compatible constants. Callers that persist vectors should use
19
+ # embed_record(), because the active profile can switch from fallback to BGE.
20
+ MODEL_ID = FALLBACK_MODEL_ID
21
+ MODEL_REVISION = FALLBACK_MODEL_REVISION
22
+ DIMENSION = FALLBACK_DIMENSION
12
23
 
13
- def embed_text(text: str) -> list[float]:
14
- """Deterministic local embedding fallback.
15
-
16
- This is intentionally local and dependency-free. It gives the resolver a
17
- working semantic-ish retrieval substrate even on machines where the pinned
18
- FastEmbed model has not warmed yet. The model id/revision make it safe to
19
- supersede later with pinned model vectors.
20
- """
21
- vec = [0.0] * DIMENSION
24
+
25
+ @dataclass(frozen=True)
26
+ class EmbeddingProfile:
27
+ model_id: str
28
+ model_revision: str
29
+ dimension: int
30
+ kind: str
31
+ state: str
32
+ profile: str
33
+ problems: tuple[str, ...] = ()
34
+
35
+
36
+ def _hash_embed_text(text: str) -> list[float]:
37
+ vec = [0.0] * FALLBACK_DIMENSION
22
38
  for token in tokenize(text):
23
39
  digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
24
- idx = int.from_bytes(digest[:2], "big") % DIMENSION
40
+ idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
25
41
  sign = -1.0 if digest[2] % 2 else 1.0
26
42
  vec[idx] += sign
27
43
  norm = math.sqrt(sum(value * value for value in vec)) or 1.0
28
44
  return [round(value / norm, 8) for value in vec]
29
45
 
30
46
 
47
+ def _fallback_profile(*problems: str) -> EmbeddingProfile:
48
+ return EmbeddingProfile(
49
+ model_id=FALLBACK_MODEL_ID,
50
+ model_revision=FALLBACK_MODEL_REVISION,
51
+ dimension=FALLBACK_DIMENSION,
52
+ kind="deterministic_embedding",
53
+ state="available",
54
+ profile="local_context_embedding_fallback",
55
+ problems=tuple(item for item in problems if item),
56
+ )
57
+
58
+
59
+ def _fastembed_disabled() -> bool:
60
+ value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
61
+ if value in {"1", "true", "yes", "on"}:
62
+ return True
63
+ # The unit suite uses temporary NEXO homes that intentionally do not carry
64
+ # model weights. Keep those tests dependency-free unless explicitly opted in.
65
+ if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
66
+ return True
67
+ return False
68
+
69
+
70
+ @lru_cache(maxsize=1)
71
+ def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
72
+ if _fastembed_disabled():
73
+ return None, _fallback_profile("fastembed disabled for this process")
74
+ try:
75
+ import local_models
76
+ from fastembed import TextEmbedding
77
+
78
+ spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
79
+ target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
80
+ with warnings.catch_warnings():
81
+ warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
82
+ model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
83
+ return model, EmbeddingProfile(
84
+ model_id=spec.model_id,
85
+ model_revision=spec.revision,
86
+ dimension=spec.dimension or 384,
87
+ kind=spec.kind,
88
+ state="available",
89
+ profile=spec.name,
90
+ )
91
+ except Exception as exc: # pragma: no cover - host/cache dependent
92
+ return None, _fallback_profile(str(exc))
93
+
94
+
95
+ def active_profile() -> EmbeddingProfile:
96
+ _model, profile = _fastembed_state()
97
+ return profile
98
+
99
+
100
+ def reset_cache() -> None:
101
+ _fastembed_state.cache_clear()
102
+
103
+
104
+ def embed_record(text: str) -> dict[str, Any]:
105
+ model, profile = _fastembed_state()
106
+ if model is not None and profile.kind == "fastembed_embedding":
107
+ try:
108
+ vector = list(next(iter(model.embed([text or ""]))))
109
+ return {
110
+ "vector": [float(value) for value in vector],
111
+ "model_id": profile.model_id,
112
+ "model_revision": profile.model_revision,
113
+ "dimension": profile.dimension,
114
+ "profile": profile.profile,
115
+ "kind": profile.kind,
116
+ }
117
+ except Exception: # pragma: no cover - runtime fallback only
118
+ pass
119
+ fallback = _fallback_profile()
120
+ return {
121
+ "vector": _hash_embed_text(text),
122
+ "model_id": fallback.model_id,
123
+ "model_revision": fallback.model_revision,
124
+ "dimension": fallback.dimension,
125
+ "profile": fallback.profile,
126
+ "kind": fallback.kind,
127
+ }
128
+
129
+
130
+ def embed_text(text: str) -> list[float]:
131
+ return embed_record(text)["vector"]
132
+
133
+
31
134
  def cosine(a: list[float], b: list[float]) -> float:
32
135
  if not a or not b or len(a) != len(b):
33
136
  return 0.0