nexo-brain 7.20.24 → 7.20.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +3 -1
- package/package.json +1 -1
- package/src/local_context/api.py +231 -35
- package/src/local_context/embeddings.py +116 -13
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.25",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.20.
|
|
21
|
+
Version `7.20.25` is the current packaged-runtime line. Patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
|
|
22
|
+
|
|
23
|
+
Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
|
|
22
24
|
|
|
23
25
|
Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
|
|
24
26
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.25",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/local_context/api.py
CHANGED
|
@@ -10,6 +10,7 @@ import hashlib
|
|
|
10
10
|
import subprocess
|
|
11
11
|
import sys
|
|
12
12
|
import time
|
|
13
|
+
from functools import lru_cache
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Any
|
|
15
16
|
|
|
@@ -41,6 +42,36 @@ INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
|
|
|
41
42
|
PERFORMANCE_PROFILE_KEY = "performance_profile"
|
|
42
43
|
DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
|
|
43
44
|
VALID_CONTEXT_MODES = {"compact", "full"}
|
|
45
|
+
EMBEDDING_REFRESH_JOB = "embedding_refresh"
|
|
46
|
+
HIGH_VALUE_DOCUMENT_SUFFIXES = {
|
|
47
|
+
".pdf",
|
|
48
|
+
".doc",
|
|
49
|
+
".docx",
|
|
50
|
+
".xls",
|
|
51
|
+
".xlsx",
|
|
52
|
+
".ppt",
|
|
53
|
+
".pptx",
|
|
54
|
+
".pages",
|
|
55
|
+
".numbers",
|
|
56
|
+
".key",
|
|
57
|
+
".rtf",
|
|
58
|
+
".odt",
|
|
59
|
+
".ods",
|
|
60
|
+
".odp",
|
|
61
|
+
}
|
|
62
|
+
KNOWN_TEXT_SUFFIXES = {
|
|
63
|
+
".md",
|
|
64
|
+
".markdown",
|
|
65
|
+
".txt",
|
|
66
|
+
".csv",
|
|
67
|
+
".tsv",
|
|
68
|
+
}
|
|
69
|
+
EMAIL_DOCUMENT_SUFFIXES = {
|
|
70
|
+
".eml",
|
|
71
|
+
".emlx",
|
|
72
|
+
".msg",
|
|
73
|
+
}
|
|
74
|
+
RERANKER_MODEL_SPEC = "cross-encoder-reranker"
|
|
44
75
|
PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
|
|
45
76
|
"low": {
|
|
46
77
|
"profile": "low",
|
|
@@ -1092,7 +1123,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
|
|
|
1092
1123
|
(version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
|
|
1093
1124
|
)
|
|
1094
1125
|
if should_extract(normalized, depth):
|
|
1095
|
-
enqueue_job(conn, asset_id, "light_extraction", priority=
|
|
1126
|
+
enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
|
|
1096
1127
|
enqueue_job(conn, asset_id, "graph", priority=40)
|
|
1097
1128
|
return asset_id, changed, "ok"
|
|
1098
1129
|
|
|
@@ -1203,6 +1234,27 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
|
|
|
1203
1234
|
return job_id
|
|
1204
1235
|
|
|
1205
1236
|
|
|
1237
|
+
def _extraction_priority(path: Path) -> int:
|
|
1238
|
+
suffix = path.suffix.lower()
|
|
1239
|
+
if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
|
|
1240
|
+
return 90
|
|
1241
|
+
if suffix in KNOWN_TEXT_SUFFIXES:
|
|
1242
|
+
return 82
|
|
1243
|
+
if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
|
|
1244
|
+
return 70
|
|
1245
|
+
if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
|
|
1246
|
+
return 55
|
|
1247
|
+
return 45
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
|
|
1251
|
+
try:
|
|
1252
|
+
is_file = item.is_file()
|
|
1253
|
+
except Exception:
|
|
1254
|
+
is_file = False
|
|
1255
|
+
return (0 if not is_file else 1, -_extraction_priority(item) if is_file else 0, str(item).lower())
|
|
1256
|
+
|
|
1257
|
+
|
|
1206
1258
|
def _iter_files(
|
|
1207
1259
|
conn,
|
|
1208
1260
|
root_id: int,
|
|
@@ -1236,7 +1288,7 @@ def _iter_files(
|
|
|
1236
1288
|
seen_dirs.add(key)
|
|
1237
1289
|
_upsert_dir(conn, root_id, current, seen_at, st)
|
|
1238
1290
|
try:
|
|
1239
|
-
entries = sorted(current.iterdir(), key=
|
|
1291
|
+
entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
|
|
1240
1292
|
except Exception as exc:
|
|
1241
1293
|
_record_scan_error(conn, stats, str(current), "quick_index", exc)
|
|
1242
1294
|
continue
|
|
@@ -1420,7 +1472,7 @@ def _scan_known_directory(
|
|
|
1420
1472
|
st = current.stat()
|
|
1421
1473
|
if not current.is_dir():
|
|
1422
1474
|
continue
|
|
1423
|
-
entries = sorted(current.iterdir(), key=
|
|
1475
|
+
entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
|
|
1424
1476
|
except Exception as exc:
|
|
1425
1477
|
_record_scan_error(conn, stats, str(current), "live_reconcile", exc)
|
|
1426
1478
|
continue
|
|
@@ -1661,6 +1713,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
|
|
|
1661
1713
|
return row["version_id"] if row else stable_id("ver", asset_id)
|
|
1662
1714
|
|
|
1663
1715
|
|
|
1716
|
+
def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
|
|
1717
|
+
record = embeddings.embed_record(text)
|
|
1718
|
+
model_id = str(record["model_id"])
|
|
1719
|
+
model_revision = str(record["model_revision"])
|
|
1720
|
+
dimension = int(record["dimension"])
|
|
1721
|
+
conn.execute(
|
|
1722
|
+
"""
|
|
1723
|
+
INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
|
|
1724
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
1725
|
+
""",
|
|
1726
|
+
(
|
|
1727
|
+
stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
|
|
1728
|
+
asset_id,
|
|
1729
|
+
chunk_id,
|
|
1730
|
+
model_id,
|
|
1731
|
+
model_revision,
|
|
1732
|
+
dimension,
|
|
1733
|
+
json_dumps(record["vector"]),
|
|
1734
|
+
now(),
|
|
1735
|
+
),
|
|
1736
|
+
)
|
|
1737
|
+
|
|
1738
|
+
|
|
1664
1739
|
def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
|
|
1665
1740
|
conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
|
|
1666
1741
|
conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
|
|
@@ -1673,23 +1748,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
|
|
|
1673
1748
|
""",
|
|
1674
1749
|
(chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
|
|
1675
1750
|
)
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1751
|
+
_insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
|
|
1752
|
+
|
|
1753
|
+
|
|
1754
|
+
def _refresh_asset_embeddings(conn, asset_id: str) -> int:
|
|
1755
|
+
rows = conn.execute(
|
|
1756
|
+
"""
|
|
1757
|
+
SELECT chunk_id, text
|
|
1758
|
+
FROM local_chunks
|
|
1759
|
+
WHERE asset_id=?
|
|
1760
|
+
ORDER BY chunk_index ASC
|
|
1761
|
+
""",
|
|
1762
|
+
(asset_id,),
|
|
1763
|
+
).fetchall()
|
|
1764
|
+
conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
|
|
1765
|
+
for row in rows:
|
|
1766
|
+
_insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
|
|
1767
|
+
if rows:
|
|
1768
|
+
conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
|
|
1769
|
+
return len(rows)
|
|
1770
|
+
|
|
1771
|
+
|
|
1772
|
+
def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
|
|
1773
|
+
if row is None:
|
|
1774
|
+
return False
|
|
1775
|
+
return (
|
|
1776
|
+
str(row["model_id"] or "") == profile.model_id
|
|
1777
|
+
and str(row["model_revision"] or "") == profile.model_revision
|
|
1778
|
+
and int(row["dimension"] or 0) == int(profile.dimension)
|
|
1779
|
+
)
|
|
1780
|
+
|
|
1781
|
+
|
|
1782
|
+
def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
|
|
1783
|
+
profile = embeddings.active_profile()
|
|
1784
|
+
if profile.kind == "deterministic_embedding":
|
|
1785
|
+
return 0
|
|
1786
|
+
rows = conn.execute(
|
|
1787
|
+
"""
|
|
1788
|
+
SELECT DISTINCT c.asset_id
|
|
1789
|
+
FROM local_chunks c
|
|
1790
|
+
JOIN local_assets a ON a.asset_id=c.asset_id
|
|
1791
|
+
LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
|
|
1792
|
+
WHERE a.status='active'
|
|
1793
|
+
AND a.privacy_class='normal'
|
|
1794
|
+
AND (
|
|
1795
|
+
e.embedding_id IS NULL
|
|
1796
|
+
OR e.model_id != ?
|
|
1797
|
+
OR e.model_revision != ?
|
|
1798
|
+
OR e.dimension != ?
|
|
1799
|
+
)
|
|
1800
|
+
ORDER BY a.updated_at ASC
|
|
1801
|
+
LIMIT ?
|
|
1802
|
+
""",
|
|
1803
|
+
(profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
|
|
1804
|
+
).fetchall()
|
|
1805
|
+
for row in rows:
|
|
1806
|
+
enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
|
|
1807
|
+
return len(rows)
|
|
1693
1808
|
|
|
1694
1809
|
|
|
1695
1810
|
def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
|
|
@@ -1751,6 +1866,9 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1751
1866
|
log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
|
|
1752
1867
|
return {"ok": True, "paused": True, "processed": 0, "failed": 0}
|
|
1753
1868
|
recovered = _requeue_due_jobs(conn)
|
|
1869
|
+
refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
|
|
1870
|
+
if refresh_queued:
|
|
1871
|
+
conn.commit()
|
|
1754
1872
|
rows = conn.execute(
|
|
1755
1873
|
"""
|
|
1756
1874
|
SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
|
|
@@ -1804,6 +1922,8 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1804
1922
|
_replace_chunks(conn, asset_id, version_id, text)
|
|
1805
1923
|
_replace_entities(conn, asset_id, version_id, entities(text))
|
|
1806
1924
|
conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
|
|
1925
|
+
elif job_type == EMBEDDING_REFRESH_JOB:
|
|
1926
|
+
_refresh_asset_embeddings(conn, asset_id)
|
|
1807
1927
|
elif job_type == "graph":
|
|
1808
1928
|
conn.execute(
|
|
1809
1929
|
"""
|
|
@@ -1843,8 +1963,8 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1843
1963
|
conn.commit()
|
|
1844
1964
|
conn.commit()
|
|
1845
1965
|
if processed or failed:
|
|
1846
|
-
log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
|
|
1847
|
-
return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
|
|
1966
|
+
log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
|
|
1967
|
+
return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
|
|
1848
1968
|
|
|
1849
1969
|
|
|
1850
1970
|
def run_once(
|
|
@@ -2433,27 +2553,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
|
|
|
2433
2553
|
|
|
2434
2554
|
|
|
2435
2555
|
def model_status() -> dict:
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
"
|
|
2439
|
-
"
|
|
2440
|
-
"
|
|
2441
|
-
"
|
|
2442
|
-
"
|
|
2556
|
+
active_embedding = embeddings.active_profile()
|
|
2557
|
+
active_entry = {
|
|
2558
|
+
"profile": active_embedding.profile,
|
|
2559
|
+
"name": active_embedding.model_id,
|
|
2560
|
+
"kind": active_embedding.kind,
|
|
2561
|
+
"revision": active_embedding.model_revision,
|
|
2562
|
+
"dimension": active_embedding.dimension,
|
|
2563
|
+
"state": active_embedding.state,
|
|
2443
2564
|
"required": True,
|
|
2444
|
-
|
|
2565
|
+
"active": True,
|
|
2566
|
+
"problems": list(active_embedding.problems),
|
|
2567
|
+
}
|
|
2568
|
+
models = []
|
|
2569
|
+
active_in_manifest = False
|
|
2445
2570
|
try:
|
|
2446
2571
|
import local_models
|
|
2447
2572
|
for spec in local_models.list_local_model_specs():
|
|
2448
2573
|
verification = local_models.verify_local_model_dir(spec)
|
|
2574
|
+
state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
|
|
2575
|
+
is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
|
|
2576
|
+
active_in_manifest = bool(active_in_manifest or is_active)
|
|
2449
2577
|
models.append({
|
|
2450
2578
|
"profile": spec.name,
|
|
2451
2579
|
"name": spec.model_id,
|
|
2452
2580
|
"kind": spec.kind,
|
|
2453
2581
|
"revision": spec.revision,
|
|
2454
2582
|
"dimension": spec.dimension,
|
|
2455
|
-
"state":
|
|
2583
|
+
"state": state,
|
|
2456
2584
|
"required": spec.required,
|
|
2585
|
+
"active": is_active,
|
|
2457
2586
|
"path": verification["path"],
|
|
2458
2587
|
"problems": verification["problems"],
|
|
2459
2588
|
})
|
|
@@ -2466,6 +2595,8 @@ def model_status() -> dict:
|
|
|
2466
2595
|
"required": False,
|
|
2467
2596
|
"problems": [str(exc)],
|
|
2468
2597
|
})
|
|
2598
|
+
if not active_in_manifest:
|
|
2599
|
+
models.insert(0, active_entry)
|
|
2469
2600
|
return {"ok": True, "models": models}
|
|
2470
2601
|
|
|
2471
2602
|
|
|
@@ -2608,7 +2739,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
|
|
|
2608
2739
|
def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
|
|
2609
2740
|
base_rows = conn.execute(
|
|
2610
2741
|
"""
|
|
2611
|
-
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2742
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2743
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
2612
2744
|
FROM local_chunks c
|
|
2613
2745
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
2614
2746
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -2626,7 +2758,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
|
|
|
2626
2758
|
placeholders = ",".join("?" for _ in entity_asset_ids)
|
|
2627
2759
|
entity_rows = conn.execute(
|
|
2628
2760
|
f"""
|
|
2629
|
-
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2761
|
+
SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
|
|
2762
|
+
e.vector_json, e.model_id, e.model_revision, e.dimension
|
|
2630
2763
|
FROM local_chunks c
|
|
2631
2764
|
JOIN local_assets a ON a.asset_id = c.asset_id
|
|
2632
2765
|
LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
|
|
@@ -2658,6 +2791,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
|
|
|
2658
2791
|
return text[: max(0, max_chars - 1)].rstrip() + "…"
|
|
2659
2792
|
|
|
2660
2793
|
|
|
2794
|
+
def _reranker_disabled() -> bool:
|
|
2795
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
|
|
2796
|
+
if value in {"1", "true", "yes", "on"}:
|
|
2797
|
+
return True
|
|
2798
|
+
if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
|
|
2799
|
+
return True
|
|
2800
|
+
return False
|
|
2801
|
+
|
|
2802
|
+
|
|
2803
|
+
@lru_cache(maxsize=1)
|
|
2804
|
+
def _context_reranker():
|
|
2805
|
+
if _reranker_disabled():
|
|
2806
|
+
return None
|
|
2807
|
+
try:
|
|
2808
|
+
import local_models
|
|
2809
|
+
from fastembed.rerank.cross_encoder import TextCrossEncoder
|
|
2810
|
+
|
|
2811
|
+
spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
|
|
2812
|
+
target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
|
|
2813
|
+
return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
|
|
2814
|
+
except Exception: # pragma: no cover - host/cache dependent
|
|
2815
|
+
return None
|
|
2816
|
+
|
|
2817
|
+
|
|
2818
|
+
def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
|
|
2819
|
+
if len(scored) <= 1:
|
|
2820
|
+
return scored
|
|
2821
|
+
reranker = _context_reranker()
|
|
2822
|
+
if not reranker:
|
|
2823
|
+
return scored
|
|
2824
|
+
head_count = min(len(scored), max(int(limit) * 4, 20), 60)
|
|
2825
|
+
head = scored[:head_count]
|
|
2826
|
+
tail = scored[head_count:]
|
|
2827
|
+
docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
|
|
2828
|
+
try:
|
|
2829
|
+
scores = [float(score) for score in reranker.rerank(search_query, docs)]
|
|
2830
|
+
except Exception: # pragma: no cover - runtime fallback only
|
|
2831
|
+
return scored
|
|
2832
|
+
if len(scores) != len(head):
|
|
2833
|
+
return scored
|
|
2834
|
+
reranked = sorted(
|
|
2835
|
+
((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
|
|
2836
|
+
key=lambda item: item[1],
|
|
2837
|
+
reverse=True,
|
|
2838
|
+
)
|
|
2839
|
+
return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
|
|
2840
|
+
|
|
2841
|
+
|
|
2661
2842
|
def _payload_size(payload: dict) -> int:
|
|
2662
2843
|
return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
|
|
2663
2844
|
|
|
@@ -2993,10 +3174,12 @@ def _context_query_conn(
|
|
|
2993
3174
|
normalized_mode, mode_warnings = _normalize_context_mode(mode)
|
|
2994
3175
|
context_tail = _compact_text(current_context or "", max_chars=1000)
|
|
2995
3176
|
search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
|
|
2996
|
-
|
|
3177
|
+
query_embedding = embeddings.embed_record(search_query)
|
|
3178
|
+
qvec = query_embedding["vector"]
|
|
2997
3179
|
entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
|
|
2998
3180
|
rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
|
|
2999
3181
|
scored = []
|
|
3182
|
+
stale_embedding_seen = False
|
|
3000
3183
|
for row in rows:
|
|
3001
3184
|
if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
|
|
3002
3185
|
continue
|
|
@@ -3005,7 +3188,15 @@ def _context_query_conn(
|
|
|
3005
3188
|
path_score = _search_text_score(search_query, row["path"] or "")
|
|
3006
3189
|
summary_score = _search_text_score(search_query, row["summary"] or "")
|
|
3007
3190
|
entity_score = entity_boosts.get(row["asset_id"], 0.0)
|
|
3008
|
-
vector_score =
|
|
3191
|
+
vector_score = 0.0
|
|
3192
|
+
if (
|
|
3193
|
+
str(row["model_id"] or "") == str(query_embedding["model_id"])
|
|
3194
|
+
and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
|
|
3195
|
+
and int(row["dimension"] or 0) == int(query_embedding["dimension"])
|
|
3196
|
+
):
|
|
3197
|
+
vector_score = embeddings.cosine(qvec, vector)
|
|
3198
|
+
elif vector:
|
|
3199
|
+
stale_embedding_seen = True
|
|
3009
3200
|
score = max(text_score, path_score, summary_score, vector_score)
|
|
3010
3201
|
if entity_score > 0:
|
|
3011
3202
|
direct_score = max(text_score, path_score, summary_score)
|
|
@@ -3019,6 +3210,7 @@ def _context_query_conn(
|
|
|
3019
3210
|
if score > 0:
|
|
3020
3211
|
scored.append((min(float(score), 1.6), row))
|
|
3021
3212
|
scored.sort(key=lambda item: item[0], reverse=True)
|
|
3213
|
+
scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
|
|
3022
3214
|
assets = []
|
|
3023
3215
|
chunks = []
|
|
3024
3216
|
evidence_refs = []
|
|
@@ -3057,6 +3249,10 @@ def _context_query_conn(
|
|
|
3057
3249
|
).fetchall()
|
|
3058
3250
|
relations_payload = [dict(row) for row in relation_rows]
|
|
3059
3251
|
warnings = list(mode_warnings)
|
|
3252
|
+
if query_embedding.get("kind") == "deterministic_embedding":
|
|
3253
|
+
warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
|
|
3254
|
+
elif stale_embedding_seen:
|
|
3255
|
+
warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
|
|
3060
3256
|
if evidence_required and not evidence_refs:
|
|
3061
3257
|
warnings.append("No local evidence found for this query.")
|
|
3062
3258
|
summary = ""
|
|
@@ -2,32 +2,135 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import math
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from functools import lru_cache
|
|
9
|
+
from typing import Any
|
|
5
10
|
|
|
6
11
|
from .util import tokenize
|
|
7
12
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
13
|
+
FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
|
|
14
|
+
FALLBACK_MODEL_REVISION = "1"
|
|
15
|
+
FALLBACK_DIMENSION = 128
|
|
16
|
+
PRIMARY_MODEL_SPEC = "bge-base-embeddings"
|
|
11
17
|
|
|
18
|
+
# Backward-compatible constants. Callers that persist vectors should use
|
|
19
|
+
# embed_record(), because the active profile can switch from fallback to BGE.
|
|
20
|
+
MODEL_ID = FALLBACK_MODEL_ID
|
|
21
|
+
MODEL_REVISION = FALLBACK_MODEL_REVISION
|
|
22
|
+
DIMENSION = FALLBACK_DIMENSION
|
|
12
23
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class EmbeddingProfile:
|
|
27
|
+
model_id: str
|
|
28
|
+
model_revision: str
|
|
29
|
+
dimension: int
|
|
30
|
+
kind: str
|
|
31
|
+
state: str
|
|
32
|
+
profile: str
|
|
33
|
+
problems: tuple[str, ...] = ()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _hash_embed_text(text: str) -> list[float]:
|
|
37
|
+
vec = [0.0] * FALLBACK_DIMENSION
|
|
22
38
|
for token in tokenize(text):
|
|
23
39
|
digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
|
|
24
|
-
idx = int.from_bytes(digest[:2], "big") %
|
|
40
|
+
idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
|
|
25
41
|
sign = -1.0 if digest[2] % 2 else 1.0
|
|
26
42
|
vec[idx] += sign
|
|
27
43
|
norm = math.sqrt(sum(value * value for value in vec)) or 1.0
|
|
28
44
|
return [round(value / norm, 8) for value in vec]
|
|
29
45
|
|
|
30
46
|
|
|
47
|
+
def _fallback_profile(*problems: str) -> EmbeddingProfile:
|
|
48
|
+
return EmbeddingProfile(
|
|
49
|
+
model_id=FALLBACK_MODEL_ID,
|
|
50
|
+
model_revision=FALLBACK_MODEL_REVISION,
|
|
51
|
+
dimension=FALLBACK_DIMENSION,
|
|
52
|
+
kind="deterministic_embedding",
|
|
53
|
+
state="available",
|
|
54
|
+
profile="local_context_embedding_fallback",
|
|
55
|
+
problems=tuple(item for item in problems if item),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _fastembed_disabled() -> bool:
|
|
60
|
+
value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
|
|
61
|
+
if value in {"1", "true", "yes", "on"}:
|
|
62
|
+
return True
|
|
63
|
+
# The unit suite uses temporary NEXO homes that intentionally do not carry
|
|
64
|
+
# model weights. Keep those tests dependency-free unless explicitly opted in.
|
|
65
|
+
if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@lru_cache(maxsize=1)
|
|
71
|
+
def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
|
|
72
|
+
if _fastembed_disabled():
|
|
73
|
+
return None, _fallback_profile("fastembed disabled for this process")
|
|
74
|
+
try:
|
|
75
|
+
import local_models
|
|
76
|
+
from fastembed import TextEmbedding
|
|
77
|
+
|
|
78
|
+
spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
|
|
79
|
+
target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
|
|
80
|
+
with warnings.catch_warnings():
|
|
81
|
+
warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
|
|
82
|
+
model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
|
|
83
|
+
return model, EmbeddingProfile(
|
|
84
|
+
model_id=spec.model_id,
|
|
85
|
+
model_revision=spec.revision,
|
|
86
|
+
dimension=spec.dimension or 384,
|
|
87
|
+
kind=spec.kind,
|
|
88
|
+
state="available",
|
|
89
|
+
profile=spec.name,
|
|
90
|
+
)
|
|
91
|
+
except Exception as exc: # pragma: no cover - host/cache dependent
|
|
92
|
+
return None, _fallback_profile(str(exc))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def active_profile() -> EmbeddingProfile:
|
|
96
|
+
_model, profile = _fastembed_state()
|
|
97
|
+
return profile
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def reset_cache() -> None:
|
|
101
|
+
_fastembed_state.cache_clear()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def embed_record(text: str) -> dict[str, Any]:
|
|
105
|
+
model, profile = _fastembed_state()
|
|
106
|
+
if model is not None and profile.kind == "fastembed_embedding":
|
|
107
|
+
try:
|
|
108
|
+
vector = list(next(iter(model.embed([text or ""]))))
|
|
109
|
+
return {
|
|
110
|
+
"vector": [float(value) for value in vector],
|
|
111
|
+
"model_id": profile.model_id,
|
|
112
|
+
"model_revision": profile.model_revision,
|
|
113
|
+
"dimension": profile.dimension,
|
|
114
|
+
"profile": profile.profile,
|
|
115
|
+
"kind": profile.kind,
|
|
116
|
+
}
|
|
117
|
+
except Exception: # pragma: no cover - runtime fallback only
|
|
118
|
+
pass
|
|
119
|
+
fallback = _fallback_profile()
|
|
120
|
+
return {
|
|
121
|
+
"vector": _hash_embed_text(text),
|
|
122
|
+
"model_id": fallback.model_id,
|
|
123
|
+
"model_revision": fallback.model_revision,
|
|
124
|
+
"dimension": fallback.dimension,
|
|
125
|
+
"profile": fallback.profile,
|
|
126
|
+
"kind": fallback.kind,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def embed_text(text: str) -> list[float]:
|
|
131
|
+
return embed_record(text)["vector"]
|
|
132
|
+
|
|
133
|
+
|
|
31
134
|
def cosine(a: list[float], b: list[float]) -> float:
|
|
32
135
|
if not a or not b or len(a) != len(b):
|
|
33
136
|
return 0.0
|