npm - nexo-brain - Versions diffs - 7.20.23 → 7.20.25 - Mend

nexo-brain 7.20.23 → 7.20.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +5 -1
package/package.json +1 -1
package/src/local_context/api.py +267 -39
package/src/local_context/embeddings.py +116 -13

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.20.23",
+  "version": "7.20.25",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -18,7 +18,11 @@
 [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
-Version `7.20.23` is the current packaged-runtime line. Patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
+Version `7.20.25` is the current packaged-runtime line. Patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
+Previously in `7.20.24`: patch release over v7.20.23 — Local Memory performance profile writes now tolerate active indexing, retry transient SQLite busy states, and shorten indexer write locks between processed files.
+Previously in `7.20.23`: patch release over v7.20.22 — Local Memory status reads the real split sidecar database read-only, reports retryable keyed failures without false zeroes, and keeps Desktop Spanish/English copy localized.
 Previously in `7.20.22`: patch release over v7.20.19 — Local Memory moved out of the main Brain database, MCP readiness verifies required tools, and split-aware Desktop backups validate the main DB and Local Memory sidecar separately.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.20.23",
+  "version": "7.20.25",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/local_context/api.py CHANGED Viewed

@@ -9,11 +9,13 @@ import stat
 import hashlib
 import subprocess
 import sys
+import time
+from functools import lru_cache
 from pathlib import Path
 from typing import Any
 from . import embeddings
-from .db import LOCAL_CONTEXT_TABLES, connect_local_context_db_readonly, ensure_local_context_db, get_local_context_db
+from .db import LOCAL_CONTEXT_TABLES, close_local_context_db, connect_local_context_db_readonly, ensure_local_context_db, get_local_context_db
 from .extractors import chunk_text, contains_secret, entities, extract_text, summarize
 from .logging import log_event, tail
 from .privacy import classify_path, is_local_email_tree, is_queryable_path, should_extract, should_skip_file, should_skip_tree
@@ -33,11 +35,43 @@ DEFAULT_SYSTEM_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_SYSTEM_ROOT_DEP
 DEFAULT_CONTEXT_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_MAX_CHARS", "20000") or "20000")
 DEFAULT_ROUTER_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_ROUTER_MAX_CHARS", "6000") or "6000")
 DEFAULT_MAX_JOB_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_JOB_ATTEMPTS", "3") or "3")
+DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_ATTEMPTS", "5") or "5")
+DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS = float(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_DELAY_SECONDS", "0.35") or "0.35")
 INITIAL_INDEX_COMPLETE_KEY = "initial_index_complete"
 INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
 PERFORMANCE_PROFILE_KEY = "performance_profile"
 DEFAULT_PERFORMANCE_PROFILE = os.environ.get("NEXO_LOCAL_INDEX_PERFORMANCE_PROFILE", "medium").strip().lower() or "medium"
 VALID_CONTEXT_MODES = {"compact", "full"}
+EMBEDDING_REFRESH_JOB = "embedding_refresh"
+HIGH_VALUE_DOCUMENT_SUFFIXES = {
+    ".pdf",
+    ".doc",
+    ".docx",
+    ".xls",
+    ".xlsx",
+    ".ppt",
+    ".pptx",
+    ".pages",
+    ".numbers",
+    ".key",
+    ".rtf",
+    ".odt",
+    ".ods",
+    ".odp",
+}
+KNOWN_TEXT_SUFFIXES = {
+    ".md",
+    ".markdown",
+    ".txt",
+    ".csv",
+    ".tsv",
+}
+EMAIL_DOCUMENT_SUFFIXES = {
+    ".eml",
+    ".emlx",
+    ".msg",
+}
+RERANKER_MODEL_SPEC = "cross-encoder-reranker"
 PERFORMANCE_PROFILES: dict[str, dict[str, Any]] = {
     "low": {
         "profile": "low",
@@ -108,6 +142,27 @@ def _close_read_conn(conn) -> None:
         pass
+def _sqlite_is_busy(exc: BaseException) -> bool:
+    return isinstance(exc, sqlite3.OperationalError) and "locked" in str(exc).lower()
+def _with_sqlite_busy_retry(callback, *, attempts: int | None = None):
+    max_attempts = max(1, int(attempts or DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS))
+    last_exc = None
+    for attempt in range(max_attempts):
+        try:
+            return callback()
+        except sqlite3.OperationalError as exc:
+            if not _sqlite_is_busy(exc) or attempt >= max_attempts - 1:
+                raise
+            last_exc = exc
+            close_local_context_db()
+            time.sleep(DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS * (attempt + 1))
+    if last_exc:
+        raise last_exc
+    return None
 def add_root(path: str, *, mode: str = "normal", depth: int | None = None) -> dict:
     conn = _conn()
     root_path = norm_path(path)
@@ -609,9 +664,12 @@ def _set_state_conn(conn, key: str, value: str) -> None:
 def _set_state(key: str, value: str) -> None:
-    conn = _conn()
-    _set_state_conn(conn, key, value)
-    conn.commit()
+    def write_state() -> None:
+        conn = _conn()
+        _set_state_conn(conn, key, value)
+        conn.commit()
+    _with_sqlite_busy_retry(write_state)
 def _get_state_conn(conn, key: str, default: str = "") -> str:
@@ -1065,7 +1123,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
             (version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
         )
         if should_extract(normalized, depth):
-            enqueue_job(conn, asset_id, "light_extraction", priority=60)
+            enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path))
         enqueue_job(conn, asset_id, "graph", priority=40)
     return asset_id, changed, "ok"
@@ -1176,6 +1234,27 @@ def enqueue_job(conn, asset_id: str, job_type: str, *, priority: int = 50) -> st
     return job_id
+def _extraction_priority(path: Path) -> int:
+    suffix = path.suffix.lower()
+    if suffix in HIGH_VALUE_DOCUMENT_SUFFIXES:
+        return 90
+    if suffix in KNOWN_TEXT_SUFFIXES:
+        return 82
+    if suffix in EMAIL_DOCUMENT_SUFFIXES or is_local_email_tree(str(path)):
+        return 70
+    if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".json", ".yaml", ".yml", ".toml", ".html", ".css"}:
+        return 55
+    return 45
+def _scan_entry_sort_key(item: Path) -> tuple[int, int, str]:
+    try:
+        is_file = item.is_file()
+    except Exception:
+        is_file = False
+    return (0 if not is_file else 1, -_extraction_priority(item) if is_file else 0, str(item).lower())
 def _iter_files(
     conn,
     root_id: int,
@@ -1209,7 +1288,7 @@ def _iter_files(
         seen_dirs.add(key)
         _upsert_dir(conn, root_id, current, seen_at, st)
         try:
-            entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
+            entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
         except Exception as exc:
             _record_scan_error(conn, stats, str(current), "quick_index", exc)
             continue
@@ -1393,7 +1472,7 @@ def _scan_known_directory(
             st = current.stat()
             if not current.is_dir():
                 continue
-            entries = sorted(current.iterdir(), key=lambda item: str(item).lower())
+            entries = sorted(current.iterdir(), key=_scan_entry_sort_key)
         except Exception as exc:
             _record_scan_error(conn, stats, str(current), "live_reconcile", exc)
             continue
@@ -1634,6 +1713,29 @@ def _latest_version_id(conn, asset_id: str) -> str:
     return row["version_id"] if row else stable_id("ver", asset_id)
+def _insert_chunk_embedding(conn, asset_id: str, chunk_id: str, text: str) -> None:
+    record = embeddings.embed_record(text)
+    model_id = str(record["model_id"])
+    model_revision = str(record["model_revision"])
+    dimension = int(record["dimension"])
+    conn.execute(
+        """
+        INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        """,
+        (
+            stable_id("emb", f"{chunk_id}:{model_id}:{model_revision}:{dimension}"),
+            asset_id,
+            chunk_id,
+            model_id,
+            model_revision,
+            dimension,
+            json_dumps(record["vector"]),
+            now(),
+        ),
+    )
 def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
     conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
     conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
@@ -1646,23 +1748,63 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
             """,
             (chunk_id, asset_id, version_id, index, chunk, len(tokenize(chunk)), now()),
         )
-        vector = embeddings.embed_text(chunk)
-        conn.execute(
-            """
-            INSERT INTO local_embeddings(embedding_id, asset_id, chunk_id, model_id, model_revision, dimension, vector_json, created_at)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-            """,
-            (
-                stable_id("emb", f"{chunk_id}:{embeddings.MODEL_ID}:{embeddings.MODEL_REVISION}"),
-                asset_id,
-                chunk_id,
-                embeddings.MODEL_ID,
-                embeddings.MODEL_REVISION,
-                embeddings.DIMENSION,
-                json_dumps(vector),
-                now(),
-            ),
-        )
+        _insert_chunk_embedding(conn, asset_id, chunk_id, chunk)
+def _refresh_asset_embeddings(conn, asset_id: str) -> int:
+    rows = conn.execute(
+        """
+        SELECT chunk_id, text
+        FROM local_chunks
+        WHERE asset_id=?
+        ORDER BY chunk_index ASC
+        """,
+        (asset_id,),
+    ).fetchall()
+    conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
+    for row in rows:
+        _insert_chunk_embedding(conn, asset_id, row["chunk_id"], row["text"])
+    if rows:
+        conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
+    return len(rows)
+def _embedding_matches_profile(row, profile: embeddings.EmbeddingProfile) -> bool:
+    if row is None:
+        return False
+    return (
+        str(row["model_id"] or "") == profile.model_id
+        and str(row["model_revision"] or "") == profile.model_revision
+        and int(row["dimension"] or 0) == int(profile.dimension)
+    )
+def _enqueue_stale_embedding_refresh_jobs(conn, *, limit: int) -> int:
+    profile = embeddings.active_profile()
+    if profile.kind == "deterministic_embedding":
+        return 0
+    rows = conn.execute(
+        """
+        SELECT DISTINCT c.asset_id
+        FROM local_chunks c
+        JOIN local_assets a ON a.asset_id=c.asset_id
+        LEFT JOIN local_embeddings e ON e.chunk_id=c.chunk_id
+        WHERE a.status='active'
+          AND a.privacy_class='normal'
+          AND (
+            e.embedding_id IS NULL
+            OR e.model_id != ?
+            OR e.model_revision != ?
+            OR e.dimension != ?
+          )
+        ORDER BY a.updated_at ASC
+        LIMIT ?
+        """,
+        (profile.model_id, profile.model_revision, int(profile.dimension), max(1, int(limit))),
+    ).fetchall()
+    for row in rows:
+        enqueue_job(conn, row["asset_id"], EMBEDDING_REFRESH_JOB, priority=58)
+    return len(rows)
 def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -> None:
@@ -1724,6 +1866,9 @@ def process_jobs(*, limit: int = 100) -> dict:
         log_event("info", "jobs_skipped_paused", "Local memory jobs skipped because indexing is paused")
         return {"ok": True, "paused": True, "processed": 0, "failed": 0}
     recovered = _requeue_due_jobs(conn)
+    refresh_queued = _enqueue_stale_embedding_refresh_jobs(conn, limit=max(1, min(int(limit or 1), 100)))
+    if refresh_queued:
+        conn.commit()
     rows = conn.execute(
         """
         SELECT j.*, a.path, a.depth, a.privacy_class, a.status AS asset_status
@@ -1745,6 +1890,7 @@ def process_jobs(*, limit: int = 100) -> dict:
             "UPDATE local_index_jobs SET status='running', claimed_by='local-process', lease_expires_at=?, updated_at=? WHERE job_id=?",
             (now() + 300, now(), job_id),
         )
+        conn.commit()
         try:
             if row["asset_status"] != "active":
                 raise FileNotFoundError(row["path"])
@@ -1754,6 +1900,7 @@ def process_jobs(*, limit: int = 100) -> dict:
                     (now(), job_id),
                 )
                 processed += 1
+                conn.commit()
                 continue
             if job_type == "light_extraction":
                 text, metadata = extract_text(Path(row["path"]))
@@ -1765,6 +1912,7 @@ def process_jobs(*, limit: int = 100) -> dict:
                         (now(), job_id),
                     )
                     processed += 1
+                    conn.commit()
                     continue
                 summary = summarize(text)
                 conn.execute(
@@ -1774,6 +1922,8 @@ def process_jobs(*, limit: int = 100) -> dict:
                 _replace_chunks(conn, asset_id, version_id, text)
                 _replace_entities(conn, asset_id, version_id, entities(text))
                 conn.execute("UPDATE local_assets SET phase='embeddings', updated_at=? WHERE asset_id=?", (now(), asset_id))
+            elif job_type == EMBEDDING_REFRESH_JOB:
+                _refresh_asset_embeddings(conn, asset_id)
             elif job_type == "graph":
                 conn.execute(
                     """
@@ -1787,6 +1937,7 @@ def process_jobs(*, limit: int = 100) -> dict:
                 (now(), job_id),
             )
             processed += 1
+            conn.commit()
         except Exception as exc:
             failed += 1
             attempts = int(row["attempt_count"] or 0) + 1
@@ -1809,10 +1960,11 @@ def process_jobs(*, limit: int = 100) -> dict:
                 technical_detail=str(exc),
                 retryable=not terminal,
             )
+            conn.commit()
     conn.commit()
     if processed or failed:
-        log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed)
-    return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered}
+        log_event("info", "jobs_processed", "Local memory jobs processed", processed=processed, failed=failed, refresh_queued=refresh_queued)
+    return {"ok": True, "processed": processed, "failed": failed, "recovered": recovered, "embedding_refresh_queued": refresh_queued}
 def run_once(
@@ -2401,27 +2553,36 @@ def diagnostics_tail(limit: int = 100) -> dict:
 def model_status() -> dict:
-    models = [{
-        "profile": "local_context_embedding_fallback",
-        "name": embeddings.MODEL_ID,
-        "kind": "deterministic_embedding",
-        "revision": embeddings.MODEL_REVISION,
-        "dimension": embeddings.DIMENSION,
-        "state": "available",
+    active_embedding = embeddings.active_profile()
+    active_entry = {
+        "profile": active_embedding.profile,
+        "name": active_embedding.model_id,
+        "kind": active_embedding.kind,
+        "revision": active_embedding.model_revision,
+        "dimension": active_embedding.dimension,
+        "state": active_embedding.state,
         "required": True,
-    }]
+        "active": True,
+        "problems": list(active_embedding.problems),
+    }
+    models = []
+    active_in_manifest = False
     try:
         import local_models
         for spec in local_models.list_local_model_specs():
             verification = local_models.verify_local_model_dir(spec)
+            state = "available" if verification["ok"] else ("optional_missing" if not spec.required else "not_warmed")
+            is_active = spec.model_id == active_embedding.model_id and spec.revision == active_embedding.model_revision
+            active_in_manifest = bool(active_in_manifest or is_active)
             models.append({
                 "profile": spec.name,
                 "name": spec.model_id,
                 "kind": spec.kind,
                 "revision": spec.revision,
                 "dimension": spec.dimension,
-                "state": "available" if verification["ok"] else "not_warmed",
+                "state": state,
                 "required": spec.required,
+                "active": is_active,
                 "path": verification["path"],
                 "problems": verification["problems"],
             })
@@ -2434,6 +2595,8 @@ def model_status() -> dict:
             "required": False,
             "problems": [str(exc)],
         })
+    if not active_in_manifest:
+        models.insert(0, active_entry)
     return {"ok": True, "models": models}
@@ -2576,7 +2739,8 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
 def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
     base_rows = conn.execute(
         """
-        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
+        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
+               e.vector_json, e.model_id, e.model_revision, e.dimension
         FROM local_chunks c
         JOIN local_assets a ON a.asset_id = c.asset_id
         LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2594,7 +2758,8 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
     placeholders = ",".join("?" for _ in entity_asset_ids)
     entity_rows = conn.execute(
         f"""
-        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
+        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
+               e.vector_json, e.model_id, e.model_revision, e.dimension
         FROM local_chunks c
         JOIN local_assets a ON a.asset_id = c.asset_id
         LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
@@ -2626,6 +2791,54 @@ def _compact_text(value: str, *, max_chars: int) -> str:
     return text[: max(0, max_chars - 1)].rstrip() + "…"
+def _reranker_disabled() -> bool:
+    value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_RERANKER", "").strip().lower()
+    if value in {"1", "true", "yes", "on"}:
+        return True
+    if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_RERANKER_IN_TESTS") != "1":
+        return True
+    return False
+@lru_cache(maxsize=1)
+def _context_reranker():
+    if _reranker_disabled():
+        return None
+    try:
+        import local_models
+        from fastembed.rerank.cross_encoder import TextCrossEncoder
+        spec = local_models.get_local_model_spec(RERANKER_MODEL_SPEC)
+        target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
+        return TextCrossEncoder(spec.model_id, specific_model_path=str(target_dir))
+    except Exception:  # pragma: no cover - host/cache dependent
+        return None
+def _rerank_scored_candidates(search_query: str, scored: list[tuple[float, Any]], *, limit: int) -> list[tuple[float, Any]]:
+    if len(scored) <= 1:
+        return scored
+    reranker = _context_reranker()
+    if not reranker:
+        return scored
+    head_count = min(len(scored), max(int(limit) * 4, 20), 60)
+    head = scored[:head_count]
+    tail = scored[head_count:]
+    docs = [_compact_text(row["text"], max_chars=1400) for _score, row in head]
+    try:
+        scores = [float(score) for score in reranker.rerank(search_query, docs)]
+    except Exception:  # pragma: no cover - runtime fallback only
+        return scored
+    if len(scores) != len(head):
+        return scored
+    reranked = sorted(
+        ((base_score, rerank_score, row) for (base_score, row), rerank_score in zip(head, scores)),
+        key=lambda item: item[1],
+        reverse=True,
+    )
+    return [(base_score, row) for base_score, _rerank_score, row in reranked] + tail
 def _payload_size(payload: dict) -> int:
     return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
@@ -2961,10 +3174,12 @@ def _context_query_conn(
     normalized_mode, mode_warnings = _normalize_context_mode(mode)
     context_tail = _compact_text(current_context or "", max_chars=1000)
     search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
-    qvec = embeddings.embed_text(search_query)
+    query_embedding = embeddings.embed_record(search_query)
+    qvec = query_embedding["vector"]
     entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
     rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
     scored = []
+    stale_embedding_seen = False
     for row in rows:
         if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
             continue
@@ -2973,7 +3188,15 @@ def _context_query_conn(
         path_score = _search_text_score(search_query, row["path"] or "")
         summary_score = _search_text_score(search_query, row["summary"] or "")
         entity_score = entity_boosts.get(row["asset_id"], 0.0)
-        vector_score = embeddings.cosine(qvec, vector)
+        vector_score = 0.0
+        if (
+            str(row["model_id"] or "") == str(query_embedding["model_id"])
+            and str(row["model_revision"] or "") == str(query_embedding["model_revision"])
+            and int(row["dimension"] or 0) == int(query_embedding["dimension"])
+        ):
+            vector_score = embeddings.cosine(qvec, vector)
+        elif vector:
+            stale_embedding_seen = True
         score = max(text_score, path_score, summary_score, vector_score)
         if entity_score > 0:
             direct_score = max(text_score, path_score, summary_score)
@@ -2987,6 +3210,7 @@ def _context_query_conn(
         if score > 0:
             scored.append((min(float(score), 1.6), row))
     scored.sort(key=lambda item: item[0], reverse=True)
+    scored = _rerank_scored_candidates(search_query, scored, limit=int(limit))
     assets = []
     chunks = []
     evidence_refs = []
@@ -3025,6 +3249,10 @@ def _context_query_conn(
         ).fetchall()
         relations_payload = [dict(row) for row in relation_rows]
     warnings = list(mode_warnings)
+    if query_embedding.get("kind") == "deterministic_embedding":
+        warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
+    elif stale_embedding_seen:
+        warnings.append("Some local chunks still use an older embedding profile and will be refreshed automatically.")
     if evidence_required and not evidence_refs:
         warnings.append("No local evidence found for this query.")
     summary = ""

package/src/local_context/embeddings.py CHANGED Viewed

@@ -2,32 +2,135 @@ from __future__ import annotations
 import hashlib
 import math
+import os
+import warnings
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Any
 from .util import tokenize
-MODEL_ID = "nexo-local-hash-embedding"
-MODEL_REVISION = "1"
-DIMENSION = 128
+FALLBACK_MODEL_ID = "nexo-local-hash-embedding"
+FALLBACK_MODEL_REVISION = "1"
+FALLBACK_DIMENSION = 128
+PRIMARY_MODEL_SPEC = "bge-base-embeddings"
+# Backward-compatible constants. Callers that persist vectors should use
+# embed_record(), because the active profile can switch from fallback to BGE.
+MODEL_ID = FALLBACK_MODEL_ID
+MODEL_REVISION = FALLBACK_MODEL_REVISION
+DIMENSION = FALLBACK_DIMENSION
-def embed_text(text: str) -> list[float]:
-    """Deterministic local embedding fallback.
-    This is intentionally local and dependency-free. It gives the resolver a
-    working semantic-ish retrieval substrate even on machines where the pinned
-    FastEmbed model has not warmed yet. The model id/revision make it safe to
-    supersede later with pinned model vectors.
-    """
-    vec = [0.0] * DIMENSION
+@dataclass(frozen=True)
+class EmbeddingProfile:
+    model_id: str
+    model_revision: str
+    dimension: int
+    kind: str
+    state: str
+    profile: str
+    problems: tuple[str, ...] = ()
+def _hash_embed_text(text: str) -> list[float]:
+    vec = [0.0] * FALLBACK_DIMENSION
     for token in tokenize(text):
         digest = hashlib.sha256(token.encode("utf-8", errors="ignore")).digest()
-        idx = int.from_bytes(digest[:2], "big") % DIMENSION
+        idx = int.from_bytes(digest[:2], "big") % FALLBACK_DIMENSION
         sign = -1.0 if digest[2] % 2 else 1.0
         vec[idx] += sign
     norm = math.sqrt(sum(value * value for value in vec)) or 1.0
     return [round(value / norm, 8) for value in vec]
+def _fallback_profile(*problems: str) -> EmbeddingProfile:
+    return EmbeddingProfile(
+        model_id=FALLBACK_MODEL_ID,
+        model_revision=FALLBACK_MODEL_REVISION,
+        dimension=FALLBACK_DIMENSION,
+        kind="deterministic_embedding",
+        state="available",
+        profile="local_context_embedding_fallback",
+        problems=tuple(item for item in problems if item),
+    )
+def _fastembed_disabled() -> bool:
+    value = os.environ.get("NEXO_LOCAL_CONTEXT_DISABLE_FASTEMBED", "").strip().lower()
+    if value in {"1", "true", "yes", "on"}:
+        return True
+    # The unit suite uses temporary NEXO homes that intentionally do not carry
+    # model weights. Keep those tests dependency-free unless explicitly opted in.
+    if os.environ.get("NEXO_TEST_DB") and os.environ.get("NEXO_LOCAL_CONTEXT_FASTEMBED_IN_TESTS") != "1":
+        return True
+    return False
+@lru_cache(maxsize=1)
+def _fastembed_state() -> tuple[Any, EmbeddingProfile] | tuple[None, EmbeddingProfile]:
+    if _fastembed_disabled():
+        return None, _fallback_profile("fastembed disabled for this process")
+    try:
+        import local_models
+        from fastembed import TextEmbedding
+        spec = local_models.get_local_model_spec(PRIMARY_MODEL_SPEC)
+        target_dir = local_models.ensure_local_model(spec.name, local_files_only=True)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", message=r"The model .* now uses mean pooling.*", category=UserWarning)
+            model = TextEmbedding(spec.model_id, specific_model_path=str(target_dir))
+        return model, EmbeddingProfile(
+            model_id=spec.model_id,
+            model_revision=spec.revision,
+            dimension=spec.dimension or 384,
+            kind=spec.kind,
+            state="available",
+            profile=spec.name,
+        )
+    except Exception as exc:  # pragma: no cover - host/cache dependent
+        return None, _fallback_profile(str(exc))
+def active_profile() -> EmbeddingProfile:
+    _model, profile = _fastembed_state()
+    return profile
+def reset_cache() -> None:
+    _fastembed_state.cache_clear()
+def embed_record(text: str) -> dict[str, Any]:
+    model, profile = _fastembed_state()
+    if model is not None and profile.kind == "fastembed_embedding":
+        try:
+            vector = list(next(iter(model.embed([text or ""]))))
+            return {
+                "vector": [float(value) for value in vector],
+                "model_id": profile.model_id,
+                "model_revision": profile.model_revision,
+                "dimension": profile.dimension,
+                "profile": profile.profile,
+                "kind": profile.kind,
+            }
+        except Exception:  # pragma: no cover - runtime fallback only
+            pass
+    fallback = _fallback_profile()
+    return {
+        "vector": _hash_embed_text(text),
+        "model_id": fallback.model_id,
+        "model_revision": fallback.model_revision,
+        "dimension": fallback.dimension,
+        "profile": fallback.profile,
+        "kind": fallback.kind,
+    }
+def embed_text(text: str) -> list[float]:
+    return embed_record(text)["vector"]
 def cosine(a: list[float], b: list[float]) -> float:
     if not a or not b or len(a) != len(b):
         return 0.0