npm - nexo-brain - Versions diffs - 7.20.4 → 7.20.9 - Mend

nexo-brain 7.20.4 → 7.20.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +11 -1
package/bin/windows-wsl-bridge.js +20 -0
package/package.json +1 -1
package/src/crons/sync.py +125 -7
package/src/local_context/api.py +272 -27
package/src/local_context/extractors.py +109 -2
package/src/local_context/privacy.py +111 -0
package/src/tools_hot_context.py +9 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.20.4",
+  "version": "7.20.9",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -18,7 +18,17 @@
 [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
-Version `7.20.4` is the current packaged-runtime line. Patch release over v7.20.3 — Local Context now blocks private dotfiles, hidden project folders and secret-bearing content before chunks, embeddings, graph relations or agent context are created.
+Version `7.20.9` is the current packaged-runtime line. Patch release over v7.20.8 — Local Context scans automatic roots at full operational depth, falls back to crontab when Linux/WSL systemd user timers fail, passes Windows AppData email roots into WSL, and blocks Google API keys before HTML cleaning.
+Previously in `7.20.8`: patch release over v7.20.7 — Local Context recognises Windows Mail package roots and Outlook Mac profile roots as bounded local-email sources instead of rejecting them as generic AppData / Group Containers.
+Previously in `7.20.7`: patch release over v7.20.6 — Local Context email-root bootstrap is deterministic across CI, WSL and migrated profiles while preserving macOS Mail.app, Windows Outlook, Thunderbird and NEXO email coverage.
+Previously in `7.20.6`: patch release over v7.20.5 — Local Context ranks entity matches at chunk level, keeps old entity-matched assets eligible, adds safe local email roots for macOS/Windows/Linux, extracts `.eml`, `.emlx`, `.msg` and NEXO email DB continuity, and exposes local graph relations in pre-action context.
+Previously in `7.20.5`: patch release over v7.20.4 — Local Context status reports elapsed indexing time and a defensive ETA while background jobs remain pending.
+Previously in `7.20.4`: patch release over v7.20.3 — Local Context now blocks private dotfiles, hidden project folders and secret-bearing content before chunks, embeddings, graph relations or agent context are created.
 Previously in `7.20.3`: patch release over v7.20.2 — installer DMG volumes are no longer added as local-memory roots, removed roots purge stale payloads, and doctor can repair removed-root residue.

package/bin/windows-wsl-bridge.js CHANGED Viewed

@@ -93,6 +93,20 @@ function resolveLinuxEnv(env = process.env) {
   return linuxEnv;
 }
+function resolveWindowsHostPathEnv(env = process.env) {
+  const result = {};
+  for (const key of ["LOCALAPPDATA", "APPDATA"]) {
+    const value = String(env[key] || "").trim();
+    if (!value) continue;
+    if (isWindowsStylePath(value)) {
+      result[key] = toWslPath(value);
+    } else if (value.startsWith("/")) {
+      result[key] = value;
+    }
+  }
+  return result;
+}
 function uniqueValues(values = []) {
   const seen = new Set();
   return values.filter((value) => {
@@ -242,6 +256,10 @@ function buildWslExecSpec({
   for (const [key, value] of Object.entries(linuxEnv)) {
     wslArgs.push(`${key}=${value}`);
   }
+  const windowsHostPathEnv = resolveWindowsHostPathEnv(env);
+  for (const [key, value] of Object.entries(windowsHostPathEnv)) {
+    wslArgs.push(`${key}=${value}`);
+  }
   // Build the staging shell script. Stages the bundle from /mnt/c (DrvFs/9P)
   // to /tmp (native ext4) BEFORE invoking node. Without staging, node hangs
@@ -296,6 +314,7 @@ function buildWslExecSpec({
     command: "wsl.exe",
     args: wslArgs,
     linuxEnv,
+    windowsHostPathEnv,
     managedLinuxPath,
     translatedScriptPath,
   };
@@ -338,6 +357,7 @@ module.exports = {
   probeWslUserHome,
   resolveLinuxEnv,
   resolveLinuxUserHome,
+  resolveWindowsHostPathEnv,
   runViaWsl,
   toWslPath,
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "7.20.4",
+  "version": "7.20.9",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/crons/sync.py CHANGED Viewed

@@ -20,6 +20,7 @@ import json
 import os
 import platform
 import plistlib
+import shlex
 import shutil
 import subprocess
 import sys
@@ -133,6 +134,8 @@ SCHEDULE_FILE = paths.config_dir() / "schedule.json"
 CORE_CRON_MANAGED_ENV = "NEXO_MANAGED_CORE_CRON"
 PERSONAL_CRON_MANAGED_ENV = "NEXO_MANAGED_PERSONAL_CRON"
 PERSONAL_CRON_ID_ENV = "NEXO_PERSONAL_CRON_ID"
+CRONTAB_BEGIN = "# >>> NEXO managed core crons >>>"
+CRONTAB_END = "# <<< NEXO managed core crons <<<"
 RETIRED_CORE_FILES = (
     Path("core") / "scripts" / "nexo-day-orchestrator.sh",
     Path("scripts") / "nexo-day-orchestrator.sh",
@@ -457,6 +460,106 @@ def build_plist(cron: dict) -> dict:
     return plist
+def _shell_join(args: list[str | Path]) -> str:
+    return " ".join(shlex.quote(str(arg)) for arg in args)
+def _cron_schedule(cron: dict) -> str | None:
+    if cron.get("keep_alive"):
+        return None
+    if "interval_seconds" in cron:
+        try:
+            seconds = int(cron["interval_seconds"])
+        except Exception:
+            return None
+        if seconds <= 0 or seconds % 60 != 0:
+            return None
+        minutes = max(1, seconds // 60)
+        return "* * * * *" if minutes == 1 else f"*/{minutes} * * * *"
+    if "schedule" in cron:
+        s = resolve_declared_schedule(cron)
+        hour, minute = int(s.get("hour", 0)), int(s.get("minute", 0))
+        weekday = "*"
+        if "weekday" in s:
+            raw_weekday = int(s["weekday"])
+            weekday = "0" if raw_weekday == 7 else str(raw_weekday)
+        return f"{minute} {hour} * * {weekday}"
+    return None
+def _linux_crontab_entry(cron: dict, exec_cmd: str, stdout_log: Path, stderr_log: Path) -> str | None:
+    schedule = _cron_schedule(cron)
+    if not schedule:
+        return None
+    env_prefix = " ".join(
+        f"{key}={shlex.quote(str(value))}"
+        for key, value in {
+            "HOME": Path.home(),
+            "NEXO_HOME": NEXO_HOME,
+            "NEXO_CODE": _runtime_code_dir(),
+            "PYTHONUNBUFFERED": "1",
+        }.items()
+    )
+    return f"{schedule} {env_prefix} {exec_cmd} >> {shlex.quote(str(stdout_log))} 2>> {shlex.quote(str(stderr_log))}"
+def _strip_managed_crontab_block(body: str) -> str:
+    lines = body.splitlines()
+    kept: list[str] = []
+    skipping = False
+    for line in lines:
+        if line.strip() == CRONTAB_BEGIN:
+            skipping = True
+            continue
+        if line.strip() == CRONTAB_END:
+            skipping = False
+            continue
+        if not skipping:
+            kept.append(line)
+    return "\n".join(kept).rstrip()
+def _install_linux_crontab_fallback(entries: list[str]) -> dict:
+    if not entries:
+        return {"ok": False, "error": "no_crontab_entries"}
+    if not shutil.which("crontab"):
+        return {"ok": False, "error": "crontab_missing"}
+    existing = subprocess.run(["crontab", "-l"], capture_output=True, text=True)
+    current_body = existing.stdout if existing.returncode == 0 else ""
+    unmanaged_body = _strip_managed_crontab_block(current_body)
+    managed_body = "\n".join([CRONTAB_BEGIN, *entries, CRONTAB_END])
+    next_body = f"{unmanaged_body}\n\n{managed_body}\n" if unmanaged_body else f"{managed_body}\n"
+    tmp_path = None
+    try:
+        with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False) as fh:
+            tmp_path = fh.name
+            fh.write(next_body)
+        proc = subprocess.run(["crontab", tmp_path], capture_output=True, text=True)
+    finally:
+        if tmp_path:
+            try:
+                Path(tmp_path).unlink(missing_ok=True)
+            except Exception:
+                pass
+    if proc.returncode != 0:
+        return {"ok": False, "error": proc.stderr or proc.stdout or "crontab_install_failed"}
+    return {"ok": True, "entries": len(entries)}
+def _enable_systemd_user_units(units: list[str]) -> dict:
+    errors: list[str] = []
+    daemon = subprocess.run(["systemctl", "--user", "daemon-reload"], capture_output=True, text=True)
+    if daemon.returncode != 0:
+        errors.append(daemon.stderr or daemon.stdout or "systemctl daemon-reload failed")
+    for unit in units:
+        proc = subprocess.run(["systemctl", "--user", "enable", "--now", unit], capture_output=True, text=True)
+        if proc.returncode != 0:
+            errors.append(f"{unit}: {proc.stderr or proc.stdout or 'enable failed'}")
+    return {"ok": not errors, "errors": errors}
 def get_installed_nexo_crons() -> dict[str, Path]:
     """Return dict of cron_id → plist_path for installed NEXO crons."""
     installed = {}
@@ -670,6 +773,9 @@ def sync_linux(dry_run: bool = False):
             python_bin = p
             break
+    enable_units: list[str] = []
+    crontab_entries: list[str] = []
     for cron in manifest_crons:
         cron_id = cron["id"]
         script_src = _resolve_source_artifact(cron["script"])
@@ -683,9 +789,9 @@ def sync_linux(dry_run: bool = False):
             _copy_into_runtime(subdir_src)
         if script_type == "shell":
-            exec_cmd = f"/bin/bash {wrapper_dest} {cron_id} /bin/bash {script_dest}"
+            exec_cmd = _shell_join(["/bin/bash", wrapper_dest, cron_id, "/bin/bash", script_dest])
         else:
-            exec_cmd = f"/bin/bash {wrapper_dest} {cron_id} {python_bin} {script_dest}"
+            exec_cmd = _shell_join(["/bin/bash", wrapper_dest, cron_id, python_bin, script_dest])
         service_path = unit_dir / f"nexo-{cron_id}.service"
         timer_path = unit_dir / f"nexo-{cron_id}.timer"
@@ -734,6 +840,7 @@ StandardError=append:{stderr_log}
         service_path.write_text(service_content)
         if cron.get("keep_alive"):
+            enable_units.append(f"nexo-{cron_id}.service")
             log(f"  Installed keep_alive service: {cron_id}")
             continue
@@ -748,14 +855,25 @@ Persistent=true
 WantedBy=timers.target
 """
         timer_path.write_text(timer_content)
+        enable_units.append(f"nexo-{cron_id}.timer")
+        crontab_entry = _linux_crontab_entry(cron, exec_cmd, stdout_log, stderr_log)
+        if crontab_entry:
+            crontab_entries.append(crontab_entry)
         log(f"  Installed: {cron_id}")
     if not dry_run:
-        subprocess.run(["systemctl", "--user", "daemon-reload"], capture_output=True)
-        for cron in manifest_crons:
-            unit = f"nexo-{cron['id']}.service" if cron.get("keep_alive") else f"nexo-{cron['id']}.timer"
-            subprocess.run(["systemctl", "--user", "enable", "--now", unit], capture_output=True)
-        log("systemd units enabled.")
+        systemd_result = _enable_systemd_user_units(enable_units)
+        if systemd_result.get("ok"):
+            log("systemd units enabled.")
+        else:
+            log(f"WARNING: systemd user timers failed; installing crontab fallback: {systemd_result.get('errors')}")
+            fallback = _install_linux_crontab_fallback(crontab_entries)
+            if not fallback.get("ok"):
+                raise RuntimeError(
+                    "Linux cron activation failed: "
+                    f"systemd={systemd_result.get('errors')} crontab={fallback.get('error')}"
+                )
+            log(f"crontab fallback installed ({fallback.get('entries')} entries).")
     log("Sync complete.")

package/src/local_context/api.py CHANGED Viewed

@@ -26,6 +26,9 @@ LOCAL_INDEX_LINUX_UNIT = "nexo-local-index.service"
 DEFAULT_LIVE_ASSET_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_ASSET_LIMIT", "2000") or "2000")
 DEFAULT_LIVE_DIR_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_DIR_LIMIT", "300") or "300")
 DEFAULT_LIVE_FILE_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_FILE_LIMIT", "1000") or "1000")
+DEFAULT_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_DEPTH", "24") or "24")
+DEFAULT_EMAIL_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_EMAIL_ROOT_DEPTH", "24") or "24")
+DEFAULT_MOUNTED_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_MOUNTED_ROOT_DEPTH", "24") or "24")
 def ensure_ready() -> None:
@@ -91,6 +94,21 @@ def _dedupe_roots(roots: list[str]) -> list[str]:
     return result
+def _dedupe_root_specs(specs: list[tuple[str, int]]) -> list[tuple[str, int]]:
+    ordered: list[str] = []
+    depths: dict[str, int] = {}
+    for root, depth in specs:
+        normalized = norm_path(root)
+        if not normalized:
+            continue
+        if normalized not in depths:
+            ordered.append(normalized)
+            depths[normalized] = int(depth)
+        else:
+            depths[normalized] = max(depths[normalized], int(depth))
+    return [(root, depths[root]) for root in ordered]
 def _mounted_volume_roots() -> list[str]:
     candidates: list[Path] = []
     if sys.platform == "darwin":
@@ -123,24 +141,78 @@ def _mounted_volume_roots() -> list[str]:
     return roots
+def _local_email_roots() -> list[str]:
+    home = Path.home()
+    roots: list[Path] = [home / ".nexo" / "runtime" / "nexo-email"]
+    mac_roots = [
+        home / "Library" / "Mail",
+        home / "Library" / "Group Containers" / "UBF8T346G9.Office" / "Outlook" / "Outlook 15 Profiles",
+    ]
+    local_app_data = Path(os.environ.get("LOCALAPPDATA") or home / "AppData" / "Local")
+    roaming_app_data = Path(os.environ.get("APPDATA") or home / "AppData" / "Roaming")
+    windows_roots = [
+        home / "Documents" / "Outlook Files",
+        local_app_data / "Microsoft" / "Outlook",
+        roaming_app_data / "Microsoft" / "Outlook",
+        local_app_data / "Packages" / "microsoft.windowscommunicationsapps_8wekyb3d8bbwe" / "LocalState",
+    ]
+    linux_roots = [home / ".thunderbird", home / ".mozilla-thunderbird"]
+    if sys.platform == "darwin":
+        roots.extend(mac_roots)
+    elif sys.platform.startswith("win"):
+        roots.extend(windows_roots)
+    else:
+        roots.extend(linux_roots)
+    # CI and migrated profiles can expose platform-specific mail stores while
+    # running on another OS. Include only the stores that actually exist.
+    for optional_root in [*mac_roots, *windows_roots, *linux_roots]:
+        if optional_root.exists() and optional_root not in roots:
+            roots.append(optional_root)
+    return [str(root) for root in roots]
 def default_roots() -> list[str]:
+    return [root for root, _depth in default_root_specs()]
+def default_root_specs() -> list[tuple[str, int]]:
     home = Path.home()
     configured = os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_ROOTS", "").strip()
     if configured:
-        return _dedupe_roots([item for item in configured.split(os.pathsep) if item.strip()])
-    return _dedupe_roots([str(home), *_mounted_volume_roots()])
+        return _dedupe_root_specs(
+            [(item, DEFAULT_ROOT_DEPTH) for item in configured.split(os.pathsep) if item.strip()]
+        )
+    return _dedupe_root_specs(
+        [(str(home), DEFAULT_ROOT_DEPTH)]
+        + [(root, DEFAULT_EMAIL_ROOT_DEPTH) for root in _local_email_roots()]
+        + [(root, DEFAULT_MOUNTED_ROOT_DEPTH) for root in _mounted_volume_roots()]
+    )
 def ensure_default_roots() -> dict:
-    existing_paths = {row["root_path"] for row in list_roots()}
+    existing = {row["root_path"]: row for row in list_roots()}
     created = []
-    for root in default_roots():
-        if root in existing_paths:
-            continue
+    updated = []
+    for root, depth in default_root_specs():
         candidate = Path(root).expanduser()
-        if candidate.exists() and candidate.is_dir():
-            created.append(add_root(str(candidate), mode="normal", depth=2))
-    return {"ok": True, "created": len(created), "roots": list_roots()}
+        if not candidate.exists() or not candidate.is_dir():
+            continue
+        existing_row = existing.get(norm_path(str(candidate)))
+        if existing_row:
+            current_depth = int(existing_row.get("depth") or 0)
+            if current_depth < depth:
+                conn = _conn()
+                conn.execute(
+                    "UPDATE local_index_roots SET depth=?, updated_at=? WHERE root_path=?",
+                    (depth, now(), existing_row["root_path"]),
+                )
+                conn.commit()
+                updated.append({"root_path": existing_row["root_path"], "depth": depth})
+            continue
+        created.append(add_root(str(candidate), mode="normal", depth=depth))
+    return {"ok": True, "created": len(created), "updated": len(updated), "roots": list_roots()}
 def _should_skip_mounted_root(candidate: Path) -> bool:
@@ -471,7 +543,7 @@ def _file_type(path: Path) -> str:
         return "photo"
     if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".css", ".html"}:
         return "code"
-    if suffix in {".eml"}:
+    if suffix in {".eml", ".emlx", ".msg", ".pst", ".ost"}:
         return "email"
     if suffix in {".pdf", ".docx", ".pptx", ".xlsx", ".md", ".txt", ".csv", ".tsv"}:
         return "document"
@@ -1316,7 +1388,7 @@ def process_jobs(*, limit: int = 100) -> dict:
             if job_type == "light_extraction":
                 text, metadata = extract_text(Path(row["path"]))
                 version_id = _latest_version_id(conn, asset_id)
-                if contains_secret(text):
+                if metadata.get("content_secret_detected") or contains_secret(text):
                     _mark_content_secret_assets(conn, [asset_id])
                     conn.execute(
                         "UPDATE local_index_jobs SET status='done', updated_at=?, last_error_code='content_secret_blocked' WHERE job_id=?",
@@ -1652,6 +1724,29 @@ def _service_cycle_observation(conn) -> dict:
     return observation
+def _index_timing(conn, *, done: int, active_jobs: int, percent: int) -> dict:
+    first_seen = conn.execute(
+        """
+        SELECT MIN(created_at) AS created_at
+        FROM local_index_logs
+        WHERE event IN ('root_added', 'scan_started', 'scan_finished', 'jobs_processed', 'service_cycle_finished')
+        """
+    ).fetchone()["created_at"] or 0
+    if not first_seen:
+        first_seen = conn.execute(
+            """
+            SELECT MIN(first_seen_at) AS first_seen_at
+            FROM local_assets
+            WHERE status!='deleted'
+            """
+        ).fetchone()["first_seen_at"] or 0
+    elapsed_seconds = max(0, int(now() - float(first_seen))) if first_seen else 0
+    eta_seconds = None
+    if elapsed_seconds > 0 and done > 0 and active_jobs > 0 and 0 < percent < 100:
+        eta_seconds = max(0, int((elapsed_seconds / max(done, 1)) * active_jobs))
+    return {"elapsed_seconds": elapsed_seconds, "eta_seconds": eta_seconds}
 def _service_scheduler_has_error(service: dict) -> bool:
     if service.get("manager") == "launchagent":
         code = str(service.get("last_exit_code") or "").strip()
@@ -1725,6 +1820,7 @@ def status() -> dict:
     active_jobs = pending + running_jobs + failed_jobs
     total_jobs = active_jobs + done
     percent = 100 if total_jobs == 0 else int((done / max(total_jobs, 1)) * 100)
+    timing = _index_timing(conn, done=done, active_jobs=active_jobs, percent=percent)
     roots = list_roots()
     volumes = []
     by_volume = conn.execute(
@@ -1770,8 +1866,8 @@ def status() -> dict:
             "jobs_pending": pending,
             "jobs_running": running_jobs,
             "jobs_failed": failed_jobs,
-            "elapsed_seconds": 0,
-            "eta_seconds": None,
+            "elapsed_seconds": timing["elapsed_seconds"],
+            "eta_seconds": timing["eta_seconds"],
         },
         "volumes": volumes,
         "roots": roots,
@@ -1856,10 +1952,112 @@ def _search_text_score(query: str, text: str) -> float:
     return len(q & tokens) / max(len(q), 1)
-def context_query(query: str, *, intent: str = "answer", limit: int = 12, evidence_required: bool = True, current_context: str = "") -> dict:
-    conn = _conn()
-    qvec = embeddings.embed_text(query)
+_QUERY_STOPWORDS = {
+    "about",
+    "archivos",
+    "con",
+    "context",
+    "contexto",
+    "cuanto",
+    "dame",
+    "del",
+    "desde",
+    "documentos",
+    "donde",
+    "esta",
+    "está",
+    "file",
+    "files",
+    "hay",
+    "los",
+    "para",
+    "que",
+    "qué",
+    "related",
+    "relacionado",
+    "sabes",
+    "sobre",
+    "todo",
+    "what",
+    "where",
+}
+def _query_terms(query: str) -> list[str]:
+    terms = []
+    for token in tokenize(query):
+        if len(token) < 3 or token in _QUERY_STOPWORDS:
+            continue
+        if token not in terms:
+            terms.append(token)
+    return terms[:10]
+def _entity_match_score(query_lower: str, terms: list[str], name: str) -> float:
+    entity = (name or "").strip().lower()
+    if not entity:
+        return 0.0
+    entity_terms = set(tokenize(entity))
+    if entity and entity in query_lower:
+        return 1.0
+    if not terms:
+        return 0.0
+    term_set = set(terms)
+    overlap = term_set & entity_terms
+    if overlap:
+        return min(0.95, 0.45 + (len(overlap) / max(len(entity_terms), 1)) * 0.5)
+    if any(term in entity for term in terms):
+        return 0.6
+    return 0.0
+def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dict], dict[str, float]]:
+    query_lower = (query or "").strip().lower()
+    terms = _query_terms(query)
+    if not query_lower or not terms:
+        return [], {}
+    clauses = " OR ".join("lower(e.name) LIKE ?" for _ in terms)
+    params = [f"%{term}%" for term in terms]
     rows = conn.execute(
+        f"""
+        SELECT DISTINCT e.name, e.entity_type, e.asset_id, a.path, a.privacy_class
+        FROM local_entities e
+        JOIN local_assets a ON a.asset_id = e.asset_id
+        WHERE a.status='active'
+          AND a.privacy_class='normal'
+          AND ({clauses})
+        LIMIT ?
+        """,
+        [*params, max(int(limit) * 20, 40)],
+    ).fetchall()
+    matches = []
+    boosts: dict[str, float] = {}
+    seen = set()
+    for row in rows:
+        if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
+            continue
+        score = _entity_match_score(query_lower, terms, str(row["name"] or ""))
+        if score <= 0:
+            continue
+        key = (row["name"], row["entity_type"], row["asset_id"])
+        if key not in seen:
+            matches.append({
+                "name": row["name"],
+                "entity_type": row["entity_type"],
+                "asset_id": row["asset_id"],
+                "score": round(float(score), 4),
+            })
+            seen.add(key)
+        boosts[row["asset_id"]] = max(boosts.get(row["asset_id"], 0.0), float(score))
+    matches.sort(key=lambda item: item.get("score", 0), reverse=True)
+    return matches[: int(limit)], boosts
+def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
+    base_rows = conn.execute(
         """
         SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
         FROM local_chunks c
@@ -1869,17 +2067,68 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
         WHERE a.status='active'
           AND a.privacy_class='normal'
         ORDER BY c.created_at DESC
-        LIMIT 5000
-        """
+        LIMIT ?
+        """,
+        (int(base_limit),),
+    ).fetchall()
+    if not entity_asset_ids:
+        return base_rows
+    placeholders = ",".join("?" for _ in entity_asset_ids)
+    entity_rows = conn.execute(
+        f"""
+        SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
+        FROM local_chunks c
+        JOIN local_assets a ON a.asset_id = c.asset_id
+        LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
+        LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
+        WHERE a.status='active'
+          AND a.privacy_class='normal'
+          AND c.asset_id IN ({placeholders})
+        ORDER BY c.chunk_index ASC
+        LIMIT ?
+        """,
+        [*entity_asset_ids, max(1000, len(entity_asset_ids) * 80)],
     ).fetchall()
+    rows = []
+    seen_chunks = set()
+    for row in [*entity_rows, *base_rows]:
+        chunk_id = row["chunk_id"]
+        if chunk_id in seen_chunks:
+            continue
+        seen_chunks.add(chunk_id)
+        rows.append(row)
+    return rows
+def context_query(query: str, *, intent: str = "answer", limit: int = 12, evidence_required: bool = True, current_context: str = "") -> dict:
+    conn = _conn()
+    qvec = embeddings.embed_text(query)
+    entities_payload, entity_boosts = _entity_matches_for_query(conn, query, limit=max(int(limit), 1))
+    rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
     scored = []
     for row in rows:
         if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
             continue
         vector = json_loads(row["vector_json"], [])
-        score = max(_search_text_score(query, row["text"]), embeddings.cosine(qvec, vector))
+        text_score = _search_text_score(query, row["text"])
+        path_score = _search_text_score(query, row["path"] or "")
+        summary_score = _search_text_score(query, row["summary"] or "")
+        entity_score = entity_boosts.get(row["asset_id"], 0.0)
+        vector_score = embeddings.cosine(qvec, vector)
+        score = max(text_score, path_score, summary_score, vector_score)
+        if entity_score > 0:
+            direct_score = max(text_score, path_score, summary_score)
+            if direct_score > 0:
+                entity_rank = 0.82 + (0.42 * text_score) + (0.18 * path_score) + (0.12 * summary_score)
+                score = max(score, entity_rank + min(0.2, entity_score * 0.2))
+            else:
+                # Entity-level matches keep older assets eligible, but do not let
+                # unrelated chunks from a long document outrank direct evidence.
+                score = max(score, min(0.48, 0.28 + entity_score * 0.2))
         if score > 0:
-            scored.append((score, row))
+            scored.append((min(float(score), 1.6), row))
     scored.sort(key=lambda item: item[0], reverse=True)
     assets = []
     chunks = []
@@ -1902,14 +2151,10 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
             "score": round(float(score), 4),
         })
         evidence_refs.append(f"local_asset:{row['asset_id']}#chunk:{row['chunk_id']}")
-    entity_rows = conn.execute(
-        "SELECT DISTINCT name, entity_type, asset_id FROM local_entities WHERE lower(name) LIKE ? LIMIT ?",
-        (f"%{query.lower()}%", int(limit)),
-    ).fetchall()
-    entities_payload = [dict(row) for row in entity_rows]
     relations_payload: list[dict] = []
-    if seen_assets:
-        asset_ids = list(seen_assets)[: int(limit)]
+    relation_asset_ids = list(dict.fromkeys([*seen_assets, *entity_boosts.keys()]))[: int(limit)]
+    if relation_asset_ids:
+        asset_ids = relation_asset_ids
         placeholders = ",".join("?" for _ in asset_ids)
         relation_rows = conn.execute(
             f"""

package/src/local_context/extractors.py CHANGED Viewed

@@ -4,12 +4,15 @@ import csv
 import html
 import json
 import re
+import sqlite3
 import zipfile
 from email import policy
 from email.parser import BytesParser
 from pathlib import Path
 from xml.etree import ElementTree
+from .privacy import is_local_email_db
 MAX_TEXT_BYTES = 512 * 1024
 MAX_CHARS = 120_000
@@ -38,6 +41,7 @@ SECRET_PATTERNS: tuple[re.Pattern, ...] = (
     re.compile(r"\bpk-(?:[a-z]+-)?[A-Za-z0-9_\-]{20,}\b"),
     re.compile(r"\b(ghp|gho|ghu|ghs|ghr|github_pat|glpat|xoxb|xoxp|shpat)_[A-Za-z0-9_]{16,}\b", re.I),
     re.compile(r"\b(AKIA|ASIA)[A-Z0-9]{16,}\b"),
+    re.compile(r"\bAIza[0-9A-Za-z_-]{30,}\b"),
     re.compile(r"\bey[A-Za-z0-9_-]{10,}\.ey[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"),
     re.compile(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----", re.I),
     re.compile(r"\b([A-Z][A-Z0-9_]*(?:TOKEN|SECRET|KEY|PASSWORD|PASS)\s*[:=]\s*)['\"]?[A-Za-z0-9._/+=\-]{12,}", re.I),
@@ -73,8 +77,8 @@ def _extract_csv(path: Path) -> str:
     return "\n".join(rows)[:MAX_CHARS]
-def _extract_eml(path: Path) -> tuple[str, dict]:
-    msg = BytesParser(policy=policy.default).parsebytes(path.read_bytes()[:MAX_TEXT_BYTES])
+def _extract_email_bytes(data: bytes) -> tuple[str, dict]:
+    msg = BytesParser(policy=policy.default).parsebytes(data[:MAX_TEXT_BYTES])
     meta = {
         "subject": str(msg.get("subject") or ""),
         "from": str(msg.get("from") or ""),
@@ -92,6 +96,99 @@ def _extract_eml(path: Path) -> tuple[str, dict]:
     return "\n".join([meta["subject"], meta["from"], meta["to"], text])[:MAX_CHARS], meta
+def _extract_eml(path: Path) -> tuple[str, dict]:
+    return _extract_email_bytes(path.read_bytes()[:MAX_TEXT_BYTES])
+def _extract_emlx(path: Path) -> tuple[str, dict]:
+    data = path.read_bytes()[:MAX_TEXT_BYTES]
+    first_line, separator, rest = data.partition(b"\n")
+    if separator and first_line.strip().isdigit():
+        declared = int(first_line.strip() or b"0")
+        payload = rest[:declared] if declared > 0 else rest
+    else:
+        payload = data
+    if b"\n<?xml" in payload:
+        payload = payload.split(b"\n<?xml", 1)[0]
+    text, meta = _extract_email_bytes(payload)
+    meta["apple_mail_message"] = True
+    return text, meta
+def _printable_binary_text(path: Path) -> str:
+    data = path.read_bytes()[:MAX_TEXT_BYTES]
+    decoded = data.decode("utf-16", errors="ignore") if b"\x00" in data[:2000] else data.decode("latin-1", errors="ignore")
+    pieces = re.findall(r"[\wÀ-ÿ@./:=+\- ,;()\\[\\]{}]{4,}", decoded)
+    return "\n".join(piece.strip() for piece in pieces if piece.strip())[:MAX_CHARS]
+def _extract_msg(path: Path) -> tuple[str, dict]:
+    try:
+        import extract_msg  # type: ignore
+        message = extract_msg.Message(str(path))
+        meta = {
+            "subject": str(getattr(message, "subject", "") or ""),
+            "from": str(getattr(message, "sender", "") or ""),
+            "to": str(getattr(message, "to", "") or ""),
+            "date": str(getattr(message, "date", "") or ""),
+            "extractor": "msg",
+        }
+        body = str(getattr(message, "body", "") or "")
+        close = getattr(message, "close", None)
+        if callable(close):
+            close()
+        return "\n".join([meta["subject"], meta["from"], meta["to"], body])[:MAX_CHARS], meta
+    except Exception:
+        return _printable_binary_text(path), {"extractor": "msg_fallback"}
+def _table_names(conn: sqlite3.Connection) -> set[str]:
+    rows = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
+    return {str(row[0]) for row in rows}
+def _select_existing_columns(conn: sqlite3.Connection, table: str, columns: list[str]) -> list[str]:
+    found = {str(row[1]) for row in conn.execute(f"PRAGMA table_info({table})").fetchall()}
+    return [column for column in columns if column in found]
+def _extract_nexo_email_db(path: Path) -> tuple[str, dict]:
+    if not is_local_email_db(str(path)):
+        return "", {"extractor": "sqlite_blocked"}
+    uri = f"file:{path}?mode=ro"
+    parts: list[str] = []
+    try:
+        conn = sqlite3.connect(uri, uri=True, timeout=1)
+    except Exception:
+        return "", {"extractor": "nexo_email_db", "state": "locked_or_unavailable"}
+    try:
+        tables = _table_names(conn)
+        if "emails" in tables:
+            cols = _select_existing_columns(
+                conn,
+                "emails",
+                ["from_addr", "from_name", "subject", "received_at", "status", "body", "response"],
+            )
+            if not cols:
+                return "", {"extractor": "nexo_email_db", "tables": sorted(tables)}
+            order = "received_at" if "received_at" in cols else "rowid"
+            for row in conn.execute(f"SELECT {', '.join(cols)} FROM emails ORDER BY {order} DESC LIMIT 1000").fetchall():
+                parts.append(" | ".join(str(value or "")[:4000] for value in row))
+        if "sent_email_events" in tables:
+            cols = _select_existing_columns(
+                conn,
+                "sent_email_events",
+                ["sender", "to_addrs", "cc_addrs", "subject", "sent_at", "status", "body_text"],
+            )
+            if cols:
+                order = "sent_at" if "sent_at" in cols else "rowid"
+                for row in conn.execute(f"SELECT {', '.join(cols)} FROM sent_email_events ORDER BY {order} DESC LIMIT 1000").fetchall():
+                    parts.append(" | ".join(str(value or "")[:4000] for value in row))
+    finally:
+        conn.close()
+    return "\n".join(parts)[:MAX_CHARS], {"extractor": "nexo_email_db", "tables": sorted(tables) if "tables" in locals() else []}
 def _zip_xml_text(path: Path, members: list[str]) -> str:
     pieces: list[str] = []
     with zipfile.ZipFile(path) as zf:
@@ -176,6 +273,14 @@ def extract_text(path: Path) -> tuple[str, dict]:
     elif suffix == ".eml":
         text, metadata = _extract_eml(path)
         metadata["extractor"] = "eml"
+    elif suffix == ".emlx":
+        text, metadata = _extract_emlx(path)
+        metadata["extractor"] = "emlx"
+    elif suffix == ".msg":
+        text, metadata = _extract_msg(path)
+        metadata["extractor"] = metadata.get("extractor") or "msg"
+    elif suffix == ".db" and is_local_email_db(str(path)):
+        text, metadata = _extract_nexo_email_db(path)
     elif suffix == ".pdf":
         text = _extract_pdf(path)
     elif suffix == ".docx":
@@ -186,6 +291,8 @@ def extract_text(path: Path) -> tuple[str, dict]:
         text = _extract_xlsx(path)
     else:
         text = ""
+    if contains_secret(text):
+        metadata["content_secret_detected"] = True
     return clean_text(text), metadata

package/src/local_context/privacy.py CHANGED Viewed

@@ -67,6 +67,36 @@ SENSITIVE_PARTS = {
     "browser profile",
 }
+EMAIL_RUNTIME_DB_NAMES = {
+    "email.db",
+    "email-tracker.db",
+    "emails.db",
+    "monitor.db",
+    "nexo-email.db",
+}
+EMAIL_ATTACHMENT_SUFFIXES = {
+    ".csv",
+    ".docx",
+    ".eml",
+    ".emlx",
+    ".html",
+    ".md",
+    ".pdf",
+    ".pptx",
+    ".txt",
+    ".xlsx",
+}
+EMAIL_EXTRACTABLE_SUFFIXES = {".eml", ".emlx", ".msg"}
+OUTLOOK_MAC_INVENTORY_SUFFIXES = {
+    ".olk15message",
+    ".olk15msgsource",
+    ".olk15msgattach",
+    ".olk15event",
+    ".olk15contact",
+}
 NOISY_PARTS = {
     "node_modules",
     "vendor",
@@ -173,6 +203,77 @@ def _contains_path_marker(lowered: str, markers: set[str]) -> bool:
     return any(marker in lowered for marker in markers)
+def _is_under_marker(lowered: str, marker: str) -> bool:
+    marker = marker.strip("/").lower()
+    if not marker:
+        return False
+    return lowered.endswith("/" + marker) or f"/{marker}/" in lowered
+def _is_inside_windows_mail_package(lowered: str) -> bool:
+    return "/appdata/local/packages/microsoft.windowscommunicationsapps" in lowered
+def _is_inside_outlook_mac_profile(lowered: str) -> bool:
+    return "/library/group containers/ubf8t346g9.office/outlook" in lowered
+def is_local_email_tree(path: str) -> bool:
+    lowered = _normalized(path)
+    if _is_inside_windows_mail_package(lowered) or _is_inside_outlook_mac_profile(lowered):
+        return True
+    return any(
+        _is_under_marker(lowered, marker)
+        for marker in (
+            "library/mail",
+            ".nexo/runtime/nexo-email",
+            "documents/outlook files",
+            "appdata/local/microsoft/outlook",
+            "appdata/roaming/microsoft/outlook",
+            "appdata/local/packages/microsoft.windowscommunicationsapps",
+            ".thunderbird",
+            ".mozilla-thunderbird",
+        )
+    )
+def is_local_email_db(path: str) -> bool:
+    p = Path(path)
+    return is_local_email_tree(path) and p.name.lower() in EMAIL_RUNTIME_DB_NAMES
+def is_allowed_local_email_file(path: str) -> bool:
+    if not is_local_email_tree(path):
+        return False
+    p = Path(path)
+    lowered = _normalized(path)
+    suffix = p.suffix.lower()
+    if is_sensitive_path(path):
+        return False
+    if _is_under_marker(lowered, ".nexo/runtime/nexo-email"):
+        if is_local_email_db(path):
+            return True
+        if _is_under_marker(lowered, ".nexo/runtime/nexo-email/attachments"):
+            return suffix in EMAIL_ATTACHMENT_SUFFIXES
+        return suffix in {".eml", ".emlx"}
+    if _is_under_marker(lowered, "library/mail"):
+        return suffix in {".eml", ".emlx"}
+    if any(
+        _is_under_marker(lowered, marker)
+        for marker in (
+            "library/group containers/ubf8t346g9.office/outlook",
+            "documents/outlook files",
+            "appdata/local/microsoft/outlook",
+            "appdata/roaming/microsoft/outlook",
+            "appdata/local/packages/microsoft.windowscommunicationsapps",
+        )
+    ) or _is_inside_windows_mail_package(lowered) or _is_inside_outlook_mac_profile(lowered):
+        return suffix in {".eml", ".msg", ".pst", ".ost"} | OUTLOOK_MAC_INVENTORY_SUFFIXES
+    if _is_under_marker(lowered, ".thunderbird") or _is_under_marker(lowered, ".mozilla-thunderbird"):
+        return suffix in {".eml", ".mbox", ""}
+    return False
 def _has_transient_project_part(path: str) -> bool:
     parts = list(_normalized(path).replace(":", "/").split("/"))
     for index, part in enumerate(parts):
@@ -239,6 +340,8 @@ def classify_path(path: str) -> tuple[int, str, str]:
     lowered = _normalized(path)
     parts = _parts(path)
+    if is_local_email_tree(path) and (Path(path).suffix == "" or is_allowed_local_email_file(path)):
+        return 2, "normal", "local_email_path"
     if is_sensitive_path(path):
         return 1, "sensitive_inventory_only", "sensitive_path"
     if is_private_profile_path(path):
@@ -253,6 +356,8 @@ def classify_path(path: str) -> tuple[int, str, str]:
 def should_skip_tree(path: str) -> bool:
     lowered = _normalized(path)
     parts = _parts(path)
+    if is_local_email_tree(path):
+        return False
     if any(item in lowered for item in SYSTEM_PARTS):
         return True
     if is_sensitive_path(path) or is_private_profile_path(path):
@@ -263,6 +368,8 @@ def should_skip_tree(path: str) -> bool:
 def should_skip_file(path: str) -> bool:
     lowered = _normalized(path)
     parts = _parts(path)
+    if is_local_email_tree(path):
+        return not is_allowed_local_email_file(path)
     if any(item in lowered for item in SYSTEM_PARTS):
         return True
     if is_sensitive_path(path) or is_private_profile_path(path):
@@ -282,6 +389,8 @@ def should_extract(path: str, depth: int) -> bool:
     if should_skip_file(path):
         return False
     suffix = Path(path).suffix.lower()
+    if is_local_email_db(path):
+        return True
     if suffix in {
         ".txt",
         ".md",
@@ -302,6 +411,8 @@ def should_extract(path: str, depth: int) -> bool:
         ".csv",
         ".tsv",
         ".eml",
+        ".emlx",
+        ".msg",
         ".pdf",
         ".docx",
         ".pptx",

package/src/tools_hot_context.py CHANGED Viewed

@@ -43,6 +43,15 @@ def _format_local_context_evidence(query: str, *, limit: int = 4) -> str:
     refs = result.get("evidence_refs") or []
     if refs:
         lines.append(f"Evidence refs: {', '.join(str(ref) for ref in refs[:limit])}")
+    relations = result.get("relations") or []
+    if relations:
+        lines.append("Local relations:")
+        for relation in relations[:limit]:
+            relation_type = str(relation.get("relation_type") or "related")
+            target = str(relation.get("target_ref") or relation.get("target_asset_id") or "").strip()
+            evidence = str(relation.get("evidence") or "").strip()
+            suffix = f" — {evidence[:120]}" if evidence else ""
+            lines.append(f"- {relation_type}: {target}{suffix}")
     return "\n".join(lines)