nexo-brain 7.20.4 → 7.20.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.4",
3
+ "version": "7.20.9",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,17 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.20.4` is the current packaged-runtime line. Patch release over v7.20.3 — Local Context now blocks private dotfiles, hidden project folders and secret-bearing content before chunks, embeddings, graph relations or agent context are created.
21
+ Version `7.20.9` is the current packaged-runtime line. Patch release over v7.20.8 — Local Context scans automatic roots at full operational depth, falls back to crontab when Linux/WSL systemd user timers fail, passes Windows AppData email roots into WSL, and blocks Google API keys before HTML cleaning.
22
+
23
+ Previously in `7.20.8`: patch release over v7.20.7 — Local Context recognises Windows Mail package roots and Outlook Mac profile roots as bounded local-email sources instead of rejecting them as generic AppData / Group Containers.
24
+
25
+ Previously in `7.20.7`: patch release over v7.20.6 — Local Context email-root bootstrap is deterministic across CI, WSL and migrated profiles while preserving macOS Mail.app, Windows Outlook, Thunderbird and NEXO email coverage.
26
+
27
+ Previously in `7.20.6`: patch release over v7.20.5 — Local Context ranks entity matches at chunk level, keeps old entity-matched assets eligible, adds safe local email roots for macOS/Windows/Linux, extracts `.eml`, `.emlx`, `.msg` and NEXO email DB continuity, and exposes local graph relations in pre-action context.
28
+
29
+ Previously in `7.20.5`: patch release over v7.20.4 — Local Context status reports elapsed indexing time and a defensive ETA while background jobs remain pending.
30
+
31
+ Previously in `7.20.4`: patch release over v7.20.3 — Local Context now blocks private dotfiles, hidden project folders and secret-bearing content before chunks, embeddings, graph relations or agent context are created.
22
32
 
23
33
  Previously in `7.20.3`: patch release over v7.20.2 — installer DMG volumes are no longer added as local-memory roots, removed roots purge stale payloads, and doctor can repair removed-root residue.
24
34
 
@@ -93,6 +93,20 @@ function resolveLinuxEnv(env = process.env) {
93
93
  return linuxEnv;
94
94
  }
95
95
 
96
+ function resolveWindowsHostPathEnv(env = process.env) {
97
+ const result = {};
98
+ for (const key of ["LOCALAPPDATA", "APPDATA"]) {
99
+ const value = String(env[key] || "").trim();
100
+ if (!value) continue;
101
+ if (isWindowsStylePath(value)) {
102
+ result[key] = toWslPath(value);
103
+ } else if (value.startsWith("/")) {
104
+ result[key] = value;
105
+ }
106
+ }
107
+ return result;
108
+ }
109
+
96
110
  function uniqueValues(values = []) {
97
111
  const seen = new Set();
98
112
  return values.filter((value) => {
@@ -242,6 +256,10 @@ function buildWslExecSpec({
242
256
  for (const [key, value] of Object.entries(linuxEnv)) {
243
257
  wslArgs.push(`${key}=${value}`);
244
258
  }
259
+ const windowsHostPathEnv = resolveWindowsHostPathEnv(env);
260
+ for (const [key, value] of Object.entries(windowsHostPathEnv)) {
261
+ wslArgs.push(`${key}=${value}`);
262
+ }
245
263
 
246
264
  // Build the staging shell script. Stages the bundle from /mnt/c (DrvFs/9P)
247
265
  // to /tmp (native ext4) BEFORE invoking node. Without staging, node hangs
@@ -296,6 +314,7 @@ function buildWslExecSpec({
296
314
  command: "wsl.exe",
297
315
  args: wslArgs,
298
316
  linuxEnv,
317
+ windowsHostPathEnv,
299
318
  managedLinuxPath,
300
319
  translatedScriptPath,
301
320
  };
@@ -338,6 +357,7 @@ module.exports = {
338
357
  probeWslUserHome,
339
358
  resolveLinuxEnv,
340
359
  resolveLinuxUserHome,
360
+ resolveWindowsHostPathEnv,
341
361
  runViaWsl,
342
362
  toWslPath,
343
363
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.4",
3
+ "version": "7.20.9",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
package/src/crons/sync.py CHANGED
@@ -20,6 +20,7 @@ import json
20
20
  import os
21
21
  import platform
22
22
  import plistlib
23
+ import shlex
23
24
  import shutil
24
25
  import subprocess
25
26
  import sys
@@ -133,6 +134,8 @@ SCHEDULE_FILE = paths.config_dir() / "schedule.json"
133
134
  CORE_CRON_MANAGED_ENV = "NEXO_MANAGED_CORE_CRON"
134
135
  PERSONAL_CRON_MANAGED_ENV = "NEXO_MANAGED_PERSONAL_CRON"
135
136
  PERSONAL_CRON_ID_ENV = "NEXO_PERSONAL_CRON_ID"
137
+ CRONTAB_BEGIN = "# >>> NEXO managed core crons >>>"
138
+ CRONTAB_END = "# <<< NEXO managed core crons <<<"
136
139
  RETIRED_CORE_FILES = (
137
140
  Path("core") / "scripts" / "nexo-day-orchestrator.sh",
138
141
  Path("scripts") / "nexo-day-orchestrator.sh",
@@ -457,6 +460,106 @@ def build_plist(cron: dict) -> dict:
457
460
  return plist
458
461
 
459
462
 
463
+ def _shell_join(args: list[str | Path]) -> str:
464
+ return " ".join(shlex.quote(str(arg)) for arg in args)
465
+
466
+
467
+ def _cron_schedule(cron: dict) -> str | None:
468
+ if cron.get("keep_alive"):
469
+ return None
470
+ if "interval_seconds" in cron:
471
+ try:
472
+ seconds = int(cron["interval_seconds"])
473
+ except Exception:
474
+ return None
475
+ if seconds <= 0 or seconds % 60 != 0:
476
+ return None
477
+ minutes = max(1, seconds // 60)
478
+ return "* * * * *" if minutes == 1 else f"*/{minutes} * * * *"
479
+ if "schedule" in cron:
480
+ s = resolve_declared_schedule(cron)
481
+ hour, minute = int(s.get("hour", 0)), int(s.get("minute", 0))
482
+ weekday = "*"
483
+ if "weekday" in s:
484
+ raw_weekday = int(s["weekday"])
485
+ weekday = "0" if raw_weekday == 7 else str(raw_weekday)
486
+ return f"{minute} {hour} * * {weekday}"
487
+ return None
488
+
489
+
490
+ def _linux_crontab_entry(cron: dict, exec_cmd: str, stdout_log: Path, stderr_log: Path) -> str | None:
491
+ schedule = _cron_schedule(cron)
492
+ if not schedule:
493
+ return None
494
+ env_prefix = " ".join(
495
+ f"{key}={shlex.quote(str(value))}"
496
+ for key, value in {
497
+ "HOME": Path.home(),
498
+ "NEXO_HOME": NEXO_HOME,
499
+ "NEXO_CODE": _runtime_code_dir(),
500
+ "PYTHONUNBUFFERED": "1",
501
+ }.items()
502
+ )
503
+ return f"{schedule} {env_prefix} {exec_cmd} >> {shlex.quote(str(stdout_log))} 2>> {shlex.quote(str(stderr_log))}"
504
+
505
+
506
+ def _strip_managed_crontab_block(body: str) -> str:
507
+ lines = body.splitlines()
508
+ kept: list[str] = []
509
+ skipping = False
510
+ for line in lines:
511
+ if line.strip() == CRONTAB_BEGIN:
512
+ skipping = True
513
+ continue
514
+ if line.strip() == CRONTAB_END:
515
+ skipping = False
516
+ continue
517
+ if not skipping:
518
+ kept.append(line)
519
+ return "\n".join(kept).rstrip()
520
+
521
+
522
+ def _install_linux_crontab_fallback(entries: list[str]) -> dict:
523
+ if not entries:
524
+ return {"ok": False, "error": "no_crontab_entries"}
525
+ if not shutil.which("crontab"):
526
+ return {"ok": False, "error": "crontab_missing"}
527
+
528
+ existing = subprocess.run(["crontab", "-l"], capture_output=True, text=True)
529
+ current_body = existing.stdout if existing.returncode == 0 else ""
530
+ unmanaged_body = _strip_managed_crontab_block(current_body)
531
+ managed_body = "\n".join([CRONTAB_BEGIN, *entries, CRONTAB_END])
532
+ next_body = f"{unmanaged_body}\n\n{managed_body}\n" if unmanaged_body else f"{managed_body}\n"
533
+
534
+ tmp_path = None
535
+ try:
536
+ with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False) as fh:
537
+ tmp_path = fh.name
538
+ fh.write(next_body)
539
+ proc = subprocess.run(["crontab", tmp_path], capture_output=True, text=True)
540
+ finally:
541
+ if tmp_path:
542
+ try:
543
+ Path(tmp_path).unlink(missing_ok=True)
544
+ except Exception:
545
+ pass
546
+ if proc.returncode != 0:
547
+ return {"ok": False, "error": proc.stderr or proc.stdout or "crontab_install_failed"}
548
+ return {"ok": True, "entries": len(entries)}
549
+
550
+
551
+ def _enable_systemd_user_units(units: list[str]) -> dict:
552
+ errors: list[str] = []
553
+ daemon = subprocess.run(["systemctl", "--user", "daemon-reload"], capture_output=True, text=True)
554
+ if daemon.returncode != 0:
555
+ errors.append(daemon.stderr or daemon.stdout or "systemctl daemon-reload failed")
556
+ for unit in units:
557
+ proc = subprocess.run(["systemctl", "--user", "enable", "--now", unit], capture_output=True, text=True)
558
+ if proc.returncode != 0:
559
+ errors.append(f"{unit}: {proc.stderr or proc.stdout or 'enable failed'}")
560
+ return {"ok": not errors, "errors": errors}
561
+
562
+
460
563
  def get_installed_nexo_crons() -> dict[str, Path]:
461
564
  """Return dict of cron_id → plist_path for installed NEXO crons."""
462
565
  installed = {}
@@ -670,6 +773,9 @@ def sync_linux(dry_run: bool = False):
670
773
  python_bin = p
671
774
  break
672
775
 
776
+ enable_units: list[str] = []
777
+ crontab_entries: list[str] = []
778
+
673
779
  for cron in manifest_crons:
674
780
  cron_id = cron["id"]
675
781
  script_src = _resolve_source_artifact(cron["script"])
@@ -683,9 +789,9 @@ def sync_linux(dry_run: bool = False):
683
789
  _copy_into_runtime(subdir_src)
684
790
 
685
791
  if script_type == "shell":
686
- exec_cmd = f"/bin/bash {wrapper_dest} {cron_id} /bin/bash {script_dest}"
792
+ exec_cmd = _shell_join(["/bin/bash", wrapper_dest, cron_id, "/bin/bash", script_dest])
687
793
  else:
688
- exec_cmd = f"/bin/bash {wrapper_dest} {cron_id} {python_bin} {script_dest}"
794
+ exec_cmd = _shell_join(["/bin/bash", wrapper_dest, cron_id, python_bin, script_dest])
689
795
 
690
796
  service_path = unit_dir / f"nexo-{cron_id}.service"
691
797
  timer_path = unit_dir / f"nexo-{cron_id}.timer"
@@ -734,6 +840,7 @@ StandardError=append:{stderr_log}
734
840
 
735
841
  service_path.write_text(service_content)
736
842
  if cron.get("keep_alive"):
843
+ enable_units.append(f"nexo-{cron_id}.service")
737
844
  log(f" Installed keep_alive service: {cron_id}")
738
845
  continue
739
846
 
@@ -748,14 +855,25 @@ Persistent=true
748
855
  WantedBy=timers.target
749
856
  """
750
857
  timer_path.write_text(timer_content)
858
+ enable_units.append(f"nexo-{cron_id}.timer")
859
+ crontab_entry = _linux_crontab_entry(cron, exec_cmd, stdout_log, stderr_log)
860
+ if crontab_entry:
861
+ crontab_entries.append(crontab_entry)
751
862
  log(f" Installed: {cron_id}")
752
863
 
753
864
  if not dry_run:
754
- subprocess.run(["systemctl", "--user", "daemon-reload"], capture_output=True)
755
- for cron in manifest_crons:
756
- unit = f"nexo-{cron['id']}.service" if cron.get("keep_alive") else f"nexo-{cron['id']}.timer"
757
- subprocess.run(["systemctl", "--user", "enable", "--now", unit], capture_output=True)
758
- log("systemd units enabled.")
865
+ systemd_result = _enable_systemd_user_units(enable_units)
866
+ if systemd_result.get("ok"):
867
+ log("systemd units enabled.")
868
+ else:
869
+ log(f"WARNING: systemd user timers failed; installing crontab fallback: {systemd_result.get('errors')}")
870
+ fallback = _install_linux_crontab_fallback(crontab_entries)
871
+ if not fallback.get("ok"):
872
+ raise RuntimeError(
873
+ "Linux cron activation failed: "
874
+ f"systemd={systemd_result.get('errors')} crontab={fallback.get('error')}"
875
+ )
876
+ log(f"crontab fallback installed ({fallback.get('entries')} entries).")
759
877
 
760
878
  log("Sync complete.")
761
879
 
@@ -26,6 +26,9 @@ LOCAL_INDEX_LINUX_UNIT = "nexo-local-index.service"
26
26
  DEFAULT_LIVE_ASSET_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_ASSET_LIMIT", "2000") or "2000")
27
27
  DEFAULT_LIVE_DIR_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_DIR_LIMIT", "300") or "300")
28
28
  DEFAULT_LIVE_FILE_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_FILE_LIMIT", "1000") or "1000")
29
+ DEFAULT_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_DEPTH", "24") or "24")
30
+ DEFAULT_EMAIL_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_EMAIL_ROOT_DEPTH", "24") or "24")
31
+ DEFAULT_MOUNTED_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_MOUNTED_ROOT_DEPTH", "24") or "24")
29
32
 
30
33
 
31
34
  def ensure_ready() -> None:
@@ -91,6 +94,21 @@ def _dedupe_roots(roots: list[str]) -> list[str]:
91
94
  return result
92
95
 
93
96
 
97
+ def _dedupe_root_specs(specs: list[tuple[str, int]]) -> list[tuple[str, int]]:
98
+ ordered: list[str] = []
99
+ depths: dict[str, int] = {}
100
+ for root, depth in specs:
101
+ normalized = norm_path(root)
102
+ if not normalized:
103
+ continue
104
+ if normalized not in depths:
105
+ ordered.append(normalized)
106
+ depths[normalized] = int(depth)
107
+ else:
108
+ depths[normalized] = max(depths[normalized], int(depth))
109
+ return [(root, depths[root]) for root in ordered]
110
+
111
+
94
112
  def _mounted_volume_roots() -> list[str]:
95
113
  candidates: list[Path] = []
96
114
  if sys.platform == "darwin":
@@ -123,24 +141,78 @@ def _mounted_volume_roots() -> list[str]:
123
141
  return roots
124
142
 
125
143
 
144
+ def _local_email_roots() -> list[str]:
145
+ home = Path.home()
146
+ roots: list[Path] = [home / ".nexo" / "runtime" / "nexo-email"]
147
+ mac_roots = [
148
+ home / "Library" / "Mail",
149
+ home / "Library" / "Group Containers" / "UBF8T346G9.Office" / "Outlook" / "Outlook 15 Profiles",
150
+ ]
151
+ local_app_data = Path(os.environ.get("LOCALAPPDATA") or home / "AppData" / "Local")
152
+ roaming_app_data = Path(os.environ.get("APPDATA") or home / "AppData" / "Roaming")
153
+ windows_roots = [
154
+ home / "Documents" / "Outlook Files",
155
+ local_app_data / "Microsoft" / "Outlook",
156
+ roaming_app_data / "Microsoft" / "Outlook",
157
+ local_app_data / "Packages" / "microsoft.windowscommunicationsapps_8wekyb3d8bbwe" / "LocalState",
158
+ ]
159
+ linux_roots = [home / ".thunderbird", home / ".mozilla-thunderbird"]
160
+
161
+ if sys.platform == "darwin":
162
+ roots.extend(mac_roots)
163
+ elif sys.platform.startswith("win"):
164
+ roots.extend(windows_roots)
165
+ else:
166
+ roots.extend(linux_roots)
167
+
168
+ # CI and migrated profiles can expose platform-specific mail stores while
169
+ # running on another OS. Include only the stores that actually exist.
170
+ for optional_root in [*mac_roots, *windows_roots, *linux_roots]:
171
+ if optional_root.exists() and optional_root not in roots:
172
+ roots.append(optional_root)
173
+ return [str(root) for root in roots]
174
+
175
+
126
176
  def default_roots() -> list[str]:
177
+ return [root for root, _depth in default_root_specs()]
178
+
179
+
180
+ def default_root_specs() -> list[tuple[str, int]]:
127
181
  home = Path.home()
128
182
  configured = os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_ROOTS", "").strip()
129
183
  if configured:
130
- return _dedupe_roots([item for item in configured.split(os.pathsep) if item.strip()])
131
- return _dedupe_roots([str(home), *_mounted_volume_roots()])
184
+ return _dedupe_root_specs(
185
+ [(item, DEFAULT_ROOT_DEPTH) for item in configured.split(os.pathsep) if item.strip()]
186
+ )
187
+ return _dedupe_root_specs(
188
+ [(str(home), DEFAULT_ROOT_DEPTH)]
189
+ + [(root, DEFAULT_EMAIL_ROOT_DEPTH) for root in _local_email_roots()]
190
+ + [(root, DEFAULT_MOUNTED_ROOT_DEPTH) for root in _mounted_volume_roots()]
191
+ )
132
192
 
133
193
 
134
194
  def ensure_default_roots() -> dict:
135
- existing_paths = {row["root_path"] for row in list_roots()}
195
+ existing = {row["root_path"]: row for row in list_roots()}
136
196
  created = []
137
- for root in default_roots():
138
- if root in existing_paths:
139
- continue
197
+ updated = []
198
+ for root, depth in default_root_specs():
140
199
  candidate = Path(root).expanduser()
141
- if candidate.exists() and candidate.is_dir():
142
- created.append(add_root(str(candidate), mode="normal", depth=2))
143
- return {"ok": True, "created": len(created), "roots": list_roots()}
200
+ if not candidate.exists() or not candidate.is_dir():
201
+ continue
202
+ existing_row = existing.get(norm_path(str(candidate)))
203
+ if existing_row:
204
+ current_depth = int(existing_row.get("depth") or 0)
205
+ if current_depth < depth:
206
+ conn = _conn()
207
+ conn.execute(
208
+ "UPDATE local_index_roots SET depth=?, updated_at=? WHERE root_path=?",
209
+ (depth, now(), existing_row["root_path"]),
210
+ )
211
+ conn.commit()
212
+ updated.append({"root_path": existing_row["root_path"], "depth": depth})
213
+ continue
214
+ created.append(add_root(str(candidate), mode="normal", depth=depth))
215
+ return {"ok": True, "created": len(created), "updated": len(updated), "roots": list_roots()}
144
216
 
145
217
 
146
218
  def _should_skip_mounted_root(candidate: Path) -> bool:
@@ -471,7 +543,7 @@ def _file_type(path: Path) -> str:
471
543
  return "photo"
472
544
  if suffix in {".py", ".js", ".ts", ".tsx", ".jsx", ".php", ".sql", ".css", ".html"}:
473
545
  return "code"
474
- if suffix in {".eml"}:
546
+ if suffix in {".eml", ".emlx", ".msg", ".pst", ".ost"}:
475
547
  return "email"
476
548
  if suffix in {".pdf", ".docx", ".pptx", ".xlsx", ".md", ".txt", ".csv", ".tsv"}:
477
549
  return "document"
@@ -1316,7 +1388,7 @@ def process_jobs(*, limit: int = 100) -> dict:
1316
1388
  if job_type == "light_extraction":
1317
1389
  text, metadata = extract_text(Path(row["path"]))
1318
1390
  version_id = _latest_version_id(conn, asset_id)
1319
- if contains_secret(text):
1391
+ if metadata.get("content_secret_detected") or contains_secret(text):
1320
1392
  _mark_content_secret_assets(conn, [asset_id])
1321
1393
  conn.execute(
1322
1394
  "UPDATE local_index_jobs SET status='done', updated_at=?, last_error_code='content_secret_blocked' WHERE job_id=?",
@@ -1652,6 +1724,29 @@ def _service_cycle_observation(conn) -> dict:
1652
1724
  return observation
1653
1725
 
1654
1726
 
1727
+ def _index_timing(conn, *, done: int, active_jobs: int, percent: int) -> dict:
1728
+ first_seen = conn.execute(
1729
+ """
1730
+ SELECT MIN(created_at) AS created_at
1731
+ FROM local_index_logs
1732
+ WHERE event IN ('root_added', 'scan_started', 'scan_finished', 'jobs_processed', 'service_cycle_finished')
1733
+ """
1734
+ ).fetchone()["created_at"] or 0
1735
+ if not first_seen:
1736
+ first_seen = conn.execute(
1737
+ """
1738
+ SELECT MIN(first_seen_at) AS first_seen_at
1739
+ FROM local_assets
1740
+ WHERE status!='deleted'
1741
+ """
1742
+ ).fetchone()["first_seen_at"] or 0
1743
+ elapsed_seconds = max(0, int(now() - float(first_seen))) if first_seen else 0
1744
+ eta_seconds = None
1745
+ if elapsed_seconds > 0 and done > 0 and active_jobs > 0 and 0 < percent < 100:
1746
+ eta_seconds = max(0, int((elapsed_seconds / max(done, 1)) * active_jobs))
1747
+ return {"elapsed_seconds": elapsed_seconds, "eta_seconds": eta_seconds}
1748
+
1749
+
1655
1750
  def _service_scheduler_has_error(service: dict) -> bool:
1656
1751
  if service.get("manager") == "launchagent":
1657
1752
  code = str(service.get("last_exit_code") or "").strip()
@@ -1725,6 +1820,7 @@ def status() -> dict:
1725
1820
  active_jobs = pending + running_jobs + failed_jobs
1726
1821
  total_jobs = active_jobs + done
1727
1822
  percent = 100 if total_jobs == 0 else int((done / max(total_jobs, 1)) * 100)
1823
+ timing = _index_timing(conn, done=done, active_jobs=active_jobs, percent=percent)
1728
1824
  roots = list_roots()
1729
1825
  volumes = []
1730
1826
  by_volume = conn.execute(
@@ -1770,8 +1866,8 @@ def status() -> dict:
1770
1866
  "jobs_pending": pending,
1771
1867
  "jobs_running": running_jobs,
1772
1868
  "jobs_failed": failed_jobs,
1773
- "elapsed_seconds": 0,
1774
- "eta_seconds": None,
1869
+ "elapsed_seconds": timing["elapsed_seconds"],
1870
+ "eta_seconds": timing["eta_seconds"],
1775
1871
  },
1776
1872
  "volumes": volumes,
1777
1873
  "roots": roots,
@@ -1856,10 +1952,112 @@ def _search_text_score(query: str, text: str) -> float:
1856
1952
  return len(q & tokens) / max(len(q), 1)
1857
1953
 
1858
1954
 
1859
- def context_query(query: str, *, intent: str = "answer", limit: int = 12, evidence_required: bool = True, current_context: str = "") -> dict:
1860
- conn = _conn()
1861
- qvec = embeddings.embed_text(query)
1955
+ _QUERY_STOPWORDS = {
1956
+ "about",
1957
+ "archivos",
1958
+ "con",
1959
+ "context",
1960
+ "contexto",
1961
+ "cuanto",
1962
+ "dame",
1963
+ "del",
1964
+ "desde",
1965
+ "documentos",
1966
+ "donde",
1967
+ "esta",
1968
+ "está",
1969
+ "file",
1970
+ "files",
1971
+ "hay",
1972
+ "los",
1973
+ "para",
1974
+ "que",
1975
+ "qué",
1976
+ "related",
1977
+ "relacionado",
1978
+ "sabes",
1979
+ "sobre",
1980
+ "todo",
1981
+ "what",
1982
+ "where",
1983
+ }
1984
+
1985
+
1986
+ def _query_terms(query: str) -> list[str]:
1987
+ terms = []
1988
+ for token in tokenize(query):
1989
+ if len(token) < 3 or token in _QUERY_STOPWORDS:
1990
+ continue
1991
+ if token not in terms:
1992
+ terms.append(token)
1993
+ return terms[:10]
1994
+
1995
+
1996
+ def _entity_match_score(query_lower: str, terms: list[str], name: str) -> float:
1997
+ entity = (name or "").strip().lower()
1998
+ if not entity:
1999
+ return 0.0
2000
+ entity_terms = set(tokenize(entity))
2001
+ if entity and entity in query_lower:
2002
+ return 1.0
2003
+ if not terms:
2004
+ return 0.0
2005
+ term_set = set(terms)
2006
+ overlap = term_set & entity_terms
2007
+ if overlap:
2008
+ return min(0.95, 0.45 + (len(overlap) / max(len(entity_terms), 1)) * 0.5)
2009
+ if any(term in entity for term in terms):
2010
+ return 0.6
2011
+ return 0.0
2012
+
2013
+
2014
+ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dict], dict[str, float]]:
2015
+ query_lower = (query or "").strip().lower()
2016
+ terms = _query_terms(query)
2017
+ if not query_lower or not terms:
2018
+ return [], {}
2019
+
2020
+ clauses = " OR ".join("lower(e.name) LIKE ?" for _ in terms)
2021
+ params = [f"%{term}%" for term in terms]
1862
2022
  rows = conn.execute(
2023
+ f"""
2024
+ SELECT DISTINCT e.name, e.entity_type, e.asset_id, a.path, a.privacy_class
2025
+ FROM local_entities e
2026
+ JOIN local_assets a ON a.asset_id = e.asset_id
2027
+ WHERE a.status='active'
2028
+ AND a.privacy_class='normal'
2029
+ AND ({clauses})
2030
+ LIMIT ?
2031
+ """,
2032
+ [*params, max(int(limit) * 20, 40)],
2033
+ ).fetchall()
2034
+
2035
+ matches = []
2036
+ boosts: dict[str, float] = {}
2037
+ seen = set()
2038
+ for row in rows:
2039
+ if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
2040
+ continue
2041
+ score = _entity_match_score(query_lower, terms, str(row["name"] or ""))
2042
+ if score <= 0:
2043
+ continue
2044
+ key = (row["name"], row["entity_type"], row["asset_id"])
2045
+ if key not in seen:
2046
+ matches.append({
2047
+ "name": row["name"],
2048
+ "entity_type": row["entity_type"],
2049
+ "asset_id": row["asset_id"],
2050
+ "score": round(float(score), 4),
2051
+ })
2052
+ seen.add(key)
2053
+ boosts[row["asset_id"]] = max(boosts.get(row["asset_id"], 0.0), float(score))
2054
+
2055
+ matches.sort(key=lambda item: item.get("score", 0), reverse=True)
2056
+ return matches[: int(limit)], boosts
2057
+
2058
+
2059
+ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
2060
+ base_rows = conn.execute(
1863
2061
  """
1864
2062
  SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
1865
2063
  FROM local_chunks c
@@ -1869,17 +2067,68 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
1869
2067
  WHERE a.status='active'
1870
2068
  AND a.privacy_class='normal'
1871
2069
  ORDER BY c.created_at DESC
1872
- LIMIT 5000
1873
- """
2070
+ LIMIT ?
2071
+ """,
2072
+ (int(base_limit),),
2073
+ ).fetchall()
2074
+ if not entity_asset_ids:
2075
+ return base_rows
2076
+
2077
+ placeholders = ",".join("?" for _ in entity_asset_ids)
2078
+ entity_rows = conn.execute(
2079
+ f"""
2080
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary, e.vector_json
2081
+ FROM local_chunks c
2082
+ JOIN local_assets a ON a.asset_id = c.asset_id
2083
+ LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
2084
+ LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
2085
+ WHERE a.status='active'
2086
+ AND a.privacy_class='normal'
2087
+ AND c.asset_id IN ({placeholders})
2088
+ ORDER BY c.chunk_index ASC
2089
+ LIMIT ?
2090
+ """,
2091
+ [*entity_asset_ids, max(1000, len(entity_asset_ids) * 80)],
1874
2092
  ).fetchall()
2093
+
2094
+ rows = []
2095
+ seen_chunks = set()
2096
+ for row in [*entity_rows, *base_rows]:
2097
+ chunk_id = row["chunk_id"]
2098
+ if chunk_id in seen_chunks:
2099
+ continue
2100
+ seen_chunks.add(chunk_id)
2101
+ rows.append(row)
2102
+ return rows
2103
+
2104
+
2105
+ def context_query(query: str, *, intent: str = "answer", limit: int = 12, evidence_required: bool = True, current_context: str = "") -> dict:
2106
+ conn = _conn()
2107
+ qvec = embeddings.embed_text(query)
2108
+ entities_payload, entity_boosts = _entity_matches_for_query(conn, query, limit=max(int(limit), 1))
2109
+ rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
1875
2110
  scored = []
1876
2111
  for row in rows:
1877
2112
  if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
1878
2113
  continue
1879
2114
  vector = json_loads(row["vector_json"], [])
1880
- score = max(_search_text_score(query, row["text"]), embeddings.cosine(qvec, vector))
2115
+ text_score = _search_text_score(query, row["text"])
2116
+ path_score = _search_text_score(query, row["path"] or "")
2117
+ summary_score = _search_text_score(query, row["summary"] or "")
2118
+ entity_score = entity_boosts.get(row["asset_id"], 0.0)
2119
+ vector_score = embeddings.cosine(qvec, vector)
2120
+ score = max(text_score, path_score, summary_score, vector_score)
2121
+ if entity_score > 0:
2122
+ direct_score = max(text_score, path_score, summary_score)
2123
+ if direct_score > 0:
2124
+ entity_rank = 0.82 + (0.42 * text_score) + (0.18 * path_score) + (0.12 * summary_score)
2125
+ score = max(score, entity_rank + min(0.2, entity_score * 0.2))
2126
+ else:
2127
+ # Entity-level matches keep older assets eligible, but do not let
2128
+ # unrelated chunks from a long document outrank direct evidence.
2129
+ score = max(score, min(0.48, 0.28 + entity_score * 0.2))
1881
2130
  if score > 0:
1882
- scored.append((score, row))
2131
+ scored.append((min(float(score), 1.6), row))
1883
2132
  scored.sort(key=lambda item: item[0], reverse=True)
1884
2133
  assets = []
1885
2134
  chunks = []
@@ -1902,14 +2151,10 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
1902
2151
  "score": round(float(score), 4),
1903
2152
  })
1904
2153
  evidence_refs.append(f"local_asset:{row['asset_id']}#chunk:{row['chunk_id']}")
1905
- entity_rows = conn.execute(
1906
- "SELECT DISTINCT name, entity_type, asset_id FROM local_entities WHERE lower(name) LIKE ? LIMIT ?",
1907
- (f"%{query.lower()}%", int(limit)),
1908
- ).fetchall()
1909
- entities_payload = [dict(row) for row in entity_rows]
1910
2154
  relations_payload: list[dict] = []
1911
- if seen_assets:
1912
- asset_ids = list(seen_assets)[: int(limit)]
2155
+ relation_asset_ids = list(dict.fromkeys([*seen_assets, *entity_boosts.keys()]))[: int(limit)]
2156
+ if relation_asset_ids:
2157
+ asset_ids = relation_asset_ids
1913
2158
  placeholders = ",".join("?" for _ in asset_ids)
1914
2159
  relation_rows = conn.execute(
1915
2160
  f"""
@@ -4,12 +4,15 @@ import csv
4
4
  import html
5
5
  import json
6
6
  import re
7
+ import sqlite3
7
8
  import zipfile
8
9
  from email import policy
9
10
  from email.parser import BytesParser
10
11
  from pathlib import Path
11
12
  from xml.etree import ElementTree
12
13
 
14
+ from .privacy import is_local_email_db
15
+
13
16
  MAX_TEXT_BYTES = 512 * 1024
14
17
  MAX_CHARS = 120_000
15
18
 
@@ -38,6 +41,7 @@ SECRET_PATTERNS: tuple[re.Pattern, ...] = (
38
41
  re.compile(r"\bpk-(?:[a-z]+-)?[A-Za-z0-9_\-]{20,}\b"),
39
42
  re.compile(r"\b(ghp|gho|ghu|ghs|ghr|github_pat|glpat|xoxb|xoxp|shpat)_[A-Za-z0-9_]{16,}\b", re.I),
40
43
  re.compile(r"\b(AKIA|ASIA)[A-Z0-9]{16,}\b"),
44
+ re.compile(r"\bAIza[0-9A-Za-z_-]{30,}\b"),
41
45
  re.compile(r"\bey[A-Za-z0-9_-]{10,}\.ey[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"),
42
46
  re.compile(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----", re.I),
43
47
  re.compile(r"\b([A-Z][A-Z0-9_]*(?:TOKEN|SECRET|KEY|PASSWORD|PASS)\s*[:=]\s*)['\"]?[A-Za-z0-9._/+=\-]{12,}", re.I),
@@ -73,8 +77,8 @@ def _extract_csv(path: Path) -> str:
73
77
  return "\n".join(rows)[:MAX_CHARS]
74
78
 
75
79
 
76
- def _extract_eml(path: Path) -> tuple[str, dict]:
77
- msg = BytesParser(policy=policy.default).parsebytes(path.read_bytes()[:MAX_TEXT_BYTES])
80
+ def _extract_email_bytes(data: bytes) -> tuple[str, dict]:
81
+ msg = BytesParser(policy=policy.default).parsebytes(data[:MAX_TEXT_BYTES])
78
82
  meta = {
79
83
  "subject": str(msg.get("subject") or ""),
80
84
  "from": str(msg.get("from") or ""),
@@ -92,6 +96,99 @@ def _extract_eml(path: Path) -> tuple[str, dict]:
92
96
  return "\n".join([meta["subject"], meta["from"], meta["to"], text])[:MAX_CHARS], meta
93
97
 
94
98
 
99
+ def _extract_eml(path: Path) -> tuple[str, dict]:
100
+ return _extract_email_bytes(path.read_bytes()[:MAX_TEXT_BYTES])
101
+
102
+
103
+ def _extract_emlx(path: Path) -> tuple[str, dict]:
104
+ data = path.read_bytes()[:MAX_TEXT_BYTES]
105
+ first_line, separator, rest = data.partition(b"\n")
106
+ if separator and first_line.strip().isdigit():
107
+ declared = int(first_line.strip() or b"0")
108
+ payload = rest[:declared] if declared > 0 else rest
109
+ else:
110
+ payload = data
111
+ if b"\n<?xml" in payload:
112
+ payload = payload.split(b"\n<?xml", 1)[0]
113
+ text, meta = _extract_email_bytes(payload)
114
+ meta["apple_mail_message"] = True
115
+ return text, meta
116
+
117
+
118
+ def _printable_binary_text(path: Path) -> str:
119
+ data = path.read_bytes()[:MAX_TEXT_BYTES]
120
+ decoded = data.decode("utf-16", errors="ignore") if b"\x00" in data[:2000] else data.decode("latin-1", errors="ignore")
121
+ pieces = re.findall(r"[\wÀ-ÿ@./:=+\- ,;()\\[\\]{}]{4,}", decoded)
122
+ return "\n".join(piece.strip() for piece in pieces if piece.strip())[:MAX_CHARS]
123
+
124
+
125
+ def _extract_msg(path: Path) -> tuple[str, dict]:
126
+ try:
127
+ import extract_msg # type: ignore
128
+ message = extract_msg.Message(str(path))
129
+ meta = {
130
+ "subject": str(getattr(message, "subject", "") or ""),
131
+ "from": str(getattr(message, "sender", "") or ""),
132
+ "to": str(getattr(message, "to", "") or ""),
133
+ "date": str(getattr(message, "date", "") or ""),
134
+ "extractor": "msg",
135
+ }
136
+ body = str(getattr(message, "body", "") or "")
137
+ close = getattr(message, "close", None)
138
+ if callable(close):
139
+ close()
140
+ return "\n".join([meta["subject"], meta["from"], meta["to"], body])[:MAX_CHARS], meta
141
+ except Exception:
142
+ return _printable_binary_text(path), {"extractor": "msg_fallback"}
143
+
144
+
145
+ def _table_names(conn: sqlite3.Connection) -> set[str]:
146
+ rows = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
147
+ return {str(row[0]) for row in rows}
148
+
149
+
150
+ def _select_existing_columns(conn: sqlite3.Connection, table: str, columns: list[str]) -> list[str]:
151
+ found = {str(row[1]) for row in conn.execute(f"PRAGMA table_info({table})").fetchall()}
152
+ return [column for column in columns if column in found]
153
+
154
+
155
+ def _extract_nexo_email_db(path: Path) -> tuple[str, dict]:
156
+ if not is_local_email_db(str(path)):
157
+ return "", {"extractor": "sqlite_blocked"}
158
+ uri = f"file:{path}?mode=ro"
159
+ parts: list[str] = []
160
+ try:
161
+ conn = sqlite3.connect(uri, uri=True, timeout=1)
162
+ except Exception:
163
+ return "", {"extractor": "nexo_email_db", "state": "locked_or_unavailable"}
164
+ try:
165
+ tables = _table_names(conn)
166
+ if "emails" in tables:
167
+ cols = _select_existing_columns(
168
+ conn,
169
+ "emails",
170
+ ["from_addr", "from_name", "subject", "received_at", "status", "body", "response"],
171
+ )
172
+ if not cols:
173
+ return "", {"extractor": "nexo_email_db", "tables": sorted(tables)}
174
+ order = "received_at" if "received_at" in cols else "rowid"
175
+ for row in conn.execute(f"SELECT {', '.join(cols)} FROM emails ORDER BY {order} DESC LIMIT 1000").fetchall():
176
+ parts.append(" | ".join(str(value or "")[:4000] for value in row))
177
+ if "sent_email_events" in tables:
178
+ cols = _select_existing_columns(
179
+ conn,
180
+ "sent_email_events",
181
+ ["sender", "to_addrs", "cc_addrs", "subject", "sent_at", "status", "body_text"],
182
+ )
183
+ if cols:
184
+ order = "sent_at" if "sent_at" in cols else "rowid"
185
+ for row in conn.execute(f"SELECT {', '.join(cols)} FROM sent_email_events ORDER BY {order} DESC LIMIT 1000").fetchall():
186
+ parts.append(" | ".join(str(value or "")[:4000] for value in row))
187
+ finally:
188
+ conn.close()
189
+ return "\n".join(parts)[:MAX_CHARS], {"extractor": "nexo_email_db", "tables": sorted(tables) if "tables" in locals() else []}
190
+
191
+
95
192
  def _zip_xml_text(path: Path, members: list[str]) -> str:
96
193
  pieces: list[str] = []
97
194
  with zipfile.ZipFile(path) as zf:
@@ -176,6 +273,14 @@ def extract_text(path: Path) -> tuple[str, dict]:
176
273
  elif suffix == ".eml":
177
274
  text, metadata = _extract_eml(path)
178
275
  metadata["extractor"] = "eml"
276
+ elif suffix == ".emlx":
277
+ text, metadata = _extract_emlx(path)
278
+ metadata["extractor"] = "emlx"
279
+ elif suffix == ".msg":
280
+ text, metadata = _extract_msg(path)
281
+ metadata["extractor"] = metadata.get("extractor") or "msg"
282
+ elif suffix == ".db" and is_local_email_db(str(path)):
283
+ text, metadata = _extract_nexo_email_db(path)
179
284
  elif suffix == ".pdf":
180
285
  text = _extract_pdf(path)
181
286
  elif suffix == ".docx":
@@ -186,6 +291,8 @@ def extract_text(path: Path) -> tuple[str, dict]:
186
291
  text = _extract_xlsx(path)
187
292
  else:
188
293
  text = ""
294
+ if contains_secret(text):
295
+ metadata["content_secret_detected"] = True
189
296
  return clean_text(text), metadata
190
297
 
191
298
 
@@ -67,6 +67,36 @@ SENSITIVE_PARTS = {
67
67
  "browser profile",
68
68
  }
69
69
 
70
+ EMAIL_RUNTIME_DB_NAMES = {
71
+ "email.db",
72
+ "email-tracker.db",
73
+ "emails.db",
74
+ "monitor.db",
75
+ "nexo-email.db",
76
+ }
77
+
78
+ EMAIL_ATTACHMENT_SUFFIXES = {
79
+ ".csv",
80
+ ".docx",
81
+ ".eml",
82
+ ".emlx",
83
+ ".html",
84
+ ".md",
85
+ ".pdf",
86
+ ".pptx",
87
+ ".txt",
88
+ ".xlsx",
89
+ }
90
+
91
+ EMAIL_EXTRACTABLE_SUFFIXES = {".eml", ".emlx", ".msg"}
92
+ OUTLOOK_MAC_INVENTORY_SUFFIXES = {
93
+ ".olk15message",
94
+ ".olk15msgsource",
95
+ ".olk15msgattach",
96
+ ".olk15event",
97
+ ".olk15contact",
98
+ }
99
+
70
100
  NOISY_PARTS = {
71
101
  "node_modules",
72
102
  "vendor",
@@ -173,6 +203,77 @@ def _contains_path_marker(lowered: str, markers: set[str]) -> bool:
173
203
  return any(marker in lowered for marker in markers)
174
204
 
175
205
 
206
+ def _is_under_marker(lowered: str, marker: str) -> bool:
207
+ marker = marker.strip("/").lower()
208
+ if not marker:
209
+ return False
210
+ return lowered.endswith("/" + marker) or f"/{marker}/" in lowered
211
+
212
+
213
+ def _is_inside_windows_mail_package(lowered: str) -> bool:
214
+ return "/appdata/local/packages/microsoft.windowscommunicationsapps" in lowered
215
+
216
+
217
+ def _is_inside_outlook_mac_profile(lowered: str) -> bool:
218
+ return "/library/group containers/ubf8t346g9.office/outlook" in lowered
219
+
220
+
221
+ def is_local_email_tree(path: str) -> bool:
222
+ lowered = _normalized(path)
223
+ if _is_inside_windows_mail_package(lowered) or _is_inside_outlook_mac_profile(lowered):
224
+ return True
225
+ return any(
226
+ _is_under_marker(lowered, marker)
227
+ for marker in (
228
+ "library/mail",
229
+ ".nexo/runtime/nexo-email",
230
+ "documents/outlook files",
231
+ "appdata/local/microsoft/outlook",
232
+ "appdata/roaming/microsoft/outlook",
233
+ "appdata/local/packages/microsoft.windowscommunicationsapps",
234
+ ".thunderbird",
235
+ ".mozilla-thunderbird",
236
+ )
237
+ )
238
+
239
+
240
+ def is_local_email_db(path: str) -> bool:
241
+ p = Path(path)
242
+ return is_local_email_tree(path) and p.name.lower() in EMAIL_RUNTIME_DB_NAMES
243
+
244
+
245
+ def is_allowed_local_email_file(path: str) -> bool:
246
+ if not is_local_email_tree(path):
247
+ return False
248
+ p = Path(path)
249
+ lowered = _normalized(path)
250
+ suffix = p.suffix.lower()
251
+ if is_sensitive_path(path):
252
+ return False
253
+ if _is_under_marker(lowered, ".nexo/runtime/nexo-email"):
254
+ if is_local_email_db(path):
255
+ return True
256
+ if _is_under_marker(lowered, ".nexo/runtime/nexo-email/attachments"):
257
+ return suffix in EMAIL_ATTACHMENT_SUFFIXES
258
+ return suffix in {".eml", ".emlx"}
259
+ if _is_under_marker(lowered, "library/mail"):
260
+ return suffix in {".eml", ".emlx"}
261
+ if any(
262
+ _is_under_marker(lowered, marker)
263
+ for marker in (
264
+ "library/group containers/ubf8t346g9.office/outlook",
265
+ "documents/outlook files",
266
+ "appdata/local/microsoft/outlook",
267
+ "appdata/roaming/microsoft/outlook",
268
+ "appdata/local/packages/microsoft.windowscommunicationsapps",
269
+ )
270
+ ) or _is_inside_windows_mail_package(lowered) or _is_inside_outlook_mac_profile(lowered):
271
+ return suffix in {".eml", ".msg", ".pst", ".ost"} | OUTLOOK_MAC_INVENTORY_SUFFIXES
272
+ if _is_under_marker(lowered, ".thunderbird") or _is_under_marker(lowered, ".mozilla-thunderbird"):
273
+ return suffix in {".eml", ".mbox", ""}
274
+ return False
275
+
276
+
176
277
  def _has_transient_project_part(path: str) -> bool:
177
278
  parts = list(_normalized(path).replace(":", "/").split("/"))
178
279
  for index, part in enumerate(parts):
@@ -239,6 +340,8 @@ def classify_path(path: str) -> tuple[int, str, str]:
239
340
  lowered = _normalized(path)
240
341
  parts = _parts(path)
241
342
 
343
+ if is_local_email_tree(path) and (Path(path).suffix == "" or is_allowed_local_email_file(path)):
344
+ return 2, "normal", "local_email_path"
242
345
  if is_sensitive_path(path):
243
346
  return 1, "sensitive_inventory_only", "sensitive_path"
244
347
  if is_private_profile_path(path):
@@ -253,6 +356,8 @@ def classify_path(path: str) -> tuple[int, str, str]:
253
356
  def should_skip_tree(path: str) -> bool:
254
357
  lowered = _normalized(path)
255
358
  parts = _parts(path)
359
+ if is_local_email_tree(path):
360
+ return False
256
361
  if any(item in lowered for item in SYSTEM_PARTS):
257
362
  return True
258
363
  if is_sensitive_path(path) or is_private_profile_path(path):
@@ -263,6 +368,8 @@ def should_skip_tree(path: str) -> bool:
263
368
  def should_skip_file(path: str) -> bool:
264
369
  lowered = _normalized(path)
265
370
  parts = _parts(path)
371
+ if is_local_email_tree(path):
372
+ return not is_allowed_local_email_file(path)
266
373
  if any(item in lowered for item in SYSTEM_PARTS):
267
374
  return True
268
375
  if is_sensitive_path(path) or is_private_profile_path(path):
@@ -282,6 +389,8 @@ def should_extract(path: str, depth: int) -> bool:
282
389
  if should_skip_file(path):
283
390
  return False
284
391
  suffix = Path(path).suffix.lower()
392
+ if is_local_email_db(path):
393
+ return True
285
394
  if suffix in {
286
395
  ".txt",
287
396
  ".md",
@@ -302,6 +411,8 @@ def should_extract(path: str, depth: int) -> bool:
302
411
  ".csv",
303
412
  ".tsv",
304
413
  ".eml",
414
+ ".emlx",
415
+ ".msg",
305
416
  ".pdf",
306
417
  ".docx",
307
418
  ".pptx",
@@ -43,6 +43,15 @@ def _format_local_context_evidence(query: str, *, limit: int = 4) -> str:
43
43
  refs = result.get("evidence_refs") or []
44
44
  if refs:
45
45
  lines.append(f"Evidence refs: {', '.join(str(ref) for ref in refs[:limit])}")
46
+ relations = result.get("relations") or []
47
+ if relations:
48
+ lines.append("Local relations:")
49
+ for relation in relations[:limit]:
50
+ relation_type = str(relation.get("relation_type") or "related")
51
+ target = str(relation.get("target_ref") or relation.get("target_asset_id") or "").strip()
52
+ evidence = str(relation.get("evidence") or "").strip()
53
+ suffix = f" — {evidence[:120]}" if evidence else ""
54
+ lines.append(f"- {relation_type}: {target}{suffix}")
46
55
  return "\n".join(lines)
47
56
 
48
57