nexo-brain 7.31.11 → 7.31.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.31.11",
3
+ "version": "7.31.12",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,7 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.31.11` is the current packaged-runtime line. Patch release over v7.31.10 - MCP lifecycle robustness + guardrail precision. Version `7.31.10` was a patch release over v7.31.9 - Local Memory search now downranks boilerplate emails when stronger documents match the same query.
21
+ Version `7.31.12` is the current packaged-runtime line. Patch release over v7.31.11 - Local Memory core hardening (Release A: defensive cosine, stable chunk ids, iCloud dataless handling, performance PRAGMAs) plus an offline-first dependency installer. Version `7.31.11` was a patch release over v7.31.10 - MCP lifecycle robustness + guardrail precision.
22
22
 
23
23
  Previously in `7.31.9`: patch release over v7.31.8 - UI release closeout now has to prove the original reported symptom was reopened with observable evidence before claiming the release is ready.
24
24
 
package/bin/nexo-brain.js CHANGED
@@ -4103,6 +4103,19 @@ async function runSetup() {
4103
4103
  log("Try manually: python3 -m venv ~/.nexo/.venv && ~/.nexo/.venv/bin/pip install -r src/requirements.txt");
4104
4104
  process.exit(1);
4105
4105
  }
4106
+ // Mirror the bundled wheels into NEXO_HOME so the Python runtime (startup
4107
+ // self-heal, update, cron) can reinstall any missing dep OFFLINE later, with no
4108
+ // access to the Desktop bundle path. auto_update._bundled_wheels_dir() looks in
4109
+ // <NEXO_HOME>/runtime/python-wheels. Works for WSL (linux) and macOS.
4110
+ try {
4111
+ if (fs.existsSync(bundledWheelsDir)) {
4112
+ const runtimeWheels = path.join(NEXO_HOME, "runtime", "python-wheels");
4113
+ fs.mkdirSync(runtimeWheels, { recursive: true });
4114
+ fs.cpSync(bundledWheelsDir, runtimeWheels, { recursive: true });
4115
+ }
4116
+ } catch (e) {
4117
+ log(" (note) could not mirror bundled wheels into runtime: " + (e && e.message));
4118
+ }
4106
4119
  // Update python reference to use venv python for the rest of setup
4107
4120
  if (fs.existsSync(venvPython)) {
4108
4121
  python = venvPython;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.31.11",
3
+ "version": "7.31.12",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -1058,8 +1058,46 @@ def _ensure_runtime_venv(runtime_root: Path = NEXO_HOME) -> Path | None:
1058
1058
  return None
1059
1059
 
1060
1060
 
1061
+ def _bundled_wheels_dir() -> "Path | None":
1062
+ """Locate the bundled Python wheels for offline install.
1063
+
1064
+ Priority: explicit NEXO_BUNDLED_WHEELS_DIR (set by the Desktop bundle) →
1065
+ a canonical runtime copy under NEXO_HOME. Returns the dir only if it holds
1066
+ at least one .whl, else None (caller falls back to PyPI).
1067
+ """
1068
+ candidates = []
1069
+ env_dir = os.environ.get("NEXO_BUNDLED_WHEELS_DIR", "").strip()
1070
+ if env_dir:
1071
+ candidates.append(Path(env_dir).expanduser())
1072
+ candidates.append(NEXO_HOME / "runtime" / "python-wheels")
1073
+ for directory in candidates:
1074
+ try:
1075
+ if directory.is_dir() and any(directory.glob("*.whl")):
1076
+ return directory
1077
+ except Exception:
1078
+ continue
1079
+ return None
1080
+
1081
+
1082
+ def _pip_install_argv(pip_bin, req_file, *, wheels_dir=None, use_python_m=False, break_system=False) -> list:
1083
+ """Build a pip install argv. Offline (--no-index --find-links) when wheels_dir is set."""
1084
+ argv = [str(pip_bin)]
1085
+ if use_python_m:
1086
+ argv += ["-m", "pip"]
1087
+ argv += ["install", "--quiet", "-r", str(req_file)]
1088
+ if wheels_dir is not None:
1089
+ argv += ["--no-index", "--find-links", str(wheels_dir)]
1090
+ if break_system:
1091
+ argv.append("--break-system-packages")
1092
+ return argv
1093
+
1094
+
1061
1095
  def _reinstall_pip_deps() -> bool:
1062
- """Reinstall Python deps from requirements.txt. Returns True on success."""
1096
+ """Reinstall Python deps from requirements.txt. Returns True on success.
1097
+
1098
+ Prefers the bundled wheels (offline) so a user with no internet still gets a
1099
+ self-repairing runtime; falls back to PyPI if the bundle can't satisfy it.
1100
+ """
1063
1101
  req_file = SRC_DIR / "requirements.txt"
1064
1102
  if not req_file.exists():
1065
1103
  return True
@@ -1069,24 +1107,32 @@ def _reinstall_pip_deps() -> bool:
1069
1107
  alt_pip = NEXO_HOME / ".venv" / "bin" / "pip3"
1070
1108
  if alt_pip.exists():
1071
1109
  venv_pip = alt_pip
1110
+ wheels_dir = _bundled_wheels_dir()
1111
+ # Large wheel sets / slow links need more than the old 120s.
1112
+ timeout_s = 600
1113
+ use_python_m = not venv_pip.exists()
1114
+ if use_python_m and desktop_product_requested():
1115
+ _log(f"managed venv unavailable for Desktop dependency repair: {venv_python}")
1116
+ return False
1117
+ pip_bin = venv_pip if venv_pip.exists() else sys.executable
1072
1118
  try:
1073
- if venv_pip.exists():
1074
- result = subprocess.run(
1075
- [str(venv_pip), "install", "--quiet", "-r", str(req_file)],
1076
- capture_output=True, text=True, timeout=120,
1077
- )
1078
- elif not desktop_product_requested():
1079
- result = subprocess.run(
1080
- [sys.executable, "-m", "pip", "install", "--quiet", "-r", str(req_file), "--break-system-packages"],
1081
- capture_output=True, text=True, timeout=120,
1119
+ argv = _pip_install_argv(
1120
+ pip_bin, req_file, wheels_dir=wheels_dir,
1121
+ use_python_m=use_python_m, break_system=use_python_m,
1122
+ )
1123
+ result = subprocess.run(argv, capture_output=True, text=True, timeout=timeout_s)
1124
+ if result.returncode != 0 and wheels_dir is not None:
1125
+ # Offline set couldn't satisfy it (missing/incompatible wheel) → retry online.
1126
+ _log(f"offline pip install failed, retrying via PyPI: {result.stderr or result.stdout}")
1127
+ argv_online = _pip_install_argv(
1128
+ pip_bin, req_file, wheels_dir=None,
1129
+ use_python_m=use_python_m, break_system=use_python_m,
1082
1130
  )
1083
- else:
1084
- _log(f"managed venv unavailable for Desktop dependency repair: {venv_python}")
1085
- return False
1131
+ result = subprocess.run(argv_online, capture_output=True, text=True, timeout=timeout_s)
1086
1132
  if result.returncode != 0:
1087
1133
  _log(f"pip install failed (exit {result.returncode}): {result.stderr or result.stdout}")
1088
1134
  return False
1089
- _log("Reinstalled Python dependencies after update")
1135
+ _log("Reinstalled Python dependencies" + (" (offline)" if wheels_dir is not None else ""))
1090
1136
  return True
1091
1137
  except Exception as e:
1092
1138
  _log(f"pip reinstall failed: {e}")
@@ -6107,6 +6153,18 @@ def startup_preflight(*, entrypoint: str, interactive: bool = False) -> dict:
6107
6153
  result["actions"].extend(extra_actions)
6108
6154
  if reconcile_message:
6109
6155
  _log(reconcile_message)
6156
+ # Self-heal: if the managed venv lost a critical importable module
6157
+ # (e.g. pypdf -> PDFs indexed empty), reinstall it automatically. No
6158
+ # user action, no prompt — the runtime repairs itself on startup.
6159
+ try:
6160
+ from doctor.providers.boot import check_managed_venv_modules
6161
+
6162
+ dep_check = check_managed_venv_modules(fix=True)
6163
+ if getattr(dep_check, "fixed", False):
6164
+ result["actions"].append("venv-deps-repaired")
6165
+ _log(f"Managed venv dependencies repaired on startup: {dep_check.summary}")
6166
+ except Exception as dep_exc:
6167
+ _log(f"managed venv module check skipped: {dep_exc}")
6110
6168
  except Exception as e:
6111
6169
  result["error"] = str(e)
6112
6170
  _write_update_summary(result)
@@ -654,6 +654,99 @@ def check_managed_venv_python(fix: bool = False) -> DoctorCheck:
654
654
  )
655
655
 
656
656
 
657
+ # Critical importable modules the managed venv must always have. A missing one
658
+ # fails silently (e.g. pypdf absent -> every PDF/XLSX/MSG indexed as empty text).
659
+ # Verified by importing INSIDE the managed venv, not the current interpreter.
660
+ MANAGED_VENV_REQUIRED_MODULES = (
661
+ "fastmcp",
662
+ "numpy",
663
+ "anthropic",
664
+ "openai",
665
+ "fastembed",
666
+ "pypdf",
667
+ "openpyxl",
668
+ "extract_msg",
669
+ )
670
+
671
+
672
+ def _missing_venv_modules(venv_python: Path | str, modules) -> list[str]:
673
+ """Return the subset of ``modules`` that ``venv_python`` cannot import."""
674
+ mods = [str(m) for m in modules if str(m).strip()]
675
+ if not mods:
676
+ return []
677
+ probe = (
678
+ "import importlib.util as u, sys\n"
679
+ "print('\\n'.join(m for m in sys.argv[1:] if u.find_spec(m) is None))"
680
+ )
681
+ try:
682
+ result = subprocess.run(
683
+ [str(venv_python), "-c", probe, *mods],
684
+ capture_output=True,
685
+ text=True,
686
+ timeout=30,
687
+ )
688
+ except Exception:
689
+ return []
690
+ if result.returncode != 0:
691
+ return []
692
+ return [line.strip() for line in (result.stdout or "").splitlines() if line.strip()]
693
+
694
+
695
+ def _repair_managed_venv_deps() -> bool:
696
+ try:
697
+ import auto_update
698
+
699
+ return bool(auto_update._reinstall_pip_deps())
700
+ except Exception:
701
+ return False
702
+
703
+
704
+ def check_managed_venv_modules(fix: bool = False) -> DoctorCheck:
705
+ """Ensure the managed venv has every critical importable module.
706
+
707
+ A missing optional parser (pypdf/openpyxl/extract_msg) makes the local index
708
+ read PDF/XLSX/MSG as empty, silently. With ``fix=True`` (run automatically on
709
+ startup preflight) this reinstalls them, so the runtime repairs itself with no
710
+ user action.
711
+ """
712
+ venv_python = _managed_venv_python_path()
713
+ if not venv_python.exists():
714
+ # The python-version check already reports a missing venv; stay quiet here.
715
+ return DoctorCheck(
716
+ id="boot.managed_venv_modules",
717
+ tier="boot",
718
+ status="healthy",
719
+ severity="info",
720
+ summary="Managed Python venv not present yet",
721
+ evidence=[str(venv_python)],
722
+ )
723
+ missing = _missing_venv_modules(venv_python, MANAGED_VENV_REQUIRED_MODULES)
724
+ if not missing:
725
+ return DoctorCheck(
726
+ id="boot.managed_venv_modules",
727
+ tier="boot",
728
+ status="healthy",
729
+ severity="info",
730
+ summary=f"All {len(MANAGED_VENV_REQUIRED_MODULES)} critical venv modules present",
731
+ evidence=[str(venv_python)],
732
+ )
733
+ if fix and _repair_managed_venv_deps():
734
+ post = check_managed_venv_modules(fix=False)
735
+ if post.status == "healthy":
736
+ post.fixed = True
737
+ post.summary += " (repaired missing modules)"
738
+ return post
739
+ return DoctorCheck(
740
+ id="boot.managed_venv_modules",
741
+ tier="boot",
742
+ status="degraded",
743
+ severity="warn",
744
+ summary=f"{len(missing)} critical venv module(s) missing: {', '.join(missing)}",
745
+ evidence=[str(venv_python), *missing],
746
+ repair_plan=["Run nexo doctor --tier boot --fix or nexo update to reinstall managed dependencies"],
747
+ )
748
+
749
+
657
750
  CRITICAL_CONFIG_FILES = (
658
751
  ("schedule.json", ("config", "schedule.json")),
659
752
  ("optionals.json", ("config", "optionals.json")),
@@ -909,6 +1002,7 @@ def run_boot_checks(fix: bool = False, plane: str = "") -> list[DoctorCheck]:
909
1002
  safe_check(check_wrapper_scripts),
910
1003
  safe_check(check_python_runtime),
911
1004
  safe_check(check_managed_venv_python, fix=fix),
1005
+ safe_check(check_managed_venv_modules, fix=fix),
912
1006
  safe_check(check_config_parse),
913
1007
  safe_check(check_core_dev_packaged_install),
914
1008
  safe_check(check_dashboard_desktop_contract),
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import errno
3
4
  import json
4
5
  import os
5
6
  import re
@@ -54,7 +55,7 @@ ENTITY_FACTS_JOB = "entity_facts"
54
55
  BACKGROUND_INDEX_JOB_TYPES = {ENTITY_FACTS_JOB}
55
56
  ENTITY_DOSSIER_MAX_ASSETS = int(os.environ.get("NEXO_ENTITY_DOSSIER_MAX_ASSETS", "500") or "500")
56
57
  ENTITY_DOSSIER_MAX_CHUNKS = int(os.environ.get("NEXO_ENTITY_DOSSIER_MAX_CHUNKS", "1200") or "1200")
57
- ENTITY_DOSSIER_MAX_FACTS = int(os.environ.get("NEXO_ENTITY_DOSSIER_MAX_FACTS", "3000") or "3000")
58
+ ENTITY_DOSSIER_MAX_FACTS = int(os.environ.get("NEXO_ENTITY_DOSSIER_MAX_FACTS", "120") or "120")
58
59
  ENTITY_FACT_MIN_CONFIDENCE = float(os.environ.get("NEXO_ENTITY_FACT_MIN_CONFIDENCE", "0.45") or "0.45")
59
60
  # Hard ceilings to stop the entity_facts cartesian blow-up (chunks × entities × candidates).
60
61
  # Without these a single document could emit thousands of facts; 258k assets produced 337M rows / 255 GB.
@@ -247,7 +248,7 @@ def _conn():
247
248
 
248
249
 
249
250
  def _read_conn():
250
- conn = connect_local_context_db_readonly(timeout_ms=1200)
251
+ conn = connect_local_context_db_readonly()
251
252
  _validate_status_schema(conn)
252
253
  return conn
253
254
 
@@ -273,7 +274,9 @@ def _with_sqlite_busy_retry(callback, *, attempts: int | None = None):
273
274
  if not _sqlite_is_busy(exc) or attempt >= max_attempts - 1:
274
275
  raise
275
276
  last_exc = exc
276
- close_local_context_db()
277
+ # Do NOT close the cached writer handle here: it is shared, and closing
278
+ # it invalidates the connection for every other caller. The busy_timeout
279
+ # (now in parity for readers and writer) already absorbs transient locks.
277
280
  time.sleep(DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS * (attempt + 1))
278
281
  if last_exc:
279
282
  raise last_exc
@@ -2288,13 +2291,49 @@ def _record_index_error(
2288
2291
  )
2289
2292
 
2290
2293
 
2294
+ # macOS sets SF_DATALESS on iCloud files whose data has been evicted to the
2295
+ # cloud ("Optimize Mac Storage"). stat() still works on them, but reading their
2296
+ # bytes faults in a download; from the headless index daemon that materialization
2297
+ # fails with EDEADLK. Detect the flag with pure stdlib (no pyobjc) so such files
2298
+ # can be indexed metadata-only until the user materializes them.
2299
+ SF_DATALESS = getattr(stat, "SF_DATALESS", 0x40000000)
2300
+
2301
+
2302
+ def _is_dataless(st) -> bool:
2303
+ """True when a stat result carries the macOS SF_DATALESS (cloud-evicted) flag."""
2304
+ return bool(getattr(st, "st_flags", 0) & SF_DATALESS)
2305
+
2306
+
2307
+ def _is_offloaded_error(exc: BaseException) -> bool:
2308
+ """True for the EDEADLK raised when faulting in a cloud-offloaded file fails.
2309
+
2310
+ errno values are platform-specific (Darwin EDEADLK=11, Linux=35), so this
2311
+ compares the symbol, never a literal.
2312
+ """
2313
+ return isinstance(exc, OSError) and exc.errno == errno.EDEADLK
2314
+
2315
+
2291
2316
  def _record_scan_error(conn, stats: dict | None, path: str, phase: str, exc: Exception) -> None:
2317
+ if _is_offloaded_error(exc):
2318
+ # Cloud-offloaded file, NOT a reliability failure: count it separately and
2319
+ # never let it inflate the error metric or (previously) consume the budget.
2320
+ if stats is not None:
2321
+ stats["offloaded"] = int(stats.get("offloaded", 0) or 0) + 1
2322
+ _record_index_error(
2323
+ conn,
2324
+ path=path,
2325
+ phase=phase,
2326
+ error_code="offloaded",
2327
+ user_message="File is stored in iCloud and not downloaded locally",
2328
+ technical_detail=str(exc),
2329
+ retryable=True,
2330
+ )
2331
+ return
2292
2332
  if stats is not None:
2293
2333
  stats["errors"] = int(stats.get("errors", 0) or 0) + 1
2294
- logged = int(stats.get("_errors_logged", 0) or 0)
2295
- if logged >= 20:
2296
- return
2297
- stats["_errors_logged"] = logged + 1
2334
+ # No per-run cap: the old "log at most 20" silently hid the true failure scope
2335
+ # (a whole unreadable tree reported as just "20 errors"). The per-cycle scan
2336
+ # limit already bounds how many rows a single run can write.
2298
2337
  _record_index_error(
2299
2338
  conn,
2300
2339
  path=path,
@@ -2870,7 +2909,12 @@ def _replace_chunks(conn, asset_id: str, version_id: str, text: str) -> None:
2870
2909
  conn.execute("DELETE FROM local_chunks WHERE asset_id=?", (asset_id,))
2871
2910
  conn.execute("DELETE FROM local_embeddings WHERE asset_id=?", (asset_id,))
2872
2911
  for index, chunk in enumerate(chunk_text(text)):
2873
- chunk_id = stable_id("chunk", f"{version_id}:{index}:{chunk[:80]}")
2912
+ # Position-stable id: key on (version_id, index) ONLY, never chunk text.
2913
+ # chunk_index already guarantees uniqueness within the batch; hashing the
2914
+ # text used to make the id churn on every edit, which broke the
2915
+ # entity_facts / alias dedup (UNIQUE on source_chunk_id) across re-indexing
2916
+ # and the embedding-refresh join on chunk_id. Stable ids → idempotent reindex.
2917
+ chunk_id = stable_id("chunk", f"{version_id}:{index}")
2874
2918
  conn.execute(
2875
2919
  """
2876
2920
  INSERT INTO local_chunks(chunk_id, asset_id, version_id, chunk_index, text, token_count, created_at)
@@ -3322,6 +3366,26 @@ def process_jobs(*, limit: int = 100) -> dict:
3322
3366
  conn.commit()
3323
3367
  continue
3324
3368
  if job_type == "light_extraction":
3369
+ try:
3370
+ offloaded = _is_dataless(Path(row["path"]).stat())
3371
+ except OSError:
3372
+ offloaded = False
3373
+ if offloaded:
3374
+ # Cloud-offloaded file: index metadata-only. The asset already
3375
+ # carries its path/size/dates from the scan; reading the bytes
3376
+ # would trigger a download / EDEADLK. Reconcile re-enqueues
3377
+ # extraction once the fingerprint changes (user materializes it).
3378
+ conn.execute(
3379
+ "UPDATE local_assets SET phase='metadata_only', updated_at=? WHERE asset_id=?",
3380
+ (now(), asset_id),
3381
+ )
3382
+ conn.execute(
3383
+ "UPDATE local_index_jobs SET status='done', updated_at=?, last_error_code='offloaded' WHERE job_id=?",
3384
+ (now(), job_id),
3385
+ )
3386
+ processed += 1
3387
+ conn.commit()
3388
+ continue
3325
3389
  text, metadata = extract_text(Path(row["path"]))
3326
3390
  version_id = _latest_version_id(conn, asset_id)
3327
3391
  if metadata.get("content_secret_detected") or contains_secret(text):
@@ -3363,6 +3427,21 @@ def process_jobs(*, limit: int = 100) -> dict:
3363
3427
  processed += 1
3364
3428
  conn.commit()
3365
3429
  except Exception as exc:
3430
+ if _is_offloaded_error(exc):
3431
+ # A file became offloaded between scan and read (or the flag was
3432
+ # absent but the fault-in still EDEADLK'd): index it metadata-only
3433
+ # and finish cleanly instead of retry-storming on the download.
3434
+ conn.execute(
3435
+ "UPDATE local_assets SET phase='metadata_only', updated_at=? WHERE asset_id=?",
3436
+ (now(), asset_id),
3437
+ )
3438
+ conn.execute(
3439
+ "UPDATE local_index_jobs SET status='done', claimed_by='', lease_expires_at=NULL, last_error_code='offloaded', updated_at=? WHERE job_id=?",
3440
+ (now(), job_id),
3441
+ )
3442
+ processed += 1
3443
+ conn.commit()
3444
+ continue
3366
3445
  failed += 1
3367
3446
  attempts = int(row["attempt_count"] or 0) + 1
3368
3447
  terminal = attempts >= DEFAULT_MAX_JOB_ATTEMPTS
@@ -3748,6 +3827,33 @@ def _index_timing(conn, *, done: int, active_jobs: int, percent: int, readonly:
3748
3827
  return {"started_at": first_seen, "elapsed_seconds": elapsed_seconds, "eta_seconds": eta_seconds}
3749
3828
 
3750
3829
 
3830
+ BACKLOG_DRAIN_WINDOW_SECONDS = 300
3831
+
3832
+
3833
+ def _backlog_drain_rate(conn, *, pending: int, window_seconds: int = BACKLOG_DRAIN_WINDOW_SECONDS) -> dict:
3834
+ """How fast the job backlog is draining, from jobs completed in a recent window.
3835
+
3836
+ Unlike the lifetime-average ETA in _index_timing, this reflects *current*
3837
+ throughput, so a stalled indexer (rate→0) is visible and the ETA to clear the
3838
+ pending backlog is honest.
3839
+ """
3840
+ window_seconds = max(1, int(window_seconds))
3841
+ cutoff = now() - float(window_seconds)
3842
+ row = conn.execute(
3843
+ "SELECT COUNT(*) AS done FROM local_index_jobs WHERE status='done' AND updated_at >= ?",
3844
+ (cutoff,),
3845
+ ).fetchone()
3846
+ completed = int((row["done"] if row else 0) or 0)
3847
+ per_second = completed / float(window_seconds)
3848
+ eta_seconds = int(pending / per_second) if per_second > 0 and pending > 0 else None
3849
+ return {
3850
+ "window_seconds": window_seconds,
3851
+ "completed_in_window": completed,
3852
+ "per_minute": round(per_second * 60.0, 3),
3853
+ "eta_seconds": eta_seconds,
3854
+ }
3855
+
3856
+
3751
3857
  def _service_scheduler_has_error(service: dict) -> bool:
3752
3858
  if service.get("manager") == "launchagent":
3753
3859
  code = str(service.get("last_exit_code") or "").strip()
@@ -3983,6 +4089,7 @@ def _status_from_conn(conn, *, readonly: bool = False) -> dict:
3983
4089
  "jobs_failed": failed_jobs,
3984
4090
  "elapsed_seconds": timing["elapsed_seconds"],
3985
4091
  "eta_seconds": timing["eta_seconds"],
4092
+ "backlog_drain_rate": _backlog_drain_rate(conn, pending=pending),
3986
4093
  "index_started_at": index_started_at,
3987
4094
  "initial_scan_complete": bool(initial_index_complete),
3988
4095
  "initial_discovery_complete": bool(initial_scan["complete"]),
@@ -4559,7 +4666,70 @@ def _sync_context_payload_refs(payload: dict) -> None:
4559
4666
  payload["evidence_refs"] = []
4560
4667
 
4561
4668
 
4669
+ def _truncate_dossier_payload(payload: dict, *, max_chars: int) -> dict:
4670
+ """Shape-aware truncation for entity_dossier payloads.
4671
+
4672
+ Unlike context_query payloads (assets/chunks only), a dossier also carries
4673
+ ``facts`` and ``aggregates`` — the gold of the answer (importes, fechas). The
4674
+ generic truncator never trimmed those, so a heavy entity overflowed max_chars
4675
+ and fell back to an EMPTY minimal payload (learning #1234). Here we trim the
4676
+ cheap/low-value parts first (extra chunks, low-confidence facts, extra assets)
4677
+ while keeping ``aggregates``, ``entity``, ``recall`` and the highest-confidence
4678
+ facts. We never empty the dossier.
4679
+ """
4680
+ if not max_chars or max_chars <= 0 or _payload_size(payload) <= max_chars:
4681
+ return payload
4682
+ warnings = list(payload.get("warnings") or [])
4683
+ warnings.append(
4684
+ "Entity dossier truncated to fit max_chars: lower-confidence facts and extra "
4685
+ "chunks were trimmed. Aggregates and top facts are preserved — raise max_chars "
4686
+ "or refine the entity for the full set."
4687
+ )
4688
+ payload["warnings"] = warnings
4689
+ payload["truncated"] = True
4690
+ payload["query"] = _compact_text(payload.get("query") or "", max_chars=240)
4691
+ if isinstance(payload.get("candidates"), list) and len(payload["candidates"]) > 3:
4692
+ payload["candidates"] = payload["candidates"][:3]
4693
+ for chunk in payload.get("chunks") or []:
4694
+ chunk["text"] = _compact_text(chunk.get("text") or "", max_chars=240)
4695
+ facts = payload.get("facts") or []
4696
+ if facts:
4697
+ facts.sort(key=lambda fact: float(fact.get("confidence") or 0.0), reverse=True)
4698
+ payload["facts"] = facts
4699
+ # Trim cheapest-first: extra chunks -> low-confidence facts -> extra assets.
4700
+ while _payload_size(payload) > max_chars and len(payload.get("chunks") or []) > 1:
4701
+ payload["chunks"].pop()
4702
+ while _payload_size(payload) > max_chars and len(payload.get("facts") or []) > 1:
4703
+ payload["facts"].pop()
4704
+ while _payload_size(payload) > max_chars and len(payload.get("assets") or []) > 1:
4705
+ payload["assets"].pop()
4706
+ if _payload_size(payload) > max_chars:
4707
+ payload["chunks"] = (payload.get("chunks") or [])[:1]
4708
+ aggregates = payload.get("aggregates")
4709
+ if isinstance(aggregates, dict):
4710
+ # Keep the gold (documents_total, numeric_by_predicate, date_range);
4711
+ # shed only the secondary, repeatable lists.
4712
+ aggregates["frequent_predicates"] = (aggregates.get("frequent_predicates") or [])[:5]
4713
+ aggregates["atypical_documents"] = []
4714
+ _sync_dossier_evidence_refs(payload)
4715
+ payload["usage_hint"] = _context_usage_hint(payload)
4716
+ return payload
4717
+
4718
+
4719
+ def _sync_dossier_evidence_refs(payload: dict) -> None:
4720
+ refs: list[str] = []
4721
+ for fact in payload.get("facts") or []:
4722
+ if fact.get("source_chunk_id"):
4723
+ refs.append(f"local_asset:{fact.get('source_asset_id')}#chunk:{fact.get('source_chunk_id')}")
4724
+ for chunk in payload.get("chunks") or []:
4725
+ if chunk.get("chunk_id"):
4726
+ refs.append(f"local_asset:{chunk.get('asset_id')}#chunk:{chunk.get('chunk_id')}")
4727
+ payload["evidence_refs"] = list(dict.fromkeys(refs))
4728
+
4729
+
4562
4730
  def _truncate_context_payload(payload: dict, *, max_chars: int) -> dict:
4731
+ if payload.get("mode") == "entity_dossier":
4732
+ return _truncate_dossier_payload(payload, max_chars=max_chars)
4563
4733
  if not max_chars or max_chars <= 0 or _payload_size(payload) <= max_chars:
4564
4734
  return payload
4565
4735
  warnings = list(payload.get("warnings") or [])
@@ -4859,6 +5029,8 @@ def _context_query_conn(
4859
5029
  evidence_refs = []
4860
5030
  seen_assets = set()
4861
5031
  for score, row in scored[: int(limit)]:
5032
+ if contains_secret(str(row["text"] or "")):
5033
+ continue # defense-in-depth: never egress a chunk carrying a secret
4862
5034
  if row["asset_id"] not in seen_assets:
4863
5035
  assets.append({
4864
5036
  "asset_id": row["asset_id"],
@@ -4890,7 +5062,7 @@ def _context_query_conn(
4890
5062
  """,
4891
5063
  [*asset_ids, int(limit) * 3],
4892
5064
  ).fetchall()
4893
- relations_payload = [dict(row) for row in relation_rows]
5065
+ relations_payload = _egress_safe_relations(relation_rows)
4894
5066
  warnings = list(mode_warnings)
4895
5067
  if query_embedding.get("kind") == "deterministic_embedding":
4896
5068
  warnings.append("Local semantic model unavailable; using deterministic fallback until models are installed.")
@@ -5347,6 +5519,17 @@ def get_asset(asset_id: str, *, readonly: bool = True) -> dict:
5347
5519
  _close_read_conn(conn)
5348
5520
 
5349
5521
 
5522
+ def _egress_safe_relations(rows) -> list[dict]:
5523
+ """Drop relations whose evidence text carries a secret (defense-in-depth)."""
5524
+ safe: list[dict] = []
5525
+ for row in rows:
5526
+ record = dict(row)
5527
+ if contains_secret(str(record.get("evidence") or "")):
5528
+ continue
5529
+ safe.append(record)
5530
+ return safe
5531
+
5532
+
5350
5533
  def get_neighbors(asset_id: str, *, limit: int = 30, readonly: bool = True) -> dict:
5351
5534
  conn = _read_conn() if readonly else _conn()
5352
5535
  try:
@@ -5359,7 +5542,7 @@ def get_neighbors(asset_id: str, *, limit: int = 30, readonly: bool = True) -> d
5359
5542
  """,
5360
5543
  (asset_id, int(limit)),
5361
5544
  ).fetchall()
5362
- return {"ok": True, "relations": [dict(row) for row in rows]}
5545
+ return {"ok": True, "relations": _egress_safe_relations(rows)}
5363
5546
  finally:
5364
5547
  if readonly:
5365
5548
  _close_read_conn(conn)
@@ -80,10 +80,23 @@ def _connect(db_path: Path) -> sqlite3.Connection:
80
80
  conn.execute("PRAGMA journal_mode=WAL")
81
81
  conn.execute("PRAGMA synchronous=NORMAL")
82
82
  conn.execute("PRAGMA temp_store=MEMORY")
83
+ # Performance PRAGMAs for the index DB: it sees bursty writes from the 60s
84
+ # cron indexer while read-only retrieval queries run concurrently.
85
+ # - wal_autocheckpoint above the 1000-page default → fewer checkpoints during
86
+ # indexing bursts, WAL still bounded (~8 MB @ 4 KB pages).
87
+ # - mmap_size 256 MB → memory-mapped reads for the read-heavy workload.
88
+ # - cache_size -16000 → 16 MB page cache (negative = KiB), up from 2 MB.
89
+ conn.execute("PRAGMA wal_autocheckpoint=2000")
90
+ conn.execute("PRAGMA mmap_size=268435456")
91
+ conn.execute("PRAGMA cache_size=-16000")
83
92
  return conn
84
93
 
85
94
 
86
- def connect_local_context_db_readonly(*, timeout_ms: int = 1200) -> sqlite3.Connection:
95
+ def connect_local_context_db_readonly(*, timeout_ms: int | None = None) -> sqlite3.Connection:
96
+ # Parity with the writer: readers must wait as long as the writer can hold the
97
+ # lock, otherwise they raise 'database is locked' prematurely under load.
98
+ if timeout_ms is None:
99
+ timeout_ms = _busy_timeout_ms()
87
100
  db_path = local_context_db_path()
88
101
  if not db_path.is_file():
89
102
  raise FileNotFoundError(str(db_path))
@@ -132,6 +132,23 @@ def embed_text(text: str) -> list[float]:
132
132
 
133
133
 
134
134
  def cosine(a: list[float], b: list[float]) -> float:
135
+ # Defensive cosine: normalize at comparison time WITHOUT re-embedding.
136
+ # The fallback hash embedding is already L2-normalized and fastembed
137
+ # L2-normalizes its output too, so a bare dot product happens to be correct
138
+ # today — but it silently breaks the moment a model that does not normalize
139
+ # is swapped in (e.g. e5-small needs custom ONNX MEAN-pool + normalize).
140
+ # Dividing by the product of norms keeps the score bounded to [-1, 1] for
141
+ # any vectors, which is what the api.py max() fusion against lexical scores
142
+ # in [0, 1] relies on. For already-unit vectors this is a no-op.
135
143
  if not a or not b or len(a) != len(b):
136
144
  return 0.0
137
- return float(sum(x * y for x, y in zip(a, b)))
145
+ dot = 0.0
146
+ norm_a = 0.0
147
+ norm_b = 0.0
148
+ for x, y in zip(a, b):
149
+ dot += x * y
150
+ norm_a += x * x
151
+ norm_b += y * y
152
+ if norm_a <= 0.0 or norm_b <= 0.0:
153
+ return 0.0
154
+ return float(dot / math.sqrt(norm_a * norm_b))
@@ -272,7 +272,12 @@ def _extract_pdf(path: Path) -> str:
272
272
 
273
273
 
274
274
  def clean_text(text: str) -> str:
275
- text = html.unescape(text or "")
275
+ text = text or ""
276
+ # Drop the CONTENT of style/script/head blocks (not just their tags) BEFORE
277
+ # stripping tags, or CSS/JS boilerplate survives as text and poisons chunks,
278
+ # embeddings, NER and facts (e.g. 'mso-table-lspace', 'font-family').
279
+ text = re.sub(r"(?is)<(style|script|head)\b[^>]*>.*?</\1>", " ", text)
280
+ text = html.unescape(text)
276
281
  text = re.sub(r"<[^>]+>", " ", text)
277
282
  text = re.sub(r"\s+", " ", text).strip()
278
283
  return text[:MAX_CHARS]
@@ -371,6 +371,24 @@ def _is_home_hidden_path(path: str) -> bool:
371
371
  return bool(rel.parts) and rel.parts[0].startswith(".")
372
372
 
373
373
 
374
+ def _name_has_sensitive_marker(name: str, stem: str) -> bool:
375
+ """Token match (not substring) so 'secret' does not flag 'secretaria'/'secreto'."""
376
+ import re
377
+
378
+ norm = re.sub(r"[^a-z0-9]+", "_", f"{name}_{stem}".lower()).strip("_")
379
+ if not norm:
380
+ return False
381
+ tokens = set(norm.split("_"))
382
+ padded = f"_{norm}_"
383
+ for marker in SENSITIVE_NAME_MARKERS:
384
+ if "_" in marker:
385
+ if f"_{marker}_" in padded:
386
+ return True
387
+ elif marker in tokens:
388
+ return True
389
+ return False
390
+
391
+
374
392
  def is_sensitive_path(path: str) -> bool:
375
393
  p = Path(path)
376
394
  lowered = _normalized(path)
@@ -389,7 +407,7 @@ def is_sensitive_path(path: str) -> bool:
389
407
  return True
390
408
  if parts & SENSITIVE_PARTS:
391
409
  return True
392
- if any(marker in name or marker in stem for marker in SENSITIVE_NAME_MARKERS):
410
+ if _name_has_sensitive_marker(name, stem):
393
411
  return True
394
412
  return _contains_path_marker(lowered, SENSITIVE_PARTS)
395
413
 
@@ -913,25 +913,29 @@ def _reinstall_pip_deps() -> str | None:
913
913
  alt_pip = NEXO_HOME / ".venv" / "bin" / "pip3"
914
914
  if alt_pip.exists():
915
915
  venv_pip = alt_pip
916
+ # Offline-first: prefer the bundled wheels, fall back to PyPI. Shared with
917
+ # auto_update so install / update / self-heal all behave identically.
918
+ from auto_update import _bundled_wheels_dir, _pip_install_argv
919
+
920
+ wheels_dir = _bundled_wheels_dir()
916
921
  if not venv_pip.exists():
917
922
  if desktop_product_requested():
918
923
  return "managed Desktop venv pip is unavailable after repair"
919
- # No venv, try system pip with --break-system-packages
920
- try:
921
- result = subprocess.run(
922
- [sys.executable, "-m", "pip", "install", "--quiet", "-r", str(req_file), "--break-system-packages"],
923
- capture_output=True, text=True, timeout=120,
924
- )
925
- if result.returncode != 0:
926
- return f"pip install failed: {result.stderr or result.stdout}"
927
- except Exception as e:
928
- return f"pip install error: {e}"
929
- return None
924
+ pip_bin, use_python_m, break_system = sys.executable, True, True
925
+ else:
926
+ pip_bin, use_python_m, break_system = venv_pip, False, False
930
927
  try:
931
- result = subprocess.run(
932
- [str(venv_pip), "install", "--quiet", "-r", str(req_file)],
933
- capture_output=True, text=True, timeout=120,
928
+ argv = _pip_install_argv(
929
+ pip_bin, req_file, wheels_dir=wheels_dir,
930
+ use_python_m=use_python_m, break_system=break_system,
934
931
  )
932
+ result = subprocess.run(argv, capture_output=True, text=True, timeout=600)
933
+ if result.returncode != 0 and wheels_dir is not None:
934
+ argv_online = _pip_install_argv(
935
+ pip_bin, req_file, wheels_dir=None,
936
+ use_python_m=use_python_m, break_system=break_system,
937
+ )
938
+ result = subprocess.run(argv_online, capture_output=True, text=True, timeout=600)
935
939
  if result.returncode != 0:
936
940
  return f"pip install failed: {result.stderr or result.stdout}"
937
941
  except Exception as e:
@@ -17,10 +17,23 @@ anthropic>=0.80.0
17
17
  openai>=2.20.0
18
18
 
19
19
  # Embedding model (optional but recommended for cognitive features).
20
- # Pin >=0.8.0: older releases require Python <3.12 and pip iterates each
21
- # obsolete version for ~10 min on Ubuntu 24.04 (Python 3.12) before finding
22
- # a compatible one. Verified empirically during Win11 clean install bootstrap.
23
- fastembed>=0.8.0
20
+ # Hard-pinned (==) so the offline wheel bundle (fetch-python-wheels.sh) is
21
+ # reproducible across Win+Mac. onnxruntime is fastembed's transitive native
22
+ # wheel the fragile, platform-specific one so it is pinned explicitly too;
23
+ # floating it lets pip resolve an unbundled build and break offline installs.
24
+ # >=0.8.0 was the floor (older releases need Python <3.12 and pip iterates each
25
+ # obsolete version for ~10 min on Ubuntu 24.04 before finding a compatible one,
26
+ # verified during Win11 clean install bootstrap); 0.8.0 is the bundled version.
27
+ fastembed==0.8.0
28
+ onnxruntime==1.26.0
29
+
30
+ # Local Context Layer — document parsers (REQUIRED for the local memory index).
31
+ # extractors.py imports these lazily; without them a clean bundle silently indexes
32
+ # every PDF / XLSX / MSG as EMPTY text (the try/except returns ''). Real bug: on a
33
+ # clean venv `import pypdf` raised ModuleNotFoundError and all invoices read blank.
34
+ pypdf>=4.0
35
+ openpyxl>=3.1
36
+ extract-msg>=0.48
24
37
 
25
38
  # Dashboard (optional, only needed for `python -m dashboard.app`)
26
39
  fastapi
@@ -342,20 +342,31 @@ def check_launch_agents():
342
342
  }]
343
343
  results = []
344
344
 
345
- # Get list of loaded agents
345
+ # Get list of loaded agents and their last exit status.
346
346
  rc, stdout, _ = run_cmd("launchctl list")
347
347
  loaded_labels = set()
348
+ launch_statuses = {}
348
349
  if rc == 0:
349
350
  for line in stdout.splitlines():
350
351
  parts = line.split("\t")
351
352
  if len(parts) >= 3:
352
- loaded_labels.add(parts[2])
353
+ pid, last_status, label = parts[0], parts[1], parts[2]
354
+ loaded_labels.add(label)
355
+ if label.startswith("com.nexo."):
356
+ launch_statuses[label] = {
357
+ "pid": pid,
358
+ "last_status": last_status,
359
+ }
353
360
 
354
361
  for agent in EXPECTED_AGENTS:
355
362
  result = {"name": agent, "status": "OK", "detail": "", "repaired": False}
356
363
 
357
364
  if agent in loaded_labels:
358
365
  result["detail"] = "Loaded"
366
+ last_status = launch_statuses.get(agent, {}).get("last_status", "0")
367
+ if last_status not in ("0", ""):
368
+ result["status"] = "WARN"
369
+ result["detail"] = f"Loaded, last exit status={last_status}"
359
370
  else:
360
371
  # Try auto-repair
361
372
  plist = LAUNCH_AGENTS_DIR / f"{agent}.plist"
@@ -374,6 +385,20 @@ def check_launch_agents():
374
385
 
375
386
  results.append(result)
376
387
 
388
+ expected_set = set(EXPECTED_AGENTS)
389
+ for label, status_info in sorted(launch_statuses.items()):
390
+ if label in expected_set:
391
+ continue
392
+ last_status = status_info.get("last_status", "0")
393
+ if last_status in ("0", ""):
394
+ continue
395
+ results.append({
396
+ "name": label,
397
+ "status": "WARN",
398
+ "detail": f"Loaded, last exit status={last_status}",
399
+ "repaired": False,
400
+ })
401
+
377
402
  return results
378
403
 
379
404
 
package/src/server.py CHANGED
@@ -1437,7 +1437,7 @@ def nexo_entity_dossier(
1437
1437
  query: str,
1438
1438
  max_assets: int = 500,
1439
1439
  max_chunks: int = 1200,
1440
- max_facts: int = 3000,
1440
+ max_facts: int = 120,
1441
1441
  max_chars: int = 20000,
1442
1442
  ) -> str:
1443
1443
  """Build a full local dossier for one entity with aggregates and evidence."""