nexo-brain 7.20.12 → 7.20.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.12",
3
+ "version": "7.20.13",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,9 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.20.12` is the current packaged-runtime line. Patch release over v7.20.11Local Context now keeps the first index pass separate from live change tracking, persists the current indexing start time, caps compact context payloads for agents, and installs the Windows host scheduler needed to keep WSL indexing alive after reboots.
21
+ Version `7.20.13` is the current packaged-runtime line. Patch release over v7.20.12Brain recovery now pauses all known DB writers before restoring `nexo.db`, and Doctor can repair the zero-byte/locked database state that made Desktop Local Memory show zero files.
22
+
23
+ Previously in `7.20.12`: patch release over v7.20.11 — Local Context now keeps the first index pass separate from live change tracking, persists the current indexing start time, caps compact context payloads for agents, and installs the Windows host scheduler needed to keep WSL indexing alive after reboots.
22
24
 
23
25
  Previously in `7.20.11`: patch release over v7.20.10 — Local Context now starts from real system volume roots plus mounted/removable/network volumes, filters system/cache/app/product artifacts, and injects relevant local evidence automatically into heartbeat, task-open and pre-action context.
24
26
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.20.12",
3
+ "version": "7.20.13",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -1412,7 +1412,8 @@ def _self_heal_if_wiped() -> dict | None:
1412
1412
  db_looks_wiped,
1413
1413
  db_row_counts,
1414
1414
  find_latest_hourly_backup,
1415
- kill_nexo_mcp_servers,
1415
+ quiesce_nexo_db_writers,
1416
+ resume_nexo_launchagents,
1416
1417
  safe_sqlite_backup,
1417
1418
  validate_backup_matches_source,
1418
1419
  )
@@ -1467,11 +1468,29 @@ def _self_heal_if_wiped() -> dict | None:
1467
1468
  f"(reference={reference.name}, {ref_total} critical rows). Restoring..."
1468
1469
  )
1469
1470
 
1470
- # Kill any live MCP servers so they cannot overwrite the restored DB.
1471
- kill_report = kill_nexo_mcp_servers(dry_run=False)
1472
- if kill_report.get("terminated"):
1473
- _log(f"self-heal: terminated {kill_report['terminated']} live MCP server(s).")
1474
- time.sleep(0.5)
1471
+ # Pause any live DB writers so they cannot overwrite the restored DB or
1472
+ # keep stale handles open. Desktop installs have more writers than the MCP
1473
+ # server: local-index, email-monitor, followup-runner, watchdog and catchup.
1474
+ quiesce_report = quiesce_nexo_db_writers(dry_run=False)
1475
+ stopped_launchagents = list((quiesce_report.get("launchagents") or {}).get("stopped") or [])
1476
+ if quiesce_report.get("terminated") or stopped_launchagents:
1477
+ _log(
1478
+ "self-heal: quiesced DB writers "
1479
+ f"(terminated={quiesce_report.get('terminated', 0)}, "
1480
+ f"launchagents={len(stopped_launchagents)})."
1481
+ )
1482
+ if quiesce_report.get("errors"):
1483
+ _log(f"self-heal: DB writer quiesce warnings: {quiesce_report.get('errors')}")
1484
+
1485
+ def _resume_quiesced() -> dict | None:
1486
+ if not stopped_launchagents:
1487
+ return None
1488
+ report = resume_nexo_launchagents(stopped_launchagents)
1489
+ if report.get("started"):
1490
+ _log(f"self-heal: resumed {len(report['started'])} launchagent(s).")
1491
+ if report.get("errors"):
1492
+ _log(f"self-heal: launchagent resume warnings: {report.get('errors')}")
1493
+ return report
1475
1494
 
1476
1495
  # Snapshot the current (wiped) state so the heal is reversible.
1477
1496
  pre_heal_dir = paths.backups_dir() / f"pre-heal-{time.strftime('%Y-%m-%d-%H%M%S')}"
@@ -1497,27 +1516,34 @@ def _self_heal_if_wiped() -> dict | None:
1497
1516
  ok, err = safe_sqlite_backup(reference, primary)
1498
1517
  if not ok:
1499
1518
  _log(f"self-heal: restore copy failed: {err}")
1519
+ resume_report = _resume_quiesced()
1500
1520
  return {
1501
1521
  "action": "failed",
1502
1522
  "reason": "restore_copy_failed",
1503
1523
  "error": err,
1504
1524
  "reference": str(reference),
1505
1525
  "pre_heal_dir": str(pre_heal_dir),
1526
+ "quiesce": quiesce_report,
1527
+ "resume": resume_report,
1506
1528
  }
1507
1529
  valid, valid_err = validate_backup_matches_source(reference, primary, CRITICAL_TABLES)
1508
1530
  if not valid:
1509
1531
  _log(f"self-heal: post-restore validation failed: {valid_err}")
1532
+ resume_report = _resume_quiesced()
1510
1533
  return {
1511
1534
  "action": "failed",
1512
1535
  "reason": "validation_failed",
1513
1536
  "error": valid_err,
1514
1537
  "reference": str(reference),
1515
1538
  "pre_heal_dir": str(pre_heal_dir),
1539
+ "quiesce": quiesce_report,
1540
+ "resume": resume_report,
1516
1541
  }
1517
1542
 
1518
1543
  final_counts = db_row_counts(primary, CRITICAL_TABLES)
1519
1544
  final_total = sum(v for v in final_counts.values() if isinstance(v, int))
1520
1545
  _log(f"self-heal: restored {final_total} critical rows from {reference.name}.")
1546
+ resume_report = _resume_quiesced()
1521
1547
  try:
1522
1548
  SELF_HEAL_STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
1523
1549
  SELF_HEAL_STATE_FILE.write_text(json.dumps({
@@ -1525,6 +1551,7 @@ def _self_heal_if_wiped() -> dict | None:
1525
1551
  "reference": str(reference),
1526
1552
  "critical_rows_restored": final_total,
1527
1553
  "pre_heal_dir": str(pre_heal_dir),
1554
+ "quiesced_launchagents": stopped_launchagents,
1528
1555
  }))
1529
1556
  except Exception as e:
1530
1557
  _log(f"self-heal: state write warning: {e}")
@@ -1535,7 +1562,9 @@ def _self_heal_if_wiped() -> dict | None:
1535
1562
  "reference_rows": ref_total,
1536
1563
  "restored_rows": final_total,
1537
1564
  "pre_heal_dir": str(pre_heal_dir),
1538
- "terminated_servers": kill_report.get("terminated", 0),
1565
+ "terminated_servers": int((quiesce_report.get("mcp") or {}).get("terminated") or 0),
1566
+ "quiesce": quiesce_report,
1567
+ "resume": resume_report,
1539
1568
  }
1540
1569
 
1541
1570
 
package/src/db_guard.py CHANGED
@@ -27,6 +27,8 @@ auto_update.py):
27
27
  safe_sqlite_backup(source, dest) -> tuple[bool, str | None]
28
28
  validate_backup_matches_source(source, dest, tables) -> tuple[bool, str | None]
29
29
  kill_nexo_mcp_servers(dry_run) -> dict
30
+ quiesce_nexo_db_writers(dry_run) -> dict
31
+ resume_nexo_launchagents(labels, dry_run) -> dict
30
32
  """
31
33
 
32
34
  from __future__ import annotations
@@ -80,6 +82,27 @@ HOURLY_BACKUP_GLOB = "nexo-*.db"
80
82
  # as an automatic self-heal source. 48h matches nexo-backup.sh retention.
81
83
  HOURLY_BACKUP_MAX_AGE = 48 * 3600
82
84
 
85
+ # Long-lived NEXO services that can keep ``nexo.db`` open while recovery tries
86
+ # to replace it. Keep this list conservative: only product-owned background
87
+ # processes that are safe to stop and restart.
88
+ NEXO_DB_WRITER_LAUNCHAGENTS: tuple[str, ...] = (
89
+ "com.nexo.local-index",
90
+ "com.nexo.email-monitor",
91
+ "com.nexo.followup-runner",
92
+ "com.nexo.watchdog",
93
+ "com.nexo.catchup",
94
+ "com.nexo.immune",
95
+ )
96
+
97
+ NEXO_DB_WRITER_MARKERS: tuple[str, ...] = (
98
+ "nexo-local-index.py",
99
+ "nexo-email-monitor.py",
100
+ "nexo-followup-runner.py",
101
+ "nexo-catchup.py",
102
+ "nexo-watchdog.sh",
103
+ "nexo-immune.py",
104
+ )
105
+
83
106
 
84
107
  # ── Types ───────────────────────────────────────────────────────────────
85
108
 
@@ -447,3 +470,252 @@ def _looks_like_nexo_mcp(cmd: str) -> bool:
447
470
  if "nexo_sdk" in lowered or "nexo-mcp" in lowered:
448
471
  return True
449
472
  return False
473
+
474
+
475
+ # ── DB writer quiescence ────────────────────────────────────────────────
476
+
477
+ def quiesce_nexo_db_writers(
478
+ dry_run: bool = False,
479
+ *,
480
+ stop_launchagents: bool = True,
481
+ settle_seconds: float = 0.75,
482
+ ) -> dict:
483
+ """Stop known NEXO background writers before replacing ``nexo.db``.
484
+
485
+ ``kill_nexo_mcp_servers`` is not enough for Desktop installs: local-index,
486
+ email monitor, followup-runner and catchup can keep a stale DB handle open
487
+ even after the MCP server exits. This helper is intentionally narrow and
488
+ only targets product-owned long-lived writers.
489
+ """
490
+ result: dict = {
491
+ "dry_run": dry_run,
492
+ "mcp": {},
493
+ "launchagents": {"stopped": [], "errors": [], "unsupported": False},
494
+ "processes": {"scanned": 0, "terminated": 0, "pids": [], "errors": []},
495
+ "terminated": 0,
496
+ "errors": [],
497
+ }
498
+
499
+ mcp_report = kill_nexo_mcp_servers(dry_run=dry_run)
500
+ result["mcp"] = mcp_report
501
+ result["terminated"] += int(mcp_report.get("terminated") or 0)
502
+ result["errors"].extend(mcp_report.get("errors") or [])
503
+
504
+ if stop_launchagents:
505
+ la_report = _stop_nexo_launchagents(dry_run=dry_run)
506
+ result["launchagents"] = la_report
507
+ result["errors"].extend(la_report.get("errors") or [])
508
+
509
+ process_report = _terminate_nexo_db_writer_processes(dry_run=dry_run)
510
+ result["processes"] = process_report
511
+ result["terminated"] += int(process_report.get("terminated") or 0)
512
+ result["errors"].extend(process_report.get("errors") or [])
513
+
514
+ if not dry_run and (result["terminated"] or result["launchagents"].get("stopped")):
515
+ time.sleep(max(settle_seconds, 0.0))
516
+ return result
517
+
518
+
519
+ def resume_nexo_launchagents(labels: list[str] | tuple[str, ...] | None = None, dry_run: bool = False) -> dict:
520
+ """Best-effort restart of LaunchAgents stopped by DB recovery."""
521
+ result: dict = {"dry_run": dry_run, "started": [], "errors": [], "unsupported": False}
522
+ if os.name != "posix" or sys_platform() != "darwin":
523
+ result["unsupported"] = True
524
+ return result
525
+ uid = os.getuid()
526
+ launch_agents_dir = Path.home() / "Library" / "LaunchAgents"
527
+ chosen = tuple(labels or NEXO_DB_WRITER_LAUNCHAGENTS)
528
+ for label in chosen:
529
+ plist = launch_agents_dir / f"{label}.plist"
530
+ if not plist.is_file():
531
+ continue
532
+ target = f"gui/{uid}/{label}"
533
+ if dry_run:
534
+ result["started"].append(label)
535
+ continue
536
+ try:
537
+ subprocess.run(
538
+ ["launchctl", "bootstrap", f"gui/{uid}", str(plist)],
539
+ capture_output=True,
540
+ text=True,
541
+ timeout=5,
542
+ )
543
+ subprocess.run(
544
+ ["launchctl", "kickstart", "-k", target],
545
+ capture_output=True,
546
+ text=True,
547
+ timeout=5,
548
+ )
549
+ result["started"].append(label)
550
+ except Exception as exc:
551
+ result["errors"].append(f"{label}: {exc}")
552
+ return result
553
+
554
+
555
+ def sys_platform() -> str:
556
+ # Small indirection makes tests easy to monkeypatch without importing sys at
557
+ # module import time in older runtimes.
558
+ import sys
559
+ return sys.platform
560
+
561
+
562
+ def _stop_nexo_launchagents(dry_run: bool = False) -> dict:
563
+ result: dict = {"stopped": [], "errors": [], "unsupported": False}
564
+ if os.name != "posix" or sys_platform() != "darwin":
565
+ result["unsupported"] = True
566
+ return result
567
+ uid = os.getuid()
568
+ launch_agents_dir = Path.home() / "Library" / "LaunchAgents"
569
+ for label in NEXO_DB_WRITER_LAUNCHAGENTS:
570
+ plist = launch_agents_dir / f"{label}.plist"
571
+ if not plist.is_file():
572
+ continue
573
+ if dry_run:
574
+ result["stopped"].append(label)
575
+ continue
576
+ try:
577
+ proc = subprocess.run(
578
+ ["launchctl", "bootout", f"gui/{uid}", str(plist)],
579
+ capture_output=True,
580
+ text=True,
581
+ timeout=5,
582
+ )
583
+ except Exception as exc:
584
+ result["errors"].append(f"{label}: {exc}")
585
+ continue
586
+ if proc.returncode == 0:
587
+ result["stopped"].append(label)
588
+ else:
589
+ stderr = (proc.stderr or "").strip()
590
+ # launchctl returns non-zero when an agent is already unloaded. That
591
+ # is not a recovery blocker, so keep it as quiet evidence.
592
+ if stderr and "No such process" not in stderr and "not found" not in stderr:
593
+ result["errors"].append(f"{label}: {stderr[:200]}")
594
+ return result
595
+
596
+
597
+ def _terminate_nexo_db_writer_processes(dry_run: bool = False) -> dict:
598
+ result: dict = {"scanned": 0, "terminated": 0, "pids": [], "errors": [], "dry_run": dry_run}
599
+ if os.name == "posix":
600
+ return _terminate_posix_db_writer_processes(dry_run=dry_run)
601
+ if os.name == "nt":
602
+ return _terminate_windows_db_writer_processes(dry_run=dry_run)
603
+ result["errors"].append("unsupported platform")
604
+ return result
605
+
606
+
607
+ def _terminate_posix_db_writer_processes(dry_run: bool = False) -> dict:
608
+ result: dict = {"scanned": 0, "terminated": 0, "pids": [], "errors": [], "dry_run": dry_run}
609
+ try:
610
+ proc = subprocess.run(
611
+ ["ps", "-axo", "pid=,command="],
612
+ capture_output=True,
613
+ text=True,
614
+ timeout=5,
615
+ )
616
+ except Exception as exc:
617
+ result["errors"].append(f"ps failed: {exc}")
618
+ return result
619
+ if proc.returncode != 0:
620
+ result["errors"].append(f"ps exit {proc.returncode}: {proc.stderr.strip()[:200]}")
621
+ return result
622
+
623
+ my_pid = os.getpid()
624
+ for raw in proc.stdout.splitlines():
625
+ line = raw.strip()
626
+ if not line:
627
+ continue
628
+ head, _, rest = line.partition(" ")
629
+ if not head.isdigit():
630
+ continue
631
+ pid = int(head)
632
+ if pid == my_pid:
633
+ continue
634
+ cmd = rest.strip()
635
+ if not _looks_like_nexo_db_writer(cmd):
636
+ continue
637
+ result["scanned"] += 1
638
+ result["pids"].append({"pid": pid, "command": cmd[:180]})
639
+ if dry_run:
640
+ continue
641
+ try:
642
+ os.kill(pid, signal.SIGTERM)
643
+ result["terminated"] += 1
644
+ except ProcessLookupError:
645
+ pass
646
+ except Exception as exc:
647
+ result["errors"].append(f"kill {pid} failed: {exc}")
648
+ return result
649
+
650
+
651
+ def _terminate_windows_db_writer_processes(dry_run: bool = False) -> dict:
652
+ result: dict = {"scanned": 0, "terminated": 0, "pids": [], "errors": [], "dry_run": dry_run}
653
+ ps_script = (
654
+ "Get-CimInstance Win32_Process | "
655
+ "Select-Object ProcessId,CommandLine | ConvertTo-Json -Compress"
656
+ )
657
+ try:
658
+ proc = subprocess.run(
659
+ ["powershell", "-NoProfile", "-Command", ps_script],
660
+ capture_output=True,
661
+ text=True,
662
+ timeout=10,
663
+ )
664
+ except Exception as exc:
665
+ result["errors"].append(f"powershell process scan failed: {exc}")
666
+ return result
667
+ if proc.returncode != 0:
668
+ result["errors"].append(f"powershell exit {proc.returncode}: {proc.stderr.strip()[:200]}")
669
+ return result
670
+ try:
671
+ import json
672
+ rows = json.loads(proc.stdout or "[]")
673
+ except Exception as exc:
674
+ result["errors"].append(f"process json parse failed: {exc}")
675
+ return result
676
+ if isinstance(rows, dict):
677
+ rows = [rows]
678
+ my_pid = os.getpid()
679
+ for row in rows if isinstance(rows, list) else []:
680
+ try:
681
+ pid = int(row.get("ProcessId"))
682
+ except Exception:
683
+ continue
684
+ if pid == my_pid:
685
+ continue
686
+ cmd = str(row.get("CommandLine") or "")
687
+ if not _looks_like_nexo_db_writer(cmd):
688
+ continue
689
+ result["scanned"] += 1
690
+ result["pids"].append({"pid": pid, "command": cmd[:180]})
691
+ if dry_run:
692
+ continue
693
+ try:
694
+ subprocess.run(
695
+ ["taskkill", "/PID", str(pid), "/T", "/F"],
696
+ capture_output=True,
697
+ text=True,
698
+ timeout=5,
699
+ )
700
+ result["terminated"] += 1
701
+ except Exception as exc:
702
+ result["errors"].append(f"taskkill {pid} failed: {exc}")
703
+ return result
704
+
705
+
706
+ def _looks_like_nexo_db_writer(cmd: str) -> bool:
707
+ if not cmd:
708
+ return False
709
+ lowered = cmd.lower()
710
+ if _looks_like_nexo_mcp(cmd):
711
+ return True
712
+ if "nexo-cron-wrapper.sh" in lowered and any(label in lowered for label in (
713
+ "local-index",
714
+ "email-monitor",
715
+ "followup-runner",
716
+ "watchdog",
717
+ "catchup",
718
+ "immune",
719
+ )):
720
+ return True
721
+ return any(marker in lowered for marker in NEXO_DB_WRITER_MARKERS)
@@ -37,6 +37,153 @@ def check_db_exists() -> DoctorCheck:
37
37
  )
38
38
 
39
39
 
40
+ def check_db_integrity(fix: bool = False) -> DoctorCheck:
41
+ """Detect and optionally repair a wiped/corrupt local Brain database."""
42
+ import sqlite3
43
+ import paths
44
+ from db_guard import (
45
+ CRITICAL_TABLES,
46
+ EMPTY_DB_SIZE_BYTES,
47
+ MIN_REFERENCE_ROWS,
48
+ db_looks_wiped,
49
+ db_row_counts,
50
+ find_latest_hourly_backup,
51
+ )
52
+
53
+ db_path = paths.db_path()
54
+ if not db_path.is_file():
55
+ return DoctorCheck(
56
+ id="boot.db_integrity",
57
+ tier="boot",
58
+ status="critical",
59
+ severity="error",
60
+ summary="Database file not found",
61
+ evidence=[str(db_path)],
62
+ repair_plan=["Run nexo-brain to initialize the database"],
63
+ )
64
+
65
+ try:
66
+ size_bytes = db_path.stat().st_size
67
+ except OSError:
68
+ size_bytes = -1
69
+
70
+ quick_ok = False
71
+ quick_error = ""
72
+ try:
73
+ conn = sqlite3.connect(str(db_path), timeout=2)
74
+ try:
75
+ row = conn.execute("PRAGMA quick_check").fetchone()
76
+ quick_ok = bool(row and str(row[0]).lower() == "ok")
77
+ if not quick_ok:
78
+ quick_error = str(row[0] if row else "quick_check returned no row")
79
+ finally:
80
+ conn.close()
81
+ except Exception as exc:
82
+ quick_error = f"{type(exc).__name__}: {exc}"
83
+
84
+ looks_wiped = db_looks_wiped(db_path, CRITICAL_TABLES)
85
+ reference = find_latest_hourly_backup(
86
+ paths.backups_dir(),
87
+ min_critical_rows=MIN_REFERENCE_ROWS,
88
+ )
89
+ reference_counts = db_row_counts(reference, CRITICAL_TABLES) if reference else {}
90
+ reference_rows = sum(v for v in reference_counts.values() if isinstance(v, int))
91
+ lower_error = quick_error.lower()
92
+ corrupt_error = any(token in lower_error for token in (
93
+ "database disk image is malformed",
94
+ "file is not a database",
95
+ "malformed",
96
+ "not a database",
97
+ ))
98
+ recoverable_wipe = bool(
99
+ reference
100
+ and looks_wiped
101
+ and (
102
+ size_bytes <= EMPTY_DB_SIZE_BYTES
103
+ or corrupt_error
104
+ or not quick_ok
105
+ )
106
+ )
107
+
108
+ if quick_ok and not recoverable_wipe:
109
+ if looks_wiped and not reference:
110
+ return DoctorCheck(
111
+ id="boot.db_integrity",
112
+ tier="boot",
113
+ status="healthy",
114
+ severity="info",
115
+ summary="Database is readable and looks like a fresh install",
116
+ evidence=[f"Size: {size_bytes} bytes", "No usable backup with user data found"],
117
+ )
118
+ return DoctorCheck(
119
+ id="boot.db_integrity",
120
+ tier="boot",
121
+ status="healthy",
122
+ severity="info",
123
+ summary="Database integrity OK",
124
+ evidence=[f"Size: {size_bytes} bytes"],
125
+ )
126
+
127
+ evidence = [
128
+ f"DB: {db_path}",
129
+ f"Size: {size_bytes} bytes",
130
+ f"quick_check: {'ok' if quick_ok else quick_error or 'not ok'}",
131
+ f"looks_wiped: {looks_wiped}",
132
+ ]
133
+ if reference:
134
+ evidence.append(f"Reference backup: {reference} ({reference_rows} critical rows)")
135
+
136
+ if fix and recoverable_wipe:
137
+ from plugins.recover import recover
138
+
139
+ report = recover(source=str(reference), force=True)
140
+ if report.get("ok"):
141
+ final_counts = report.get("final_row_counts") or {}
142
+ restored_rows = sum(v for v in final_counts.values() if isinstance(v, int))
143
+ return DoctorCheck(
144
+ id="boot.db_integrity",
145
+ tier="boot",
146
+ status="healthy",
147
+ severity="info",
148
+ summary=f"Database restored from backup ({restored_rows} critical rows)",
149
+ evidence=evidence + [f"Pre-recover snapshot: {report.get('pre_recover_dir', '')}"],
150
+ fixed=True,
151
+ )
152
+ return DoctorCheck(
153
+ id="boot.db_integrity",
154
+ tier="boot",
155
+ status="critical",
156
+ severity="error",
157
+ summary="Database repair failed",
158
+ evidence=evidence + [f"Recover errors: {report.get('errors') or []}"],
159
+ repair_plan=["Run nexo recover --force --yes, then restart Desktop"],
160
+ )
161
+
162
+ if recoverable_wipe:
163
+ return DoctorCheck(
164
+ id="boot.db_integrity",
165
+ tier="boot",
166
+ status="critical",
167
+ severity="error",
168
+ summary="Database appears wiped or corrupt but a valid backup exists",
169
+ evidence=evidence,
170
+ repair_plan=["Run nexo doctor --tier boot --plane database_real --fix"],
171
+ escalation_prompt="NEXO database needs automatic recovery from backup.",
172
+ )
173
+
174
+ status = "critical" if corrupt_error else "degraded"
175
+ severity = "error" if status == "critical" else "warn"
176
+ return DoctorCheck(
177
+ id="boot.db_integrity",
178
+ tier="boot",
179
+ status=status,
180
+ severity=severity,
181
+ summary="Database is not fully readable" if quick_error else "Database integrity is uncertain",
182
+ evidence=evidence,
183
+ repair_plan=["Close NEXO Desktop and run nexo doctor --tier boot --plane database_real --fix"],
184
+ )
185
+
186
+
40
187
  def check_required_dirs() -> DoctorCheck:
41
188
  """Check that required NEXO_HOME directories exist (post-F0.6 layout
42
189
  or pre-F0.6 fallback)."""
@@ -411,6 +558,7 @@ def run_boot_checks(fix: bool = False) -> list[DoctorCheck]:
411
558
  """Run all boot-tier checks."""
412
559
  checks = [
413
560
  safe_check(check_db_exists),
561
+ safe_check(check_db_integrity, fix=fix),
414
562
  safe_check(check_required_dirs),
415
563
  safe_check(check_disk_space),
416
564
  safe_check(check_wrapper_scripts),
@@ -41,7 +41,8 @@ from db_guard import (
41
41
  db_row_counts,
42
42
  diff_row_counts,
43
43
  find_latest_hourly_backup,
44
- kill_nexo_mcp_servers,
44
+ quiesce_nexo_db_writers,
45
+ resume_nexo_launchagents,
45
46
  safe_sqlite_backup,
46
47
  validate_backup_matches_source,
47
48
  )
@@ -257,15 +258,20 @@ def recover(
257
258
  result["steps"].append("dry-run: stopping before any write")
258
259
  return result
259
260
 
260
- # Step 3: kill live MCP servers
261
+ stopped_launchagents: list[str] = []
262
+
263
+ # Step 3: quiesce live DB writers
261
264
  if not skip_kill:
262
- kill_report = kill_nexo_mcp_servers(dry_run=False)
263
- result["steps"].append(f"kill_mcp: terminated={kill_report['terminated']} scanned={kill_report['scanned']}")
264
- if kill_report.get("errors"):
265
- result["warnings"].extend(kill_report["errors"])
266
- # Tiny settle so the ex-server releases file locks.
267
- if kill_report["terminated"]:
268
- time.sleep(0.5)
265
+ quiesce_report = quiesce_nexo_db_writers(dry_run=False)
266
+ result["quiesce"] = quiesce_report
267
+ stopped_launchagents = list((quiesce_report.get("launchagents") or {}).get("stopped") or [])
268
+ result["steps"].append(
269
+ "quiesce_db_writers: "
270
+ f"terminated={quiesce_report.get('terminated', 0)} "
271
+ f"launchagents={len(stopped_launchagents)}"
272
+ )
273
+ if quiesce_report.get("errors"):
274
+ result["warnings"].extend(quiesce_report["errors"])
269
275
 
270
276
  # Step 4: snapshot current state to pre-recover/
271
277
  pre_recover_dir = _backup_base() / f"pre-recover-{time.strftime('%Y-%m-%d-%H%M%S')}"
@@ -297,17 +303,24 @@ def recover(
297
303
  ok, copy_err = safe_sqlite_backup(chosen, target_path)
298
304
  if not ok:
299
305
  result["errors"].append(f"restore copy failed: {copy_err}")
306
+ if stopped_launchagents:
307
+ result["resume"] = resume_nexo_launchagents(stopped_launchagents)
300
308
  return result
301
309
  result["steps"].append(f"restored {chosen.name} -> {target_path}")
302
310
 
303
311
  valid, valid_err = validate_backup_matches_source(chosen, target_path)
304
312
  if not valid:
305
313
  result["errors"].append(f"post-restore validation failed: {valid_err}")
314
+ if stopped_launchagents:
315
+ result["resume"] = resume_nexo_launchagents(stopped_launchagents)
306
316
  return result
307
317
  result["steps"].append("validated post-restore row counts")
308
318
 
309
319
  final_counts = db_row_counts(target_path)
310
320
  result["final_row_counts"] = {k: v for k, v in final_counts.items() if v is not None}
321
+ if stopped_launchagents:
322
+ result["resume"] = resume_nexo_launchagents(stopped_launchagents)
323
+ result["steps"].append(f"resumed {len((result['resume'] or {}).get('started') or [])} launchagent(s)")
311
324
  result["ok"] = True
312
325
  return result
313
326