nexo-brain 7.20.12 → 7.20.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +3 -1
- package/package.json +1 -1
- package/src/auto_update.py +36 -7
- package/src/db_guard.py +272 -0
- package/src/doctor/providers/boot.py +148 -0
- package/src/plugins/recover.py +22 -9
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.13",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.20.
|
|
21
|
+
Version `7.20.13` is the current packaged-runtime line. Patch release over v7.20.12 — Brain recovery now pauses all known DB writers before restoring `nexo.db`, and Doctor can repair the zero-byte/locked database state that made Desktop Local Memory show zero files.
|
|
22
|
+
|
|
23
|
+
Previously in `7.20.12`: patch release over v7.20.11 — Local Context now keeps the first index pass separate from live change tracking, persists the current indexing start time, caps compact context payloads for agents, and installs the Windows host scheduler needed to keep WSL indexing alive after reboots.
|
|
22
24
|
|
|
23
25
|
Previously in `7.20.11`: patch release over v7.20.10 — Local Context now starts from real system volume roots plus mounted/removable/network volumes, filters system/cache/app/product artifacts, and injects relevant local evidence automatically into heartbeat, task-open and pre-action context.
|
|
24
26
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.13",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/auto_update.py
CHANGED
|
@@ -1412,7 +1412,8 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1412
1412
|
db_looks_wiped,
|
|
1413
1413
|
db_row_counts,
|
|
1414
1414
|
find_latest_hourly_backup,
|
|
1415
|
-
|
|
1415
|
+
quiesce_nexo_db_writers,
|
|
1416
|
+
resume_nexo_launchagents,
|
|
1416
1417
|
safe_sqlite_backup,
|
|
1417
1418
|
validate_backup_matches_source,
|
|
1418
1419
|
)
|
|
@@ -1467,11 +1468,29 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1467
1468
|
f"(reference={reference.name}, {ref_total} critical rows). Restoring..."
|
|
1468
1469
|
)
|
|
1469
1470
|
|
|
1470
|
-
#
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1471
|
+
# Pause any live DB writers so they cannot overwrite the restored DB or
|
|
1472
|
+
# keep stale handles open. Desktop installs have more writers than the MCP
|
|
1473
|
+
# server: local-index, email-monitor, followup-runner, watchdog and catchup.
|
|
1474
|
+
quiesce_report = quiesce_nexo_db_writers(dry_run=False)
|
|
1475
|
+
stopped_launchagents = list((quiesce_report.get("launchagents") or {}).get("stopped") or [])
|
|
1476
|
+
if quiesce_report.get("terminated") or stopped_launchagents:
|
|
1477
|
+
_log(
|
|
1478
|
+
"self-heal: quiesced DB writers "
|
|
1479
|
+
f"(terminated={quiesce_report.get('terminated', 0)}, "
|
|
1480
|
+
f"launchagents={len(stopped_launchagents)})."
|
|
1481
|
+
)
|
|
1482
|
+
if quiesce_report.get("errors"):
|
|
1483
|
+
_log(f"self-heal: DB writer quiesce warnings: {quiesce_report.get('errors')}")
|
|
1484
|
+
|
|
1485
|
+
def _resume_quiesced() -> dict | None:
|
|
1486
|
+
if not stopped_launchagents:
|
|
1487
|
+
return None
|
|
1488
|
+
report = resume_nexo_launchagents(stopped_launchagents)
|
|
1489
|
+
if report.get("started"):
|
|
1490
|
+
_log(f"self-heal: resumed {len(report['started'])} launchagent(s).")
|
|
1491
|
+
if report.get("errors"):
|
|
1492
|
+
_log(f"self-heal: launchagent resume warnings: {report.get('errors')}")
|
|
1493
|
+
return report
|
|
1475
1494
|
|
|
1476
1495
|
# Snapshot the current (wiped) state so the heal is reversible.
|
|
1477
1496
|
pre_heal_dir = paths.backups_dir() / f"pre-heal-{time.strftime('%Y-%m-%d-%H%M%S')}"
|
|
@@ -1497,27 +1516,34 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1497
1516
|
ok, err = safe_sqlite_backup(reference, primary)
|
|
1498
1517
|
if not ok:
|
|
1499
1518
|
_log(f"self-heal: restore copy failed: {err}")
|
|
1519
|
+
resume_report = _resume_quiesced()
|
|
1500
1520
|
return {
|
|
1501
1521
|
"action": "failed",
|
|
1502
1522
|
"reason": "restore_copy_failed",
|
|
1503
1523
|
"error": err,
|
|
1504
1524
|
"reference": str(reference),
|
|
1505
1525
|
"pre_heal_dir": str(pre_heal_dir),
|
|
1526
|
+
"quiesce": quiesce_report,
|
|
1527
|
+
"resume": resume_report,
|
|
1506
1528
|
}
|
|
1507
1529
|
valid, valid_err = validate_backup_matches_source(reference, primary, CRITICAL_TABLES)
|
|
1508
1530
|
if not valid:
|
|
1509
1531
|
_log(f"self-heal: post-restore validation failed: {valid_err}")
|
|
1532
|
+
resume_report = _resume_quiesced()
|
|
1510
1533
|
return {
|
|
1511
1534
|
"action": "failed",
|
|
1512
1535
|
"reason": "validation_failed",
|
|
1513
1536
|
"error": valid_err,
|
|
1514
1537
|
"reference": str(reference),
|
|
1515
1538
|
"pre_heal_dir": str(pre_heal_dir),
|
|
1539
|
+
"quiesce": quiesce_report,
|
|
1540
|
+
"resume": resume_report,
|
|
1516
1541
|
}
|
|
1517
1542
|
|
|
1518
1543
|
final_counts = db_row_counts(primary, CRITICAL_TABLES)
|
|
1519
1544
|
final_total = sum(v for v in final_counts.values() if isinstance(v, int))
|
|
1520
1545
|
_log(f"self-heal: restored {final_total} critical rows from {reference.name}.")
|
|
1546
|
+
resume_report = _resume_quiesced()
|
|
1521
1547
|
try:
|
|
1522
1548
|
SELF_HEAL_STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
1523
1549
|
SELF_HEAL_STATE_FILE.write_text(json.dumps({
|
|
@@ -1525,6 +1551,7 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1525
1551
|
"reference": str(reference),
|
|
1526
1552
|
"critical_rows_restored": final_total,
|
|
1527
1553
|
"pre_heal_dir": str(pre_heal_dir),
|
|
1554
|
+
"quiesced_launchagents": stopped_launchagents,
|
|
1528
1555
|
}))
|
|
1529
1556
|
except Exception as e:
|
|
1530
1557
|
_log(f"self-heal: state write warning: {e}")
|
|
@@ -1535,7 +1562,9 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1535
1562
|
"reference_rows": ref_total,
|
|
1536
1563
|
"restored_rows": final_total,
|
|
1537
1564
|
"pre_heal_dir": str(pre_heal_dir),
|
|
1538
|
-
"terminated_servers":
|
|
1565
|
+
"terminated_servers": int((quiesce_report.get("mcp") or {}).get("terminated") or 0),
|
|
1566
|
+
"quiesce": quiesce_report,
|
|
1567
|
+
"resume": resume_report,
|
|
1539
1568
|
}
|
|
1540
1569
|
|
|
1541
1570
|
|
package/src/db_guard.py
CHANGED
|
@@ -27,6 +27,8 @@ auto_update.py):
|
|
|
27
27
|
safe_sqlite_backup(source, dest) -> tuple[bool, str | None]
|
|
28
28
|
validate_backup_matches_source(source, dest, tables) -> tuple[bool, str | None]
|
|
29
29
|
kill_nexo_mcp_servers(dry_run) -> dict
|
|
30
|
+
quiesce_nexo_db_writers(dry_run) -> dict
|
|
31
|
+
resume_nexo_launchagents(labels, dry_run) -> dict
|
|
30
32
|
"""
|
|
31
33
|
|
|
32
34
|
from __future__ import annotations
|
|
@@ -80,6 +82,27 @@ HOURLY_BACKUP_GLOB = "nexo-*.db"
|
|
|
80
82
|
# as an automatic self-heal source. 48h matches nexo-backup.sh retention.
|
|
81
83
|
HOURLY_BACKUP_MAX_AGE = 48 * 3600
|
|
82
84
|
|
|
85
|
+
# Long-lived NEXO services that can keep ``nexo.db`` open while recovery tries
|
|
86
|
+
# to replace it. Keep this list conservative: only product-owned background
|
|
87
|
+
# processes that are safe to stop and restart.
|
|
88
|
+
NEXO_DB_WRITER_LAUNCHAGENTS: tuple[str, ...] = (
|
|
89
|
+
"com.nexo.local-index",
|
|
90
|
+
"com.nexo.email-monitor",
|
|
91
|
+
"com.nexo.followup-runner",
|
|
92
|
+
"com.nexo.watchdog",
|
|
93
|
+
"com.nexo.catchup",
|
|
94
|
+
"com.nexo.immune",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
NEXO_DB_WRITER_MARKERS: tuple[str, ...] = (
|
|
98
|
+
"nexo-local-index.py",
|
|
99
|
+
"nexo-email-monitor.py",
|
|
100
|
+
"nexo-followup-runner.py",
|
|
101
|
+
"nexo-catchup.py",
|
|
102
|
+
"nexo-watchdog.sh",
|
|
103
|
+
"nexo-immune.py",
|
|
104
|
+
)
|
|
105
|
+
|
|
83
106
|
|
|
84
107
|
# ── Types ───────────────────────────────────────────────────────────────
|
|
85
108
|
|
|
@@ -447,3 +470,252 @@ def _looks_like_nexo_mcp(cmd: str) -> bool:
|
|
|
447
470
|
if "nexo_sdk" in lowered or "nexo-mcp" in lowered:
|
|
448
471
|
return True
|
|
449
472
|
return False
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
# ── DB writer quiescence ────────────────────────────────────────────────
|
|
476
|
+
|
|
477
|
+
def quiesce_nexo_db_writers(
|
|
478
|
+
dry_run: bool = False,
|
|
479
|
+
*,
|
|
480
|
+
stop_launchagents: bool = True,
|
|
481
|
+
settle_seconds: float = 0.75,
|
|
482
|
+
) -> dict:
|
|
483
|
+
"""Stop known NEXO background writers before replacing ``nexo.db``.
|
|
484
|
+
|
|
485
|
+
``kill_nexo_mcp_servers`` is not enough for Desktop installs: local-index,
|
|
486
|
+
email monitor, followup-runner and catchup can keep a stale DB handle open
|
|
487
|
+
even after the MCP server exits. This helper is intentionally narrow and
|
|
488
|
+
only targets product-owned long-lived writers.
|
|
489
|
+
"""
|
|
490
|
+
result: dict = {
|
|
491
|
+
"dry_run": dry_run,
|
|
492
|
+
"mcp": {},
|
|
493
|
+
"launchagents": {"stopped": [], "errors": [], "unsupported": False},
|
|
494
|
+
"processes": {"scanned": 0, "terminated": 0, "pids": [], "errors": []},
|
|
495
|
+
"terminated": 0,
|
|
496
|
+
"errors": [],
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
mcp_report = kill_nexo_mcp_servers(dry_run=dry_run)
|
|
500
|
+
result["mcp"] = mcp_report
|
|
501
|
+
result["terminated"] += int(mcp_report.get("terminated") or 0)
|
|
502
|
+
result["errors"].extend(mcp_report.get("errors") or [])
|
|
503
|
+
|
|
504
|
+
if stop_launchagents:
|
|
505
|
+
la_report = _stop_nexo_launchagents(dry_run=dry_run)
|
|
506
|
+
result["launchagents"] = la_report
|
|
507
|
+
result["errors"].extend(la_report.get("errors") or [])
|
|
508
|
+
|
|
509
|
+
process_report = _terminate_nexo_db_writer_processes(dry_run=dry_run)
|
|
510
|
+
result["processes"] = process_report
|
|
511
|
+
result["terminated"] += int(process_report.get("terminated") or 0)
|
|
512
|
+
result["errors"].extend(process_report.get("errors") or [])
|
|
513
|
+
|
|
514
|
+
if not dry_run and (result["terminated"] or result["launchagents"].get("stopped")):
|
|
515
|
+
time.sleep(max(settle_seconds, 0.0))
|
|
516
|
+
return result
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def resume_nexo_launchagents(labels: list[str] | tuple[str, ...] | None = None, dry_run: bool = False) -> dict:
|
|
520
|
+
"""Best-effort restart of LaunchAgents stopped by DB recovery."""
|
|
521
|
+
result: dict = {"dry_run": dry_run, "started": [], "errors": [], "unsupported": False}
|
|
522
|
+
if os.name != "posix" or sys_platform() != "darwin":
|
|
523
|
+
result["unsupported"] = True
|
|
524
|
+
return result
|
|
525
|
+
uid = os.getuid()
|
|
526
|
+
launch_agents_dir = Path.home() / "Library" / "LaunchAgents"
|
|
527
|
+
chosen = tuple(labels or NEXO_DB_WRITER_LAUNCHAGENTS)
|
|
528
|
+
for label in chosen:
|
|
529
|
+
plist = launch_agents_dir / f"{label}.plist"
|
|
530
|
+
if not plist.is_file():
|
|
531
|
+
continue
|
|
532
|
+
target = f"gui/{uid}/{label}"
|
|
533
|
+
if dry_run:
|
|
534
|
+
result["started"].append(label)
|
|
535
|
+
continue
|
|
536
|
+
try:
|
|
537
|
+
subprocess.run(
|
|
538
|
+
["launchctl", "bootstrap", f"gui/{uid}", str(plist)],
|
|
539
|
+
capture_output=True,
|
|
540
|
+
text=True,
|
|
541
|
+
timeout=5,
|
|
542
|
+
)
|
|
543
|
+
subprocess.run(
|
|
544
|
+
["launchctl", "kickstart", "-k", target],
|
|
545
|
+
capture_output=True,
|
|
546
|
+
text=True,
|
|
547
|
+
timeout=5,
|
|
548
|
+
)
|
|
549
|
+
result["started"].append(label)
|
|
550
|
+
except Exception as exc:
|
|
551
|
+
result["errors"].append(f"{label}: {exc}")
|
|
552
|
+
return result
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def sys_platform() -> str:
|
|
556
|
+
# Small indirection makes tests easy to monkeypatch without importing sys at
|
|
557
|
+
# module import time in older runtimes.
|
|
558
|
+
import sys
|
|
559
|
+
return sys.platform
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _stop_nexo_launchagents(dry_run: bool = False) -> dict:
|
|
563
|
+
result: dict = {"stopped": [], "errors": [], "unsupported": False}
|
|
564
|
+
if os.name != "posix" or sys_platform() != "darwin":
|
|
565
|
+
result["unsupported"] = True
|
|
566
|
+
return result
|
|
567
|
+
uid = os.getuid()
|
|
568
|
+
launch_agents_dir = Path.home() / "Library" / "LaunchAgents"
|
|
569
|
+
for label in NEXO_DB_WRITER_LAUNCHAGENTS:
|
|
570
|
+
plist = launch_agents_dir / f"{label}.plist"
|
|
571
|
+
if not plist.is_file():
|
|
572
|
+
continue
|
|
573
|
+
if dry_run:
|
|
574
|
+
result["stopped"].append(label)
|
|
575
|
+
continue
|
|
576
|
+
try:
|
|
577
|
+
proc = subprocess.run(
|
|
578
|
+
["launchctl", "bootout", f"gui/{uid}", str(plist)],
|
|
579
|
+
capture_output=True,
|
|
580
|
+
text=True,
|
|
581
|
+
timeout=5,
|
|
582
|
+
)
|
|
583
|
+
except Exception as exc:
|
|
584
|
+
result["errors"].append(f"{label}: {exc}")
|
|
585
|
+
continue
|
|
586
|
+
if proc.returncode == 0:
|
|
587
|
+
result["stopped"].append(label)
|
|
588
|
+
else:
|
|
589
|
+
stderr = (proc.stderr or "").strip()
|
|
590
|
+
# launchctl returns non-zero when an agent is already unloaded. That
|
|
591
|
+
# is not a recovery blocker, so keep it as quiet evidence.
|
|
592
|
+
if stderr and "No such process" not in stderr and "not found" not in stderr:
|
|
593
|
+
result["errors"].append(f"{label}: {stderr[:200]}")
|
|
594
|
+
return result
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _terminate_nexo_db_writer_processes(dry_run: bool = False) -> dict:
|
|
598
|
+
result: dict = {"scanned": 0, "terminated": 0, "pids": [], "errors": [], "dry_run": dry_run}
|
|
599
|
+
if os.name == "posix":
|
|
600
|
+
return _terminate_posix_db_writer_processes(dry_run=dry_run)
|
|
601
|
+
if os.name == "nt":
|
|
602
|
+
return _terminate_windows_db_writer_processes(dry_run=dry_run)
|
|
603
|
+
result["errors"].append("unsupported platform")
|
|
604
|
+
return result
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def _terminate_posix_db_writer_processes(dry_run: bool = False) -> dict:
|
|
608
|
+
result: dict = {"scanned": 0, "terminated": 0, "pids": [], "errors": [], "dry_run": dry_run}
|
|
609
|
+
try:
|
|
610
|
+
proc = subprocess.run(
|
|
611
|
+
["ps", "-axo", "pid=,command="],
|
|
612
|
+
capture_output=True,
|
|
613
|
+
text=True,
|
|
614
|
+
timeout=5,
|
|
615
|
+
)
|
|
616
|
+
except Exception as exc:
|
|
617
|
+
result["errors"].append(f"ps failed: {exc}")
|
|
618
|
+
return result
|
|
619
|
+
if proc.returncode != 0:
|
|
620
|
+
result["errors"].append(f"ps exit {proc.returncode}: {proc.stderr.strip()[:200]}")
|
|
621
|
+
return result
|
|
622
|
+
|
|
623
|
+
my_pid = os.getpid()
|
|
624
|
+
for raw in proc.stdout.splitlines():
|
|
625
|
+
line = raw.strip()
|
|
626
|
+
if not line:
|
|
627
|
+
continue
|
|
628
|
+
head, _, rest = line.partition(" ")
|
|
629
|
+
if not head.isdigit():
|
|
630
|
+
continue
|
|
631
|
+
pid = int(head)
|
|
632
|
+
if pid == my_pid:
|
|
633
|
+
continue
|
|
634
|
+
cmd = rest.strip()
|
|
635
|
+
if not _looks_like_nexo_db_writer(cmd):
|
|
636
|
+
continue
|
|
637
|
+
result["scanned"] += 1
|
|
638
|
+
result["pids"].append({"pid": pid, "command": cmd[:180]})
|
|
639
|
+
if dry_run:
|
|
640
|
+
continue
|
|
641
|
+
try:
|
|
642
|
+
os.kill(pid, signal.SIGTERM)
|
|
643
|
+
result["terminated"] += 1
|
|
644
|
+
except ProcessLookupError:
|
|
645
|
+
pass
|
|
646
|
+
except Exception as exc:
|
|
647
|
+
result["errors"].append(f"kill {pid} failed: {exc}")
|
|
648
|
+
return result
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def _terminate_windows_db_writer_processes(dry_run: bool = False) -> dict:
|
|
652
|
+
result: dict = {"scanned": 0, "terminated": 0, "pids": [], "errors": [], "dry_run": dry_run}
|
|
653
|
+
ps_script = (
|
|
654
|
+
"Get-CimInstance Win32_Process | "
|
|
655
|
+
"Select-Object ProcessId,CommandLine | ConvertTo-Json -Compress"
|
|
656
|
+
)
|
|
657
|
+
try:
|
|
658
|
+
proc = subprocess.run(
|
|
659
|
+
["powershell", "-NoProfile", "-Command", ps_script],
|
|
660
|
+
capture_output=True,
|
|
661
|
+
text=True,
|
|
662
|
+
timeout=10,
|
|
663
|
+
)
|
|
664
|
+
except Exception as exc:
|
|
665
|
+
result["errors"].append(f"powershell process scan failed: {exc}")
|
|
666
|
+
return result
|
|
667
|
+
if proc.returncode != 0:
|
|
668
|
+
result["errors"].append(f"powershell exit {proc.returncode}: {proc.stderr.strip()[:200]}")
|
|
669
|
+
return result
|
|
670
|
+
try:
|
|
671
|
+
import json
|
|
672
|
+
rows = json.loads(proc.stdout or "[]")
|
|
673
|
+
except Exception as exc:
|
|
674
|
+
result["errors"].append(f"process json parse failed: {exc}")
|
|
675
|
+
return result
|
|
676
|
+
if isinstance(rows, dict):
|
|
677
|
+
rows = [rows]
|
|
678
|
+
my_pid = os.getpid()
|
|
679
|
+
for row in rows if isinstance(rows, list) else []:
|
|
680
|
+
try:
|
|
681
|
+
pid = int(row.get("ProcessId"))
|
|
682
|
+
except Exception:
|
|
683
|
+
continue
|
|
684
|
+
if pid == my_pid:
|
|
685
|
+
continue
|
|
686
|
+
cmd = str(row.get("CommandLine") or "")
|
|
687
|
+
if not _looks_like_nexo_db_writer(cmd):
|
|
688
|
+
continue
|
|
689
|
+
result["scanned"] += 1
|
|
690
|
+
result["pids"].append({"pid": pid, "command": cmd[:180]})
|
|
691
|
+
if dry_run:
|
|
692
|
+
continue
|
|
693
|
+
try:
|
|
694
|
+
subprocess.run(
|
|
695
|
+
["taskkill", "/PID", str(pid), "/T", "/F"],
|
|
696
|
+
capture_output=True,
|
|
697
|
+
text=True,
|
|
698
|
+
timeout=5,
|
|
699
|
+
)
|
|
700
|
+
result["terminated"] += 1
|
|
701
|
+
except Exception as exc:
|
|
702
|
+
result["errors"].append(f"taskkill {pid} failed: {exc}")
|
|
703
|
+
return result
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def _looks_like_nexo_db_writer(cmd: str) -> bool:
|
|
707
|
+
if not cmd:
|
|
708
|
+
return False
|
|
709
|
+
lowered = cmd.lower()
|
|
710
|
+
if _looks_like_nexo_mcp(cmd):
|
|
711
|
+
return True
|
|
712
|
+
if "nexo-cron-wrapper.sh" in lowered and any(label in lowered for label in (
|
|
713
|
+
"local-index",
|
|
714
|
+
"email-monitor",
|
|
715
|
+
"followup-runner",
|
|
716
|
+
"watchdog",
|
|
717
|
+
"catchup",
|
|
718
|
+
"immune",
|
|
719
|
+
)):
|
|
720
|
+
return True
|
|
721
|
+
return any(marker in lowered for marker in NEXO_DB_WRITER_MARKERS)
|
|
@@ -37,6 +37,153 @@ def check_db_exists() -> DoctorCheck:
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
+
def check_db_integrity(fix: bool = False) -> DoctorCheck:
|
|
41
|
+
"""Detect and optionally repair a wiped/corrupt local Brain database."""
|
|
42
|
+
import sqlite3
|
|
43
|
+
import paths
|
|
44
|
+
from db_guard import (
|
|
45
|
+
CRITICAL_TABLES,
|
|
46
|
+
EMPTY_DB_SIZE_BYTES,
|
|
47
|
+
MIN_REFERENCE_ROWS,
|
|
48
|
+
db_looks_wiped,
|
|
49
|
+
db_row_counts,
|
|
50
|
+
find_latest_hourly_backup,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
db_path = paths.db_path()
|
|
54
|
+
if not db_path.is_file():
|
|
55
|
+
return DoctorCheck(
|
|
56
|
+
id="boot.db_integrity",
|
|
57
|
+
tier="boot",
|
|
58
|
+
status="critical",
|
|
59
|
+
severity="error",
|
|
60
|
+
summary="Database file not found",
|
|
61
|
+
evidence=[str(db_path)],
|
|
62
|
+
repair_plan=["Run nexo-brain to initialize the database"],
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
size_bytes = db_path.stat().st_size
|
|
67
|
+
except OSError:
|
|
68
|
+
size_bytes = -1
|
|
69
|
+
|
|
70
|
+
quick_ok = False
|
|
71
|
+
quick_error = ""
|
|
72
|
+
try:
|
|
73
|
+
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
74
|
+
try:
|
|
75
|
+
row = conn.execute("PRAGMA quick_check").fetchone()
|
|
76
|
+
quick_ok = bool(row and str(row[0]).lower() == "ok")
|
|
77
|
+
if not quick_ok:
|
|
78
|
+
quick_error = str(row[0] if row else "quick_check returned no row")
|
|
79
|
+
finally:
|
|
80
|
+
conn.close()
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
quick_error = f"{type(exc).__name__}: {exc}"
|
|
83
|
+
|
|
84
|
+
looks_wiped = db_looks_wiped(db_path, CRITICAL_TABLES)
|
|
85
|
+
reference = find_latest_hourly_backup(
|
|
86
|
+
paths.backups_dir(),
|
|
87
|
+
min_critical_rows=MIN_REFERENCE_ROWS,
|
|
88
|
+
)
|
|
89
|
+
reference_counts = db_row_counts(reference, CRITICAL_TABLES) if reference else {}
|
|
90
|
+
reference_rows = sum(v for v in reference_counts.values() if isinstance(v, int))
|
|
91
|
+
lower_error = quick_error.lower()
|
|
92
|
+
corrupt_error = any(token in lower_error for token in (
|
|
93
|
+
"database disk image is malformed",
|
|
94
|
+
"file is not a database",
|
|
95
|
+
"malformed",
|
|
96
|
+
"not a database",
|
|
97
|
+
))
|
|
98
|
+
recoverable_wipe = bool(
|
|
99
|
+
reference
|
|
100
|
+
and looks_wiped
|
|
101
|
+
and (
|
|
102
|
+
size_bytes <= EMPTY_DB_SIZE_BYTES
|
|
103
|
+
or corrupt_error
|
|
104
|
+
or not quick_ok
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if quick_ok and not recoverable_wipe:
|
|
109
|
+
if looks_wiped and not reference:
|
|
110
|
+
return DoctorCheck(
|
|
111
|
+
id="boot.db_integrity",
|
|
112
|
+
tier="boot",
|
|
113
|
+
status="healthy",
|
|
114
|
+
severity="info",
|
|
115
|
+
summary="Database is readable and looks like a fresh install",
|
|
116
|
+
evidence=[f"Size: {size_bytes} bytes", "No usable backup with user data found"],
|
|
117
|
+
)
|
|
118
|
+
return DoctorCheck(
|
|
119
|
+
id="boot.db_integrity",
|
|
120
|
+
tier="boot",
|
|
121
|
+
status="healthy",
|
|
122
|
+
severity="info",
|
|
123
|
+
summary="Database integrity OK",
|
|
124
|
+
evidence=[f"Size: {size_bytes} bytes"],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
evidence = [
|
|
128
|
+
f"DB: {db_path}",
|
|
129
|
+
f"Size: {size_bytes} bytes",
|
|
130
|
+
f"quick_check: {'ok' if quick_ok else quick_error or 'not ok'}",
|
|
131
|
+
f"looks_wiped: {looks_wiped}",
|
|
132
|
+
]
|
|
133
|
+
if reference:
|
|
134
|
+
evidence.append(f"Reference backup: {reference} ({reference_rows} critical rows)")
|
|
135
|
+
|
|
136
|
+
if fix and recoverable_wipe:
|
|
137
|
+
from plugins.recover import recover
|
|
138
|
+
|
|
139
|
+
report = recover(source=str(reference), force=True)
|
|
140
|
+
if report.get("ok"):
|
|
141
|
+
final_counts = report.get("final_row_counts") or {}
|
|
142
|
+
restored_rows = sum(v for v in final_counts.values() if isinstance(v, int))
|
|
143
|
+
return DoctorCheck(
|
|
144
|
+
id="boot.db_integrity",
|
|
145
|
+
tier="boot",
|
|
146
|
+
status="healthy",
|
|
147
|
+
severity="info",
|
|
148
|
+
summary=f"Database restored from backup ({restored_rows} critical rows)",
|
|
149
|
+
evidence=evidence + [f"Pre-recover snapshot: {report.get('pre_recover_dir', '')}"],
|
|
150
|
+
fixed=True,
|
|
151
|
+
)
|
|
152
|
+
return DoctorCheck(
|
|
153
|
+
id="boot.db_integrity",
|
|
154
|
+
tier="boot",
|
|
155
|
+
status="critical",
|
|
156
|
+
severity="error",
|
|
157
|
+
summary="Database repair failed",
|
|
158
|
+
evidence=evidence + [f"Recover errors: {report.get('errors') or []}"],
|
|
159
|
+
repair_plan=["Run nexo recover --force --yes, then restart Desktop"],
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if recoverable_wipe:
|
|
163
|
+
return DoctorCheck(
|
|
164
|
+
id="boot.db_integrity",
|
|
165
|
+
tier="boot",
|
|
166
|
+
status="critical",
|
|
167
|
+
severity="error",
|
|
168
|
+
summary="Database appears wiped or corrupt but a valid backup exists",
|
|
169
|
+
evidence=evidence,
|
|
170
|
+
repair_plan=["Run nexo doctor --tier boot --plane database_real --fix"],
|
|
171
|
+
escalation_prompt="NEXO database needs automatic recovery from backup.",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
status = "critical" if corrupt_error else "degraded"
|
|
175
|
+
severity = "error" if status == "critical" else "warn"
|
|
176
|
+
return DoctorCheck(
|
|
177
|
+
id="boot.db_integrity",
|
|
178
|
+
tier="boot",
|
|
179
|
+
status=status,
|
|
180
|
+
severity=severity,
|
|
181
|
+
summary="Database is not fully readable" if quick_error else "Database integrity is uncertain",
|
|
182
|
+
evidence=evidence,
|
|
183
|
+
repair_plan=["Close NEXO Desktop and run nexo doctor --tier boot --plane database_real --fix"],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
40
187
|
def check_required_dirs() -> DoctorCheck:
|
|
41
188
|
"""Check that required NEXO_HOME directories exist (post-F0.6 layout
|
|
42
189
|
or pre-F0.6 fallback)."""
|
|
@@ -411,6 +558,7 @@ def run_boot_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
|
411
558
|
"""Run all boot-tier checks."""
|
|
412
559
|
checks = [
|
|
413
560
|
safe_check(check_db_exists),
|
|
561
|
+
safe_check(check_db_integrity, fix=fix),
|
|
414
562
|
safe_check(check_required_dirs),
|
|
415
563
|
safe_check(check_disk_space),
|
|
416
564
|
safe_check(check_wrapper_scripts),
|
package/src/plugins/recover.py
CHANGED
|
@@ -41,7 +41,8 @@ from db_guard import (
|
|
|
41
41
|
db_row_counts,
|
|
42
42
|
diff_row_counts,
|
|
43
43
|
find_latest_hourly_backup,
|
|
44
|
-
|
|
44
|
+
quiesce_nexo_db_writers,
|
|
45
|
+
resume_nexo_launchagents,
|
|
45
46
|
safe_sqlite_backup,
|
|
46
47
|
validate_backup_matches_source,
|
|
47
48
|
)
|
|
@@ -257,15 +258,20 @@ def recover(
|
|
|
257
258
|
result["steps"].append("dry-run: stopping before any write")
|
|
258
259
|
return result
|
|
259
260
|
|
|
260
|
-
|
|
261
|
+
stopped_launchagents: list[str] = []
|
|
262
|
+
|
|
263
|
+
# Step 3: quiesce live DB writers
|
|
261
264
|
if not skip_kill:
|
|
262
|
-
|
|
263
|
-
result["
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
265
|
+
quiesce_report = quiesce_nexo_db_writers(dry_run=False)
|
|
266
|
+
result["quiesce"] = quiesce_report
|
|
267
|
+
stopped_launchagents = list((quiesce_report.get("launchagents") or {}).get("stopped") or [])
|
|
268
|
+
result["steps"].append(
|
|
269
|
+
"quiesce_db_writers: "
|
|
270
|
+
f"terminated={quiesce_report.get('terminated', 0)} "
|
|
271
|
+
f"launchagents={len(stopped_launchagents)}"
|
|
272
|
+
)
|
|
273
|
+
if quiesce_report.get("errors"):
|
|
274
|
+
result["warnings"].extend(quiesce_report["errors"])
|
|
269
275
|
|
|
270
276
|
# Step 4: snapshot current state to pre-recover/
|
|
271
277
|
pre_recover_dir = _backup_base() / f"pre-recover-{time.strftime('%Y-%m-%d-%H%M%S')}"
|
|
@@ -297,17 +303,24 @@ def recover(
|
|
|
297
303
|
ok, copy_err = safe_sqlite_backup(chosen, target_path)
|
|
298
304
|
if not ok:
|
|
299
305
|
result["errors"].append(f"restore copy failed: {copy_err}")
|
|
306
|
+
if stopped_launchagents:
|
|
307
|
+
result["resume"] = resume_nexo_launchagents(stopped_launchagents)
|
|
300
308
|
return result
|
|
301
309
|
result["steps"].append(f"restored {chosen.name} -> {target_path}")
|
|
302
310
|
|
|
303
311
|
valid, valid_err = validate_backup_matches_source(chosen, target_path)
|
|
304
312
|
if not valid:
|
|
305
313
|
result["errors"].append(f"post-restore validation failed: {valid_err}")
|
|
314
|
+
if stopped_launchagents:
|
|
315
|
+
result["resume"] = resume_nexo_launchagents(stopped_launchagents)
|
|
306
316
|
return result
|
|
307
317
|
result["steps"].append("validated post-restore row counts")
|
|
308
318
|
|
|
309
319
|
final_counts = db_row_counts(target_path)
|
|
310
320
|
result["final_row_counts"] = {k: v for k, v in final_counts.items() if v is not None}
|
|
321
|
+
if stopped_launchagents:
|
|
322
|
+
result["resume"] = resume_nexo_launchagents(stopped_launchagents)
|
|
323
|
+
result["steps"].append(f"resumed {len((result['resume'] or {}).get('started') or [])} launchagent(s)")
|
|
311
324
|
result["ok"] = True
|
|
312
325
|
return result
|
|
313
326
|
|