nexo-brain 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +21 -1
- package/package.json +1 -1
- package/src/agent_runner.py +5 -1
- package/src/auto_update.py +23 -6
- package/src/client_preferences.py +5 -1
- package/src/client_sync.py +5 -1
- package/src/cognitive/_memory.py +14 -7
- package/src/cognitive/_search.py +12 -5
- package/src/crons/sync.py +2 -1
- package/src/dashboard/app.py +1 -1
- package/src/db/_workflow.py +2 -2
- package/src/doctor/models.py +25 -0
- package/src/doctor/orchestrator.py +32 -2
- package/src/doctor/providers/boot.py +52 -26
- package/src/doctor/providers/deep.py +24 -21
- package/src/doctor/providers/runtime.py +151 -135
- package/src/evolution_cycle.py +48 -46
- package/src/kg_populate.py +21 -19
- package/src/maintenance.py +3 -3
- package/src/migrate_embeddings.py +36 -34
- package/src/plugins/backup.py +24 -12
- package/src/plugins/schedule.py +13 -1
- package/src/plugins/update.py +18 -4
- package/src/public_contribution.py +10 -14
- package/src/requirements.txt +1 -0
- package/src/scripts/nexo-catchup.py +15 -15
- package/src/scripts/nexo-daily-self-audit.py +12 -1
- package/src/scripts/nexo-evolution-run.py +9 -3
- package/src/state_watchers_runtime.py +48 -41
- package/src/tools_sessions.py +2 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.2",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -50,7 +50,7 @@ That means NEXO now manages not only the shared runtime and MCP wiring, but also
|
|
|
50
50
|
|
|
51
51
|
Versions `2.6.14` through `2.7.0` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first measured engineering loop.
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
Versions `3.0.0` and `3.0.1` close the next execution gap:
|
|
54
54
|
|
|
55
55
|
- protocol discipline is now a runtime contract, not just instructions:
|
|
56
56
|
- `nexo_task_open`
|
|
@@ -982,6 +982,26 @@ If NEXO Brain is useful to you, consider:
|
|
|
982
982
|
|
|
983
983
|
## Changelog
|
|
984
984
|
|
|
985
|
+
### v3.0.1 — Python 3.10 Compatibility Patch (2026-04-06)
|
|
986
|
+
- Restored Python 3.10 compatibility by replacing Python 3.11-only `datetime.UTC` with `timezone.utc`.
|
|
987
|
+
- Added `tomllib` → `tomli` fallback plus declared runtime dependency for Python < 3.11.
|
|
988
|
+
- Boot doctor now validates all critical JSON config artifacts: `schedule.json`, `optionals.json`, `crons/manifest.json`.
|
|
989
|
+
|
|
990
|
+
### v3.0.0 — Protocol Discipline, Durable Execution, Measured Runtime (2026-04-06)
|
|
991
|
+
- **Protocol discipline runtime**: Enforceable `nexo_task_open`/`nexo_task_close`, persistent `protocol_debt`, `Cortex` gates with durable `check_id`, conditioned-file guardrails across Claude hooks and Codex transcript audits.
|
|
992
|
+
- **Durable workflow runtime**: `nexo_workflow_open`/`update`/`resume`/`replay`/`list` with persistent runs, steps, checkpoints, replay history, retry bookkeeping, and idempotent open keys.
|
|
993
|
+
- **Durable goals**: `nexo_goal_open`/`update`/`get`/`list` for long-running work that stays active/blocked/abandoned/completed.
|
|
994
|
+
- **Operational truth**: Deep Sleep survives schema drift, `keep_alive` reports alive/degraded/duplicated honestly, warning storms no longer count as healthy.
|
|
995
|
+
- **Measured product surface**: 5-minute quickstart, Python SDK, reference verticals, measured compare scorecard with LoCoMo baselines and `cost_per_solved_task`.
|
|
996
|
+
- **Skill lifecycle**: Testing, promotion, retirement, and composition flows. Evolution public-core peer-review for opt-in PRs.
|
|
997
|
+
|
|
998
|
+
### v2.7.0 — Shared Brain Baseline (2026-04-06)
|
|
999
|
+
- Managed Claude Code + Codex bootstrap with explicit `CORE`/`USER` contract.
|
|
1000
|
+
- Codex config sync and transcript-aware Deep Sleep across both clients.
|
|
1001
|
+
- 60-day long-horizon analysis, weekly/monthly summary artifacts.
|
|
1002
|
+
- Retrieval auto-mode and first measured engineering loop.
|
|
1003
|
+
- `nexo chat` opens the configured client instead of assuming Claude Code.
|
|
1004
|
+
|
|
985
1005
|
### v2.6.9 — Integration Sync, CI/CD Pipeline (2026-04-04)
|
|
986
1006
|
- **Release artifact sync**: Automated version synchronization across Claude Code plugin, OpenClaw package, and ClawHub skill before every publish.
|
|
987
1007
|
- **CI/CD pipeline**: Full GitHub Actions workflow for publish + verification of all integration channels.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.2",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/agent_runner.py
CHANGED
|
@@ -9,9 +9,13 @@ import shutil
|
|
|
9
9
|
import subprocess
|
|
10
10
|
import tempfile
|
|
11
11
|
import time
|
|
12
|
-
import tomllib
|
|
13
12
|
from pathlib import Path
|
|
14
13
|
|
|
14
|
+
try:
|
|
15
|
+
import tomllib
|
|
16
|
+
except ModuleNotFoundError: # Python < 3.11
|
|
17
|
+
import tomli as tomllib
|
|
18
|
+
|
|
15
19
|
from client_preferences import (
|
|
16
20
|
BACKEND_NONE,
|
|
17
21
|
CLIENT_CLAUDE_CODE,
|
package/src/auto_update.py
CHANGED
|
@@ -311,14 +311,21 @@ def _backup_dbs() -> str | None:
|
|
|
311
311
|
|
|
312
312
|
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
313
313
|
for db_file in db_files:
|
|
314
|
+
src_conn = None
|
|
315
|
+
dst_conn = None
|
|
314
316
|
try:
|
|
315
317
|
src_conn = sqlite3.connect(str(db_file))
|
|
316
318
|
dst_conn = sqlite3.connect(str(backup_dir / db_file.name))
|
|
317
319
|
src_conn.backup(dst_conn)
|
|
318
|
-
dst_conn.close()
|
|
319
|
-
src_conn.close()
|
|
320
320
|
except Exception as e:
|
|
321
321
|
_log(f"DB backup warning ({db_file.name}): {e}")
|
|
322
|
+
finally:
|
|
323
|
+
for conn in (dst_conn, src_conn):
|
|
324
|
+
if conn is not None:
|
|
325
|
+
try:
|
|
326
|
+
conn.close()
|
|
327
|
+
except Exception:
|
|
328
|
+
pass
|
|
322
329
|
return str(backup_dir)
|
|
323
330
|
|
|
324
331
|
|
|
@@ -331,15 +338,22 @@ def _restore_dbs(backup_dir: str):
|
|
|
331
338
|
for db_backup in bdir.glob("*.db"):
|
|
332
339
|
for candidate in [DATA_DIR / db_backup.name, NEXO_HOME / db_backup.name, SRC_DIR / db_backup.name]:
|
|
333
340
|
if candidate.is_file():
|
|
341
|
+
src_conn = None
|
|
342
|
+
dst_conn = None
|
|
334
343
|
try:
|
|
335
344
|
src_conn = sqlite3.connect(str(db_backup))
|
|
336
345
|
dst_conn = sqlite3.connect(str(candidate))
|
|
337
346
|
src_conn.backup(dst_conn)
|
|
338
|
-
dst_conn.close()
|
|
339
|
-
src_conn.close()
|
|
340
347
|
_log(f"Restored DB: {db_backup.name}")
|
|
341
348
|
except Exception as e:
|
|
342
349
|
_log(f"DB restore warning ({db_backup.name}): {e}")
|
|
350
|
+
finally:
|
|
351
|
+
for conn in (dst_conn, src_conn):
|
|
352
|
+
if conn is not None:
|
|
353
|
+
try:
|
|
354
|
+
conn.close()
|
|
355
|
+
except Exception:
|
|
356
|
+
pass
|
|
343
357
|
break
|
|
344
358
|
|
|
345
359
|
|
|
@@ -626,6 +640,10 @@ def _run_file_migration(path: Path) -> tuple[bool, str]:
|
|
|
626
640
|
def run_file_migrations() -> list[dict]:
|
|
627
641
|
"""Run any pending file-based migrations from the migrations/ directory.
|
|
628
642
|
|
|
643
|
+
Migrations are ordered and sequential: if migration N fails, all subsequent
|
|
644
|
+
migrations are skipped so that N is retried on the next startup and no
|
|
645
|
+
migration is permanently skipped by a version-pointer gap.
|
|
646
|
+
|
|
629
647
|
Returns list of results: [{"version": N, "file": "...", "status": "ok"|"failed", "message": "..."}]
|
|
630
648
|
"""
|
|
631
649
|
current_version = _get_applied_migration_version()
|
|
@@ -655,8 +673,7 @@ def run_file_migrations() -> list[dict]:
|
|
|
655
673
|
"message": message,
|
|
656
674
|
})
|
|
657
675
|
_log(f"Migration {path.name}: FAILED — {message}")
|
|
658
|
-
#
|
|
659
|
-
# so independent migrations still run. Version stays at last success.
|
|
676
|
+
break # Stop on first failure so it retries next startup
|
|
660
677
|
|
|
661
678
|
return results
|
|
662
679
|
|
|
@@ -5,9 +5,13 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
import shutil
|
|
7
7
|
import sys
|
|
8
|
-
import tomllib
|
|
9
8
|
from pathlib import Path
|
|
10
9
|
|
|
10
|
+
try:
|
|
11
|
+
import tomllib
|
|
12
|
+
except ModuleNotFoundError: # Python < 3.11
|
|
13
|
+
import tomli as tomllib
|
|
14
|
+
|
|
11
15
|
from runtime_power import load_schedule_config, save_schedule_config
|
|
12
16
|
|
|
13
17
|
|
package/src/client_sync.py
CHANGED
|
@@ -10,9 +10,13 @@ import shlex
|
|
|
10
10
|
import shutil
|
|
11
11
|
import subprocess
|
|
12
12
|
import sys
|
|
13
|
-
import tomllib
|
|
14
13
|
from pathlib import Path
|
|
15
14
|
|
|
15
|
+
try:
|
|
16
|
+
import tomllib
|
|
17
|
+
except ModuleNotFoundError: # Python < 3.11
|
|
18
|
+
import tomli as tomllib
|
|
19
|
+
|
|
16
20
|
from bootstrap_docs import sync_client_bootstrap
|
|
17
21
|
|
|
18
22
|
try:
|
package/src/cognitive/_memory.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
"""NEXO Cognitive — Memory operations: format, stats, consolidation, somatic."""
|
|
2
2
|
import json, math, re
|
|
3
3
|
import numpy as np
|
|
4
|
-
from datetime import datetime, timedelta
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _utcnow_naive() -> datetime:
|
|
8
|
+
"""Timezone-aware UTC clock returned as a naive datetime to preserve
|
|
9
|
+
the legacy ``datetime.utcnow()`` string format on disk.
|
|
10
|
+
"""
|
|
11
|
+
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
5
12
|
from cognitive._core import _get_db, embed, cosine_similarity, _blob_to_array, _array_to_blob, EMBEDDING_DIM, DISCRIMINATING_ENTITIES
|
|
6
13
|
from cognitive._ingest import _sanitize_memory_content
|
|
7
14
|
|
|
@@ -74,7 +81,7 @@ def get_metrics(days: int = 7) -> dict:
|
|
|
74
81
|
score_distribution: histogram buckets [<0.5, 0.5-0.6, 0.6-0.7, 0.7-0.8, >0.8]
|
|
75
82
|
"""
|
|
76
83
|
db = _get_db()
|
|
77
|
-
cutoff = (
|
|
84
|
+
cutoff = (_utcnow_naive() - timedelta(days=days)).isoformat()
|
|
78
85
|
|
|
79
86
|
rows = db.execute(
|
|
80
87
|
"SELECT top_score FROM retrieval_log WHERE created_at >= ?", (cutoff,)
|
|
@@ -130,7 +137,7 @@ def check_repeat_errors() -> dict:
|
|
|
130
137
|
Returns count of new learnings that are semantically duplicate (cosine > 0.8).
|
|
131
138
|
"""
|
|
132
139
|
db = _get_db()
|
|
133
|
-
cutoff_7d = (
|
|
140
|
+
cutoff_7d = (_utcnow_naive() - timedelta(days=7)).isoformat()
|
|
134
141
|
|
|
135
142
|
# Recent learning STM entries
|
|
136
143
|
new_learnings = db.execute(
|
|
@@ -192,7 +199,7 @@ def rehearse_by_content(content_keywords: str, source_type: str = ""):
|
|
|
192
199
|
if np.linalg.norm(query_vec) == 0:
|
|
193
200
|
return
|
|
194
201
|
|
|
195
|
-
now =
|
|
202
|
+
now = _utcnow_naive().isoformat()
|
|
196
203
|
|
|
197
204
|
# Search both stores for matches >= 0.7
|
|
198
205
|
for table in ("stm_memories", "ltm_memories"):
|
|
@@ -803,7 +810,7 @@ def security_scan(content: str) -> dict:
|
|
|
803
810
|
def somatic_accumulate(target: str, target_type: str, delta: float):
|
|
804
811
|
"""Increase risk_score for a target (file or area). Capped at 1.0."""
|
|
805
812
|
db = _get_db()
|
|
806
|
-
now =
|
|
813
|
+
now = _utcnow_naive().strftime("%Y-%m-%dT%H:%M:%S")
|
|
807
814
|
existing = db.execute(
|
|
808
815
|
"SELECT id, risk_score, incident_count FROM somatic_markers WHERE target = ? AND target_type = ?",
|
|
809
816
|
(target, target_type)
|
|
@@ -827,8 +834,8 @@ def somatic_accumulate(target: str, target_type: str, delta: float):
|
|
|
827
834
|
def somatic_guard_decay(target: str, target_type: str):
|
|
828
835
|
"""Validated recovery: multiplicative x0.7 on successful guard check. Max once/day/target."""
|
|
829
836
|
db = _get_db()
|
|
830
|
-
today =
|
|
831
|
-
now =
|
|
837
|
+
today = _utcnow_naive().strftime("%Y-%m-%d")
|
|
838
|
+
now = _utcnow_naive().strftime("%Y-%m-%dT%H:%M:%S")
|
|
832
839
|
row = db.execute(
|
|
833
840
|
"SELECT id, risk_score, last_guard_decay_date FROM somatic_markers WHERE target = ? AND target_type = ?",
|
|
834
841
|
(target, target_type)
|
package/src/cognitive/_search.py
CHANGED
|
@@ -3,7 +3,14 @@ import math
|
|
|
3
3
|
import re
|
|
4
4
|
import sqlite3
|
|
5
5
|
import numpy as np
|
|
6
|
-
from datetime import datetime
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _utcnow_naive() -> datetime:
|
|
10
|
+
"""Timezone-aware UTC clock returned as a naive datetime to preserve
|
|
11
|
+
the legacy ``datetime.utcnow()`` string format on disk.
|
|
12
|
+
"""
|
|
13
|
+
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
7
14
|
from cognitive._core import (
|
|
8
15
|
_get_db, embed, cosine_similarity, _blob_to_array, _array_to_blob,
|
|
9
16
|
_get_model, _get_reranker, rerank_results, EMBEDDING_DIM,
|
|
@@ -461,7 +468,7 @@ def record_co_activation(memory_ids: list[tuple[str, int]]):
|
|
|
461
468
|
return
|
|
462
469
|
|
|
463
470
|
db = _get_db()
|
|
464
|
-
now =
|
|
471
|
+
now = _utcnow_naive().isoformat()
|
|
465
472
|
|
|
466
473
|
hashes = [_canonical_co_id(store, mid) for store, mid in memory_ids]
|
|
467
474
|
|
|
@@ -571,7 +578,7 @@ def _match_triggers(
|
|
|
571
578
|
text_vec = embed(text)
|
|
572
579
|
|
|
573
580
|
matched_triggers = []
|
|
574
|
-
now =
|
|
581
|
+
now = _utcnow_naive().isoformat()
|
|
575
582
|
|
|
576
583
|
for trigger in armed:
|
|
577
584
|
pattern = trigger["trigger_pattern"].lower()
|
|
@@ -667,7 +674,7 @@ def rearm_trigger(trigger_id: int) -> str:
|
|
|
667
674
|
|
|
668
675
|
def _auto_restore_snoozed(db: sqlite3.Connection):
|
|
669
676
|
"""Restore snoozed memories whose snooze_until date has passed."""
|
|
670
|
-
now =
|
|
677
|
+
now = _utcnow_naive().isoformat()
|
|
671
678
|
for table in ("stm_memories", "ltm_memories"):
|
|
672
679
|
db.execute(
|
|
673
680
|
f"UPDATE {table} SET lifecycle_state = 'active', snooze_until = NULL "
|
|
@@ -682,7 +689,7 @@ def _rehearse_results(results: list[dict], skip_ids: set = None):
|
|
|
682
689
|
if not results:
|
|
683
690
|
return
|
|
684
691
|
db = _get_db()
|
|
685
|
-
now =
|
|
692
|
+
now = _utcnow_naive().isoformat()
|
|
686
693
|
skip = skip_ids or set()
|
|
687
694
|
for r in results:
|
|
688
695
|
if (r["store"], r["id"]) in skip:
|
package/src/crons/sync.py
CHANGED
|
@@ -446,7 +446,8 @@ StandardError=append:{stderr_log}
|
|
|
446
446
|
s = cron["schedule"]
|
|
447
447
|
h, m = s.get("hour", 0), s.get("minute", 0)
|
|
448
448
|
if "weekday" in s:
|
|
449
|
-
|
|
449
|
+
# Manifest weekday uses launchd convention: 0=Sunday … 6=Saturday (7=Sunday alias)
|
|
450
|
+
days = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
|
450
451
|
timer_spec = f"OnCalendar={days[s['weekday']]} *-*-* {h:02d}:{m:02d}:00"
|
|
451
452
|
else:
|
|
452
453
|
timer_spec = f"OnCalendar=*-*-* {h:02d}:{m:02d}:00"
|
package/src/dashboard/app.py
CHANGED
|
@@ -30,7 +30,7 @@ if _PARENT not in sys.path:
|
|
|
30
30
|
|
|
31
31
|
from agent_runner import AgentRunnerError, build_followup_terminal_shell_command
|
|
32
32
|
|
|
33
|
-
app = FastAPI(title="NEXO Brain Dashboard", version="3.0.
|
|
33
|
+
app = FastAPI(title="NEXO Brain Dashboard", version="3.0.1")
|
|
34
34
|
|
|
35
35
|
TEMPLATES_DIR = Path(__file__).resolve().parent / "templates"
|
|
36
36
|
STATIC_DIR = Path(__file__).resolve().parent / "static"
|
package/src/db/_workflow.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
import json
|
|
5
5
|
import secrets
|
|
6
6
|
import time
|
|
7
|
-
from datetime import
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
8
|
|
|
9
9
|
from db._core import get_db
|
|
10
10
|
|
|
@@ -48,7 +48,7 @@ def _workflow_goal_id() -> str:
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def _now_sql() -> str:
|
|
51
|
-
return datetime.now(
|
|
51
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def _as_json(value, default):
|
package/src/doctor/models.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Doctor data models — check results and report structure."""
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import traceback
|
|
4
5
|
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Callable
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
@dataclass
|
|
@@ -42,3 +44,26 @@ class DoctorReport:
|
|
|
42
44
|
"critical": statuses.count("critical"),
|
|
43
45
|
"total": len(statuses),
|
|
44
46
|
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def safe_check(fn: Callable[..., DoctorCheck], *args, **kwargs) -> DoctorCheck:
|
|
50
|
+
"""Run a single check function, returning a crash DoctorCheck on exception.
|
|
51
|
+
|
|
52
|
+
This isolates individual checks so one failure doesn't take down
|
|
53
|
+
all sibling checks within a tier.
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
return fn(*args, **kwargs)
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
tb = traceback.format_exception(type(exc), exc, exc.__traceback__)
|
|
59
|
+
last_frame = tb[-1].strip() if tb else str(exc)
|
|
60
|
+
check_name = getattr(fn, "__name__", "unknown")
|
|
61
|
+
return DoctorCheck(
|
|
62
|
+
id=f"check.{check_name}_crashed",
|
|
63
|
+
tier="unknown",
|
|
64
|
+
status="critical",
|
|
65
|
+
severity="error",
|
|
66
|
+
summary=f"Check {check_name} crashed: {type(exc).__name__}: {exc}",
|
|
67
|
+
evidence=[last_frame],
|
|
68
|
+
repair_plan=[f"Investigate {check_name} — exception during check execution"],
|
|
69
|
+
)
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Doctor orchestrator — runs providers by tier, aggregates results."""
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import sys
|
|
4
5
|
import time
|
|
6
|
+
import traceback
|
|
5
7
|
|
|
6
|
-
from doctor.models import DoctorReport
|
|
8
|
+
from doctor.models import DoctorCheck, DoctorReport
|
|
7
9
|
from doctor.providers.boot import run_boot_checks
|
|
8
10
|
from doctor.providers.runtime import run_runtime_checks
|
|
9
11
|
from doctor.providers.deep import run_deep_checks
|
|
@@ -17,6 +19,8 @@ _TIER_RUNNERS = {
|
|
|
17
19
|
|
|
18
20
|
_TIER_ORDER = ["boot", "runtime", "deep"]
|
|
19
21
|
|
|
22
|
+
VALID_TIERS = frozenset(_TIER_ORDER) | {"all"}
|
|
23
|
+
|
|
20
24
|
|
|
21
25
|
def run_doctor(tier: str = "boot", fix: bool = False) -> DoctorReport:
|
|
22
26
|
"""Run diagnostic checks for the specified tier(s).
|
|
@@ -28,14 +32,40 @@ def run_doctor(tier: str = "boot", fix: bool = False) -> DoctorReport:
|
|
|
28
32
|
report = DoctorReport(overall_status="healthy")
|
|
29
33
|
start = time.monotonic()
|
|
30
34
|
|
|
35
|
+
if tier not in VALID_TIERS:
|
|
36
|
+
report.add(DoctorCheck(
|
|
37
|
+
id="orchestrator.invalid_tier",
|
|
38
|
+
tier="orchestrator",
|
|
39
|
+
status="critical",
|
|
40
|
+
severity="error",
|
|
41
|
+
summary=f"Unknown tier '{tier}' — valid options: {', '.join(sorted(VALID_TIERS))}",
|
|
42
|
+
))
|
|
43
|
+
report.compute_status()
|
|
44
|
+
report.duration_ms = int((time.monotonic() - start) * 1000)
|
|
45
|
+
return report
|
|
46
|
+
|
|
31
47
|
tiers = _TIER_ORDER if tier == "all" else [tier]
|
|
32
48
|
|
|
33
49
|
for t in tiers:
|
|
34
50
|
runner = _TIER_RUNNERS.get(t)
|
|
35
|
-
if runner:
|
|
51
|
+
if not runner:
|
|
52
|
+
continue
|
|
53
|
+
try:
|
|
36
54
|
checks = runner(fix=fix)
|
|
37
55
|
for check in checks:
|
|
38
56
|
report.add(check)
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
tb = traceback.format_exception(type(exc), exc, exc.__traceback__)
|
|
59
|
+
last_frame = tb[-1].strip() if tb else str(exc)
|
|
60
|
+
report.add(DoctorCheck(
|
|
61
|
+
id=f"orchestrator.{t}_crashed",
|
|
62
|
+
tier=t,
|
|
63
|
+
status="critical",
|
|
64
|
+
severity="error",
|
|
65
|
+
summary=f"{t} tier checks crashed: {type(exc).__name__}: {exc}",
|
|
66
|
+
evidence=[last_frame],
|
|
67
|
+
repair_plan=[f"Investigate {t} provider — exception during check execution"],
|
|
68
|
+
))
|
|
39
69
|
|
|
40
70
|
report.compute_status()
|
|
41
71
|
report.duration_ms = int((time.monotonic() - start) * 1000)
|
|
@@ -6,7 +6,7 @@ import shutil
|
|
|
6
6
|
import sys
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
-
from doctor.models import DoctorCheck
|
|
9
|
+
from doctor.models import DoctorCheck, safe_check
|
|
10
10
|
|
|
11
11
|
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
12
12
|
|
|
@@ -148,47 +148,73 @@ def check_python_runtime() -> DoctorCheck:
|
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
|
|
151
|
+
CRITICAL_CONFIG_FILES = (
|
|
152
|
+
("schedule.json", ("config", "schedule.json")),
|
|
153
|
+
("optionals.json", ("config", "optionals.json")),
|
|
154
|
+
("crons/manifest.json", ("crons", "manifest.json")),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
151
158
|
def check_config_parse() -> DoctorCheck:
|
|
152
|
-
"""
|
|
153
|
-
|
|
154
|
-
|
|
159
|
+
"""Validate that critical JSON config files parse correctly."""
|
|
160
|
+
import json
|
|
161
|
+
|
|
162
|
+
errors: list[str] = []
|
|
163
|
+
checked: list[str] = []
|
|
164
|
+
|
|
165
|
+
for label, relative in CRITICAL_CONFIG_FILES:
|
|
166
|
+
path = NEXO_HOME.joinpath(*relative)
|
|
167
|
+
if not path.exists():
|
|
168
|
+
continue
|
|
169
|
+
try:
|
|
170
|
+
data = json.loads(path.read_text())
|
|
171
|
+
except Exception as exc:
|
|
172
|
+
errors.append(f"{label}: {exc}")
|
|
173
|
+
continue
|
|
174
|
+
if not isinstance(data, dict):
|
|
175
|
+
errors.append(f"{label}: expected JSON object, got {type(data).__name__}")
|
|
176
|
+
continue
|
|
177
|
+
checked.append(label)
|
|
178
|
+
|
|
179
|
+
if errors:
|
|
155
180
|
return DoctorCheck(
|
|
156
181
|
id="boot.config_parse",
|
|
157
182
|
tier="boot",
|
|
158
|
-
status="
|
|
159
|
-
severity="
|
|
160
|
-
summary="
|
|
183
|
+
status="degraded",
|
|
184
|
+
severity="warn",
|
|
185
|
+
summary=f"{len(errors)} config file parse error" + ("s" if len(errors) != 1 else ""),
|
|
186
|
+
evidence=errors,
|
|
187
|
+
repair_plan=["Fix JSON syntax in the listed config files, or delete them to fall back to defaults"],
|
|
161
188
|
)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
json.loads(schedule_file.read_text())
|
|
189
|
+
|
|
190
|
+
if not checked:
|
|
165
191
|
return DoctorCheck(
|
|
166
192
|
id="boot.config_parse",
|
|
167
193
|
tier="boot",
|
|
168
194
|
status="healthy",
|
|
169
195
|
severity="info",
|
|
170
|
-
summary="
|
|
171
|
-
)
|
|
172
|
-
except Exception as e:
|
|
173
|
-
return DoctorCheck(
|
|
174
|
-
id="boot.config_parse",
|
|
175
|
-
tier="boot",
|
|
176
|
-
status="degraded",
|
|
177
|
-
severity="warn",
|
|
178
|
-
summary=f"schedule.json parse error: {e}",
|
|
179
|
-
repair_plan=["Fix JSON syntax in schedule.json or delete to use defaults"],
|
|
196
|
+
summary="No config files present (using defaults)",
|
|
180
197
|
)
|
|
181
198
|
|
|
199
|
+
return DoctorCheck(
|
|
200
|
+
id="boot.config_parse",
|
|
201
|
+
tier="boot",
|
|
202
|
+
status="healthy",
|
|
203
|
+
severity="info",
|
|
204
|
+
summary=f"{len(checked)} config file" + ("s" if len(checked) != 1 else "") + " parse OK",
|
|
205
|
+
evidence=checked,
|
|
206
|
+
)
|
|
207
|
+
|
|
182
208
|
|
|
183
209
|
def run_boot_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
184
210
|
"""Run all boot-tier checks."""
|
|
185
211
|
checks = [
|
|
186
|
-
check_db_exists
|
|
187
|
-
check_required_dirs
|
|
188
|
-
check_disk_space
|
|
189
|
-
check_wrapper_scripts
|
|
190
|
-
check_python_runtime
|
|
191
|
-
check_config_parse
|
|
212
|
+
safe_check(check_db_exists),
|
|
213
|
+
safe_check(check_required_dirs),
|
|
214
|
+
safe_check(check_disk_space),
|
|
215
|
+
safe_check(check_wrapper_scripts),
|
|
216
|
+
safe_check(check_python_runtime),
|
|
217
|
+
safe_check(check_config_parse),
|
|
192
218
|
]
|
|
193
219
|
|
|
194
220
|
if fix:
|
|
@@ -6,7 +6,7 @@ import os
|
|
|
6
6
|
import time
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
-
from doctor.models import DoctorCheck
|
|
9
|
+
from doctor.models import DoctorCheck, safe_check
|
|
10
10
|
|
|
11
11
|
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
12
12
|
|
|
@@ -106,8 +106,10 @@ def check_schema_version() -> DoctorCheck:
|
|
|
106
106
|
summary="No database to check schema",
|
|
107
107
|
)
|
|
108
108
|
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
109
|
-
|
|
110
|
-
|
|
109
|
+
try:
|
|
110
|
+
version = conn.execute("PRAGMA user_version").fetchone()[0]
|
|
111
|
+
finally:
|
|
112
|
+
conn.close()
|
|
111
113
|
return DoctorCheck(
|
|
112
114
|
id="deep.schema_version",
|
|
113
115
|
tier="deep",
|
|
@@ -250,20 +252,21 @@ def check_learning_count() -> DoctorCheck:
|
|
|
250
252
|
summary="No DB to check learnings",
|
|
251
253
|
)
|
|
252
254
|
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
255
|
+
try:
|
|
256
|
+
tables = conn.execute(
|
|
257
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='learnings'"
|
|
258
|
+
).fetchone()
|
|
259
|
+
if not tables:
|
|
260
|
+
return DoctorCheck(
|
|
261
|
+
id="deep.learning_count",
|
|
262
|
+
tier="deep",
|
|
263
|
+
status="healthy",
|
|
264
|
+
severity="info",
|
|
265
|
+
summary="No learnings table yet",
|
|
266
|
+
)
|
|
267
|
+
count = conn.execute("SELECT COUNT(*) FROM learnings WHERE archived=0").fetchone()[0]
|
|
268
|
+
finally:
|
|
257
269
|
conn.close()
|
|
258
|
-
return DoctorCheck(
|
|
259
|
-
id="deep.learning_count",
|
|
260
|
-
tier="deep",
|
|
261
|
-
status="healthy",
|
|
262
|
-
severity="info",
|
|
263
|
-
summary="No learnings table yet",
|
|
264
|
-
)
|
|
265
|
-
count = conn.execute("SELECT COUNT(*) FROM learnings WHERE archived=0").fetchone()[0]
|
|
266
|
-
conn.close()
|
|
267
270
|
return DoctorCheck(
|
|
268
271
|
id="deep.learning_count",
|
|
269
272
|
tier="deep",
|
|
@@ -284,9 +287,9 @@ def check_learning_count() -> DoctorCheck:
|
|
|
284
287
|
def run_deep_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
285
288
|
"""Run all deep-tier checks. Read-only."""
|
|
286
289
|
return [
|
|
287
|
-
check_self_audit_summary
|
|
288
|
-
check_schema_version
|
|
289
|
-
check_preflight_summary
|
|
290
|
-
check_watchdog_smoke
|
|
291
|
-
check_learning_count
|
|
290
|
+
safe_check(check_self_audit_summary),
|
|
291
|
+
safe_check(check_schema_version),
|
|
292
|
+
safe_check(check_preflight_summary),
|
|
293
|
+
safe_check(check_watchdog_smoke),
|
|
294
|
+
safe_check(check_learning_count),
|
|
292
295
|
]
|