nexo-brain 7.11.3 → 7.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/bin/nexo-brain.js +74 -30
- package/package.json +1 -1
- package/src/auto_update.py +8 -1
- package/src/crons/manifest.json +14 -0
- package/src/dashboard/app.py +44 -0
- package/src/doctor/providers/runtime.py +92 -0
- package/src/hook_guardrails.py +76 -0
- package/src/product_mode.py +2 -2
- package/src/scripts/nexo-watchdog.sh +48 -8
- package/src/scripts/runner-health-check.py +20 -13
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.11.
|
|
3
|
+
"version": "7.11.5",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,11 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.11.
|
|
21
|
+
Version `7.11.5` is the current packaged-runtime line. Patch release — Desktop-managed installs now block the standalone dashboard at the same product-mode layer as evolution, so `installation_live`, cron sync, and watchdog no longer disagree about whether `com.nexo.dashboard` should exist. Validation: `125` targeted tests across product-mode, cron sync, and doctor, plus a full pre-release wrapper (`2321 passed, 2 skipped, 1 xfailed, 4 xpassed`).
|
|
22
|
+
|
|
23
|
+
Previously in `7.11.4`: patch release — packaged runtimes now receive root JSON contracts such as `local_model_manifest.json`, install/update paths sync core crons from `src/crons/manifest.json` instead of depending on a stale JS list, `runner-health-check` is wired into cron/doctor/dashboard instead of writing an unread file, and the watchdog retries failed crons immediately while treating `run_once_on_wake` as catchup-style recovery. Validation: `117` targeted tests across packaged update, cron sync/recovery, dashboard, local models, and runtime update contracts.
|
|
24
|
+
|
|
25
|
+
Previously in `7.11.3`: patch release — root-cause fix for the `mcp_restart_required` lockup that v7.11.2 only masked at the enforcer layer. `_FINGERPRINT_EXCLUDE_DIRS` in `src/runtime_versioning.py` was missing `"versions"`, so `compute_mcp_runtime_fingerprint()` walked into `core/versions/<old>/**.py` whenever it was called against the live runtime root. `installed_runtime_fingerprint()` (which resolves through `active_runtime_root()` → `core/versions/<active>/`) returned a clean per-snapshot hash, while `prime_process_fingerprint()` (which starts from `Path(__file__).resolve().parent` → live `core/`) accumulated every retained snapshot. The two never matched after the second-ever `nexo update` on a host. Every update wrote `mcp-restart-required.json` and the marker could never be cleared by `_ack_current_client_if_restarted()` because the `installed_fp != process_fp` test always returned `True`. Every non-allowlisted MCP tool (`nexo_reminders`, `nexo_smart_startup`, `nexo_guard_check`, `nexo_task_open`, …) returned `{"error": "mcp_restart_required", "reason": "fingerprint_mismatch"}` indefinitely, even after the operator restarted the client. Adding `"versions"` to `_FINGERPRINT_EXCLUDE_DIRS` restores parity; 21 runtime-fingerprint tests stayed green.
|
|
22
26
|
|
|
23
27
|
Previously in `7.11.2`: patch release — two reliability fixes in the same family ("components ignoring signals they should respect"): (1) `STUCK CRON REAPER` added to `nexo-watchdog.sh` and (2) the Guardian/Enforcer now honors the `mcp-restart-required` marker. The watchdog reaper closes the v5.8.1 in-flight gap: truly hung wrappers (e.g. headless `claude --bare` blocked on an MCP that flagged `mcp_restart_required`) used to hold their slot for days. The reaper sweeps `cron_runs` rows with `ended_at IS NULL` past `stuck_after_seconds` (per-cron from `manifest.json`, fallback 12h global), SIGTERMs the wrapper (trap closes row at `exit 143`), grace 10s, SIGKILL on survivors. Generous defaults (deep-sleep 8h, sleep/evolution 4h) prevent any v5.8.1 regression. The enforcer gate skips `nexo_*`-mentioning reminders when the marker file is present (cached per-instance, 30s TTL); reminders that don't reference `nexo_*` still fire. 12 new tests; 3 existing watchdog tests + 52 existing enforcer tests stay green.
|
|
24
28
|
|
package/bin/nexo-brain.js
CHANGED
|
@@ -847,11 +847,12 @@ function getCoreRuntimeFlatFiles(srcDir = path.join(__dirname, "..", "src")) {
|
|
|
847
847
|
if (isDuplicateArtifactName(name, srcDir)) return false;
|
|
848
848
|
const stat = fs.statSync(path.join(srcDir, name));
|
|
849
849
|
if (!stat.isFile()) return false;
|
|
850
|
-
// Include Python modules and
|
|
851
|
-
// reads
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
850
|
+
// Include Python modules and flat JSON contracts that the runtime
|
|
851
|
+
// reads directly from the installed core tree.
|
|
852
|
+
return (
|
|
853
|
+
name.endsWith(".py")
|
|
854
|
+
|| /(?:_defaults|_manifest|_tiers)\.json$/.test(name)
|
|
855
|
+
);
|
|
855
856
|
})
|
|
856
857
|
: [];
|
|
857
858
|
return [...new Set([...staticFiles, ...discoveredRootModules])];
|
|
@@ -2351,6 +2352,42 @@ WantedBy=timers.target
|
|
|
2351
2352
|
}
|
|
2352
2353
|
}
|
|
2353
2354
|
|
|
2355
|
+
function syncCoreProcessesFromManifest(pythonPath, nexoHome, sourceRoot = "") {
|
|
2356
|
+
const candidateSyncPaths = [
|
|
2357
|
+
path.join(resolveRuntimeCronsDir(nexoHome), "sync.py"),
|
|
2358
|
+
sourceRoot ? path.join(sourceRoot, "sync.py") : "",
|
|
2359
|
+
].filter(Boolean);
|
|
2360
|
+
const runtimeCode = runtimeCodeDir(nexoHome);
|
|
2361
|
+
let lastError = "";
|
|
2362
|
+
|
|
2363
|
+
for (const syncPath of candidateSyncPaths) {
|
|
2364
|
+
if (!fs.existsSync(syncPath)) continue;
|
|
2365
|
+
const syncResult = spawnSync(
|
|
2366
|
+
pythonPath || "python3",
|
|
2367
|
+
[syncPath],
|
|
2368
|
+
{
|
|
2369
|
+
env: {
|
|
2370
|
+
...process.env,
|
|
2371
|
+
HOME: require("os").homedir(),
|
|
2372
|
+
NEXO_HOME: nexoHome,
|
|
2373
|
+
NEXO_CODE: runtimeCode,
|
|
2374
|
+
},
|
|
2375
|
+
stdio: "pipe",
|
|
2376
|
+
encoding: "utf8",
|
|
2377
|
+
},
|
|
2378
|
+
);
|
|
2379
|
+
if (syncResult.status === 0) {
|
|
2380
|
+
return { ok: true, syncPath };
|
|
2381
|
+
}
|
|
2382
|
+
lastError = (syncResult.stderr || syncResult.stdout || "").trim() || `exit ${syncResult.status}`;
|
|
2383
|
+
}
|
|
2384
|
+
|
|
2385
|
+
return {
|
|
2386
|
+
ok: false,
|
|
2387
|
+
error: lastError || "cron sync script not found",
|
|
2388
|
+
};
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2354
2391
|
async function runSetup() {
|
|
2355
2392
|
// Non-interactive mode: --defaults, --yes, --skip, or -y all skip prompts
|
|
2356
2393
|
// and apply the recommended defaults end-to-end (v6.0.0 adds --skip).
|
|
@@ -2587,8 +2624,14 @@ async function runSetup() {
|
|
|
2587
2624
|
const optFile = path.join(resolveRuntimeConfigDir(NEXO_HOME), "optionals.json");
|
|
2588
2625
|
if (fs.existsSync(optFile)) migOptionals = JSON.parse(fs.readFileSync(optFile, "utf8"));
|
|
2589
2626
|
} catch {}
|
|
2590
|
-
|
|
2591
|
-
|
|
2627
|
+
const migCronSync = syncCoreProcessesFromManifest(migPython, NEXO_HOME, cronsMigSrc);
|
|
2628
|
+
if (migCronSync.ok) {
|
|
2629
|
+
log(" Core crons reconciled with manifest.");
|
|
2630
|
+
} else {
|
|
2631
|
+
log(` Cron sync warning: ${migCronSync.error}. Falling back to legacy installer.`);
|
|
2632
|
+
installAllProcesses(platform, migPython, NEXO_HOME, migSchedule, LAUNCH_AGENTS, migOptionals);
|
|
2633
|
+
log(" Automated processes updated via legacy installer fallback.");
|
|
2634
|
+
}
|
|
2592
2635
|
|
|
2593
2636
|
// Update version file
|
|
2594
2637
|
fs.writeFileSync(versionFile, JSON.stringify({
|
|
@@ -2701,19 +2744,11 @@ async function runSetup() {
|
|
|
2701
2744
|
copyDirRec2(cronsSrc, cronsDest);
|
|
2702
2745
|
log("Refreshed crons/ directory.");
|
|
2703
2746
|
|
|
2704
|
-
const
|
|
2705
|
-
if (
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
encoding: "utf8",
|
|
2710
|
-
});
|
|
2711
|
-
if (syncResult.status === 0) {
|
|
2712
|
-
log("Core crons reconciled with manifest.");
|
|
2713
|
-
} else {
|
|
2714
|
-
const syncErr = (syncResult.stderr || syncResult.stdout || "").trim();
|
|
2715
|
-
log(`Cron sync warning: ${syncErr || `exit ${syncResult.status}`}`);
|
|
2716
|
-
}
|
|
2747
|
+
const syncStatus = syncCoreProcessesFromManifest(syncPython, NEXO_HOME, cronsSrc);
|
|
2748
|
+
if (syncStatus.ok) {
|
|
2749
|
+
log("Core crons reconciled with manifest.");
|
|
2750
|
+
} else {
|
|
2751
|
+
log(`Cron sync warning: ${syncStatus.error}`);
|
|
2717
2752
|
}
|
|
2718
2753
|
}
|
|
2719
2754
|
|
|
@@ -4227,15 +4262,9 @@ ${doScan ? `- Stack: ${Object.keys(profileData.code.languages || {}).slice(0, 5)
|
|
|
4227
4262
|
schedule = await maybeConfigurePublicContribution(schedule, useDefaults);
|
|
4228
4263
|
schedule = await maybeConfigureFullDiskAccess(schedule, useDefaults, python);
|
|
4229
4264
|
const enabledOptionals = { dashboard: doDashboard, automation: schedule.automation_enabled !== false };
|
|
4230
|
-
if (smokeTestMode) {
|
|
4231
|
-
log("Smoke test mode detected — skipping LaunchAgents installation.");
|
|
4232
|
-
} else if (isEphemeralInstall(NEXO_HOME)) {
|
|
4233
|
-
log("Ephemeral HOME/NEXO_HOME detected — skipping LaunchAgents installation.");
|
|
4234
|
-
} else {
|
|
4235
|
-
installAllProcesses(platform, python, NEXO_HOME, schedule, LAUNCH_AGENTS, enabledOptionals);
|
|
4236
|
-
}
|
|
4237
4265
|
|
|
4238
|
-
// Persist optional process preferences
|
|
4266
|
+
// Persist optional process preferences before cron sync so the manifest
|
|
4267
|
+
// installer reads the same automation/dashboard state we just computed.
|
|
4239
4268
|
try {
|
|
4240
4269
|
const configDir = resolveRuntimeConfigDir(NEXO_HOME);
|
|
4241
4270
|
fs.mkdirSync(configDir, { recursive: true });
|
|
@@ -4243,8 +4272,23 @@ ${doScan ? `- Stack: ${Object.keys(profileData.code.languages || {}).slice(0, 5)
|
|
|
4243
4272
|
fs.writeFileSync(optFile, JSON.stringify(enabledOptionals, null, 2));
|
|
4244
4273
|
} catch {}
|
|
4245
4274
|
|
|
4246
|
-
|
|
4247
|
-
|
|
4275
|
+
if (smokeTestMode) {
|
|
4276
|
+
log("Smoke test mode detected — skipping LaunchAgents installation.");
|
|
4277
|
+
} else if (isEphemeralInstall(NEXO_HOME)) {
|
|
4278
|
+
log("Ephemeral HOME/NEXO_HOME detected — skipping LaunchAgents installation.");
|
|
4279
|
+
} else {
|
|
4280
|
+
const cronSync = syncCoreProcessesFromManifest(python, NEXO_HOME, path.join(__dirname, "..", "src", "crons"));
|
|
4281
|
+
if (cronSync.ok) {
|
|
4282
|
+
log("Core crons reconciled with manifest.");
|
|
4283
|
+
} else {
|
|
4284
|
+
log(`Cron sync warning: ${cronSync.error}. Falling back to legacy installer.`);
|
|
4285
|
+
installAllProcesses(platform, python, NEXO_HOME, schedule, LAUNCH_AGENTS, enabledOptionals);
|
|
4286
|
+
log("Automated processes configured via legacy installer fallback.");
|
|
4287
|
+
}
|
|
4288
|
+
}
|
|
4289
|
+
|
|
4290
|
+
// Manifest-driven cron sync now owns the steady-state install path.
|
|
4291
|
+
// The legacy installer remains only as a bootstrap fallback.
|
|
4248
4292
|
|
|
4249
4293
|
// Step 7b: macOS Keychain setup for headless automation
|
|
4250
4294
|
await setupKeychainPassFile(NEXO_HOME);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.11.
|
|
3
|
+
"version": "7.11.5",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/auto_update.py
CHANGED
|
@@ -4135,7 +4135,14 @@ def _discover_runtime_root_python_modules(base_dir: Path) -> list[str]:
|
|
|
4135
4135
|
def _runtime_flat_files(base_dir: Path) -> list[str]:
|
|
4136
4136
|
ordered: list[str] = []
|
|
4137
4137
|
seen: set[str] = set()
|
|
4138
|
-
|
|
4138
|
+
json_contracts = [
|
|
4139
|
+
item.name
|
|
4140
|
+
for item in sorted(base_dir.iterdir(), key=lambda p: p.name)
|
|
4141
|
+
if item.is_file()
|
|
4142
|
+
and re.search(r"(?:_defaults|_manifest|_tiers)\.json$", item.name)
|
|
4143
|
+
and not is_duplicate_artifact_name(item.name)
|
|
4144
|
+
]
|
|
4145
|
+
for name in _discover_runtime_root_python_modules(base_dir) + json_contracts + ["requirements.txt", "package.json", "version.json"]:
|
|
4139
4146
|
if name in seen:
|
|
4140
4147
|
continue
|
|
4141
4148
|
seen.add(name)
|
package/src/crons/manifest.json
CHANGED
|
@@ -92,6 +92,20 @@
|
|
|
92
92
|
"run_on_boot": true,
|
|
93
93
|
"run_on_wake": true
|
|
94
94
|
},
|
|
95
|
+
{
|
|
96
|
+
"id": "runner-health-check",
|
|
97
|
+
"script": "scripts/runner-health-check.py",
|
|
98
|
+
"interval_seconds": 21600,
|
|
99
|
+
"description": "Watchdog check — verifies that automation runners produce recent useful output",
|
|
100
|
+
"core": true,
|
|
101
|
+
"optional": "automation",
|
|
102
|
+
"recovery_policy": "catchup",
|
|
103
|
+
"idempotent": true,
|
|
104
|
+
"max_catchup_age": 43200,
|
|
105
|
+
"stuck_after_seconds": 600,
|
|
106
|
+
"run_on_boot": true,
|
|
107
|
+
"run_on_wake": true
|
|
108
|
+
},
|
|
95
109
|
{
|
|
96
110
|
"id": "backup",
|
|
97
111
|
"script": "scripts/nexo-backup.sh",
|
package/src/dashboard/app.py
CHANGED
|
@@ -1257,6 +1257,38 @@ async def api_watchdog():
|
|
|
1257
1257
|
return JSONResponse({"error": f"Invalid JSON: {e}"}, status_code=500)
|
|
1258
1258
|
|
|
1259
1259
|
|
|
1260
|
+
@app.get("/api/runner-health")
|
|
1261
|
+
async def api_runner_health():
|
|
1262
|
+
"""Read runner health status from file."""
|
|
1263
|
+
report_path = paths.operations_dir() / "runner-health-report.json"
|
|
1264
|
+
if not report_path.exists():
|
|
1265
|
+
return JSONResponse(
|
|
1266
|
+
{"error": "runner-health-report.json not found", "path": str(report_path)},
|
|
1267
|
+
status_code=404,
|
|
1268
|
+
)
|
|
1269
|
+
try:
|
|
1270
|
+
data = json.loads(report_path.read_text(encoding="utf-8"))
|
|
1271
|
+
return data
|
|
1272
|
+
except json.JSONDecodeError as e:
|
|
1273
|
+
return JSONResponse({"error": f"Invalid JSON: {e}"}, status_code=500)
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
@app.get("/api/morning-briefing")
|
|
1277
|
+
async def api_morning_briefing():
|
|
1278
|
+
"""Read the latest generated morning briefing artifact."""
|
|
1279
|
+
briefing_path = paths.operations_dir() / "morning-briefing-latest.md"
|
|
1280
|
+
if not briefing_path.exists():
|
|
1281
|
+
return JSONResponse(
|
|
1282
|
+
{"error": "morning-briefing-latest.md not found", "path": str(briefing_path)},
|
|
1283
|
+
status_code=404,
|
|
1284
|
+
)
|
|
1285
|
+
return {
|
|
1286
|
+
"path": str(briefing_path),
|
|
1287
|
+
"updated_at": datetime.datetime.fromtimestamp(briefing_path.stat().st_mtime).isoformat(),
|
|
1288
|
+
"content": briefing_path.read_text(encoding="utf-8"),
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
|
|
1260
1292
|
# ===========================================================================
|
|
1261
1293
|
# NEW API ENDPOINTS — Dashboard v3.0 modules
|
|
1262
1294
|
# ===========================================================================
|
|
@@ -1377,6 +1409,18 @@ async def api_chat(body: ChatMessage):
|
|
|
1377
1409
|
return {"answer": "Watchdog status:", "data": json.loads(wp.read_text()), "query_type": "watchdog"}
|
|
1378
1410
|
return {"answer": "Watchdog not available.", "data": [], "query_type": "watchdog"}
|
|
1379
1411
|
|
|
1412
|
+
elif any(w in msg for w in ["runner health", "morning agent", "followup runner"]):
|
|
1413
|
+
rp = paths.operations_dir() / "runner-health-report.json"
|
|
1414
|
+
if rp.exists():
|
|
1415
|
+
return {"answer": "Runner health:", "data": json.loads(rp.read_text()), "query_type": "runner_health"}
|
|
1416
|
+
return {"answer": "Runner health report not available.", "data": [], "query_type": "runner_health"}
|
|
1417
|
+
|
|
1418
|
+
elif any(w in msg for w in ["morning briefing", "briefing", "resumen matinal"]):
|
|
1419
|
+
bp = paths.operations_dir() / "morning-briefing-latest.md"
|
|
1420
|
+
if bp.exists():
|
|
1421
|
+
return {"answer": "Latest morning briefing:", "data": {"content": bp.read_text(encoding="utf-8")}, "query_type": "morning_briefing"}
|
|
1422
|
+
return {"answer": "Morning briefing not available.", "data": [], "query_type": "morning_briefing"}
|
|
1423
|
+
|
|
1380
1424
|
elif any(w in msg for w in ["skill", "habilidad"]):
|
|
1381
1425
|
rows = conn.execute(
|
|
1382
1426
|
"SELECT id, name, level, trust_score, use_count FROM skills ORDER BY trust_score DESC LIMIT 20"
|
|
@@ -47,6 +47,7 @@ PROTECTED_MACOS_ROOTS = (
|
|
|
47
47
|
# Freshness thresholds in seconds
|
|
48
48
|
IMMUNE_FRESHNESS = 3600 # 1 hour (runs every 30 min)
|
|
49
49
|
WATCHDOG_FRESHNESS = 3600 # 1 hour (runs every 30 min)
|
|
50
|
+
RUNNER_HEALTH_FRESHNESS = 43200 # 12 hours (runner-health-check runs every 6h)
|
|
50
51
|
DEFAULT_CRON_THRESHOLD = 7200 # Fallback when manifest data is unavailable
|
|
51
52
|
LIVE_PROTOCOL_SESSION_FRESHNESS = 1800 # 30 minutes
|
|
52
53
|
SPECIAL_ENV_NORMALIZE_IDS = {"prevent-sleep", "tcc-approve"}
|
|
@@ -1314,6 +1315,96 @@ def check_watchdog_status() -> DoctorCheck:
|
|
|
1314
1315
|
)
|
|
1315
1316
|
|
|
1316
1317
|
|
|
1318
|
+
def check_runner_health_status() -> DoctorCheck:
|
|
1319
|
+
"""Check runner-health-report.json freshness and overall status."""
|
|
1320
|
+
schedule = {}
|
|
1321
|
+
try:
|
|
1322
|
+
if SCHEDULE_FILE.is_file():
|
|
1323
|
+
schedule = _load_json(SCHEDULE_FILE)
|
|
1324
|
+
except Exception:
|
|
1325
|
+
schedule = {}
|
|
1326
|
+
|
|
1327
|
+
prefs = normalize_client_preferences(schedule)
|
|
1328
|
+
if not prefs.get("automation_enabled", True):
|
|
1329
|
+
return DoctorCheck(
|
|
1330
|
+
id="runtime.runner_health",
|
|
1331
|
+
tier="runtime",
|
|
1332
|
+
status="healthy",
|
|
1333
|
+
severity="info",
|
|
1334
|
+
summary="Automation disabled; runner health check not expected",
|
|
1335
|
+
)
|
|
1336
|
+
|
|
1337
|
+
report_file = paths.operations_dir() / "runner-health-report.json"
|
|
1338
|
+
age = _file_age_seconds(report_file)
|
|
1339
|
+
if age is None:
|
|
1340
|
+
return DoctorCheck(
|
|
1341
|
+
id="runtime.runner_health",
|
|
1342
|
+
tier="runtime",
|
|
1343
|
+
status="degraded",
|
|
1344
|
+
severity="warn",
|
|
1345
|
+
summary="Runner health report not found",
|
|
1346
|
+
evidence=[f"Expected: {report_file}"],
|
|
1347
|
+
repair_plan=[
|
|
1348
|
+
"Check that runner-health-check exists in crons/manifest.json and was synced into LaunchAgents/systemd",
|
|
1349
|
+
"Verify followup-runner and morning-agent are enabled under automation",
|
|
1350
|
+
],
|
|
1351
|
+
escalation_prompt="Automation runners are enabled but runner-health-report.json was never produced.",
|
|
1352
|
+
)
|
|
1353
|
+
|
|
1354
|
+
age_hours = age / 3600
|
|
1355
|
+
if age > RUNNER_HEALTH_FRESHNESS:
|
|
1356
|
+
return DoctorCheck(
|
|
1357
|
+
id="runtime.runner_health",
|
|
1358
|
+
tier="runtime",
|
|
1359
|
+
status="degraded",
|
|
1360
|
+
severity="warn",
|
|
1361
|
+
summary=f"Runner health report stale ({age_hours:.1f}h old)",
|
|
1362
|
+
evidence=[
|
|
1363
|
+
f"{report_file} last modified {age_hours:.1f} hours ago",
|
|
1364
|
+
f"Expected freshness threshold: {RUNNER_HEALTH_FRESHNESS / 3600:.0f} hours",
|
|
1365
|
+
],
|
|
1366
|
+
repair_plan=[
|
|
1367
|
+
"Inspect runner-health-check cron installation and recent stderr",
|
|
1368
|
+
"Verify automation crons are installed from the manifest, not a stale legacy process list",
|
|
1369
|
+
],
|
|
1370
|
+
)
|
|
1371
|
+
|
|
1372
|
+
try:
|
|
1373
|
+
data = _load_json(report_file)
|
|
1374
|
+
runners = data.get("runners") or []
|
|
1375
|
+
overall = str(data.get("overall", "UNKNOWN")).upper()
|
|
1376
|
+
fail_count = sum(1 for runner in runners if str(runner.get("status", "")).upper() == "FAIL")
|
|
1377
|
+
warn_count = sum(1 for runner in runners if str(runner.get("status", "")).upper() == "WARN")
|
|
1378
|
+
if overall == "FAIL" or fail_count > 0:
|
|
1379
|
+
status = "critical"
|
|
1380
|
+
severity = "error"
|
|
1381
|
+
elif overall == "WARN" or warn_count > 0:
|
|
1382
|
+
status = "degraded"
|
|
1383
|
+
severity = "warn"
|
|
1384
|
+
else:
|
|
1385
|
+
status = "healthy"
|
|
1386
|
+
severity = "info"
|
|
1387
|
+
return DoctorCheck(
|
|
1388
|
+
id="runtime.runner_health",
|
|
1389
|
+
tier="runtime",
|
|
1390
|
+
status=status,
|
|
1391
|
+
severity=severity,
|
|
1392
|
+
summary=(
|
|
1393
|
+
f"Runner health: {overall} ({len(runners)} runner(s), {warn_count} warn, {fail_count} fail, "
|
|
1394
|
+
f"{age_hours:.1f}h ago)"
|
|
1395
|
+
),
|
|
1396
|
+
)
|
|
1397
|
+
except Exception as e:
|
|
1398
|
+
return DoctorCheck(
|
|
1399
|
+
id="runtime.runner_health",
|
|
1400
|
+
tier="runtime",
|
|
1401
|
+
status="degraded",
|
|
1402
|
+
severity="warn",
|
|
1403
|
+
summary=f"Runner health report unreadable ({age_hours:.1f}h ago)",
|
|
1404
|
+
evidence=[str(e)],
|
|
1405
|
+
)
|
|
1406
|
+
|
|
1407
|
+
|
|
1317
1408
|
def check_stale_sessions() -> DoctorCheck:
|
|
1318
1409
|
"""Check for stale sessions from DB."""
|
|
1319
1410
|
try:
|
|
@@ -3252,6 +3343,7 @@ def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
|
3252
3343
|
return [
|
|
3253
3344
|
safe_check(check_immune_status),
|
|
3254
3345
|
safe_check(check_watchdog_status),
|
|
3346
|
+
safe_check(check_runner_health_status),
|
|
3255
3347
|
safe_check(check_stale_sessions),
|
|
3256
3348
|
safe_check(check_cron_freshness),
|
|
3257
3349
|
safe_check(check_client_backend_preferences, fix=fix),
|
package/src/hook_guardrails.py
CHANGED
|
@@ -252,6 +252,78 @@ def _normalize_file_path(path: str) -> str:
|
|
|
252
252
|
return _normalize_path_token(str(Path(path)))
|
|
253
253
|
|
|
254
254
|
|
|
255
|
+
# Tokens that look like absolute paths but never refer to real files. They
|
|
256
|
+
# typically come from shell heredocs, JSON keys (``/DTEND``), regex/glob
|
|
257
|
+
# fragments, or numeric/dictionary substrings the bash extractor lifted out
|
|
258
|
+
# of a quoted argument. Without this filter the hook keeps emitting
|
|
259
|
+
# unack-eable g4_guard_check_required entries (self-audit 2026-04-24 C2).
|
|
260
|
+
_PATH_ARTIFACT_RE = re.compile(
|
|
261
|
+
r"""
|
|
262
|
+
[\$\`] # unresolved shell substitution / backtick boundary
|
|
263
|
+
| [\*\?] # glob metacharacter
|
|
264
|
+
| [\[\]\{\}] # bracket/range/heredoc markers
|
|
265
|
+
| \s # embedded whitespace (most likely truncation)
|
|
266
|
+
""",
|
|
267
|
+
re.VERBOSE,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Single-segment ``/word`` candidates that match a small dictionary block-list
|
|
271
|
+
# of confirmed false positives observed in the live debt log.
|
|
272
|
+
_PATH_DICTIONARY_BLOCKLIST = frozenset(
|
|
273
|
+
{
|
|
274
|
+
"/diary",
|
|
275
|
+
"/stdout",
|
|
276
|
+
"/stderr",
|
|
277
|
+
"/estancada",
|
|
278
|
+
"/confirmacion",
|
|
279
|
+
"/confirmación",
|
|
280
|
+
"/window",
|
|
281
|
+
"/restaurar",
|
|
282
|
+
"/dtend",
|
|
283
|
+
"/dtstart",
|
|
284
|
+
"/summary",
|
|
285
|
+
}
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _looks_like_real_path(path: str) -> bool:
|
|
290
|
+
"""Return True only when ``path`` plausibly refers to a real file.
|
|
291
|
+
|
|
292
|
+
The protocol-pretool guardrail uses this filter to suppress noise
|
|
293
|
+
coming from shell heredocs, glob fragments, and dictionary words that
|
|
294
|
+
the bash extractor sometimes mistakes for absolute paths. Without it
|
|
295
|
+
every false positive becomes a permanent ``g4_guard_check_required``
|
|
296
|
+
debt row that nobody can ack.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
raw = str(path or "").strip()
|
|
300
|
+
if not raw:
|
|
301
|
+
return False
|
|
302
|
+
if not raw.startswith("/"):
|
|
303
|
+
return False
|
|
304
|
+
if _PATH_ARTIFACT_RE.search(raw):
|
|
305
|
+
return False
|
|
306
|
+
# Pure numeric segments (``/166``, ``/487``, ``/1000``) are almost
|
|
307
|
+
# always status codes or counters lifted out of a log line.
|
|
308
|
+
stripped = raw.lstrip("/")
|
|
309
|
+
if stripped and re.fullmatch(r"\d+", stripped):
|
|
310
|
+
return False
|
|
311
|
+
if raw.lower() in _PATH_DICTIONARY_BLOCKLIST:
|
|
312
|
+
return False
|
|
313
|
+
# Reject single-segment ``/word`` candidates that do not exist on the
|
|
314
|
+
# filesystem and have no extension. Real edits target nested paths or
|
|
315
|
+
# well-known top-level files (``/etc/hosts`` etc.) that already pass
|
|
316
|
+
# the dictionary check above. Globs hitting ``/etc`` etc. are rare
|
|
317
|
+
# and acceptable to over-filter compared with the noise we suppress.
|
|
318
|
+
if "/" not in stripped and "." not in stripped:
|
|
319
|
+
try:
|
|
320
|
+
if not Path(raw).exists():
|
|
321
|
+
return False
|
|
322
|
+
except OSError:
|
|
323
|
+
return False
|
|
324
|
+
return True
|
|
325
|
+
|
|
326
|
+
|
|
255
327
|
def _resolve_runtime_path(path: str) -> Path:
|
|
256
328
|
candidate = Path(str(path or "")).expanduser()
|
|
257
329
|
if not candidate.is_absolute():
|
|
@@ -328,6 +400,8 @@ def _extract_touched_files(tool_input) -> list[str]:
|
|
|
328
400
|
unique: list[str] = []
|
|
329
401
|
seen = set()
|
|
330
402
|
for item in files:
|
|
403
|
+
if not _looks_like_real_path(item):
|
|
404
|
+
continue
|
|
331
405
|
normalized = _normalize_file_path(item)
|
|
332
406
|
if normalized and normalized not in seen:
|
|
333
407
|
seen.add(normalized)
|
|
@@ -414,6 +488,8 @@ def _extract_bash_touched_files(tool_input) -> list[str]:
|
|
|
414
488
|
|
|
415
489
|
def add(candidate: str) -> None:
|
|
416
490
|
resolved = _resolve_shell_candidate_path(candidate, cwd)
|
|
491
|
+
if not resolved or not _looks_like_real_path(resolved):
|
|
492
|
+
return
|
|
417
493
|
normalized = _normalize_file_path(resolved) if resolved else ""
|
|
418
494
|
if resolved and normalized and normalized not in seen:
|
|
419
495
|
seen.add(normalized)
|
package/src/product_mode.py
CHANGED
|
@@ -15,7 +15,7 @@ DESKTOP_PRODUCT_ENV = "NEXO_DESKTOP_MANAGED"
|
|
|
15
15
|
ALLOW_CORE_WRITES_ENV = "NEXO_ALLOW_CORE_WRITES"
|
|
16
16
|
PRODUCT_MODE_FILENAME = "product-mode.json"
|
|
17
17
|
DESKTOP_PRODUCT_MODE = "desktop_closed_product"
|
|
18
|
-
DESKTOP_DISABLED_FEATURES = ("evolution",)
|
|
18
|
+
DESKTOP_DISABLED_FEATURES = ("evolution", "dashboard")
|
|
19
19
|
DESKTOP_EVOLUTION_DISABLED_REASON = "Disabled by NEXO Desktop product contract"
|
|
20
20
|
|
|
21
21
|
|
|
@@ -199,7 +199,7 @@ def enforce_desktop_product_contract(*, source: str = "desktop") -> dict[str, An
|
|
|
199
199
|
|
|
200
200
|
def is_cron_blocked(cron_id: str | None) -> bool:
|
|
201
201
|
clean = str(cron_id or "").strip().lower()
|
|
202
|
-
return clean
|
|
202
|
+
return clean in DESKTOP_DISABLED_FEATURES and desktop_product_requested()
|
|
203
203
|
|
|
204
204
|
|
|
205
205
|
def filter_blocked_crons(crons: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
@@ -350,6 +350,17 @@ try_request_catchup() {
|
|
|
350
350
|
return 1
|
|
351
351
|
}
|
|
352
352
|
|
|
353
|
+
recovery_uses_catchup() {
|
|
354
|
+
case "$1" in
|
|
355
|
+
catchup|run_once_on_wake)
|
|
356
|
+
return 0
|
|
357
|
+
;;
|
|
358
|
+
*)
|
|
359
|
+
return 1
|
|
360
|
+
;;
|
|
361
|
+
esac
|
|
362
|
+
}
|
|
363
|
+
|
|
353
364
|
try_verify_repair() {
|
|
354
365
|
# After Level 2 repair, wait and verify the service is healthy
|
|
355
366
|
local plist_id="$1"
|
|
@@ -604,12 +615,19 @@ lookup_stuck_threshold() {
|
|
|
604
615
|
fi
|
|
605
616
|
}
|
|
606
617
|
|
|
618
|
+
escape_extended_regex() {
|
|
619
|
+
printf '%s\n' "$1" | sed 's/[][(){}.^$*+?|\\]/\\&/g'
|
|
620
|
+
}
|
|
621
|
+
|
|
607
622
|
find_wrapper_pids() {
|
|
608
623
|
local cron_id="$1"
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
624
|
+
local wrapper_script="${NEXO_CODE}/scripts/nexo-cron-wrapper.sh"
|
|
625
|
+
local wrapper_pattern cron_pattern
|
|
626
|
+
wrapper_pattern=$(escape_extended_regex "$wrapper_script")
|
|
627
|
+
cron_pattern=$(escape_extended_regex "$cron_id")
|
|
628
|
+
# Match the wrapper path for this runtime only. This avoids reaping
|
|
629
|
+
# another install's wrapper that happens to share the same cron_id.
|
|
630
|
+
pgrep -f "${wrapper_pattern} ${cron_pattern} " 2>/dev/null
|
|
613
631
|
}
|
|
614
632
|
|
|
615
633
|
reap_stuck_cron_pids() {
|
|
@@ -735,6 +753,7 @@ for monitor in "${MONITORS[@]}"; do
|
|
|
735
753
|
cron_id=$(cron_id_from_service "$plist_id")
|
|
736
754
|
latest_run_has_record=false
|
|
737
755
|
latest_run_failed=false
|
|
756
|
+
recovered_failed_run=false
|
|
738
757
|
|
|
739
758
|
# Check 1: Service loaded? (launchd on macOS, systemd on Linux)
|
|
740
759
|
if is_loaded "$plist_id"; then
|
|
@@ -808,7 +827,7 @@ for monitor in "${MONITORS[@]}"; do
|
|
|
808
827
|
if [ "$age" -gt $(( max_stale * 3 )) ] && [ -n "$proc_grep" ] && ! process_running "$proc_grep"; then
|
|
809
828
|
status="FAIL"
|
|
810
829
|
details="${details}In-flight for ${stale_age} but process '$proc_grep' dead — stale row. "
|
|
811
|
-
if
|
|
830
|
+
if recovery_uses_catchup "$recovery_policy"; then
|
|
812
831
|
if try_request_catchup; then
|
|
813
832
|
status="HEALED"
|
|
814
833
|
details="${details}Self-healed: requested catchup for crashed in-flight run. "
|
|
@@ -836,10 +855,31 @@ for monitor in "${MONITORS[@]}"; do
|
|
|
836
855
|
status="FAIL"
|
|
837
856
|
details="${details}Last run exited ${last_exit}. "
|
|
838
857
|
[ -n "$last_error" ] && details="${details}Error: ${last_error}. "
|
|
858
|
+
if recovery_uses_catchup "$recovery_policy"; then
|
|
859
|
+
if try_request_catchup; then
|
|
860
|
+
status="HEALED"
|
|
861
|
+
recovered_failed_run=true
|
|
862
|
+
details="${details}Self-healed: requested catchup after failed run. "
|
|
863
|
+
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
864
|
+
else
|
|
865
|
+
details="${details}Catchup request after failed run failed. "
|
|
866
|
+
fi
|
|
867
|
+
else
|
|
868
|
+
if try_reexecute_missed_cron "$plist_id"; then
|
|
869
|
+
status="HEALED"
|
|
870
|
+
recovered_failed_run=true
|
|
871
|
+
details="${details}Self-healed: re-executed failed run immediately. "
|
|
872
|
+
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
873
|
+
else
|
|
874
|
+
details="${details}Immediate re-execute after failed run failed. "
|
|
875
|
+
fi
|
|
876
|
+
fi
|
|
839
877
|
fi
|
|
840
878
|
fi
|
|
841
|
-
if
|
|
842
|
-
|
|
879
|
+
if $recovered_failed_run; then
|
|
880
|
+
:
|
|
881
|
+
elif [ "$age" -gt $(( max_stale * 3 )) ]; then
|
|
882
|
+
if recovery_uses_catchup "$recovery_policy"; then
|
|
843
883
|
if try_request_catchup; then
|
|
844
884
|
status="HEALED"
|
|
845
885
|
details="${details}Self-healed: requested catchup for missed window (last run: $stale_age). "
|
|
@@ -867,7 +907,7 @@ for monitor in "${MONITORS[@]}"; do
|
|
|
867
907
|
fi
|
|
868
908
|
else
|
|
869
909
|
stale_age="no cron_runs entry"
|
|
870
|
-
if
|
|
910
|
+
if recovery_uses_catchup "$recovery_policy"; then
|
|
871
911
|
if try_request_catchup; then
|
|
872
912
|
status="HEALED"
|
|
873
913
|
details="${details}Self-healed: requested catchup for missing cron_runs entry. "
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# nexo: name=runner-health-check
|
|
3
|
-
# nexo: description=Watchdog check:
|
|
3
|
+
# nexo: description=Watchdog check: verify that automation runners produce real work. Alert if they go 48h without runs or useful output.
|
|
4
4
|
# nexo: category=watchdog
|
|
5
5
|
# nexo: runtime=python
|
|
6
6
|
# nexo: timeout=60
|
|
@@ -14,15 +14,15 @@
|
|
|
14
14
|
# nexo: doctor_allow_db=true
|
|
15
15
|
|
|
16
16
|
"""
|
|
17
|
-
Runner Health Check —
|
|
17
|
+
Runner Health Check — verify that NEXO runners produce real work.
|
|
18
18
|
|
|
19
19
|
Checks:
|
|
20
|
-
1. followup-runner:
|
|
21
|
-
2. morning-agent:
|
|
22
|
-
3.
|
|
23
|
-
4. Minimum execution count:
|
|
20
|
+
1. followup-runner: has it run in the last 48h? has any followup changed state?
|
|
21
|
+
2. morning-agent: has it run successfully in the last 48h?
|
|
22
|
+
3. Output evidence: are the logs non-empty and recent?
|
|
23
|
+
4. Minimum execution count: at least N successful runs in the last week?
|
|
24
24
|
|
|
25
|
-
Output: JSON report +
|
|
25
|
+
Output: JSON report + .watchdog-alert entry if there are failures.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
28
|
import json
|
|
@@ -142,6 +142,7 @@ def check_runner(conn: sqlite3.Connection, runner: dict) -> dict:
|
|
|
142
142
|
now = datetime.now(timezone.utc)
|
|
143
143
|
cutoff_48h = (now - timedelta(hours=MAX_HOURS_NO_RUN)).strftime("%Y-%m-%d %H:%M:%S")
|
|
144
144
|
cutoff_7d = (now - timedelta(days=7)).strftime("%Y-%m-%d %H:%M:%S")
|
|
145
|
+
cutoff_7d_ts = (now - timedelta(days=7)).timestamp()
|
|
145
146
|
|
|
146
147
|
result = {
|
|
147
148
|
"cron_id": cron_id,
|
|
@@ -237,18 +238,24 @@ def check_runner(conn: sqlite3.Connection, runner: dict) -> dict:
|
|
|
237
238
|
detail = "; ".join(log_issues[:2]) if log_issues else "no recent log evidence"
|
|
238
239
|
result["issues"].append(f"no recent log evidence ({detail})")
|
|
239
240
|
|
|
240
|
-
# Check 6: For followup-runner specifically —
|
|
241
|
+
# Check 6: For followup-runner specifically — look for actual followup activity
|
|
241
242
|
if cron_id == "followup-runner":
|
|
242
|
-
|
|
243
|
+
transitioned = conn.execute(
|
|
243
244
|
"SELECT COUNT(*) FROM followups WHERE status != 'PENDING' AND updated_at > ?",
|
|
244
|
-
(
|
|
245
|
+
(cutoff_7d_ts,),
|
|
245
246
|
).fetchone()
|
|
246
|
-
# updated_at is epoch float
|
|
247
247
|
recent_updated = conn.execute(
|
|
248
248
|
"SELECT COUNT(*) FROM followups WHERE updated_at > ?",
|
|
249
|
-
(
|
|
249
|
+
(cutoff_7d_ts,),
|
|
250
250
|
).fetchone()
|
|
251
|
-
|
|
251
|
+
transitioned_count = transitioned[0] if transitioned else 0
|
|
252
|
+
updated_count = recent_updated[0] if recent_updated else 0
|
|
253
|
+
result["followups_non_pending_last_7d"] = transitioned_count
|
|
254
|
+
result["followups_updated_last_7d"] = updated_count
|
|
255
|
+
if success_7d > 0 and transitioned_count == 0 and updated_count == 0:
|
|
256
|
+
if result["status"] == "PASS":
|
|
257
|
+
result["status"] = "WARN"
|
|
258
|
+
result["issues"].append("No followup updates or state transitions in last 7d")
|
|
252
259
|
|
|
253
260
|
return result
|
|
254
261
|
|