nexo-brain 7.11.5 → 7.11.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.11.
|
|
3
|
+
"version": "7.11.7",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,11 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.11.
|
|
21
|
+
Version `7.11.7` is the current packaged-runtime line. Patch release — runtime doctor now stops treating resolved protocol drift, disabled evolution, successful zero-usage headless runs, and recent in-flight crons as live runtime failures, while `runner-health-check` treats supervisor `SIGTERM 143` reloads as benign and supports both tuple and row-shaped SQLite reads. Result: the runtime doctor/runtime tier goes back to green on the live install. Validation so far: `104` targeted tests across doctor and runner-health.
|
|
22
|
+
|
|
23
|
+
Previously in `7.11.6`: patch release — Guardian G4 now filters more false-positive slash fragments before they become debt, `strict_protocol_write_without_task` downgrades to `warn` when the session has a fresh heartbeat, and Deep Sleep extraction validates the real prompt contract instead of accepting any syntactically valid JSON. Validation so far: `50` targeted tests across hook guardrails and Deep Sleep extraction.
|
|
24
|
+
|
|
25
|
+
Previously in `7.11.5`: patch release — Desktop-managed installs now block the standalone dashboard at the same product-mode layer as evolution, so `installation_live`, cron sync, and watchdog no longer disagree about whether `com.nexo.dashboard` should exist. Validation: `125` targeted tests across product-mode, cron sync, and doctor, plus a full pre-release wrapper (`2321 passed, 2 skipped, 1 xfailed, 4 xpassed`).
|
|
22
26
|
|
|
23
27
|
Previously in `7.11.4`: patch release — packaged runtimes now receive root JSON contracts such as `local_model_manifest.json`, install/update paths sync core crons from `src/crons/manifest.json` instead of depending on a stale JS list, `runner-health-check` is wired into cron/doctor/dashboard instead of writing an unread file, and the watchdog retries failed crons immediately while treating `run_once_on_wake` as catchup-style recovery. Validation: `117` targeted tests across packaged update, cron sync/recovery, dashboard, local models, and runtime update contracts.
|
|
24
28
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.11.
|
|
3
|
+
"version": "7.11.7",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
|
@@ -57,6 +57,28 @@ PACKAGE_JSON = NEXO_CODE / "package.json"
|
|
|
57
57
|
CHANGELOG_FILE = NEXO_CODE / "CHANGELOG.md"
|
|
58
58
|
|
|
59
59
|
|
|
60
|
+
def _evolution_objective_payload() -> dict:
|
|
61
|
+
candidates = [
|
|
62
|
+
NEXO_HOME / "brain" / "evolution-objective.json",
|
|
63
|
+
NEXO_HOME / "cortex" / "evolution-objective.json",
|
|
64
|
+
]
|
|
65
|
+
for path in candidates:
|
|
66
|
+
if not path.is_file():
|
|
67
|
+
continue
|
|
68
|
+
try:
|
|
69
|
+
payload = json.loads(path.read_text())
|
|
70
|
+
except Exception:
|
|
71
|
+
continue
|
|
72
|
+
if isinstance(payload, dict):
|
|
73
|
+
return payload
|
|
74
|
+
return {}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _is_evolution_disabled() -> bool:
|
|
78
|
+
payload = _evolution_objective_payload()
|
|
79
|
+
return payload.get("evolution_enabled") is False
|
|
80
|
+
|
|
81
|
+
|
|
60
82
|
def _expected_runtime_code_dir() -> Path:
|
|
61
83
|
packaged = NEXO_HOME / "core"
|
|
62
84
|
if packaged.exists() or not (NEXO_HOME / "server.py").is_file():
|
|
@@ -887,6 +909,8 @@ def _cron_expectations() -> dict[str, dict]:
|
|
|
887
909
|
cron_id = cron.get("id")
|
|
888
910
|
if not cron_id or cron.get("keep_alive"):
|
|
889
911
|
continue
|
|
912
|
+
if cron_id == "evolution" and _is_evolution_disabled():
|
|
913
|
+
continue
|
|
890
914
|
if cron.get("run_at_load") and not cron.get("interval_seconds") and not cron.get("schedule"):
|
|
891
915
|
continue
|
|
892
916
|
|
|
@@ -1485,7 +1509,17 @@ def check_cron_freshness() -> DoctorCheck:
|
|
|
1485
1509
|
)
|
|
1486
1510
|
# Latest run per cron
|
|
1487
1511
|
rows = conn.execute(
|
|
1488
|
-
"
|
|
1512
|
+
"""
|
|
1513
|
+
SELECT cr.cron_id, cr.started_at, cr.ended_at, cr.exit_code
|
|
1514
|
+
FROM cron_runs cr
|
|
1515
|
+
INNER JOIN (
|
|
1516
|
+
SELECT cron_id, MAX(started_at) AS last_run
|
|
1517
|
+
FROM cron_runs
|
|
1518
|
+
GROUP BY cron_id
|
|
1519
|
+
) latest
|
|
1520
|
+
ON latest.cron_id = cr.cron_id
|
|
1521
|
+
AND latest.last_run = cr.started_at
|
|
1522
|
+
"""
|
|
1489
1523
|
).fetchall()
|
|
1490
1524
|
finally:
|
|
1491
1525
|
conn.close()
|
|
@@ -1510,8 +1544,16 @@ def check_cron_freshness() -> DoctorCheck:
|
|
|
1510
1544
|
|
|
1511
1545
|
age = now - parsed.timestamp()
|
|
1512
1546
|
expected = expectations.get(cron_id, {"threshold": DEFAULT_CRON_THRESHOLD, "label": "runtime default"})
|
|
1547
|
+
in_flight = row[2] is None and row[3] is None
|
|
1548
|
+
if in_flight and age <= max(expected["threshold"] * 4, 3600):
|
|
1549
|
+
continue
|
|
1513
1550
|
if age > expected["threshold"]:
|
|
1514
|
-
|
|
1551
|
+
if in_flight:
|
|
1552
|
+
stale.append(
|
|
1553
|
+
f"{cron_id}: in-flight for {int(age / 60)}m (expected {expected['label']})"
|
|
1554
|
+
)
|
|
1555
|
+
else:
|
|
1556
|
+
stale.append(f"{cron_id}: {int(age / 3600)}h ago (expected {expected['label']})")
|
|
1515
1557
|
|
|
1516
1558
|
if stale:
|
|
1517
1559
|
return DoctorCheck(
|
|
@@ -2318,6 +2360,11 @@ def check_codex_conditioned_file_discipline() -> DoctorCheck:
|
|
|
2318
2360
|
repair_plan.append("Keep using managed Codex bootstrap so conditioned-file discipline remains visible in transcripts")
|
|
2319
2361
|
|
|
2320
2362
|
no_open_conditioned_debt = debt_summary["available"] and debt_summary["open_total"] == 0
|
|
2363
|
+
historical_no_open_debt_drift = (
|
|
2364
|
+
no_open_conditioned_debt
|
|
2365
|
+
and audit.get("latest_violation_age_seconds") is not None
|
|
2366
|
+
and float(audit["latest_violation_age_seconds"]) >= LIVE_PROTOCOL_SESSION_FRESHNESS
|
|
2367
|
+
)
|
|
2321
2368
|
historical_read_only = (
|
|
2322
2369
|
no_open_conditioned_debt
|
|
2323
2370
|
and audit["read_without_protocol"] > 0
|
|
@@ -2334,13 +2381,16 @@ def check_codex_conditioned_file_discipline() -> DoctorCheck:
|
|
|
2334
2381
|
)
|
|
2335
2382
|
|
|
2336
2383
|
if audit["write_without_protocol"] or audit["write_without_guard_ack"]:
|
|
2337
|
-
if
|
|
2384
|
+
if historical_no_open_debt_drift:
|
|
2385
|
+
status = "healthy"
|
|
2386
|
+
severity = "info"
|
|
2387
|
+
elif tracked_mutation_without_open_debt:
|
|
2338
2388
|
status = "healthy"
|
|
2339
2389
|
severity = "info"
|
|
2340
2390
|
else:
|
|
2341
2391
|
status = "critical"
|
|
2342
2392
|
severity = "error"
|
|
2343
|
-
elif historical_read_only:
|
|
2393
|
+
elif historical_no_open_debt_drift or historical_read_only:
|
|
2344
2394
|
status = "healthy"
|
|
2345
2395
|
severity = "info"
|
|
2346
2396
|
elif audit["read_without_protocol"]:
|
|
@@ -2357,7 +2407,7 @@ def check_codex_conditioned_file_discipline() -> DoctorCheck:
|
|
|
2357
2407
|
severity=severity,
|
|
2358
2408
|
summary=(
|
|
2359
2409
|
"Historical Codex conditioned-file drift has no open protocol debt"
|
|
2360
|
-
if historical_read_only
|
|
2410
|
+
if historical_no_open_debt_drift or historical_read_only
|
|
2361
2411
|
else "Tracked Codex conditioned-file mutation drift has no open protocol debt"
|
|
2362
2412
|
if tracked_mutation_without_open_debt
|
|
2363
2413
|
else "Recent Codex sessions respect conditioned-file discipline"
|
|
@@ -2685,7 +2735,10 @@ def check_protocol_compliance() -> DoctorCheck:
|
|
|
2685
2735
|
closed_tasks = [row for row in tasks if row["status"] != "open"]
|
|
2686
2736
|
verify_required = [row for row in closed_tasks if row["must_verify"] and row["status"] == "done"]
|
|
2687
2737
|
verify_ok = [row for row in verify_required if (row["close_evidence"] or "").strip()]
|
|
2688
|
-
change_required = [
|
|
2738
|
+
change_required = [
|
|
2739
|
+
row for row in closed_tasks
|
|
2740
|
+
if row["must_change_log"] and row["status"] in {"done", "partial", "failed"}
|
|
2741
|
+
]
|
|
2689
2742
|
change_ok = [row for row in change_required if row["change_log_id"]]
|
|
2690
2743
|
learning_required = [row for row in closed_tasks if row["correction_happened"]]
|
|
2691
2744
|
learning_ok = [row for row in learning_required if row["learning_id"]]
|
|
@@ -3216,16 +3269,23 @@ def check_automation_telemetry(days: int = 7) -> DoctorCheck:
|
|
|
3216
3269
|
interactive_expr = "0"
|
|
3217
3270
|
if "session_type" in columns:
|
|
3218
3271
|
interactive_expr = "COALESCE(session_type, '') LIKE 'interactive%'"
|
|
3272
|
+
headless_unmetered_expr = (
|
|
3273
|
+
f"status = 'ok' AND NOT ({interactive_expr}) "
|
|
3274
|
+
"AND (input_tokens + cached_input_tokens + output_tokens) = 0 "
|
|
3275
|
+
"AND COALESCE(total_cost_usd, 0) <= 0 "
|
|
3276
|
+
"AND COALESCE(cost_source, '') IN ('', 'backend', 'missing')"
|
|
3277
|
+
)
|
|
3219
3278
|
row = conn.execute(
|
|
3220
3279
|
f"""
|
|
3221
3280
|
SELECT
|
|
3222
3281
|
COUNT(*) AS runs,
|
|
3223
3282
|
SUM(CASE WHEN status = 'ok' THEN 1 ELSE 0 END) AS successful_runs,
|
|
3224
3283
|
SUM(CASE WHEN status != 'ok' THEN 1 ELSE 0 END) AS failed_runs,
|
|
3225
|
-
SUM(CASE WHEN status = 'ok' AND NOT ({interactive_expr}) THEN 1 ELSE 0 END) AS scored_successful_runs,
|
|
3284
|
+
SUM(CASE WHEN status = 'ok' AND NOT ({interactive_expr}) AND NOT ({headless_unmetered_expr}) THEN 1 ELSE 0 END) AS scored_successful_runs,
|
|
3226
3285
|
SUM(CASE WHEN status = 'ok' AND NOT ({interactive_expr}) AND (input_tokens + cached_input_tokens + output_tokens) > 0 THEN 1 ELSE 0 END) AS usage_runs,
|
|
3227
3286
|
SUM(CASE WHEN status = 'ok' AND NOT ({interactive_expr}) AND total_cost_usd IS NOT NULL THEN 1 ELSE 0 END) AS cost_runs,
|
|
3228
3287
|
SUM(CASE WHEN status = 'ok' AND NOT ({interactive_expr}) AND cost_source = 'pricing_unavailable' THEN 1 ELSE 0 END) AS pricing_gaps,
|
|
3288
|
+
SUM(CASE WHEN {headless_unmetered_expr} THEN 1 ELSE 0 END) AS headless_unmetered_runs,
|
|
3229
3289
|
SUM(CASE WHEN status = 'ok' AND ({interactive_expr}) AND ((input_tokens + cached_input_tokens + output_tokens) = 0 OR total_cost_usd IS NULL) THEN 1 ELSE 0 END) AS interactive_unmetered_runs,
|
|
3230
3290
|
GROUP_CONCAT(DISTINCT backend) AS backends
|
|
3231
3291
|
FROM automation_runs
|
|
@@ -3284,9 +3344,10 @@ def check_automation_telemetry(days: int = 7) -> DoctorCheck:
|
|
|
3284
3344
|
usage_runs = int((row["usage_runs"] if row else 0) or 0)
|
|
3285
3345
|
cost_runs = int((row["cost_runs"] if row else 0) or 0)
|
|
3286
3346
|
pricing_gaps = int((row["pricing_gaps"] if row else 0) or 0)
|
|
3347
|
+
headless_unmetered_runs = int((row["headless_unmetered_runs"] if row and "headless_unmetered_runs" in row.keys() else 0) or 0)
|
|
3287
3348
|
interactive_unmetered_runs = int((row["interactive_unmetered_runs"] if row and "interactive_unmetered_runs" in row.keys() else 0) or 0)
|
|
3288
|
-
usage_denominator = scored_successful_runs or (successful_runs if not interactive_unmetered_runs else 0)
|
|
3289
|
-
cost_denominator = scored_successful_runs or (successful_runs if not interactive_unmetered_runs else 0)
|
|
3349
|
+
usage_denominator = scored_successful_runs or (successful_runs if not interactive_unmetered_runs and not headless_unmetered_runs else 0)
|
|
3350
|
+
cost_denominator = scored_successful_runs or (successful_runs if not interactive_unmetered_runs and not headless_unmetered_runs else 0)
|
|
3290
3351
|
missing_usage_runs = max(0, usage_denominator - usage_runs) if usage_denominator else 0
|
|
3291
3352
|
usage_coverage = round((usage_runs / usage_denominator) * 100, 1) if usage_denominator else 100.0
|
|
3292
3353
|
cost_coverage = round((cost_runs / cost_denominator) * 100, 1) if cost_denominator else 100.0
|
|
@@ -3302,6 +3363,8 @@ def check_automation_telemetry(days: int = 7) -> DoctorCheck:
|
|
|
3302
3363
|
]
|
|
3303
3364
|
if missing_usage_runs:
|
|
3304
3365
|
evidence.append(f"missing_usage_runs={missing_usage_runs}")
|
|
3366
|
+
if headless_unmetered_runs:
|
|
3367
|
+
evidence.append(f"headless_unmetered_runs_excluded={headless_unmetered_runs}")
|
|
3305
3368
|
if interactive_unmetered_runs:
|
|
3306
3369
|
evidence.append(f"interactive_unmetered_runs_excluded={interactive_unmetered_runs}")
|
|
3307
3370
|
backends = str((row["backends"] if row else "") or "").strip()
|
package/src/hook_guardrails.py
CHANGED
|
@@ -8,11 +8,12 @@ import os
|
|
|
8
8
|
import re
|
|
9
9
|
import shlex
|
|
10
10
|
import sys
|
|
11
|
+
import time
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
import paths
|
|
13
14
|
|
|
14
15
|
from core_prompts import render_core_prompt
|
|
15
|
-
from db import create_protocol_debt, get_db
|
|
16
|
+
from db import create_protocol_debt, get_db, get_last_heartbeat_ts
|
|
16
17
|
from operator_language import append_operator_language_contract
|
|
17
18
|
from plugins.guard import _load_conditioned_learnings, _normalize_path_token
|
|
18
19
|
from protocol_settings import get_protocol_strictness
|
|
@@ -262,10 +263,13 @@ _PATH_ARTIFACT_RE = re.compile(
|
|
|
262
263
|
[\$\`] # unresolved shell substitution / backtick boundary
|
|
263
264
|
| [\*\?] # glob metacharacter
|
|
264
265
|
| [\[\]\{\}] # bracket/range/heredoc markers
|
|
266
|
+
| [\|\=\;] # regex fragments / shell assignment / command separators
|
|
265
267
|
| \s # embedded whitespace (most likely truncation)
|
|
266
268
|
""",
|
|
267
269
|
re.VERBOSE,
|
|
268
270
|
)
|
|
271
|
+
_DATE_LIKE_PATH_RE = re.compile(r"^/\d{1,4}/\d{1,4}(?:/\d{1,4})?$")
|
|
272
|
+
_STRICT_WRITE_HEARTBEAT_WINDOW_SECONDS = 300
|
|
269
273
|
|
|
270
274
|
# Single-segment ``/word`` candidates that match a small dictionary block-list
|
|
271
275
|
# of confirmed false positives observed in the live debt log.
|
|
@@ -303,6 +307,8 @@ def _looks_like_real_path(path: str) -> bool:
|
|
|
303
307
|
return False
|
|
304
308
|
if _PATH_ARTIFACT_RE.search(raw):
|
|
305
309
|
return False
|
|
310
|
+
if _DATE_LIKE_PATH_RE.fullmatch(raw):
|
|
311
|
+
return False
|
|
306
312
|
# Pure numeric segments (``/166``, ``/487``, ``/1000``) are almost
|
|
307
313
|
# always status codes or counters lifted out of a log line.
|
|
308
314
|
stripped = raw.lstrip("/")
|
|
@@ -321,9 +327,38 @@ def _looks_like_real_path(path: str) -> bool:
|
|
|
321
327
|
return False
|
|
322
328
|
except OSError:
|
|
323
329
|
return False
|
|
330
|
+
parts = [segment for segment in stripped.split("/") if segment]
|
|
331
|
+
if len(parts) > 1 and "." not in parts[-1]:
|
|
332
|
+
try:
|
|
333
|
+
if not Path(raw).exists():
|
|
334
|
+
return False
|
|
335
|
+
except OSError:
|
|
336
|
+
return False
|
|
324
337
|
return True
|
|
325
338
|
|
|
326
339
|
|
|
340
|
+
def _strict_write_without_task_severity(session_id: str) -> str:
|
|
341
|
+
"""Downgrade missing-task debt when the session is clearly alive.
|
|
342
|
+
|
|
343
|
+
A recent heartbeat shows the session is connected to a real ongoing
|
|
344
|
+
conversation even if the operator skipped `nexo_task_open`. We still
|
|
345
|
+
block strict writes, but store the debt as warn so dashboards separate
|
|
346
|
+
protocol drift from completely untracked edits.
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
if not session_id:
|
|
350
|
+
return "error"
|
|
351
|
+
try:
|
|
352
|
+
last_hb = get_last_heartbeat_ts(session_id)
|
|
353
|
+
except Exception:
|
|
354
|
+
return "error"
|
|
355
|
+
if last_hb is None:
|
|
356
|
+
return "error"
|
|
357
|
+
if time.time() - float(last_hb) <= _STRICT_WRITE_HEARTBEAT_WINDOW_SECONDS:
|
|
358
|
+
return "warn"
|
|
359
|
+
return "error"
|
|
360
|
+
|
|
361
|
+
|
|
327
362
|
def _resolve_runtime_path(path: str) -> Path:
|
|
328
363
|
candidate = Path(str(path or "")).expanduser()
|
|
329
364
|
if not candidate.is_absolute():
|
|
@@ -1394,12 +1429,13 @@ def process_pre_tool_event(payload: dict) -> dict:
|
|
|
1394
1429
|
if not files:
|
|
1395
1430
|
task = _find_any_open_task(conn, sid)
|
|
1396
1431
|
if not task:
|
|
1432
|
+
severity = _strict_write_without_task_severity(sid)
|
|
1397
1433
|
debt = _ensure_protocol_debt(
|
|
1398
1434
|
conn,
|
|
1399
1435
|
session_id=sid,
|
|
1400
1436
|
task_id="",
|
|
1401
1437
|
debt_type="strict_protocol_write_without_task",
|
|
1402
|
-
severity=
|
|
1438
|
+
severity=severity,
|
|
1403
1439
|
evidence=f"{tool_name} attempted without a detectable file path and without an open protocol task.",
|
|
1404
1440
|
file_token="unknown-target",
|
|
1405
1441
|
)
|
|
@@ -1425,12 +1461,13 @@ def process_pre_tool_event(payload: dict) -> dict:
|
|
|
1425
1461
|
for filepath in files:
|
|
1426
1462
|
task = _find_open_task_for_file(conn, sid, filepath)
|
|
1427
1463
|
if not task:
|
|
1464
|
+
severity = _strict_write_without_task_severity(sid)
|
|
1428
1465
|
debt = _ensure_protocol_debt(
|
|
1429
1466
|
conn,
|
|
1430
1467
|
session_id=sid,
|
|
1431
1468
|
task_id="",
|
|
1432
1469
|
debt_type="strict_protocol_write_without_task",
|
|
1433
|
-
severity=
|
|
1470
|
+
severity=severity,
|
|
1434
1471
|
evidence=f"{tool_name} attempted on {filepath} without an open protocol task for that file.",
|
|
1435
1472
|
file_token=filepath,
|
|
1436
1473
|
)
|
|
@@ -68,6 +68,7 @@ TRANSIENT_ERROR_KINDS = {
|
|
|
68
68
|
"timeout",
|
|
69
69
|
"signal",
|
|
70
70
|
}
|
|
71
|
+
REQUIRED_PROTOCOL_SUMMARY_KEYS = ("guard_check", "heartbeat", "change_log")
|
|
71
72
|
|
|
72
73
|
|
|
73
74
|
def _classify_cli_result(result) -> tuple[str, str]:
|
|
@@ -133,6 +134,53 @@ def extract_json_from_response(text: str) -> dict | None:
|
|
|
133
134
|
return None
|
|
134
135
|
|
|
135
136
|
|
|
137
|
+
def _is_valid_extraction(
|
|
138
|
+
parsed: dict,
|
|
139
|
+
*,
|
|
140
|
+
expected_session_id: str | None = None,
|
|
141
|
+
) -> bool:
|
|
142
|
+
"""Validate the minimum Deep Sleep extraction contract.
|
|
143
|
+
|
|
144
|
+
The extractor prompt's real top-level shape is
|
|
145
|
+
``session_id/findings/protocol_summary`` plus optional richer sections.
|
|
146
|
+
We intentionally validate the live prompt contract rather than an older
|
|
147
|
+
proposal so a syntactically valid but structurally degraded JSON payload
|
|
148
|
+
does not silently count as success.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
if not isinstance(parsed, dict):
|
|
152
|
+
return False
|
|
153
|
+
session_id = parsed.get("session_id")
|
|
154
|
+
if not isinstance(session_id, str) or not session_id.strip():
|
|
155
|
+
return False
|
|
156
|
+
if expected_session_id and session_id != expected_session_id:
|
|
157
|
+
return False
|
|
158
|
+
findings = parsed.get("findings")
|
|
159
|
+
if not isinstance(findings, list):
|
|
160
|
+
return False
|
|
161
|
+
if any(not isinstance(item, dict) for item in findings):
|
|
162
|
+
return False
|
|
163
|
+
protocol_summary = parsed.get("protocol_summary")
|
|
164
|
+
if not isinstance(protocol_summary, dict):
|
|
165
|
+
return False
|
|
166
|
+
for key in REQUIRED_PROTOCOL_SUMMARY_KEYS:
|
|
167
|
+
if not isinstance(protocol_summary.get(key), dict):
|
|
168
|
+
return False
|
|
169
|
+
for key in ("emotional_timeline", "abandoned_projects", "skill_candidates"):
|
|
170
|
+
if key in parsed and not isinstance(parsed.get(key), list):
|
|
171
|
+
return False
|
|
172
|
+
if "productivity_score" in parsed and not isinstance(parsed.get("productivity_score"), dict):
|
|
173
|
+
return False
|
|
174
|
+
return True
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _write_debug_extract(session_id: str, kind: str, raw_output: str) -> Path:
|
|
178
|
+
debug_file = _deep_sleep_dir() / f"debug-extract-{session_id[:20]}-{kind}.txt"
|
|
179
|
+
debug_file.parent.mkdir(parents=True, exist_ok=True)
|
|
180
|
+
debug_file.write_text((raw_output or "")[:5000])
|
|
181
|
+
return debug_file
|
|
182
|
+
|
|
183
|
+
|
|
136
184
|
def _safe_session_slug(session_id: str) -> str:
|
|
137
185
|
return (
|
|
138
186
|
session_id
|
|
@@ -215,6 +263,8 @@ def analyze_session(
|
|
|
215
263
|
if not line.strip().startswith("Post-mortem") and line.strip()
|
|
216
264
|
)
|
|
217
265
|
parsed = extract_json_from_response(output)
|
|
266
|
+
debug_output = output
|
|
267
|
+
parse_failure_kind = "json_parse"
|
|
218
268
|
|
|
219
269
|
# Fallback: if Claude returned text instead of JSON, ask a short conversion call
|
|
220
270
|
if not parsed and len(output.strip()) > 50:
|
|
@@ -231,17 +281,23 @@ def analyze_session(
|
|
|
231
281
|
append_system_prompt=json_system_prompt,
|
|
232
282
|
)
|
|
233
283
|
if convert_result.returncode == 0:
|
|
284
|
+
debug_output = convert_result.stdout
|
|
234
285
|
parsed = extract_json_from_response(convert_result.stdout)
|
|
235
286
|
if parsed:
|
|
236
287
|
print(f" Conversion succeeded")
|
|
237
288
|
|
|
289
|
+
if parsed and not _is_valid_extraction(parsed, expected_session_id=session_id):
|
|
290
|
+
parse_failure_kind = "json_schema"
|
|
291
|
+
debug_output = json.dumps(parsed, indent=2, ensure_ascii=False)
|
|
292
|
+
parsed = None
|
|
293
|
+
|
|
238
294
|
if not parsed:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
return None,
|
|
295
|
+
debug_file = _write_debug_extract(session_id, parse_failure_kind, debug_output)
|
|
296
|
+
print(
|
|
297
|
+
f" Failed to validate extraction ({parse_failure_kind}). Raw output saved to {debug_file}",
|
|
298
|
+
file=sys.stderr,
|
|
299
|
+
)
|
|
300
|
+
return None, parse_failure_kind
|
|
245
301
|
|
|
246
302
|
return parsed, None
|
|
247
303
|
|
|
@@ -71,6 +71,25 @@ RUNNERS = [
|
|
|
71
71
|
]
|
|
72
72
|
|
|
73
73
|
|
|
74
|
+
def _row_value(row: sqlite3.Row | tuple, key: str):
|
|
75
|
+
if isinstance(row, sqlite3.Row):
|
|
76
|
+
return row[key]
|
|
77
|
+
column_index = {
|
|
78
|
+
"exit_code": 0,
|
|
79
|
+
"error": 1,
|
|
80
|
+
"started_at": 2,
|
|
81
|
+
}
|
|
82
|
+
return row[column_index[key]]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _is_benign_supervisor_interrupt(row: sqlite3.Row | tuple) -> bool:
|
|
86
|
+
exit_code = _row_value(row, "exit_code")
|
|
87
|
+
error = _row_value(row, "error")
|
|
88
|
+
if int(exit_code or 0) != 143:
|
|
89
|
+
return False
|
|
90
|
+
return "Killed by SIGTERM" in str(error or "")
|
|
91
|
+
|
|
92
|
+
|
|
74
93
|
def _recent_summary_evidence(conn: sqlite3.Connection, cron_id: str, cutoff: str) -> Optional[dict]:
|
|
75
94
|
row = conn.execute(
|
|
76
95
|
"SELECT summary, started_at FROM cron_runs WHERE cron_id=? AND started_at > ? AND summary != '' ORDER BY started_at DESC LIMIT 1",
|
|
@@ -113,27 +132,33 @@ def _recent_log_evidence(now: datetime, max_age_hours: int, *sources: tuple[str,
|
|
|
113
132
|
|
|
114
133
|
|
|
115
134
|
def _last_error_state(conn: sqlite3.Connection, cron_id: str) -> Optional[dict]:
|
|
116
|
-
|
|
117
|
-
"SELECT error, started_at FROM cron_runs WHERE cron_id=? AND error != '' AND error IS NOT NULL ORDER BY started_at DESC
|
|
135
|
+
rows = conn.execute(
|
|
136
|
+
"SELECT exit_code, error, started_at FROM cron_runs WHERE cron_id=? AND error != '' AND error IS NOT NULL ORDER BY started_at DESC",
|
|
118
137
|
(cron_id,),
|
|
119
|
-
).
|
|
120
|
-
if not
|
|
138
|
+
).fetchall()
|
|
139
|
+
row = next((candidate for candidate in rows if not _is_benign_supervisor_interrupt(candidate)), None)
|
|
140
|
+
if row is None:
|
|
121
141
|
return None
|
|
122
142
|
|
|
123
143
|
successful_since = conn.execute(
|
|
124
|
-
"SELECT
|
|
125
|
-
(cron_id, row
|
|
126
|
-
).
|
|
144
|
+
"SELECT exit_code, error, started_at FROM cron_runs WHERE cron_id=? AND started_at > ?",
|
|
145
|
+
(cron_id, _row_value(row, "started_at")),
|
|
146
|
+
).fetchall()
|
|
147
|
+
successful_count = sum(
|
|
148
|
+
1
|
|
149
|
+
for candidate in successful_since
|
|
150
|
+
if int(_row_value(candidate, "exit_code") or 0) == 0 or _is_benign_supervisor_interrupt(candidate)
|
|
151
|
+
)
|
|
127
152
|
age_row = conn.execute(
|
|
128
153
|
"SELECT ROUND((julianday('now') - julianday(?)) * 24, 1)",
|
|
129
|
-
(row
|
|
154
|
+
(_row_value(row, "started_at"),),
|
|
130
155
|
).fetchone()
|
|
131
156
|
|
|
132
157
|
return {
|
|
133
|
-
"last_error": row[
|
|
134
|
-
"last_error_at": row
|
|
158
|
+
"last_error": str(_row_value(row, "error") or "")[:200],
|
|
159
|
+
"last_error_at": _row_value(row, "started_at"),
|
|
135
160
|
"last_error_age_hours": age_row[0] if age_row else None,
|
|
136
|
-
"successful_runs_since_last_error":
|
|
161
|
+
"successful_runs_since_last_error": successful_count,
|
|
137
162
|
}
|
|
138
163
|
|
|
139
164
|
|
|
@@ -167,11 +192,15 @@ def check_runner(conn: sqlite3.Connection, runner: dict) -> dict:
|
|
|
167
192
|
result["issues"].append(f"No runs in the last {MAX_HOURS_NO_RUN}h (last: {last_run})")
|
|
168
193
|
|
|
169
194
|
# Check 2: Successful runs in the last week
|
|
170
|
-
|
|
171
|
-
"SELECT
|
|
195
|
+
run_rows_7d = conn.execute(
|
|
196
|
+
"SELECT exit_code, error, started_at FROM cron_runs WHERE cron_id=? AND started_at > ?",
|
|
172
197
|
(cron_id, cutoff_7d),
|
|
173
|
-
).
|
|
174
|
-
success_7d =
|
|
198
|
+
).fetchall()
|
|
199
|
+
success_7d = sum(
|
|
200
|
+
1
|
|
201
|
+
for row in run_rows_7d
|
|
202
|
+
if int(_row_value(row, "exit_code") or 0) == 0 or _is_benign_supervisor_interrupt(row)
|
|
203
|
+
)
|
|
175
204
|
result["successful_runs_last_7d"] = success_7d
|
|
176
205
|
|
|
177
206
|
if success_7d < runner["min_weekly"]:
|
|
@@ -183,11 +212,11 @@ def check_runner(conn: sqlite3.Connection, runner: dict) -> dict:
|
|
|
183
212
|
)
|
|
184
213
|
|
|
185
214
|
# Check 3: Error rate in last week
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
215
|
+
errors_7d = sum(
|
|
216
|
+
1
|
|
217
|
+
for row in run_rows_7d
|
|
218
|
+
if int(_row_value(row, "exit_code") or 0) != 0 and not _is_benign_supervisor_interrupt(row)
|
|
219
|
+
)
|
|
191
220
|
total_7d = success_7d + errors_7d
|
|
192
221
|
result["errors_last_7d"] = errors_7d
|
|
193
222
|
result["total_runs_last_7d"] = total_7d
|
|
@@ -266,6 +295,7 @@ def main() -> int:
|
|
|
266
295
|
return 1
|
|
267
296
|
|
|
268
297
|
conn = sqlite3.connect(str(DB_PATH), timeout=10)
|
|
298
|
+
conn.row_factory = sqlite3.Row
|
|
269
299
|
now = datetime.now(timezone.utc)
|
|
270
300
|
|
|
271
301
|
report = {
|