nexo-brain 2.4.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -4
- package/bin/nexo-brain.js +238 -12
- package/bin/nexo.js +55 -0
- package/community/skills/.gitkeep +1 -0
- package/package.json +11 -3
- package/src/auto_update.py +193 -9
- package/src/cli.py +719 -0
- package/src/cognitive/_ingest.py +1 -1
- package/src/cognitive/_memory.py +4 -4
- package/src/crons/manifest.json +8 -0
- package/src/dashboard/app.py +700 -35
- package/src/dashboard/templates/adaptive.html +112 -218
- package/src/dashboard/templates/artifacts.html +133 -0
- package/src/dashboard/templates/backups.html +136 -0
- package/src/dashboard/templates/base.html +413 -0
- package/src/dashboard/templates/calendar.html +523 -654
- package/src/dashboard/templates/chat.html +356 -0
- package/src/dashboard/templates/claims.html +259 -0
- package/src/dashboard/templates/cortex.html +262 -0
- package/src/dashboard/templates/credentials.html +128 -0
- package/src/dashboard/templates/crons.html +370 -0
- package/src/dashboard/templates/dashboard.html +383 -578
- package/src/dashboard/templates/dreams.html +252 -0
- package/src/dashboard/templates/email.html +160 -0
- package/src/dashboard/templates/evolution.html +189 -0
- package/src/dashboard/templates/feed.html +249 -0
- package/src/dashboard/templates/followup_health.html +170 -0
- package/src/dashboard/templates/graph.html +191 -269
- package/src/dashboard/templates/guard.html +259 -0
- package/src/dashboard/templates/inbox.html +220 -346
- package/src/dashboard/templates/memory.html +317 -197
- package/src/dashboard/templates/operations.html +521 -698
- package/src/dashboard/templates/plugins.html +185 -0
- package/src/dashboard/templates/rules.html +246 -0
- package/src/dashboard/templates/sentiment.html +247 -0
- package/src/dashboard/templates/sessions.html +215 -182
- package/src/dashboard/templates/skills.html +329 -0
- package/src/dashboard/templates/somatic.html +68 -172
- package/src/dashboard/templates/triggers.html +133 -0
- package/src/dashboard/templates/trust.html +360 -0
- package/src/db/__init__.py +5 -0
- package/src/db/_schema.py +16 -1
- package/src/db/_sessions.py +22 -0
- package/src/db/_skills.py +980 -274
- package/src/doctor/__init__.py +1 -0
- package/src/doctor/formatters.py +52 -0
- package/src/doctor/models.py +44 -0
- package/src/doctor/orchestrator.py +42 -0
- package/src/doctor/providers/__init__.py +1 -0
- package/src/doctor/providers/boot.py +206 -0
- package/src/doctor/providers/deep.py +292 -0
- package/src/doctor/providers/runtime.py +686 -0
- package/src/evolution_cycle.py +86 -6
- package/src/hooks/post-compact.sh +5 -1
- package/src/hooks/pre-compact.sh +1 -1
- package/src/plugins/doctor.py +36 -0
- package/src/plugins/evolution.py +11 -3
- package/src/plugins/skills.py +135 -175
- package/src/requirements.txt +1 -0
- package/src/script_registry.py +322 -0
- package/src/scripts/deep-sleep/apply_findings.py +63 -48
- package/src/scripts/deep-sleep/extract-prompt.md +14 -0
- package/src/scripts/deep-sleep/synthesize-prompt.md +36 -0
- package/src/scripts/deep-sleep/synthesize.py +37 -1
- package/src/scripts/nexo-dashboard.sh +29 -0
- package/src/scripts/nexo-day-orchestrator.sh +139 -0
- package/src/scripts/nexo-evolution-run.py +141 -54
- package/src/scripts/nexo-learning-housekeep.py +1 -1
- package/src/scripts/nexo-watchdog.sh +1 -1
- package/src/server.py +9 -5
- package/src/skills/run-runtime-doctor/guide.md +12 -0
- package/src/skills/run-runtime-doctor/script.py +21 -0
- package/src/skills/run-runtime-doctor/skill.json +25 -0
- package/src/skills_runtime.py +347 -0
- package/src/tools_menu.py +3 -2
- package/src/tools_sessions.py +126 -0
- package/src/user_context.py +46 -0
- package/templates/nexo_helper.py +45 -0
- package/templates/script-template.py +44 -0
- package/templates/skill-script-template.py +39 -0
- package/templates/skill-template.md +33 -0
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
"""Runtime tier checks — read-only health checks from existing artifacts. Target <5s."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import datetime as dt
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import platform
|
|
8
|
+
import plistlib
|
|
9
|
+
import subprocess
|
|
10
|
+
import sys
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from doctor.models import DoctorCheck
|
|
15
|
+
|
|
16
|
+
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
17
|
+
NEXO_CODE = Path(os.environ.get("NEXO_CODE", str(Path(__file__).resolve().parents[2])))
|
|
18
|
+
LAUNCH_AGENTS_DIR = Path.home() / "Library" / "LaunchAgents"
|
|
19
|
+
|
|
20
|
+
# Freshness thresholds in seconds
|
|
21
|
+
IMMUNE_FRESHNESS = 3600 # 1 hour (runs every 30 min)
|
|
22
|
+
WATCHDOG_FRESHNESS = 3600 # 1 hour (runs every 30 min)
|
|
23
|
+
DEFAULT_CRON_THRESHOLD = 7200 # Fallback when manifest data is unavailable
|
|
24
|
+
SPECIAL_LAUNCHAGENT_IDS = {"prevent-sleep", "tcc-approve"}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _file_age_seconds(path: Path) -> float | None:
|
|
28
|
+
"""Return file age in seconds, or None if not found."""
|
|
29
|
+
try:
|
|
30
|
+
if path.is_file():
|
|
31
|
+
return time.time() - path.stat().st_mtime
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _load_json(path: Path) -> dict:
|
|
38
|
+
return json.loads(path.read_text())
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _count_checks(checks) -> int:
|
|
42
|
+
if isinstance(checks, list):
|
|
43
|
+
return len(checks)
|
|
44
|
+
if isinstance(checks, dict):
|
|
45
|
+
total = 0
|
|
46
|
+
for value in checks.values():
|
|
47
|
+
if isinstance(value, list):
|
|
48
|
+
total += len(value)
|
|
49
|
+
elif value:
|
|
50
|
+
total += 1
|
|
51
|
+
return total
|
|
52
|
+
return 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _parse_timestamp(value: str) -> dt.datetime | None:
|
|
56
|
+
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M"):
|
|
57
|
+
try:
|
|
58
|
+
return dt.datetime.strptime(value, fmt)
|
|
59
|
+
except ValueError:
|
|
60
|
+
continue
|
|
61
|
+
try:
|
|
62
|
+
return dt.datetime.fromisoformat(value)
|
|
63
|
+
except ValueError:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _cron_expectations() -> dict[str, dict]:
|
|
68
|
+
manifest_candidates = [
|
|
69
|
+
NEXO_HOME / "crons" / "manifest.json",
|
|
70
|
+
NEXO_CODE / "crons" / "manifest.json",
|
|
71
|
+
]
|
|
72
|
+
for manifest_path in manifest_candidates:
|
|
73
|
+
if not manifest_path.is_file():
|
|
74
|
+
continue
|
|
75
|
+
try:
|
|
76
|
+
data = _load_json(manifest_path)
|
|
77
|
+
except Exception:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
expectations = {}
|
|
81
|
+
for cron in data.get("crons", []):
|
|
82
|
+
cron_id = cron.get("id")
|
|
83
|
+
if not cron_id or cron.get("run_at_load"):
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
interval_seconds = cron.get("interval_seconds")
|
|
87
|
+
schedule = cron.get("schedule") or {}
|
|
88
|
+
if interval_seconds:
|
|
89
|
+
threshold = max(int(interval_seconds) * 3, int(interval_seconds) + 600)
|
|
90
|
+
label = f"every {int(interval_seconds) // 60}m"
|
|
91
|
+
elif "weekday" in schedule:
|
|
92
|
+
threshold = 8 * 86400
|
|
93
|
+
label = "weekly"
|
|
94
|
+
elif "hour" in schedule and "minute" in schedule:
|
|
95
|
+
threshold = 36 * 3600
|
|
96
|
+
label = "daily"
|
|
97
|
+
else:
|
|
98
|
+
threshold = DEFAULT_CRON_THRESHOLD
|
|
99
|
+
label = "custom"
|
|
100
|
+
|
|
101
|
+
expectations[cron_id] = {"threshold": threshold, "label": label}
|
|
102
|
+
return expectations
|
|
103
|
+
return {}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _run_at_load_cron_ids() -> set[str]:
|
|
107
|
+
return {
|
|
108
|
+
cron_id
|
|
109
|
+
for cron_id, expected in _launchagent_schedule_expectations().items()
|
|
110
|
+
if expected.get("RunAtLoad") is True
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _launchagent_schedule_expectations() -> dict[str, dict]:
|
|
115
|
+
manifest_candidates = [
|
|
116
|
+
NEXO_HOME / "crons" / "manifest.json",
|
|
117
|
+
NEXO_CODE / "crons" / "manifest.json",
|
|
118
|
+
]
|
|
119
|
+
for manifest_path in manifest_candidates:
|
|
120
|
+
if not manifest_path.is_file():
|
|
121
|
+
continue
|
|
122
|
+
try:
|
|
123
|
+
data = _load_json(manifest_path)
|
|
124
|
+
except Exception:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
expectations = {}
|
|
128
|
+
for cron in data.get("crons", []):
|
|
129
|
+
cron_id = cron.get("id")
|
|
130
|
+
if not cron_id:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
expected = {
|
|
134
|
+
"StartInterval": None,
|
|
135
|
+
"StartCalendarInterval": None,
|
|
136
|
+
"RunAtLoad": None,
|
|
137
|
+
}
|
|
138
|
+
if cron.get("run_at_load"):
|
|
139
|
+
expected["RunAtLoad"] = True
|
|
140
|
+
elif "interval_seconds" in cron:
|
|
141
|
+
expected["StartInterval"] = int(cron["interval_seconds"])
|
|
142
|
+
elif "schedule" in cron:
|
|
143
|
+
schedule = cron.get("schedule") or {}
|
|
144
|
+
cal = {}
|
|
145
|
+
if "hour" in schedule:
|
|
146
|
+
cal["Hour"] = schedule["hour"]
|
|
147
|
+
if "minute" in schedule:
|
|
148
|
+
cal["Minute"] = schedule["minute"]
|
|
149
|
+
if "weekday" in schedule:
|
|
150
|
+
cal["Weekday"] = schedule["weekday"]
|
|
151
|
+
expected["StartCalendarInterval"] = cal
|
|
152
|
+
expectations[cron_id] = expected
|
|
153
|
+
return expectations
|
|
154
|
+
return {}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _managed_launchagent_plists() -> list[tuple[str, Path]]:
|
|
158
|
+
ids = set(SPECIAL_LAUNCHAGENT_IDS)
|
|
159
|
+
for cron_id in _launchagent_schedule_expectations().keys():
|
|
160
|
+
ids.add(cron_id)
|
|
161
|
+
|
|
162
|
+
plists = []
|
|
163
|
+
for cron_id in sorted(ids):
|
|
164
|
+
plist_path = LAUNCH_AGENTS_DIR / f"com.nexo.{cron_id}.plist"
|
|
165
|
+
if plist_path.is_file():
|
|
166
|
+
plists.append((cron_id, plist_path))
|
|
167
|
+
return plists
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _extract_launchctl_value(output: str, prefix: str) -> str | None:
|
|
171
|
+
for line in output.splitlines():
|
|
172
|
+
stripped = line.strip()
|
|
173
|
+
if stripped.startswith(prefix):
|
|
174
|
+
return stripped[len(prefix):].strip()
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _repair_launchagents(items: list[tuple[str, Path]]) -> tuple[bool, list[str]]:
|
|
179
|
+
evidence = []
|
|
180
|
+
uid = str(os.getuid())
|
|
181
|
+
ok = True
|
|
182
|
+
for cron_id, plist_path in items:
|
|
183
|
+
label = f"com.nexo.{cron_id}"
|
|
184
|
+
subprocess.run(
|
|
185
|
+
["launchctl", "bootout", f"gui/{uid}/{label}"],
|
|
186
|
+
capture_output=True,
|
|
187
|
+
text=True,
|
|
188
|
+
timeout=3,
|
|
189
|
+
)
|
|
190
|
+
result = subprocess.run(
|
|
191
|
+
["launchctl", "bootstrap", f"gui/{uid}", str(plist_path)],
|
|
192
|
+
capture_output=True,
|
|
193
|
+
text=True,
|
|
194
|
+
timeout=5,
|
|
195
|
+
)
|
|
196
|
+
if result.returncode != 0:
|
|
197
|
+
ok = False
|
|
198
|
+
evidence.append(f"{label}: {result.stderr.strip() or result.stdout.strip() or 'bootstrap failed'}")
|
|
199
|
+
return ok, evidence
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _sync_launchagents_from_manifest() -> tuple[bool, list[str]]:
|
|
203
|
+
sync_path = NEXO_CODE / "crons" / "sync.py"
|
|
204
|
+
if not sync_path.is_file():
|
|
205
|
+
return False, [f"cron sync script not found at {sync_path}"]
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
result = subprocess.run(
|
|
209
|
+
[sys.executable, str(sync_path)],
|
|
210
|
+
capture_output=True,
|
|
211
|
+
text=True,
|
|
212
|
+
timeout=30,
|
|
213
|
+
env={**os.environ, "NEXO_HOME": str(NEXO_HOME), "NEXO_CODE": str(NEXO_CODE)},
|
|
214
|
+
)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
return False, [f"cron sync failed: {e}"]
|
|
217
|
+
|
|
218
|
+
if result.returncode != 0:
|
|
219
|
+
detail = result.stderr.strip() or result.stdout.strip() or "cron sync failed"
|
|
220
|
+
return False, [detail]
|
|
221
|
+
return True, []
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def check_immune_status() -> DoctorCheck:
|
|
225
|
+
"""Check immune system status freshness."""
|
|
226
|
+
status_file = NEXO_HOME / "coordination" / "immune-status.json"
|
|
227
|
+
age = _file_age_seconds(status_file)
|
|
228
|
+
|
|
229
|
+
if age is None:
|
|
230
|
+
return DoctorCheck(
|
|
231
|
+
id="runtime.immune_freshness",
|
|
232
|
+
tier="runtime",
|
|
233
|
+
status="degraded",
|
|
234
|
+
severity="warn",
|
|
235
|
+
summary="Immune status file not found",
|
|
236
|
+
evidence=[f"Expected: {status_file}"],
|
|
237
|
+
repair_plan=["Check if immune cron is installed and running"],
|
|
238
|
+
escalation_prompt="Immune system has never run or status file was deleted.",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
age_min = age / 60
|
|
242
|
+
if age > IMMUNE_FRESHNESS:
|
|
243
|
+
return DoctorCheck(
|
|
244
|
+
id="runtime.immune_freshness",
|
|
245
|
+
tier="runtime",
|
|
246
|
+
status="degraded",
|
|
247
|
+
severity="warn",
|
|
248
|
+
summary=f"Immune status stale ({age_min:.0f} min old, threshold {IMMUNE_FRESHNESS // 60} min)",
|
|
249
|
+
evidence=[f"{status_file} last modified {age_min:.0f} minutes ago"],
|
|
250
|
+
repair_plan=[
|
|
251
|
+
"Check LaunchAgent/systemd timer for immune cron",
|
|
252
|
+
"nexo scripts call nexo_schedule_status --input '{}'",
|
|
253
|
+
],
|
|
254
|
+
escalation_prompt="Investigate why immune system stopped refreshing.",
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Read status for additional context
|
|
258
|
+
try:
|
|
259
|
+
data = _load_json(status_file)
|
|
260
|
+
counts = data.get("counts") or {}
|
|
261
|
+
ok_count = int(counts.get("OK", 0) or 0)
|
|
262
|
+
warn_count = int(counts.get("WARN", 0) or 0)
|
|
263
|
+
fail_count = int(counts.get("FAIL", 0) or 0)
|
|
264
|
+
checks_count = _count_checks(data.get("checks"))
|
|
265
|
+
if fail_count > 0:
|
|
266
|
+
status = "critical"
|
|
267
|
+
severity = "error"
|
|
268
|
+
overall = "fail"
|
|
269
|
+
elif warn_count > 0:
|
|
270
|
+
status = "degraded"
|
|
271
|
+
severity = "warn"
|
|
272
|
+
overall = "warn"
|
|
273
|
+
else:
|
|
274
|
+
status = "healthy"
|
|
275
|
+
severity = "info"
|
|
276
|
+
overall = "ok"
|
|
277
|
+
return DoctorCheck(
|
|
278
|
+
id="runtime.immune_freshness",
|
|
279
|
+
tier="runtime",
|
|
280
|
+
status=status,
|
|
281
|
+
severity=severity,
|
|
282
|
+
summary=(
|
|
283
|
+
f"Immune: {overall} "
|
|
284
|
+
f"({ok_count} OK, {warn_count} WARN, {fail_count} FAIL; "
|
|
285
|
+
f"{checks_count} checks, {age_min:.0f} min ago)"
|
|
286
|
+
),
|
|
287
|
+
)
|
|
288
|
+
except Exception as e:
|
|
289
|
+
return DoctorCheck(
|
|
290
|
+
id="runtime.immune_freshness",
|
|
291
|
+
tier="runtime",
|
|
292
|
+
status="degraded",
|
|
293
|
+
severity="warn",
|
|
294
|
+
summary=f"Immune status unreadable ({age_min:.0f} min ago)",
|
|
295
|
+
evidence=[str(e)],
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def check_watchdog_status() -> DoctorCheck:
|
|
300
|
+
"""Check watchdog status freshness."""
|
|
301
|
+
status_file = NEXO_HOME / "operations" / "watchdog-status.json"
|
|
302
|
+
age = _file_age_seconds(status_file)
|
|
303
|
+
|
|
304
|
+
if age is None:
|
|
305
|
+
return DoctorCheck(
|
|
306
|
+
id="runtime.watchdog_freshness",
|
|
307
|
+
tier="runtime",
|
|
308
|
+
status="degraded",
|
|
309
|
+
severity="warn",
|
|
310
|
+
summary="Watchdog status file not found",
|
|
311
|
+
evidence=[f"Expected: {status_file}"],
|
|
312
|
+
repair_plan=["Check if watchdog cron is installed and running"],
|
|
313
|
+
escalation_prompt="Watchdog has never run or status file was deleted.",
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
age_min = age / 60
|
|
317
|
+
if age > WATCHDOG_FRESHNESS:
|
|
318
|
+
return DoctorCheck(
|
|
319
|
+
id="runtime.watchdog_freshness",
|
|
320
|
+
tier="runtime",
|
|
321
|
+
status="degraded",
|
|
322
|
+
severity="warn",
|
|
323
|
+
summary=f"Watchdog status stale ({age_min:.0f} min old)",
|
|
324
|
+
evidence=[
|
|
325
|
+
f"{status_file} last modified {age_min:.0f} minutes ago",
|
|
326
|
+
f"Expected freshness threshold: {WATCHDOG_FRESHNESS // 60} minutes",
|
|
327
|
+
],
|
|
328
|
+
repair_plan=[
|
|
329
|
+
"Inspect LaunchAgent or systemd timer for watchdog",
|
|
330
|
+
"Check for macOS sandbox errors in stderr logs",
|
|
331
|
+
],
|
|
332
|
+
escalation_prompt="Investigate why watchdog stopped refreshing despite timer being installed.",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Read for detail
|
|
336
|
+
try:
|
|
337
|
+
data = _load_json(status_file)
|
|
338
|
+
summary = data.get("summary") or {}
|
|
339
|
+
monitors = summary.get("total", "?")
|
|
340
|
+
passes = summary.get("pass", "?")
|
|
341
|
+
warns = int(summary.get("warn", 0) or 0)
|
|
342
|
+
fails = int(summary.get("fail", 0) or 0)
|
|
343
|
+
overall = str(summary.get("overall", "UNKNOWN")).upper()
|
|
344
|
+
if overall == "FAIL" or fails > 0:
|
|
345
|
+
status = "critical"
|
|
346
|
+
severity = "error"
|
|
347
|
+
elif overall == "WARN" or warns > 0:
|
|
348
|
+
status = "degraded"
|
|
349
|
+
severity = "warn"
|
|
350
|
+
else:
|
|
351
|
+
status = "healthy"
|
|
352
|
+
severity = "info"
|
|
353
|
+
return DoctorCheck(
|
|
354
|
+
id="runtime.watchdog_freshness",
|
|
355
|
+
tier="runtime",
|
|
356
|
+
status=status,
|
|
357
|
+
severity=severity,
|
|
358
|
+
summary=(
|
|
359
|
+
f"Watchdog: {passes}/{monitors} pass, {warns} warn, {fails} fail "
|
|
360
|
+
f"({age_min:.0f} min ago)"
|
|
361
|
+
),
|
|
362
|
+
)
|
|
363
|
+
except Exception as e:
|
|
364
|
+
return DoctorCheck(
|
|
365
|
+
id="runtime.watchdog_freshness",
|
|
366
|
+
tier="runtime",
|
|
367
|
+
status="degraded",
|
|
368
|
+
severity="warn",
|
|
369
|
+
summary=f"Watchdog status unreadable ({age_min:.0f} min ago)",
|
|
370
|
+
evidence=[str(e)],
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def check_stale_sessions() -> DoctorCheck:
|
|
375
|
+
"""Check for stale sessions from DB."""
|
|
376
|
+
try:
|
|
377
|
+
import sqlite3
|
|
378
|
+
db_path = NEXO_HOME / "data" / "nexo.db"
|
|
379
|
+
if not db_path.is_file():
|
|
380
|
+
return DoctorCheck(
|
|
381
|
+
id="runtime.stale_sessions",
|
|
382
|
+
tier="runtime",
|
|
383
|
+
status="healthy",
|
|
384
|
+
severity="info",
|
|
385
|
+
summary="No DB to check sessions",
|
|
386
|
+
)
|
|
387
|
+
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
388
|
+
conn.row_factory = sqlite3.Row
|
|
389
|
+
cutoff = time.time() - 7200
|
|
390
|
+
day_ago = time.time() - 86400
|
|
391
|
+
rows = conn.execute(
|
|
392
|
+
"SELECT COUNT(*) as cnt FROM sessions WHERE last_update_epoch < ? AND last_update_epoch > ?",
|
|
393
|
+
(cutoff, day_ago),
|
|
394
|
+
).fetchone()
|
|
395
|
+
conn.close()
|
|
396
|
+
count = rows["cnt"] if rows else 0
|
|
397
|
+
if count > 0:
|
|
398
|
+
return DoctorCheck(
|
|
399
|
+
id="runtime.stale_sessions",
|
|
400
|
+
tier="runtime",
|
|
401
|
+
status="degraded",
|
|
402
|
+
severity="warn",
|
|
403
|
+
summary=f"{count} stale session{'s' if count > 1 else ''} (no heartbeat >2h)",
|
|
404
|
+
repair_plan=["auto_close_sessions cron should handle this automatically"],
|
|
405
|
+
)
|
|
406
|
+
return DoctorCheck(
|
|
407
|
+
id="runtime.stale_sessions",
|
|
408
|
+
tier="runtime",
|
|
409
|
+
status="healthy",
|
|
410
|
+
severity="info",
|
|
411
|
+
summary="No stale sessions",
|
|
412
|
+
)
|
|
413
|
+
except Exception as e:
|
|
414
|
+
return DoctorCheck(
|
|
415
|
+
id="runtime.stale_sessions",
|
|
416
|
+
tier="runtime",
|
|
417
|
+
status="degraded",
|
|
418
|
+
severity="warn",
|
|
419
|
+
summary=f"Session check failed: {e}",
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def check_cron_freshness() -> DoctorCheck:
|
|
424
|
+
"""Check cron_runs table for recent executions."""
|
|
425
|
+
try:
|
|
426
|
+
import sqlite3
|
|
427
|
+
db_path = NEXO_HOME / "data" / "nexo.db"
|
|
428
|
+
if not db_path.is_file():
|
|
429
|
+
return DoctorCheck(
|
|
430
|
+
id="runtime.cron_freshness",
|
|
431
|
+
tier="runtime",
|
|
432
|
+
status="healthy",
|
|
433
|
+
severity="info",
|
|
434
|
+
summary="No DB to check cron runs",
|
|
435
|
+
)
|
|
436
|
+
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
437
|
+
# Check if cron_runs table exists
|
|
438
|
+
tables = conn.execute(
|
|
439
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='cron_runs'"
|
|
440
|
+
).fetchone()
|
|
441
|
+
if not tables:
|
|
442
|
+
conn.close()
|
|
443
|
+
return DoctorCheck(
|
|
444
|
+
id="runtime.cron_freshness",
|
|
445
|
+
tier="runtime",
|
|
446
|
+
status="healthy",
|
|
447
|
+
severity="info",
|
|
448
|
+
summary="No cron_runs table yet",
|
|
449
|
+
)
|
|
450
|
+
# Latest run per cron
|
|
451
|
+
rows = conn.execute(
|
|
452
|
+
"SELECT cron_id, MAX(started_at) as last_run FROM cron_runs GROUP BY cron_id"
|
|
453
|
+
).fetchall()
|
|
454
|
+
conn.close()
|
|
455
|
+
|
|
456
|
+
stale = []
|
|
457
|
+
expectations = _cron_expectations()
|
|
458
|
+
ignored_crons = _run_at_load_cron_ids()
|
|
459
|
+
now = time.time()
|
|
460
|
+
for row in rows:
|
|
461
|
+
cron_id = row[0]
|
|
462
|
+
if cron_id in ignored_crons:
|
|
463
|
+
continue
|
|
464
|
+
parsed = _parse_timestamp(row[1]) if row[1] else None
|
|
465
|
+
if parsed is None:
|
|
466
|
+
stale.append(f"{cron_id}: unreadable timestamp {row[1]!r}")
|
|
467
|
+
continue
|
|
468
|
+
|
|
469
|
+
age = now - parsed.timestamp()
|
|
470
|
+
expected = expectations.get(cron_id, {"threshold": DEFAULT_CRON_THRESHOLD, "label": "runtime default"})
|
|
471
|
+
if age > expected["threshold"]:
|
|
472
|
+
stale.append(f"{cron_id}: {int(age / 3600)}h ago (expected {expected['label']})")
|
|
473
|
+
|
|
474
|
+
if stale:
|
|
475
|
+
return DoctorCheck(
|
|
476
|
+
id="runtime.cron_freshness",
|
|
477
|
+
tier="runtime",
|
|
478
|
+
status="degraded",
|
|
479
|
+
severity="warn",
|
|
480
|
+
summary=f"{len(stale)} cron(s) haven't run recently",
|
|
481
|
+
evidence=stale,
|
|
482
|
+
)
|
|
483
|
+
return DoctorCheck(
|
|
484
|
+
id="runtime.cron_freshness",
|
|
485
|
+
tier="runtime",
|
|
486
|
+
status="healthy",
|
|
487
|
+
severity="info",
|
|
488
|
+
summary=f"All {len(rows)} tracked crons ran recently",
|
|
489
|
+
)
|
|
490
|
+
except Exception as e:
|
|
491
|
+
return DoctorCheck(
|
|
492
|
+
id="runtime.cron_freshness",
|
|
493
|
+
tier="runtime",
|
|
494
|
+
status="degraded",
|
|
495
|
+
severity="warn",
|
|
496
|
+
summary=f"Cron check failed: {e}",
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def check_launchagent_integrity(fix: bool = False) -> DoctorCheck:
|
|
501
|
+
"""Check that core LaunchAgents are loaded from the real plist paths, not temp installs."""
|
|
502
|
+
if platform.system() != "Darwin":
|
|
503
|
+
return DoctorCheck(
|
|
504
|
+
id="runtime.launchagents",
|
|
505
|
+
tier="runtime",
|
|
506
|
+
status="healthy",
|
|
507
|
+
severity="info",
|
|
508
|
+
summary="LaunchAgent integrity check skipped on non-macOS",
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
managed = _managed_launchagent_plists()
|
|
512
|
+
if not managed:
|
|
513
|
+
return DoctorCheck(
|
|
514
|
+
id="runtime.launchagents",
|
|
515
|
+
tier="runtime",
|
|
516
|
+
status="healthy",
|
|
517
|
+
severity="info",
|
|
518
|
+
summary="No managed LaunchAgents found on disk",
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
uid = str(os.getuid())
|
|
522
|
+
problems = []
|
|
523
|
+
problem_items: list[tuple[str, Path]] = []
|
|
524
|
+
tmp_drift = False
|
|
525
|
+
schedule_expectations = _launchagent_schedule_expectations()
|
|
526
|
+
for cron_id, plist_path in managed:
|
|
527
|
+
label = f"com.nexo.{cron_id}"
|
|
528
|
+
had_problem = False
|
|
529
|
+
try:
|
|
530
|
+
result = subprocess.run(
|
|
531
|
+
["launchctl", "print", f"gui/{uid}/{label}"],
|
|
532
|
+
capture_output=True,
|
|
533
|
+
text=True,
|
|
534
|
+
timeout=3,
|
|
535
|
+
)
|
|
536
|
+
except Exception as e:
|
|
537
|
+
problems.append(f"{label}: launchctl print failed ({e})")
|
|
538
|
+
continue
|
|
539
|
+
|
|
540
|
+
output = (result.stdout or "") + (result.stderr or "")
|
|
541
|
+
if result.returncode != 0 or "Could not find service" in output:
|
|
542
|
+
problems.append(f"{label}: not loaded")
|
|
543
|
+
had_problem = True
|
|
544
|
+
problem_items.append((cron_id, plist_path))
|
|
545
|
+
continue
|
|
546
|
+
|
|
547
|
+
expected_path = str(plist_path)
|
|
548
|
+
actual_path = _extract_launchctl_value(output, "path = ")
|
|
549
|
+
if actual_path != expected_path:
|
|
550
|
+
problems.append(f"{label}: loaded from {actual_path or 'unknown path'}")
|
|
551
|
+
had_problem = True
|
|
552
|
+
if actual_path and "/tmp/" in actual_path:
|
|
553
|
+
tmp_drift = True
|
|
554
|
+
|
|
555
|
+
try:
|
|
556
|
+
with plist_path.open("rb") as fh:
|
|
557
|
+
plist_data = plistlib.load(fh)
|
|
558
|
+
env = plist_data.get("EnvironmentVariables") or {}
|
|
559
|
+
except Exception as e:
|
|
560
|
+
problems.append(f"{label}: plist unreadable ({e})")
|
|
561
|
+
continue
|
|
562
|
+
|
|
563
|
+
for env_key in ("NEXO_HOME", "NEXO_CODE"):
|
|
564
|
+
expected_value = env.get(env_key)
|
|
565
|
+
if not expected_value:
|
|
566
|
+
continue
|
|
567
|
+
marker = f"{env_key} => {expected_value}"
|
|
568
|
+
if marker not in output:
|
|
569
|
+
problems.append(f"{label}: {env_key} drift")
|
|
570
|
+
had_problem = True
|
|
571
|
+
if "/tmp/" in output:
|
|
572
|
+
tmp_drift = True
|
|
573
|
+
|
|
574
|
+
expected_schedule = schedule_expectations.get(cron_id)
|
|
575
|
+
if expected_schedule is not None:
|
|
576
|
+
actual_schedule = {
|
|
577
|
+
"StartInterval": plist_data.get("StartInterval"),
|
|
578
|
+
"StartCalendarInterval": plist_data.get("StartCalendarInterval"),
|
|
579
|
+
"RunAtLoad": plist_data.get("RunAtLoad"),
|
|
580
|
+
}
|
|
581
|
+
if actual_schedule != expected_schedule:
|
|
582
|
+
problems.append(
|
|
583
|
+
f"{label}: schedule drift "
|
|
584
|
+
f"(actual={actual_schedule}, expected={expected_schedule})"
|
|
585
|
+
)
|
|
586
|
+
had_problem = True
|
|
587
|
+
|
|
588
|
+
if had_problem:
|
|
589
|
+
problem_items.append((cron_id, plist_path))
|
|
590
|
+
|
|
591
|
+
if not problems:
|
|
592
|
+
return DoctorCheck(
|
|
593
|
+
id="runtime.launchagents",
|
|
594
|
+
tier="runtime",
|
|
595
|
+
status="healthy",
|
|
596
|
+
severity="info",
|
|
597
|
+
summary=f"LaunchAgents aligned for {len(managed)} managed job(s)",
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
check = DoctorCheck(
|
|
601
|
+
id="runtime.launchagents",
|
|
602
|
+
tier="runtime",
|
|
603
|
+
status="critical" if tmp_drift else "degraded",
|
|
604
|
+
severity="error" if tmp_drift else "warn",
|
|
605
|
+
summary=f"LaunchAgent drift detected in {len(problems)} job(s)",
|
|
606
|
+
evidence=problems[:10],
|
|
607
|
+
repair_plan=[
|
|
608
|
+
"Reload the affected LaunchAgents from ~/Library/LaunchAgents",
|
|
609
|
+
"Re-sync core cron plists from crons/manifest.json if the schedule drifted",
|
|
610
|
+
"If any job is loaded from /tmp, boot it out before bootstrapping the real plist",
|
|
611
|
+
],
|
|
612
|
+
escalation_prompt="Launchd is serving stale or drifted NEXO jobs. Compare loaded job paths with plist paths on disk.",
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
if fix:
|
|
616
|
+
sync_ok, sync_evidence = _sync_launchagents_from_manifest()
|
|
617
|
+
repaired, repair_evidence = _repair_launchagents(problem_items)
|
|
618
|
+
if sync_ok and repaired:
|
|
619
|
+
post_check = check_launchagent_integrity(fix=False)
|
|
620
|
+
if post_check.status == "healthy":
|
|
621
|
+
post_check.fixed = True
|
|
622
|
+
post_check.summary += " (fixed)"
|
|
623
|
+
return post_check
|
|
624
|
+
check.evidence.extend((sync_evidence + repair_evidence)[:10])
|
|
625
|
+
return check
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def check_skill_health(fix: bool = False) -> DoctorCheck:
|
|
629
|
+
"""Check executable skill consistency and approval state."""
|
|
630
|
+
try:
|
|
631
|
+
from db import get_skill_health_report
|
|
632
|
+
report = get_skill_health_report(fix=fix)
|
|
633
|
+
except Exception as e:
|
|
634
|
+
return DoctorCheck(
|
|
635
|
+
id="runtime.skills",
|
|
636
|
+
tier="runtime",
|
|
637
|
+
status="degraded",
|
|
638
|
+
severity="warn",
|
|
639
|
+
summary=f"Skill health check failed: {e}",
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
issues = report.get("issues", [])
|
|
643
|
+
if not issues:
|
|
644
|
+
summary = f"Skills consistent ({report.get('checked', 0)} checked)"
|
|
645
|
+
if fix:
|
|
646
|
+
summary += " (fixed)"
|
|
647
|
+
return DoctorCheck(
|
|
648
|
+
id="runtime.skills",
|
|
649
|
+
tier="runtime",
|
|
650
|
+
status="healthy",
|
|
651
|
+
severity="info",
|
|
652
|
+
summary=summary,
|
|
653
|
+
fixed=fix,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
errors = [issue for issue in issues if issue.get("severity") == "error"]
|
|
657
|
+
warnings = [issue for issue in issues if issue.get("severity") != "error"]
|
|
658
|
+
status = "critical" if errors else "degraded"
|
|
659
|
+
severity = "error" if errors else "warn"
|
|
660
|
+
evidence = [f"{issue['skill_id']}: {issue['message']}" for issue in issues[:10]]
|
|
661
|
+
return DoctorCheck(
|
|
662
|
+
id="runtime.skills",
|
|
663
|
+
tier="runtime",
|
|
664
|
+
status=status,
|
|
665
|
+
severity=severity,
|
|
666
|
+
summary=f"Skill issues detected in {len(issues)} item(s)",
|
|
667
|
+
evidence=evidence,
|
|
668
|
+
repair_plan=[
|
|
669
|
+
"Run nexo skills sync to reconcile filesystem definitions",
|
|
670
|
+
"Auto-reconcile execution metadata for executable skills",
|
|
671
|
+
"Fix or restore missing executable files for execute/hybrid skills",
|
|
672
|
+
],
|
|
673
|
+
escalation_prompt="Skill metadata and filesystem artifacts are out of sync or an executable skill is missing artifacts.",
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
678
|
+
"""Run all runtime-tier checks. Read-only by default."""
|
|
679
|
+
return [
|
|
680
|
+
check_immune_status(),
|
|
681
|
+
check_watchdog_status(),
|
|
682
|
+
check_stale_sessions(),
|
|
683
|
+
check_cron_freshness(),
|
|
684
|
+
check_launchagent_integrity(fix=fix),
|
|
685
|
+
check_skill_health(fix=fix),
|
|
686
|
+
]
|