nexo-brain 5.3.20 → 5.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/package.json +1 -1
  3. package/src/auto_update.py +11 -8
  4. package/src/dashboard/static/favicon 2.svg +32 -0
  5. package/src/dashboard/static/nexo-logo 2.png +0 -0
  6. package/src/dashboard/static/nexo-logo 2.svg +40 -0
  7. package/src/dashboard/static/style 2.css +2458 -0
  8. package/src/dashboard/templates/adaptive 2.html +118 -0
  9. package/src/dashboard/templates/artifacts 2.html +133 -0
  10. package/src/dashboard/templates/backups 2.html +136 -0
  11. package/src/dashboard/templates/base 2.html +417 -0
  12. package/src/dashboard/templates/calendar 2.html +591 -0
  13. package/src/dashboard/templates/chat 2.html +356 -0
  14. package/src/dashboard/templates/claims 2.html +259 -0
  15. package/src/dashboard/templates/cortex 2.html +321 -0
  16. package/src/dashboard/templates/credentials 2.html +128 -0
  17. package/src/dashboard/templates/crons 2.html +370 -0
  18. package/src/dashboard/templates/dashboard 2.html +494 -0
  19. package/src/dashboard/templates/dreams 2.html +252 -0
  20. package/src/dashboard/templates/email 2.html +160 -0
  21. package/src/dashboard/templates/evolution 2.html +189 -0
  22. package/src/dashboard/templates/feed 2.html +249 -0
  23. package/src/dashboard/templates/followup_health 2.html +170 -0
  24. package/src/dashboard/templates/graph 2.html +201 -0
  25. package/src/dashboard/templates/guard 2.html +259 -0
  26. package/src/dashboard/templates/inbox 2.html +251 -0
  27. package/src/dashboard/templates/memory 2.html +420 -0
  28. package/src/dashboard/templates/operations 2.html +608 -0
  29. package/src/dashboard/templates/plugins 2.html +185 -0
  30. package/src/dashboard/templates/protocol 2.html +199 -0
  31. package/src/dashboard/templates/rules 2.html +246 -0
  32. package/src/dashboard/templates/sentiment 2.html +247 -0
  33. package/src/dashboard/templates/sessions 2.html +218 -0
  34. package/src/dashboard/templates/skills 2.html +329 -0
  35. package/src/dashboard/templates/somatic 2.html +73 -0
  36. package/src/dashboard/templates/triggers 2.html +133 -0
  37. package/src/dashboard/templates/trust 2.html +360 -0
  38. package/src/db/__init__ 2.py +259 -0
  39. package/src/db/_core 2.py +437 -0
  40. package/src/db/_credentials 2.py +124 -0
  41. package/src/db/_episodic 2.py +762 -0
  42. package/src/db/_evolution 2.py +54 -0
  43. package/src/db/_fts 2.py +406 -0
  44. package/src/db/_goal_profiles 2.py +376 -0
  45. package/src/db/_hot_context 2.py +660 -0
  46. package/src/db/_outcomes 2.py +800 -0
  47. package/src/db/_personal_scripts 2.py +582 -0
  48. package/src/db/_sessions 2.py +330 -0
  49. package/src/db/_tasks 2.py +91 -0
  50. package/src/db/_watchers 2.py +173 -0
  51. package/src/doctor/formatters 2.py +52 -0
  52. package/src/doctor/models 2.py +69 -0
  53. package/src/doctor/planes 2.py +87 -0
  54. package/src/doctor/providers/__init__ 2.py +1 -0
  55. package/src/doctor/providers/deep 2.py +367 -0
  56. package/src/evolution_cycle 2.py +519 -0
  57. package/src/hooks/auto_capture 2.py +208 -0
  58. package/src/hooks/caffeinate-guard 2.sh +8 -0
  59. package/src/hooks/capture-session 2.sh +21 -0
  60. package/src/hooks/capture-tool-logs 2.sh +158 -0
  61. package/src/hooks/daily-briefing-check 2.sh +33 -0
  62. package/src/hooks/heartbeat-enforcement 2.py +90 -0
  63. package/src/hooks/heartbeat-posttool 2.sh +18 -0
  64. package/src/hooks/inbox-hook 2.sh +76 -0
  65. package/src/hooks/post-compact 2.sh +152 -0
  66. package/src/hooks/pre-compact 2.sh +169 -0
  67. package/src/hooks/protocol-guardrail 2.sh +10 -0
  68. package/src/hooks/protocol-pretool-guardrail 2.sh +9 -0
  69. package/src/hooks/session-stop 2.sh +52 -0
  70. package/src/kg_populate 2.py +292 -0
  71. package/src/maintenance 2.py +53 -0
  72. package/src/memory_backends 2.py +71 -0
  73. package/src/migrate_embeddings 2.py +124 -0
  74. package/src/nexo_sdk 2.py +103 -0
  75. package/src/observability 2.py +199 -0
  76. package/src/plugin_loader 2.py +217 -0
  77. package/src/plugins/__init__ 2.py +0 -0
  78. package/src/plugins/artifact_registry 2.py +450 -0
  79. package/src/plugins/backup 2.py +127 -0
  80. package/src/plugins/claims_tools 2.py +119 -0
  81. package/src/plugins/cognitive_memory 2.py +609 -0
  82. package/src/plugins/core_rules 2.py +252 -0
  83. package/src/plugins/cortex 2.py +1155 -0
  84. package/src/plugins/entities 2.py +67 -0
  85. package/src/plugins/episodic_memory 2.py +560 -0
  86. package/src/plugins/evolution 2.py +167 -0
  87. package/src/plugins/goal_engine 2.py +142 -0
  88. package/src/plugins/guard 2.py +862 -0
  89. package/src/plugins/impact 2.py +29 -0
  90. package/src/plugins/knowledge_graph_tools 2.py +137 -0
  91. package/src/plugins/media_memory_tools 2.py +98 -0
  92. package/src/plugins/memory_export 2.py +196 -0
  93. package/src/plugins/outcomes 2.py +130 -0
  94. package/src/plugins/personal_scripts 2.py +117 -0
  95. package/src/plugins/preferences 2.py +47 -0
  96. package/src/plugins/protocol 2.py +1449 -0
  97. package/src/plugins/simple_api 2.py +106 -0
  98. package/src/plugins/skills 2.py +341 -0
  99. package/src/plugins/state_watchers 2.py +79 -0
  100. package/src/plugins/update 2.py +986 -0
  101. package/src/plugins/user_state_tools 2.py +43 -0
  102. package/src/plugins/workflow 2.py +588 -0
  103. package/src/protocol_settings 2.py +59 -0
  104. package/src/public_contribution 2.py +466 -0
  105. package/src/public_evolution_queue 2.py +241 -0
  106. package/src/requirements 2.txt +14 -0
  107. package/src/retroactive_learnings 2.py +373 -0
  108. package/src/rules/__init__ 2.py +0 -0
  109. package/src/rules/core-rules 2.json +331 -0
  110. package/src/rules/migrate 2.py +207 -0
  111. package/src/runtime_power 2.py +874 -0
  112. package/src/script_registry 2.py +1559 -0
  113. package/src/scripts/check-context 2.py +272 -0
  114. package/src/scripts/deep-sleep/apply_findings 2.py +2327 -0
  115. package/src/scripts/deep-sleep/collect 2.py +928 -0
  116. package/src/scripts/deep-sleep/extract 2.py +330 -0
  117. package/src/scripts/deep-sleep/extract-prompt 2.md +285 -0
  118. package/src/scripts/deep-sleep/synthesize 2.py +312 -0
  119. package/src/scripts/deep-sleep/synthesize-prompt 2.md +336 -0
  120. package/src/scripts/nexo-agent-run 2.py +75 -0
  121. package/src/scripts/nexo-auto-update 2.py +6 -0
  122. package/src/scripts/nexo-backup 2.sh +25 -0
  123. package/src/scripts/nexo-brain-activation 2.sh +140 -0
  124. package/src/scripts/nexo-catchup 2.py +300 -0
  125. package/src/scripts/nexo-cognitive-decay 2.py +257 -0
  126. package/src/scripts/nexo-cortex-cycle 2.py +293 -0
  127. package/src/scripts/nexo-cron-wrapper 2.sh +53 -0
  128. package/src/scripts/nexo-daily-self-audit 2.py +2161 -0
  129. package/src/scripts/nexo-dashboard 2.sh +29 -0
  130. package/src/scripts/nexo-deep-sleep 2.sh +86 -0
  131. package/src/scripts/nexo-evolution-run 2.py +1664 -0
  132. package/src/scripts/nexo-followup-hygiene 2.py +139 -0
  133. package/src/scripts/nexo-hook-record 2.py +42 -0
  134. package/src/scripts/nexo-immune 2.py +936 -0
  135. package/src/scripts/nexo-impact-scorer 2.py +117 -0
  136. package/src/scripts/nexo-inbox-hook 2.sh +74 -0
  137. package/src/scripts/nexo-install 2.py +6 -0
  138. package/src/scripts/nexo-learning-housekeep 2.py +401 -0
  139. package/src/scripts/nexo-learning-validator 2.py +266 -0
  140. package/src/scripts/nexo-migrate 2.py +260 -0
  141. package/src/scripts/nexo-outcome-checker 2.py +127 -0
  142. package/src/scripts/nexo-postmortem-consolidator 2.py +456 -0
  143. package/src/scripts/nexo-pre-commit 2.py +120 -0
  144. package/src/scripts/nexo-prevent-sleep 2.sh +35 -0
  145. package/src/scripts/nexo-proactive-dashboard 2.py +354 -0
  146. package/src/scripts/nexo-reflection 2.py +256 -0
  147. package/src/scripts/nexo-runtime-preflight 2.py +274 -0
  148. package/src/scripts/nexo-sleep 2.py +631 -0
  149. package/src/scripts/nexo-snapshot-restore 2.sh +35 -0
  150. package/src/scripts/nexo-sync-clients 2.py +16 -0
  151. package/src/scripts/nexo-synthesis 2.py +475 -0
  152. package/src/scripts/nexo-tcc-approve 2.sh +79 -0
  153. package/src/scripts/nexo-update 2.sh +306 -0
  154. package/src/scripts/nexo-watchdog 2.sh +1207 -0
  155. package/src/scripts/nexo-watchdog-smoke 2.py +119 -0
  156. package/src/scripts/rehydrate_learnings_from_archive 2.py +245 -0
  157. package/src/server 2.py +1296 -0
  158. package/src/skills/run-nexo-audit-phase/guide 2.md +43 -0
  159. package/src/skills/run-nexo-audit-phase/skill 2.json +59 -0
  160. package/src/skills/run-nexo-core-fix-cycle/guide 2.md +17 -0
  161. package/src/skills/run-nexo-core-fix-cycle/script 2.py +276 -0
  162. package/src/skills/run-nexo-core-fix-cycle/skill 2.json +58 -0
  163. package/src/skills/run-release-final-audit/guide 2.md +16 -0
  164. package/src/skills/run-release-final-audit/script 2.py +259 -0
  165. package/src/skills/run-release-final-audit/skill 2.json +77 -0
  166. package/src/skills/run-runtime-doctor/guide 2.md +12 -0
  167. package/src/skills/run-runtime-doctor/script 2.py +21 -0
  168. package/src/skills/run-runtime-doctor/skill 2.json +25 -0
  169. package/src/skills_runtime 2.py +932 -0
  170. package/src/state_watchers_runtime 2.py +475 -0
  171. package/src/storage_router 2.py +32 -0
  172. package/src/system_catalog 2.py +786 -0
  173. package/src/tools_coordination 2.py +103 -0
  174. package/src/tools_credentials 2.py +68 -0
  175. package/src/tools_drive 2.py +487 -0
  176. package/src/tools_hot_context 2.py +163 -0
  177. package/src/tools_learnings 2.py +612 -0
  178. package/src/tools_menu 2.py +229 -0
  179. package/src/tools_reminders 2.py +88 -0
  180. package/src/tools_reminders_crud 2.py +363 -0
  181. package/src/tools_sessions 2.py +1054 -0
  182. package/src/tools_system_catalog 2.py +19 -0
  183. package/src/tools_task_history 2.py +57 -0
  184. package/src/tools_transcripts 2.py +98 -0
  185. package/src/transcript_utils 2.py +412 -0
  186. package/src/user_context 2.py +46 -0
  187. package/src/user_data_portability 2.py +328 -0
  188. package/src/user_state_model 2.py +170 -0
  189. package/templates/CLAUDE.md 2.template +108 -0
  190. package/templates/CODEX.AGENTS.md 2.template +66 -0
  191. package/templates/launchagents/README 2.md +132 -0
  192. package/templates/launchagents/com.nexo.auto-close-sessions 2.plist +39 -0
  193. package/templates/launchagents/com.nexo.catchup 2.plist +39 -0
  194. package/templates/launchagents/com.nexo.cognitive-decay 2.plist +40 -0
  195. package/templates/launchagents/com.nexo.dashboard 2.plist +43 -0
  196. package/templates/launchagents/com.nexo.deep-sleep 2.plist +43 -0
  197. package/templates/launchagents/com.nexo.evolution 2.plist +44 -0
  198. package/templates/launchagents/com.nexo.followup-hygiene 2.plist +45 -0
  199. package/templates/launchagents/com.nexo.immune 2.plist +41 -0
  200. package/templates/launchagents/com.nexo.postmortem 2.plist +45 -0
  201. package/templates/launchagents/com.nexo.self-audit 2.plist +47 -0
  202. package/templates/launchagents/com.nexo.synthesis 2.plist +45 -0
  203. package/templates/launchagents/com.nexo.watchdog 2.plist +37 -0
  204. package/templates/nexo_helper 2.py +301 -0
  205. package/templates/openclaw 2.json +13 -0
  206. package/templates/plugin-template 2.py +40 -0
  207. package/templates/script-template 2.py +59 -0
  208. package/templates/script-template 2.sh +13 -0
  209. package/templates/skill-script-template 2.py +48 -0
  210. package/templates/skill-template 2.md +33 -0
@@ -0,0 +1,936 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ NEXO Immune System — Health monitor & auto-repair.
4
+
5
+ Runs every 30 minutes via LaunchAgent. Checks tokens, LaunchAgents, DBs,
6
+ scripts, logs, disk, and remote server crons. Auto-repairs what it can,
7
+ alerts via notification on NEW failures.
8
+
9
+ Zero external dependencies. Stdlib + sqlite3 + urllib only.
10
+ """
11
+
12
+ import fcntl
13
+ import json
14
+ import os
15
+ import re
16
+ import shlex
17
+ import signal
18
+ import sqlite3
19
+ import ssl
20
+ import subprocess
21
+ import sys
22
+ import time
23
+ from datetime import datetime, date, timedelta
24
+ from pathlib import Path
25
+
26
+
27
+ try:
28
+ from client_preferences import resolve_user_model as _resolve_user_model
29
+ _USER_MODEL = _resolve_user_model()
30
+ except Exception:
31
+ _USER_MODEL = ""
32
+
33
+ NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
34
+ _script_dir = Path(__file__).resolve().parent
35
+ _repo_src = _script_dir.parent
36
+ NEXO_CODE = Path(os.environ.get("NEXO_CODE", str(_repo_src) if (_repo_src / "server.py").exists() else str(NEXO_HOME)))
37
+ if str(NEXO_CODE) not in sys.path:
38
+ sys.path.insert(0, str(NEXO_CODE))
39
+
40
+ from agent_runner import AutomationBackendUnavailableError, run_automation_prompt
41
+
42
+ from urllib.request import Request, urlopen
43
+ from urllib.error import URLError, HTTPError
44
+
45
+ # ─── SSL context for macOS (certifi or system certs) ─────────────────────────
46
+ def _make_ssl_context():
47
+ """Create an SSL context that works on macOS with Python.org Python."""
48
+ # Try certifi first (pip-installed)
49
+ try:
50
+ import certifi
51
+ ctx = ssl.create_default_context(cafile=certifi.where())
52
+ return ctx
53
+ except ImportError:
54
+ pass
55
+ # Try macOS system certificates
56
+ for ca_path in [
57
+ "/etc/ssl/cert.pem",
58
+ "/usr/local/etc/openssl/cert.pem",
59
+ "/usr/local/etc/openssl@3/cert.pem",
60
+ "/opt/homebrew/etc/openssl@3/cert.pem",
61
+ ]:
62
+ if os.path.exists(ca_path):
63
+ ctx = ssl.create_default_context(cafile=ca_path)
64
+ return ctx
65
+ # Last resort: unverified (still better than crashing)
66
+ ctx = ssl.create_default_context()
67
+ ctx.check_hostname = False
68
+ ctx.verify_mode = ssl.CERT_NONE
69
+ return ctx
70
+
71
+ SSL_CTX = _make_ssl_context()
72
+
73
+ # ─── Paths ────────────────────────────────────────────────────────────────────
74
+ HOME = Path.home()
75
+ CLAUDE_DIR = NEXO_HOME
76
+ COORD_DIR = CLAUDE_DIR / "coordination"
77
+ BRAIN_DIR = CLAUDE_DIR / "brain"
78
+ SCRIPTS_DIR = CLAUDE_DIR / "scripts"
79
+
80
+ IMMUNE_STATUS = COORD_DIR / "immune-status.json"
81
+ IMMUNE_LOG = COORD_DIR / "immune-log.json"
82
+ LOCK_FILE = COORD_DIR / "immune-process.lock"
83
+
84
+ # Configure your alert script here (optional)
85
+ # ALERT_SCRIPT = SCRIPTS_DIR / "my-notify.sh"
86
+
87
+ CLAUDE_MEM_DB = HOME / ".claude-mem" / "claude-mem.db"
88
+
89
+ LAUNCH_AGENTS_DIR = HOME / "Library" / "LaunchAgents"
90
+ CLAUDE_CLI = HOME / ".local" / "bin" / "claude"
91
+
92
+ NOW = datetime.now()
93
+ TODAY = date.today()
94
+
95
+ # ─── Config ───────────────────────────────────────────────────────────────────
96
+
97
+ # Token checks — configure for your services.
98
+ # Supported types: file_text (read file, optional test_url), json_field (check for refresh_token),
99
+ # service_account (check for private_key/client_email), hardcoded (direct URL test)
100
+ TOKEN_CHECKS = [
101
+ # Example: uncomment and configure for your services
102
+ # {
103
+ # "name": "My API",
104
+ # "path": "~/.nexo/my_api_token.txt",
105
+ # "type": "file_text",
106
+ # "test_url": "https://api.example.com/health?token={token}",
107
+ # },
108
+ # {
109
+ # "name": "My Service Account",
110
+ # "path": "~/.nexo/service-account.json",
111
+ # "type": "service_account",
112
+ # },
113
+ ]
114
+
115
+ EXPECTED_AGENTS = [
116
+ "com.nexo.immune",
117
+ "com.nexo.sleep",
118
+ "com.nexo.synthesis",
119
+ ]
120
+
121
+ # SSH check interval — only every 2 hours, not every 30 min
122
+ SSH_CHECK_INTERVAL_HOURS = 2
123
+
124
+ # Log size thresholds (bytes)
125
+ LOG_WARN_SIZE = 10 * 1024 * 1024 # 10 MB
126
+ LOG_FAIL_SIZE = 50 * 1024 * 1024 # 50 MB
127
+ LOG_TRUNCATE_SIZE = 50 * 1024 * 1024 # 50 MB — auto-truncate threshold
128
+
129
+ # Disk thresholds (percentage used)
130
+ DISK_WARN_PCT = 85
131
+ DISK_FAIL_PCT = 95
132
+
133
+ # Quiet hours — no WhatsApp alerts
134
+ QUIET_START = 23 # 23:00
135
+ QUIET_END = 7 # 07:00
136
+
137
+ # Skip execution hours (deep night)
138
+ SKIP_START = 0 # 00:00
139
+ SKIP_END = 6 # 06:00
140
+
141
+ # Max entries in immune-log.json
142
+ MAX_LOG_ENTRIES = 500
143
+
144
+ # HTTP timeout for token checks
145
+ HTTP_TIMEOUT = 10
146
+
147
+ # SSH timeout
148
+ SSH_TIMEOUT = 15
149
+
150
+
151
+ # ─── Helpers ──────────────────────────────────────────────────────────────────
152
+
153
+ def load_json(path, default=None):
154
+ if not path.exists():
155
+ return default if default is not None else {}
156
+ try:
157
+ return json.loads(path.read_text())
158
+ except Exception:
159
+ return default if default is not None else {}
160
+
161
+
162
+ def save_json(path, data):
163
+ path.write_text(json.dumps(data, indent=2, ensure_ascii=False))
164
+
165
+
166
+ def is_quiet_hours():
167
+ """Check if within WhatsApp quiet hours (23:00 - 07:00)."""
168
+ h = NOW.hour
169
+ if QUIET_START > QUIET_END:
170
+ return h >= QUIET_START or h < QUIET_END
171
+ return QUIET_START <= h < QUIET_END
172
+
173
+
174
+ def is_skip_hours():
175
+ """Check if within skip hours (00:00 - 06:00)."""
176
+ return SKIP_START <= NOW.hour < SKIP_END
177
+
178
+
179
+ def send_alert(title, message):
180
+ """Send alert notification if not in quiet hours.
181
+
182
+ Configure ALERT_SCRIPT at the top of this file to enable.
183
+ Override this function for custom alerting (email, Slack, etc.).
184
+ """
185
+ if is_quiet_hours():
186
+ print(f" [QUIET] Suppressed alert: {title}")
187
+ return False
188
+ # Default: log only. Configure ALERT_SCRIPT for active notifications.
189
+ print(f" [ALERT] {title}: {message}")
190
+ return True
191
+
192
+
193
+ def http_get(url, headers=None, timeout=HTTP_TIMEOUT):
194
+ """Simple HTTP GET, returns (status_code, body) or (0, error_string)."""
195
+ try:
196
+ req = Request(url)
197
+ if headers:
198
+ for k, v in headers.items():
199
+ req.add_header(k, v)
200
+ with urlopen(req, timeout=timeout, context=SSL_CTX) as resp:
201
+ body = resp.read().decode("utf-8", errors="replace")
202
+ return resp.status, body
203
+ except HTTPError as e:
204
+ return e.code, str(e)
205
+ except URLError as e:
206
+ return 0, str(e.reason)
207
+ except Exception as e:
208
+ return 0, str(e)
209
+
210
+
211
+ def run_cmd(cmd, timeout=30):
212
+ """Run a command without invoking a shell. Accepts string or argv list."""
213
+ try:
214
+ argv = shlex.split(cmd) if isinstance(cmd, str) else list(cmd)
215
+ r = subprocess.run(
216
+ argv, capture_output=True, text=True, timeout=timeout
217
+ )
218
+ return r.returncode, r.stdout.strip(), r.stderr.strip()
219
+ except subprocess.TimeoutExpired:
220
+ return -1, "", "timeout"
221
+ except Exception as e:
222
+ return -1, "", str(e)
223
+
224
+
225
+ def pid_alive(pid):
226
+ """Check if a PID is still running."""
227
+ try:
228
+ os.kill(pid, 0)
229
+ return True
230
+ except (OSError, ProcessLookupError):
231
+ return False
232
+
233
+
234
+ # ─── Check Functions ──────────────────────────────────────────────────────────
235
+
236
+ def check_tokens():
237
+ """Check all configured tokens. Returns list of result dicts."""
238
+ results = []
239
+
240
+ for tc in TOKEN_CHECKS:
241
+ name = tc["name"]
242
+ result = {"name": name, "status": "OK", "detail": ""}
243
+
244
+ try:
245
+ if tc["type"] == "file_text":
246
+ path = Path(tc["path"]).expanduser()
247
+ if not path.exists():
248
+ result["status"] = "FAIL"
249
+ result["detail"] = f"Token file missing: {path}"
250
+ else:
251
+ token = path.read_text().strip()
252
+ if not token:
253
+ result["status"] = "FAIL"
254
+ result["detail"] = "Token file empty"
255
+ elif "test_url" in tc:
256
+ url = tc["test_url"].format(token=token)
257
+ code, body = http_get(url)
258
+ if code == 200:
259
+ result["detail"] = "HTTP 200 OK"
260
+ elif code == 190 or (isinstance(body, str) and "expired" in body.lower()):
261
+ result["status"] = "FAIL"
262
+ result["detail"] = f"Token expired (HTTP {code})"
263
+ else:
264
+ result["status"] = "FAIL"
265
+ result["detail"] = f"HTTP {code}: {body[:200]}"
266
+
267
+ elif tc["type"] == "json_field":
268
+ path = Path(tc["path"]).expanduser()
269
+ if not path.exists():
270
+ result["status"] = "FAIL"
271
+ result["detail"] = f"Token file missing: {path}"
272
+ else:
273
+ data = load_json(path, default=None)
274
+ if data is None:
275
+ result["status"] = "FAIL"
276
+ result["detail"] = "Invalid JSON"
277
+ elif "refresh_token" not in data:
278
+ result["status"] = "FAIL"
279
+ result["detail"] = "No refresh_token in JSON"
280
+ else:
281
+ result["detail"] = "refresh_token present"
282
+
283
+ elif tc["type"] == "service_account":
284
+ path = Path(tc["path"]).expanduser()
285
+ if not path.exists():
286
+ result["status"] = "FAIL"
287
+ result["detail"] = f"Service account file missing: {path}"
288
+ else:
289
+ data = load_json(path, default=None)
290
+ if data is None:
291
+ result["status"] = "FAIL"
292
+ result["detail"] = "Invalid JSON"
293
+ elif "private_key" not in data or "client_email" not in data:
294
+ result["status"] = "FAIL"
295
+ result["detail"] = "Missing private_key or client_email"
296
+ else:
297
+ result["detail"] = f"SA: {data.get('client_email', '?')[:40]}"
298
+
299
+ elif tc["type"] == "hardcoded":
300
+ url = tc["test_url"]
301
+ headers = {tc["header"]: tc["token"]}
302
+ code, body = http_get(url, headers=headers)
303
+ if code == 200:
304
+ result["detail"] = "HTTP 200 OK"
305
+ elif code == 401:
306
+ result["status"] = "FAIL"
307
+ result["detail"] = "Token unauthorized (401)"
308
+ else:
309
+ result["status"] = "FAIL"
310
+ result["detail"] = f"HTTP {code}: {body[:200]}"
311
+
312
+ except Exception as e:
313
+ result["status"] = "FAIL"
314
+ result["detail"] = f"Exception: {str(e)[:200]}"
315
+
316
+ results.append(result)
317
+
318
+ return results
319
+
320
+
321
+ def check_launch_agents():
322
+ """Check that expected LaunchAgents are loaded. Auto-repair if not."""
323
+ results = []
324
+
325
+ # Get list of loaded agents
326
+ rc, stdout, _ = run_cmd("launchctl list")
327
+ loaded_labels = set()
328
+ if rc == 0:
329
+ for line in stdout.splitlines():
330
+ parts = line.split("\t")
331
+ if len(parts) >= 3:
332
+ loaded_labels.add(parts[2])
333
+
334
+ for agent in EXPECTED_AGENTS:
335
+ result = {"name": agent, "status": "OK", "detail": "", "repaired": False}
336
+
337
+ if agent in loaded_labels:
338
+ result["detail"] = "Loaded"
339
+ else:
340
+ # Try auto-repair
341
+ plist = LAUNCH_AGENTS_DIR / f"{agent}.plist"
342
+ if plist.exists():
343
+ rc, out, err = run_cmd(f"launchctl load '{plist}'")
344
+ if rc == 0:
345
+ result["status"] = "WARN"
346
+ result["detail"] = f"Was unloaded, auto-loaded successfully"
347
+ result["repaired"] = True
348
+ else:
349
+ result["status"] = "FAIL"
350
+ result["detail"] = f"Unloaded, auto-load failed: {err[:100]}"
351
+ else:
352
+ result["status"] = "FAIL"
353
+ result["detail"] = f"Unloaded, plist not found: {plist}"
354
+
355
+ results.append(result)
356
+
357
+ return results
358
+
359
+
360
+ def check_databases():
361
+ """Run PRAGMA integrity_check on known databases."""
362
+ results = []
363
+
364
+ dbs = [
365
+ ("nexo.db", NEXO_HOME / "data" / "nexo.db"),
366
+ ("cognitive.db", NEXO_HOME / "data" / "cognitive.db"),
367
+ ("claude-mem.db", CLAUDE_MEM_DB),
368
+ ]
369
+
370
+ for name, path in dbs:
371
+ result = {"name": name, "status": "OK", "detail": ""}
372
+
373
+ if not path.exists():
374
+ result["status"] = "FAIL"
375
+ result["detail"] = f"File missing: {path}"
376
+ else:
377
+ try:
378
+ conn = sqlite3.connect(str(path), timeout=5)
379
+ cursor = conn.execute("PRAGMA integrity_check")
380
+ check_result = cursor.fetchone()[0]
381
+ conn.close()
382
+ if check_result == "ok":
383
+ size_mb = path.stat().st_size / (1024 * 1024)
384
+ result["detail"] = f"Integrity OK ({size_mb:.1f} MB)"
385
+ else:
386
+ result["status"] = "FAIL"
387
+ result["detail"] = f"Integrity failed: {check_result[:200]}"
388
+ except Exception as e:
389
+ result["status"] = "FAIL"
390
+ result["detail"] = f"Error: {str(e)[:200]}"
391
+
392
+ results.append(result)
393
+
394
+ return results
395
+
396
+
397
+ def check_scripts():
398
+ """Check stale lock files."""
399
+ results = []
400
+
401
+ # Stale lock files (PID dead)
402
+ lock_files = list(COORD_DIR.glob("*.lock"))
403
+ for lf in lock_files:
404
+ if lf == LOCK_FILE:
405
+ continue # Skip our own lock
406
+ result = {"name": f"lock:{lf.name}", "status": "OK", "detail": "", "repaired": False}
407
+ try:
408
+ content = lf.read_text().strip()
409
+ if content and content.isdigit():
410
+ pid = int(content)
411
+ if pid_alive(pid):
412
+ result["detail"] = f"PID {pid} alive"
413
+ else:
414
+ # Auto-repair: remove stale lock
415
+ lf.unlink()
416
+ result["status"] = "WARN"
417
+ result["detail"] = f"PID {pid} dead — lock removed"
418
+ result["repaired"] = True
419
+ elif content:
420
+ # Lock file has non-PID content — check if size 0 (normal flock pattern)
421
+ if lf.stat().st_size == 0:
422
+ result["detail"] = "Empty lock (flock pattern)"
423
+ else:
424
+ result["detail"] = f"Non-PID content: {content[:50]}"
425
+ else:
426
+ result["detail"] = "Empty lock file"
427
+ except Exception as e:
428
+ result["detail"] = f"Error checking: {e}"
429
+ results.append(result)
430
+
431
+ return results
432
+
433
+
434
+ def check_logs():
435
+ """Check log file sizes. Auto-truncate if > 50 MB."""
436
+ results = []
437
+
438
+ # JSON logs to check
439
+ json_logs = [
440
+ COORD_DIR / "heartbeat-log.json",
441
+ COORD_DIR / "reflection-log.json",
442
+ COORD_DIR / "immune-log.json",
443
+ COORD_DIR / "ops-board.json",
444
+ COORD_DIR / "messages.json",
445
+ ]
446
+
447
+ # Text logs to check
448
+ text_logs = [
449
+ COORD_DIR / "heartbeat-stdout.log",
450
+ COORD_DIR / "heartbeat-stderr.log",
451
+ COORD_DIR / "reflection-stdout.log",
452
+ COORD_DIR / "reflection-stderr.log",
453
+ COORD_DIR / "immune-stdout.log",
454
+ COORD_DIR / "immune-stderr.log",
455
+ ]
456
+
457
+ for log_path in json_logs + text_logs:
458
+ if not log_path.exists():
459
+ continue
460
+
461
+ result = {"name": log_path.name, "status": "OK", "detail": "", "repaired": False}
462
+ size = log_path.stat().st_size
463
+ size_mb = size / (1024 * 1024)
464
+
465
+ if size >= LOG_FAIL_SIZE:
466
+ result["status"] = "FAIL"
467
+ result["detail"] = f"{size_mb:.1f} MB — exceeds {LOG_FAIL_SIZE // (1024*1024)} MB"
468
+
469
+ # Auto-truncate
470
+ try:
471
+ if log_path.suffix == ".json":
472
+ _truncate_json_log(log_path, keep_entries=200)
473
+ else:
474
+ _truncate_text_log(log_path, keep_lines=1000)
475
+ new_size = log_path.stat().st_size / (1024 * 1024)
476
+ result["detail"] += f" -> truncated to {new_size:.1f} MB"
477
+ result["repaired"] = True
478
+ except Exception as e:
479
+ result["detail"] += f" -> truncate failed: {e}"
480
+
481
+ elif size >= LOG_WARN_SIZE:
482
+ result["status"] = "WARN"
483
+ result["detail"] = f"{size_mb:.1f} MB — approaching limit"
484
+ else:
485
+ result["detail"] = f"{size_mb:.2f} MB"
486
+
487
+ results.append(result)
488
+
489
+ return results
490
+
491
+
492
+ def _truncate_json_log(path, keep_entries=200):
493
+ """Truncate a JSON log file to the last N entries."""
494
+ data = load_json(path, default=[])
495
+ if isinstance(data, list) and len(data) > keep_entries:
496
+ data = data[-keep_entries:]
497
+ save_json(path, data)
498
+ elif isinstance(data, dict):
499
+ # Some logs are dicts with a list value
500
+ for key in data:
501
+ if isinstance(data[key], list) and len(data[key]) > keep_entries:
502
+ data[key] = data[key][-keep_entries:]
503
+ save_json(path, data)
504
+
505
+
506
+ def _truncate_text_log(path, keep_lines=1000):
507
+ """Truncate a text log to the last N lines."""
508
+ lines = path.read_text().splitlines()
509
+ if len(lines) > keep_lines:
510
+ path.write_text("\n".join(lines[-keep_lines:]) + "\n")
511
+
512
+
513
+ def check_disk():
514
+ """Check disk usage via os.statvfs."""
515
+ results = []
516
+ result = {"name": "disk:/", "status": "OK", "detail": ""}
517
+
518
+ try:
519
+ st = os.statvfs("/")
520
+ total = st.f_frsize * st.f_blocks
521
+ avail = st.f_frsize * st.f_bavail
522
+ used = total - avail
523
+ pct = (used / total) * 100 if total > 0 else 0
524
+
525
+ avail_gb = avail / (1024 ** 3)
526
+ total_gb = total / (1024 ** 3)
527
+
528
+ if pct >= DISK_FAIL_PCT:
529
+ result["status"] = "FAIL"
530
+ result["detail"] = f"{pct:.1f}% used ({avail_gb:.1f} GB free of {total_gb:.0f} GB)"
531
+ elif pct >= DISK_WARN_PCT:
532
+ result["status"] = "WARN"
533
+ result["detail"] = f"{pct:.1f}% used ({avail_gb:.1f} GB free of {total_gb:.0f} GB)"
534
+ else:
535
+ result["detail"] = f"{pct:.1f}% used ({avail_gb:.1f} GB free of {total_gb:.0f} GB)"
536
+ except Exception as e:
537
+ result["status"] = "FAIL"
538
+ result["detail"] = f"Error: {e}"
539
+
540
+ results.append(result)
541
+ return results
542
+
543
+
544
+ def check_server_crons():
545
+ """Check remote server crons via SSH. Only runs every 2 hours.
546
+
547
+ Configure SSH_SERVER_CMD below with your server details if you want
548
+ remote health checks. Leave empty to skip.
549
+ """
550
+ results = []
551
+ result = {"name": "remote-server", "status": "OK", "detail": ""}
552
+
553
+ # Configure your SSH health check command here (empty = skip)
554
+ # Example: 'ssh -p 22 user@myserver.example.com "echo OK"'
555
+ SSH_SERVER_CMD = ""
556
+
557
+ if not SSH_SERVER_CMD:
558
+ result["detail"] = "No remote server configured (SSH_SERVER_CMD empty)"
559
+ results.append(result)
560
+ return results, False
561
+
562
+ # Check if we should run (every 2 hours based on last check)
563
+ status = load_json(IMMUNE_STATUS)
564
+ last_ssh_str = status.get("last_ssh_check", "")
565
+ should_run = True
566
+
567
+ if last_ssh_str:
568
+ try:
569
+ last_ssh = datetime.strptime(last_ssh_str, "%Y-%m-%d %H:%M")
570
+ hours_ago = (NOW - last_ssh).total_seconds() / 3600
571
+ if hours_ago < SSH_CHECK_INTERVAL_HOURS:
572
+ result["detail"] = f"Skipped (last check {hours_ago:.1f}h ago, interval {SSH_CHECK_INTERVAL_HOURS}h)"
573
+ should_run = False
574
+ except Exception:
575
+ pass
576
+
577
+ if should_run:
578
+ rc, stdout, stderr = run_cmd(SSH_SERVER_CMD, timeout=SSH_TIMEOUT)
579
+
580
+ if rc == 0:
581
+ result["detail"] = f"Server OK: {stdout[:100]}"
582
+ else:
583
+ result["status"] = "FAIL"
584
+ err_short = (stderr or "unknown error")[:150]
585
+ result["detail"] = f"SSH failed (rc={rc}): {err_short}"
586
+
587
+ results.append(result)
588
+ return results, should_run
589
+
590
+
591
+ # ─── Alerting ─────────────────────────────────────────────────────────────────
592
+
593
+ def get_system_uptime_minutes():
594
+ """Get system uptime in minutes via sysctl."""
595
+ try:
596
+ r = subprocess.run(
597
+ ["sysctl", "-n", "kern.boottime"],
598
+ capture_output=True, text=True, timeout=5
599
+ )
600
+ if r.returncode == 0:
601
+ # Format: { sec = 1709000000, usec = 0 } ...
602
+ import re as _re
603
+ m = _re.search(r'sec\s*=\s*(\d+)', r.stdout)
604
+ if m:
605
+ boot_ts = int(m.group(1))
606
+ return (time.time() - boot_ts) / 60
607
+ except Exception:
608
+ pass
609
+ return 9999 # Assume long uptime if we can't determine
610
+
611
+
612
+ def detect_new_failures(current_results, previous_status):
613
+ """Compare current results with previous to find NEW failures.
614
+
615
+ Includes debounce: SSH/server checks need 2 consecutive failures before alerting.
616
+ Includes boot grace: suppresses all alerts within 10 min of system boot.
617
+ """
618
+ # Boot grace period — suppress alerts when network may still be settling
619
+ uptime = get_system_uptime_minutes()
620
+ if uptime < 10:
621
+ print(f" [GRACE] System uptime {uptime:.0f}min < 10min — suppressing alerts")
622
+ return []
623
+
624
+ prev_checks = {}
625
+ for category in previous_status.get("checks", {}):
626
+ for item in previous_status["checks"][category]:
627
+ key = f"{category}:{item.get('name', '')}"
628
+ prev_checks[key] = item.get("status", "OK")
629
+
630
+ # Load consecutive failure counts for debounce
631
+ consec_file = COORD_DIR / "immune-consecutive-failures.json"
632
+ consec = load_json(consec_file, default={})
633
+
634
+ new_failures = []
635
+ for category, items in current_results.items():
636
+ for item in items:
637
+ key = f"{category}:{item.get('name', '')}"
638
+ current_status = item.get("status", "OK")
639
+ prev_stat = prev_checks.get(key, "OK")
640
+
641
+ if current_status in ("FAIL", "WARN"):
642
+ consec[key] = consec.get(key, 0) + 1
643
+ else:
644
+ consec[key] = 0
645
+
646
+ # Debounce: server/SSH checks need 2+ consecutive failures
647
+ is_server_check = category == "server" or "ssh" in key.lower()
648
+ min_consecutive = 2 if is_server_check else 1
649
+
650
+ if current_status == "FAIL" and prev_stat != "FAIL":
651
+ if consec.get(key, 0) >= min_consecutive:
652
+ new_failures.append(item)
653
+ elif current_status == "WARN" and prev_stat == "OK":
654
+ if consec.get(key, 0) >= min_consecutive:
655
+ new_failures.append(item)
656
+
657
+ save_json(consec_file, consec)
658
+ return new_failures
659
+
660
+
661
+ def send_failure_alerts(new_failures):
662
+ """Send WhatsApp alerts for new failures. Max 1 alert per 30 min."""
663
+ if not new_failures:
664
+ return
665
+
666
+ # Global alert cooldown — max 1 WhatsApp alert per 30 minutes
667
+ cooldown_file = COORD_DIR / "immune-last-alert.txt"
668
+ if cooldown_file.exists():
669
+ try:
670
+ last_alert = datetime.strptime(cooldown_file.read_text().strip(), "%Y-%m-%d %H:%M")
671
+ minutes_since = (NOW - last_alert).total_seconds() / 60
672
+ if minutes_since < 30:
673
+ print(f" [COOLDOWN] Last alert {minutes_since:.0f}min ago — suppressing")
674
+ return
675
+ except Exception:
676
+ pass
677
+
678
+ fails = [f for f in new_failures if f["status"] == "FAIL"]
679
+ warns = [f for f in new_failures if f["status"] == "WARN"]
680
+
681
+ sent = False
682
+ if fails:
683
+ lines = [f"- {f['name']}: {f['detail']}" for f in fails[:5]]
684
+ msg = "\n".join(lines)
685
+ if len(fails) > 5:
686
+ msg += f"\n... +{len(fails) - 5} more"
687
+ sent = send_alert(
688
+ "NEXO Immune FAIL",
689
+ f"{len(fails)} new failure(s):\n{msg}"
690
+ )
691
+
692
+ if warns and not fails:
693
+ lines = [f"- {f['name']}: {f['detail']}" for f in warns[:3]]
694
+ msg = "\n".join(lines)
695
+ sent = send_alert(
696
+ "NEXO Immune WARN",
697
+ f"{len(warns)} new warning(s):\n{msg}"
698
+ )
699
+
700
+ if sent:
701
+ cooldown_file.write_text(NOW.strftime("%Y-%m-%d %H:%M"))
702
+
703
+
704
+ # ─── Main ─────────────────────────────────────────────────────────────────────
705
+
706
+ def main():
707
+ print(f"\n{'='*60}")
708
+ print(f"NEXO Immune System — {NOW.strftime('%Y-%m-%d %H:%M:%S')}")
709
+ print(f"{'='*60}")
710
+
711
+ # Skip hours gate
712
+ if is_skip_hours():
713
+ print(f"[SKIP] Hour {NOW.hour} is within skip range ({SKIP_START}:00-{SKIP_END}:00). Exiting.")
714
+ return
715
+
716
+ # Ensure coordination directory exists
717
+ COORD_DIR.mkdir(parents=True, exist_ok=True)
718
+
719
+ # Process lock (fcntl)
720
+ lock_fd = None
721
+ try:
722
+ lock_fd = open(LOCK_FILE, "w")
723
+ fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
724
+ except (IOError, OSError):
725
+ print("[LOCKED] Another immune instance is running. Exiting.")
726
+ if lock_fd:
727
+ lock_fd.close()
728
+ return
729
+
730
+ try:
731
+ _run_checks(lock_fd)
732
+ finally:
733
+ try:
734
+ fcntl.flock(lock_fd, fcntl.LOCK_UN)
735
+ lock_fd.close()
736
+ except Exception:
737
+ pass
738
+
739
+
740
+ def _run_checks(lock_fd):
741
+ """Execute all checks and produce report."""
742
+ previous_status = load_json(IMMUNE_STATUS)
743
+
744
+ all_results = {}
745
+ repairs = []
746
+
747
+ # 1. Tokens
748
+ print("\n[1/7] Checking tokens...")
749
+ all_results["tokens"] = check_tokens()
750
+ for r in all_results["tokens"]:
751
+ icon = "OK" if r["status"] == "OK" else r["status"]
752
+ print(f" [{icon}] {r['name']}: {r['detail']}")
753
+
754
+ # 2. LaunchAgents
755
+ print("\n[2/7] Checking LaunchAgents...")
756
+ all_results["agents"] = check_launch_agents()
757
+ for r in all_results["agents"]:
758
+ icon = "OK" if r["status"] == "OK" else r["status"]
759
+ print(f" [{icon}] {r['name']}: {r['detail']}")
760
+ if r.get("repaired"):
761
+ repairs.append(f"LaunchAgent {r['name']} reloaded")
762
+
763
+ # 3. Databases
764
+ print("\n[3/7] Checking databases...")
765
+ all_results["databases"] = check_databases()
766
+ for r in all_results["databases"]:
767
+ icon = "OK" if r["status"] == "OK" else r["status"]
768
+ print(f" [{icon}] {r['name']}: {r['detail']}")
769
+
770
+ # 4. Scripts & locks
771
+ print("\n[4/7] Checking scripts & locks...")
772
+ all_results["scripts"] = check_scripts()
773
+ for r in all_results["scripts"]:
774
+ icon = "OK" if r["status"] == "OK" else r["status"]
775
+ print(f" [{icon}] {r['name']}: {r['detail']}")
776
+ if r.get("repaired"):
777
+ repairs.append(f"Stale lock {r['name']} removed")
778
+
779
+ # 5. Logs
780
+ print("\n[5/7] Checking log sizes...")
781
+ all_results["logs"] = check_logs()
782
+ for r in all_results["logs"]:
783
+ icon = "OK" if r["status"] == "OK" else r["status"]
784
+ print(f" [{icon}] {r['name']}: {r['detail']}")
785
+ if r.get("repaired"):
786
+ repairs.append(f"Log {r['name']} truncated")
787
+
788
+ # 6. Disk
789
+ print("\n[6/7] Checking disk usage...")
790
+ all_results["disk"] = check_disk()
791
+ for r in all_results["disk"]:
792
+ icon = "OK" if r["status"] == "OK" else r["status"]
793
+ print(f" [{icon}] {r['name']}: {r['detail']}")
794
+
795
+ # 7. Server crons
796
+ print("\n[7/7] Checking server crons...")
797
+ server_results, ssh_ran = check_server_crons()
798
+ all_results["server"] = server_results
799
+ for r in all_results["server"]:
800
+ icon = "OK" if r["status"] == "OK" else r["status"]
801
+ print(f" [{icon}] {r['name']}: {r['detail']}")
802
+
803
+ # ─── Summary ──────────────────────────────────────────────────────────
804
+ counts = {"OK": 0, "WARN": 0, "FAIL": 0}
805
+ for category_items in all_results.values():
806
+ for item in category_items:
807
+ s = item.get("status", "OK")
808
+ if s in counts:
809
+ counts[s] += 1
810
+
811
+ total = sum(counts.values())
812
+
813
+ print(f"\n{'─'*60}")
814
+ print(f"SUMMARY: {total} checks — {counts['OK']} OK, {counts['WARN']} WARN, {counts['FAIL']} FAIL")
815
+ if repairs:
816
+ print(f"AUTO-REPAIRS: {len(repairs)}")
817
+ for r in repairs:
818
+ print(f" - {r}")
819
+ print(f"{'─'*60}\n")
820
+
821
+ # ─── Detect new failures & alert ──────────────────────────────────────
822
+ new_failures = detect_new_failures(all_results, previous_status)
823
+ if new_failures:
824
+ print(f"[ALERT] {len(new_failures)} new failure(s)/warning(s) detected:")
825
+ for nf in new_failures:
826
+ print(f" - [{nf['status']}] {nf['name']}: {nf['detail']}")
827
+ send_failure_alerts(new_failures)
828
+ else:
829
+ print("[OK] No new failures.")
830
+
831
+ # ─── Save status ──────────────────────────────────────────────────────
832
+ status = {
833
+ "last_run": NOW.strftime("%Y-%m-%d %H:%M"),
834
+ "counts": counts,
835
+ "repairs": repairs,
836
+ "new_failures": len(new_failures),
837
+ "checks": all_results,
838
+ }
839
+ if ssh_ran:
840
+ status["last_ssh_check"] = NOW.strftime("%Y-%m-%d %H:%M")
841
+ elif "last_ssh_check" in previous_status:
842
+ status["last_ssh_check"] = previous_status["last_ssh_check"]
843
+
844
+ save_json(IMMUNE_STATUS, status)
845
+
846
+ # ─── Append to log ────────────────────────────────────────────────────
847
+ log_entry = {
848
+ "ts": NOW.strftime("%Y-%m-%d %H:%M"),
849
+ "ok": counts["OK"],
850
+ "warn": counts["WARN"],
851
+ "fail": counts["FAIL"],
852
+ "repairs": len(repairs),
853
+ "new_failures": len(new_failures),
854
+ }
855
+
856
+ log = load_json(IMMUNE_LOG, default=[])
857
+ if not isinstance(log, list):
858
+ log = []
859
+ log.append(log_entry)
860
+ if len(log) > MAX_LOG_ENTRIES:
861
+ log = log[-MAX_LOG_ENTRIES:]
862
+ save_json(IMMUNE_LOG, log)
863
+
864
+ print(f"Status saved to {IMMUNE_STATUS}")
865
+ print(f"Log appended to {IMMUNE_LOG} ({len(log)} entries)")
866
+
867
+ # ─── Stage B: CLI interpretation (only when issues found) ────────────
868
+ if counts["FAIL"] > 0 or counts["WARN"] > 2 or repairs:
869
+ _run_cli_triage(all_results, repairs, counts)
870
+
871
+
872
+ def _run_cli_triage(all_results: dict, repairs: list, counts: dict):
873
+ """Pass all findings to the configured automation backend for intelligent triage and recommendations."""
874
+ triage_file = COORD_DIR / "immune-triage.md"
875
+ findings_json = json.dumps({
876
+ "timestamp": NOW.strftime("%Y-%m-%d %H:%M"),
877
+ "counts": counts,
878
+ "repairs": repairs,
879
+ "checks": all_results,
880
+ }, indent=2, default=str)
881
+
882
+ prompt = f"""You are the NEXO Immune System triage analyst.
883
+
884
+ Below are the raw health check results from a scheduled scan. Your job:
885
+
886
+ 1. Identify which failures are REAL problems vs transient/expected
887
+ 2. Group related issues (e.g. SSH failure + server cron failure = same root cause)
888
+ 3. Prioritize: what needs attention NOW vs can wait
889
+ 4. For each real issue, suggest a specific remediation action
890
+ 5. Note any patterns across recent runs if visible
891
+
892
+ Write a concise triage report to: {triage_file}
893
+
894
+ Format:
895
+ ## Immune Triage — YYYY-MM-DD HH:MM
896
+
897
+ ### Critical (act now)
898
+ - ...
899
+
900
+ ### Monitor (watch next run)
901
+ - ...
902
+
903
+ ### Resolved (auto-repaired)
904
+ - ...
905
+
906
+ ### Patterns
907
+ - ...
908
+
909
+ Raw findings:
910
+ {findings_json}
911
+
912
+ Write the report. Be concise — max 40 lines."""
913
+
914
+ print("\n[TRIAGE] Running CLI interpretation...")
915
+ try:
916
+ result = run_automation_prompt(
917
+ prompt,
918
+ model=_USER_MODEL or "opus",
919
+ timeout=21600,
920
+ output_format="text",
921
+ allowed_tools="Read,Write,Edit,Glob,Grep,Bash,mcp__nexo__*",
922
+ )
923
+ if result.returncode == 0:
924
+ print(f"[TRIAGE] Report written to {triage_file}")
925
+ else:
926
+ print(f"[TRIAGE] CLI exited {result.returncode}: {result.stderr[:200]}")
927
+ except AutomationBackendUnavailableError as e:
928
+ print(f"[TRIAGE] Skipping triage: {e}")
929
+ except subprocess.TimeoutExpired:
930
+ print("[TRIAGE] CLI timed out (120s)")
931
+ except Exception as e:
932
+ print(f"[TRIAGE] Error: {e}")
933
+
934
+
935
+ if __name__ == "__main__":
936
+ main()