nexo-brain 2.3.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/nexo-brain.js +92 -9
- package/bin/postinstall.js +22 -15
- package/package.json +7 -4
- package/src/auto_update.py +194 -5
- package/src/crons/sync.py +6 -2
- package/src/db/_core.py +1 -0
- package/src/db/_entities.py +1 -0
- package/src/db/_episodic.py +1 -0
- package/src/db/_learnings.py +1 -0
- package/src/db/_reminders.py +1 -0
- package/src/db/_schema.py +11 -1
- package/src/db/_sessions.py +1 -0
- package/src/db/_skills.py +1 -0
- package/src/hooks/capture-tool-logs.sh +23 -6
- package/src/hooks/session-start.sh +4 -3
- package/src/plugin_loader.py +1 -0
- package/src/plugins/update.py +377 -26
- package/src/scripts/deep-sleep/apply_findings.py +1 -0
- package/src/scripts/deep-sleep/collect.py +1 -0
- package/src/scripts/deep-sleep/extract.py +1 -0
- package/src/scripts/deep-sleep/synthesize.py +1 -0
- package/src/scripts/nexo-catchup.py +29 -4
- package/src/scripts/nexo-daily-self-audit.py +21 -1
- package/src/scripts/nexo-evolution-run.py +21 -1
- package/src/scripts/nexo-learning-housekeep.py +1 -0
- package/src/scripts/nexo-postmortem-consolidator.py +34 -9
- package/src/scripts/nexo-sleep.py +32 -10
- package/src/scripts/nexo-synthesis.py +29 -9
- package/src/scripts/nexo-update.sh +109 -7
- package/src/scripts/nexo-watchdog.sh +122 -58
- package/src/server.py +66 -1
- package/src/tools_coordination.py +1 -0
- package/src/tools_sessions.py +1 -0
- package/scripts/migrate-to-unified 2.sh +0 -813
- package/scripts/migrate-to-unified.sh +0 -813
- package/scripts/migrate-v1.5-to-v1.6 2.py +0 -778
- package/scripts/migrate-v1.5-to-v1.6.py +0 -778
- package/scripts/migrate-v1.7-to-v1.8 2.py +0 -214
- package/scripts/migrate-v1.7-to-v1.8.py +0 -214
- package/scripts/nexo-preflight.sh +0 -236
- package/scripts/pre-commit-check 2.sh +0 -55
- package/scripts/pre-commit-check.sh +0 -55
- package/src/__pycache__/auto_close_sessions.cpython-314.pyc +0 -0
- package/src/__pycache__/auto_update.cpython-310.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-310.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-314.pyc +0 -0
- package/src/__pycache__/kg_populate.cpython-310.pyc +0 -0
- package/src/__pycache__/knowledge_graph.cpython-310.pyc +0 -0
- package/src/__pycache__/plugin_loader.cpython-310.pyc +0 -0
- package/src/__pycache__/plugin_loader.cpython-314.pyc +0 -0
- package/src/__pycache__/tools_coordination.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_credentials.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_learnings.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_menu.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_reminders.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_reminders_crud.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_sessions.cpython-310.pyc +0 -0
- package/src/__pycache__/tools_task_history.cpython-310.pyc +0 -0
- package/src/auto_close_sessions 2.py +0 -159
- package/src/auto_update 2.py +0 -634
- package/src/claim_graph 2.py +0 -323
- package/src/cognitive/__init__ 2.py +0 -62
- package/src/cognitive/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_core.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_core.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_core.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_decay.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_decay.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_decay.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_ingest.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_ingest.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_ingest.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_memory.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_memory.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_memory.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_search.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_search.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_search.cpython-314.pyc +0 -0
- package/src/cognitive/__pycache__/_trust.cpython-310.pyc +0 -0
- package/src/cognitive/__pycache__/_trust.cpython-312.pyc +0 -0
- package/src/cognitive/__pycache__/_trust.cpython-314.pyc +0 -0
- package/src/cognitive/_core 2.py +0 -567
- package/src/cognitive/_decay 2.py +0 -382
- package/src/cognitive/_ingest 2.py +0 -892
- package/src/cognitive/_memory 2.py +0 -912
- package/src/cognitive/_search 2.py +0 -949
- package/src/cognitive/_trust 2.py +0 -464
- package/src/crons/__pycache__/sync.cpython-314.pyc +0 -0
- package/src/crons/manifest 2.json +0 -106
- package/src/crons/sync 2.py +0 -217
- package/src/dashboard/__init__ 2.py +0 -0
- package/src/dashboard/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/dashboard/__pycache__/app.cpython-310.pyc +0 -0
- package/src/dashboard/app 2.py +0 -789
- package/src/db/__init__ 2.py +0 -89
- package/src/db/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/db/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/db/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_core.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_core.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_core.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_credentials.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_credentials.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_credentials.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_cron_runs.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_cron_runs.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_entities.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_entities.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_entities.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_episodic.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_episodic.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_episodic.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_evolution.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_evolution.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_evolution.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_fts.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_fts.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_fts.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_learnings.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_learnings.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_learnings.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_reminders.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_reminders.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_reminders.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_schema.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_schema.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_schema.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_sessions.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_sessions.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_sessions.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_skills.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_skills.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_skills.cpython-314.pyc +0 -0
- package/src/db/__pycache__/_tasks.cpython-310.pyc +0 -0
- package/src/db/__pycache__/_tasks.cpython-312.pyc +0 -0
- package/src/db/__pycache__/_tasks.cpython-314.pyc +0 -0
- package/src/db/_core 2.py +0 -417
- package/src/db/_credentials 2.py +0 -124
- package/src/db/_entities 2.py +0 -178
- package/src/db/_episodic 2.py +0 -738
- package/src/db/_evolution 2.py +0 -54
- package/src/db/_fts 2.py +0 -406
- package/src/db/_learnings 2.py +0 -168
- package/src/db/_reminders 2.py +0 -338
- package/src/db/_schema 2.py +0 -364
- package/src/db/_sessions 2.py +0 -300
- package/src/db/_tasks 2.py +0 -91
- package/src/evolution_cycle 2.py +0 -266
- package/src/hnsw_index 2.py +0 -254
- package/src/hooks/auto_capture 2.py +0 -208
- package/src/hooks/caffeinate-guard 2.sh +0 -8
- package/src/hooks/capture-session 2.sh +0 -21
- package/src/hooks/capture-tool-logs 2.sh +0 -127
- package/src/hooks/daily-briefing-check 2.sh +0 -33
- package/src/hooks/inbox-hook 2.sh +0 -76
- package/src/hooks/post-compact 2.sh +0 -148
- package/src/hooks/pre-compact 2.sh +0 -151
- package/src/hooks/session-start 2.sh +0 -268
- package/src/hooks/session-stop 2.sh +0 -140
- package/src/kg_populate 2.py +0 -290
- package/src/knowledge_graph 2.py +0 -257
- package/src/maintenance 2.py +0 -59
- package/src/migrate_embeddings 2.py +0 -122
- package/src/plugin_loader 2.py +0 -202
- package/src/plugins/__init__ 2.py +0 -0
- package/src/plugins/__pycache__/__init__ 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/__init__.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/adaptive_mode 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/adaptive_mode.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/adaptive_mode.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/agents 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/agents.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/artifact_registry 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/artifact_registry.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/backup 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/backup.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cognitive_memory 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cognitive_memory.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/core_rules 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/core_rules.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cortex 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/cortex.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/entities 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/entities.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/episodic_memory 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/episodic_memory.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/evolution 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/evolution.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/guard 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/guard.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/knowledge_graph_tools 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/knowledge_graph_tools.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/preferences 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/preferences.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/schedule.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/schedule.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/skills.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/skills.cpython-314.pyc +0 -0
- package/src/plugins/__pycache__/update 2.cpython-310.pyc +0 -0
- package/src/plugins/__pycache__/update.cpython-310.pyc +0 -0
- package/src/plugins/adaptive_mode 2.py +0 -805
- package/src/plugins/agents 2.py +0 -52
- package/src/plugins/artifact_registry 2.py +0 -450
- package/src/plugins/backup 2.py +0 -104
- package/src/plugins/cognitive_memory 2.py +0 -564
- package/src/plugins/core_rules 2.py +0 -252
- package/src/plugins/cortex 2.py +0 -299
- package/src/plugins/entities 2.py +0 -67
- package/src/plugins/episodic_memory 2.py +0 -533
- package/src/plugins/evolution 2.py +0 -115
- package/src/plugins/guard 2.py +0 -746
- package/src/plugins/knowledge_graph_tools 2.py +0 -105
- package/src/plugins/preferences 2.py +0 -47
- package/src/plugins/update 2.py +0 -256
- package/src/requirements 2.txt +0 -12
- package/src/rules/__init__ 2.py +0 -0
- package/src/rules/core-rules 2.json +0 -331
- package/src/rules/migrate 2.py +0 -207
- package/src/scripts/__pycache__/nexo-auto-update.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-catchup.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-cognitive-decay.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-daily-self-audit.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-evolution-run.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-followup-hygiene.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-immune.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-install.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-learning-housekeep.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-learning-validator.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-migrate.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-postmortem-consolidator.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-pre-commit.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-proactive-dashboard.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-reflection.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-runtime-preflight.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-send-email.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-send-reply.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-sleep.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-synthesis.cpython-314.pyc +0 -0
- package/src/scripts/__pycache__/nexo-watchdog-smoke.cpython-314.pyc +0 -0
- package/src/scripts/check-context 2.py +0 -264
- package/src/scripts/nexo-auto-update 2.py +0 -6
- package/src/scripts/nexo-backup 2.sh +0 -25
- package/src/scripts/nexo-brain-activation 2.sh +0 -140
- package/src/scripts/nexo-catchup 2.py +0 -242
- package/src/scripts/nexo-cognitive-decay 2.py +0 -182
- package/src/scripts/nexo-daily-self-audit 2.py +0 -552
- package/src/scripts/nexo-deep-sleep 2.sh +0 -97
- package/src/scripts/nexo-evolution-run 2.py +0 -597
- package/src/scripts/nexo-followup-hygiene 2.py +0 -112
- package/src/scripts/nexo-github-monitor 2.py +0 -256
- package/src/scripts/nexo-immune 2.py +0 -927
- package/src/scripts/nexo-inbox-hook 2.sh +0 -74
- package/src/scripts/nexo-install 2.py +0 -6
- package/src/scripts/nexo-learning-housekeep 2.py +0 -245
- package/src/scripts/nexo-learning-validator 2.py +0 -207
- package/src/scripts/nexo-migrate 2.py +0 -232
- package/src/scripts/nexo-postmortem-consolidator 2.py +0 -421
- package/src/scripts/nexo-pre-commit 2.py +0 -120
- package/src/scripts/nexo-prevent-sleep 2.sh +0 -29
- package/src/scripts/nexo-proactive-dashboard 2.py +0 -345
- package/src/scripts/nexo-reflection 2.py +0 -253
- package/src/scripts/nexo-runtime-preflight 2.py +0 -274
- package/src/scripts/nexo-send-email 2.py +0 -25
- package/src/scripts/nexo-send-email.py +0 -25
- package/src/scripts/nexo-send-reply 2.py +0 -178
- package/src/scripts/nexo-send-reply.py +0 -178
- package/src/scripts/nexo-sleep 2.py +0 -592
- package/src/scripts/nexo-snapshot-restore 2.sh +0 -35
- package/src/scripts/nexo-synthesis 2.py +0 -253
- package/src/scripts/nexo-tcc-approve 2.sh +0 -79
- package/src/scripts/nexo-update 2.sh +0 -161
- package/src/scripts/nexo-watchdog 2.sh +0 -878
- package/src/scripts/nexo-watchdog-smoke 2.py +0 -119
- package/src/server 2.py +0 -733
- package/src/storage_router 2.py +0 -32
- package/src/tools_coordination 2.py +0 -102
- package/src/tools_credentials 2.py +0 -68
- package/src/tools_learnings 2.py +0 -220
- package/src/tools_menu 2.py +0 -227
- package/src/tools_reminders 2.py +0 -86
- package/src/tools_reminders_crud 2.py +0 -159
- package/src/tools_sessions 2.py +0 -476
- package/src/tools_task_history 2.py +0 -57
- package/templates/CLAUDE.md 2.template +0 -63
- package/templates/openclaw 2.json +0 -13
- package/tests/__init__ 2.py +0 -0
- package/tests/__init__.py +0 -0
- package/tests/conftest 2.py +0 -71
- package/tests/conftest.py +0 -71
- package/tests/test_cognitive 2.py +0 -205
- package/tests/test_cognitive.py +0 -205
- package/tests/test_knowledge_graph 2.py +0 -140
- package/tests/test_knowledge_graph.py +0 -140
- package/tests/test_migrations 2.py +0 -137
- package/tests/test_migrations.py +0 -137
|
@@ -1,878 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# ============================================================================
|
|
3
|
-
# NEXO Watchdog — Comprehensive health monitor for all NEXO services
|
|
4
|
-
# Cron: */5 * * * * NEXO_HOME/scripts/nexo-watchdog.sh
|
|
5
|
-
# ============================================================================
|
|
6
|
-
# Monitors ALL LaunchAgents, cron jobs, and background processes.
|
|
7
|
-
# Outputs: watchdog-status.json (machine), watchdog-report.txt (human),
|
|
8
|
-
# .watchdog-alert (if any FAIL detected)
|
|
9
|
-
# ============================================================================
|
|
10
|
-
set -uo pipefail
|
|
11
|
-
|
|
12
|
-
# === PATHS ===
|
|
13
|
-
HOME_DIR="$HOME"
|
|
14
|
-
NEXO_HOME="${NEXO_HOME:-$HOME/.nexo}"
|
|
15
|
-
NEXO_DIR="$NEXO_HOME"
|
|
16
|
-
CORTEX_DIR="$NEXO_HOME/brain"
|
|
17
|
-
OPS_DIR="$NEXO_HOME/operations"
|
|
18
|
-
LOG_DIR="$NEXO_HOME/logs"
|
|
19
|
-
LOG="$LOG_DIR/watchdog.log"
|
|
20
|
-
STATUS_JSON="$OPS_DIR/watchdog-status.json"
|
|
21
|
-
REPORT_TXT="$OPS_DIR/watchdog-report.txt"
|
|
22
|
-
ALERT_FILE="$OPS_DIR/.watchdog-alert"
|
|
23
|
-
HASH_REGISTRY="$NEXO_HOME/scripts/.watchdog-hashes"
|
|
24
|
-
FAIL_COUNT_FILE="$NEXO_HOME/scripts/.watchdog-fails"
|
|
25
|
-
MAX_FAILS=3
|
|
26
|
-
|
|
27
|
-
mkdir -p "$LOG_DIR" "$OPS_DIR"
|
|
28
|
-
|
|
29
|
-
TS=$(date "+%Y-%m-%d %H:%M:%S")
|
|
30
|
-
TS_EPOCH=$(date +%s)
|
|
31
|
-
|
|
32
|
-
log() { echo "[$TS] $1" >> "$LOG"; }
|
|
33
|
-
|
|
34
|
-
# ============================================================================
|
|
35
|
-
# MONITOR REGISTRY — Add new monitors here
|
|
36
|
-
# ============================================================================
|
|
37
|
-
# Format: NAME|PLIST_ID|LOG_STDOUT|LOG_STDERR|MAX_STALE_SECS|PROCESS_GREP|SCHEDULE_DESC
|
|
38
|
-
#
|
|
39
|
-
# MAX_STALE_SECS: how old stdout log can be before WARN.
|
|
40
|
-
# 0 = skip staleness check (for one-shot or infrequent tasks)
|
|
41
|
-
# WARN at MAX_STALE_SECS, FAIL at 3x MAX_STALE_SECS
|
|
42
|
-
# PROCESS_GREP: pattern to grep in ps (empty = skip process check)
|
|
43
|
-
# ============================================================================
|
|
44
|
-
# TYPE field: "core" = part of NEXO (goes to public repo), "personal" = user-specific
|
|
45
|
-
# Format: NAME|PLIST_ID|LOG_STDOUT|LOG_STDERR|MAX_STALE_SECS|PROCESS_GREP|SCHEDULE_DESC|TYPE
|
|
46
|
-
# Add your own monitors below. Core NEXO services are listed as examples.
|
|
47
|
-
MONITORS=(
|
|
48
|
-
"Catchup|com.nexo.catchup|$NEXO_HOME/logs/catchup-stdout.log|$NEXO_HOME/logs/catchup-stderr.log|0||RunAtLoad once|core"
|
|
49
|
-
"Cognitive Decay|com.nexo.cognitive-decay|$NEXO_HOME/logs/cognitive-decay-stdout.log|$NEXO_HOME/logs/cognitive-decay-stderr.log|90000||Daily 3:00 AM|core"
|
|
50
|
-
"Evolution|com.nexo.evolution|$NEXO_HOME/logs/evolution-stdout.log|$NEXO_HOME/logs/evolution-stderr.log|0||Weekly Sun 3:00 AM|core"
|
|
51
|
-
"GitHub Monitor|com.nexo.github-monitor|$NEXO_HOME/logs/github-monitor-stdout.log|$NEXO_HOME/logs/github-monitor-stderr.log|90000||Daily 8:00 AM|core"
|
|
52
|
-
"Immune|com.nexo.immune|$NEXO_HOME/logs/immune-stdout.log|$NEXO_HOME/logs/immune-stderr.log|3600||Every 30 min|core"
|
|
53
|
-
"Postmortem|com.nexo.postmortem|$NEXO_HOME/logs/postmortem-stdout.log|$NEXO_HOME/logs/postmortem-stderr.log|90000||Daily 23:30|core"
|
|
54
|
-
"Prevent Sleep|com.nexo.prevent-sleep|||0|caffeinate|KeepAlive|core"
|
|
55
|
-
"Self Audit|com.nexo.self-audit|$NEXO_HOME/logs/self-audit-stdout.log|$NEXO_HOME/logs/self-audit-stderr.log|90000||Daily 7:00 AM|core"
|
|
56
|
-
"Sleep|com.nexo.sleep|$NEXO_HOME/logs/sleep-stdout.log|$NEXO_HOME/logs/sleep-stderr.log|90000||Daily 4:00 AM|core"
|
|
57
|
-
"Synthesis|com.nexo.synthesis|$NEXO_HOME/logs/synthesis-stdout.log|$NEXO_HOME/logs/synthesis-stderr.log|10800||Every 2 hours|core"
|
|
58
|
-
"Deep Sleep|com.nexo.deep-sleep|$NEXO_HOME/logs/deep-sleep-stdout.log|$NEXO_HOME/logs/deep-sleep-stderr.log|90000||Daily 4:30 AM|core"
|
|
59
|
-
"Followup Hygiene|com.nexo.followup-hygiene|$NEXO_HOME/logs/followup-hygiene-stdout.log|$NEXO_HOME/logs/followup-hygiene-stderr.log|604800||Weekly Sun 5:00 AM|core"
|
|
60
|
-
# Add your own personal monitors below (type "personal"):
|
|
61
|
-
# "My Service|com.nexo.my-service|$NEXO_HOME/logs/my-service.log||3600||Every 30 min|personal"
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Cron jobs to check (NAME|SCRIPT|CHECK_PATH|MAX_STALE_SECS|SCHEDULE)
|
|
65
|
-
CRON_MONITORS=(
|
|
66
|
-
"Backup Cron|$NEXO_DIR/backup_cron.sh|$NEXO_DIR/backups/|7200|Hourly"
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# Error patterns to search in stderr logs (last 50 lines)
|
|
70
|
-
ERROR_PATTERNS="Traceback|Error:|CRITICAL|FATAL|ModuleNotFoundError|PermissionError|FileNotFoundError|ConnectionRefused|Errno"
|
|
71
|
-
|
|
72
|
-
# ============================================================================
|
|
73
|
-
# HELPER FUNCTIONS
|
|
74
|
-
# ============================================================================
|
|
75
|
-
|
|
76
|
-
UID_NUM=$(id -u)
|
|
77
|
-
REPAIR_LOG="$LOG_DIR/watchdog-repairs.log"
|
|
78
|
-
TOTAL_HEALED=0
|
|
79
|
-
IS_MACOS=false
|
|
80
|
-
[ "$(uname)" = "Darwin" ] && IS_MACOS=true
|
|
81
|
-
|
|
82
|
-
log_repair() { echo "[$TS] REPAIR: $1" >> "$REPAIR_LOG"; log "REPAIR: $1"; }
|
|
83
|
-
|
|
84
|
-
is_loaded() {
|
|
85
|
-
$IS_MACOS && launchctl list "$1" &>/dev/null
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
# ============================================================================
|
|
89
|
-
# AUTO-REPAIR FUNCTIONS
|
|
90
|
-
# ============================================================================
|
|
91
|
-
|
|
92
|
-
try_repair_launchagent() {
|
|
93
|
-
$IS_MACOS || return 1
|
|
94
|
-
local plist_id="$1"
|
|
95
|
-
local proc_grep="$2"
|
|
96
|
-
local plist_file="$HOME_DIR/Library/LaunchAgents/${plist_id}.plist"
|
|
97
|
-
|
|
98
|
-
# Repair 1: Not loaded — try to bootstrap
|
|
99
|
-
if ! is_loaded "$plist_id"; then
|
|
100
|
-
if [ -f "$plist_file" ]; then
|
|
101
|
-
launchctl bootstrap "gui/$UID_NUM" "$plist_file" 2>/dev/null
|
|
102
|
-
sleep 1
|
|
103
|
-
if is_loaded "$plist_id"; then
|
|
104
|
-
log_repair "$plist_id: bootstrapped successfully"
|
|
105
|
-
return 0
|
|
106
|
-
fi
|
|
107
|
-
fi
|
|
108
|
-
return 1
|
|
109
|
-
fi
|
|
110
|
-
|
|
111
|
-
# Repair 2: Loaded but process not running (KeepAlive) — kickstart
|
|
112
|
-
if [ -n "$proc_grep" ] && ! process_running "$proc_grep"; then
|
|
113
|
-
launchctl kickstart "gui/$UID_NUM/$plist_id" 2>/dev/null
|
|
114
|
-
sleep 2
|
|
115
|
-
if process_running "$proc_grep"; then
|
|
116
|
-
log_repair "$plist_id: kickstarted process '$proc_grep'"
|
|
117
|
-
return 0
|
|
118
|
-
fi
|
|
119
|
-
fi
|
|
120
|
-
|
|
121
|
-
return 1
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
try_repair_cron() {
|
|
125
|
-
local script="$1"
|
|
126
|
-
|
|
127
|
-
# Repair: Script not executable — chmod it
|
|
128
|
-
if [ -f "$script" ] && [ ! -x "$script" ]; then
|
|
129
|
-
chmod +x "$script"
|
|
130
|
-
if [ -x "$script" ]; then
|
|
131
|
-
log_repair "$script: made executable"
|
|
132
|
-
return 0
|
|
133
|
-
fi
|
|
134
|
-
fi
|
|
135
|
-
|
|
136
|
-
return 1
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
try_reexecute_missed_cron() {
|
|
140
|
-
$IS_MACOS || return 1
|
|
141
|
-
# Re-execute a cron that missed its scheduled run
|
|
142
|
-
# Extracts ProgramArguments from the plist and runs them
|
|
143
|
-
local plist_id="$1"
|
|
144
|
-
local plist_file="$HOME_DIR/Library/LaunchAgents/${plist_id}.plist"
|
|
145
|
-
|
|
146
|
-
if [ ! -f "$plist_file" ]; then
|
|
147
|
-
log "Re-execute skipped: no plist for $plist_id"
|
|
148
|
-
return 1
|
|
149
|
-
fi
|
|
150
|
-
|
|
151
|
-
# Extract the full command from plist
|
|
152
|
-
local cmd
|
|
153
|
-
cmd=$(python3 -c "
|
|
154
|
-
import plistlib, sys
|
|
155
|
-
try:
|
|
156
|
-
with open('$plist_file', 'rb') as f:
|
|
157
|
-
d = plistlib.load(f)
|
|
158
|
-
args = d.get('ProgramArguments', [])
|
|
159
|
-
# Skip KeepAlive services (they should be running, not re-executed)
|
|
160
|
-
if d.get('KeepAlive'):
|
|
161
|
-
sys.exit(1)
|
|
162
|
-
# Skip services without a schedule (RunAtLoad only)
|
|
163
|
-
if not d.get('StartCalendarInterval') and not d.get('StartInterval'):
|
|
164
|
-
sys.exit(1)
|
|
165
|
-
print(' '.join(args))
|
|
166
|
-
except:
|
|
167
|
-
sys.exit(1)
|
|
168
|
-
" 2>/dev/null)
|
|
169
|
-
|
|
170
|
-
if [ -z "$cmd" ] || [ $? -ne 0 ]; then
|
|
171
|
-
return 1
|
|
172
|
-
fi
|
|
173
|
-
|
|
174
|
-
log "Re-executing missed cron: $plist_id → $cmd"
|
|
175
|
-
# Run in background with timeout (5 min max)
|
|
176
|
-
timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
|
|
177
|
-
local pid=$!
|
|
178
|
-
|
|
179
|
-
# Wait briefly and check if it started ok
|
|
180
|
-
sleep 2
|
|
181
|
-
if kill -0 "$pid" 2>/dev/null || wait "$pid" 2>/dev/null; then
|
|
182
|
-
log_repair "$plist_id: re-executed missed cron (PID $pid)"
|
|
183
|
-
return 0
|
|
184
|
-
else
|
|
185
|
-
log "Re-execute failed for $plist_id"
|
|
186
|
-
return 1
|
|
187
|
-
fi
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
try_verify_repair() {
|
|
191
|
-
$IS_MACOS || return 1
|
|
192
|
-
# After Level 2 repair, wait and verify the service is healthy
|
|
193
|
-
local plist_id="$1"
|
|
194
|
-
local log_stdout="$2"
|
|
195
|
-
local proc_grep="$3"
|
|
196
|
-
local max_wait=30
|
|
197
|
-
|
|
198
|
-
log "Verifying repair for $plist_id..."
|
|
199
|
-
|
|
200
|
-
# Check 1: Is it loaded?
|
|
201
|
-
if ! is_loaded "$plist_id"; then
|
|
202
|
-
log "Verify FAILED: $plist_id still not loaded"
|
|
203
|
-
return 1
|
|
204
|
-
fi
|
|
205
|
-
|
|
206
|
-
# Check 2: Process running? (for KeepAlive services)
|
|
207
|
-
if [ -n "$proc_grep" ]; then
|
|
208
|
-
local waited=0
|
|
209
|
-
while [ $waited -lt $max_wait ]; do
|
|
210
|
-
if process_running "$proc_grep"; then
|
|
211
|
-
log "Verify OK: $plist_id process running after ${waited}s"
|
|
212
|
-
return 0
|
|
213
|
-
fi
|
|
214
|
-
sleep 5
|
|
215
|
-
waited=$((waited + 5))
|
|
216
|
-
done
|
|
217
|
-
log "Verify FAILED: $plist_id process not running after ${max_wait}s"
|
|
218
|
-
return 1
|
|
219
|
-
fi
|
|
220
|
-
|
|
221
|
-
# Check 3: For scheduled crons, check if log was updated recently
|
|
222
|
-
if [ -n "$log_stdout" ] && [ -f "$log_stdout" ]; then
|
|
223
|
-
local age
|
|
224
|
-
age=$(file_age "$log_stdout")
|
|
225
|
-
if [ "$age" -lt 300 ]; then
|
|
226
|
-
log "Verify OK: $plist_id log updated ${age}s ago"
|
|
227
|
-
return 0
|
|
228
|
-
fi
|
|
229
|
-
fi
|
|
230
|
-
|
|
231
|
-
# If we get here for a scheduled service, it's loaded which is sufficient
|
|
232
|
-
log "Verify OK: $plist_id is loaded (scheduled service)"
|
|
233
|
-
return 0
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
try_repair_backup() {
|
|
237
|
-
local backup_script="$NEXO_DIR/backup_cron.sh"
|
|
238
|
-
if [ -x "$backup_script" ]; then
|
|
239
|
-
"$backup_script" 2>/dev/null
|
|
240
|
-
sleep 1
|
|
241
|
-
local newest
|
|
242
|
-
newest=$(ls -t "$NEXO_DIR/backups/nexo-"*.db 2>/dev/null | head -1)
|
|
243
|
-
if [ -n "$newest" ]; then
|
|
244
|
-
if $IS_MACOS; then local age=$(( TS_EPOCH - $(stat -f %m "$newest") )); else local age=$(( TS_EPOCH - $(stat -c %Y "$newest") )); fi
|
|
245
|
-
if [ "$age" -lt 60 ]; then
|
|
246
|
-
log_repair "backup_cron.sh: ran successfully, fresh backup created"
|
|
247
|
-
return 0
|
|
248
|
-
fi
|
|
249
|
-
fi
|
|
250
|
-
fi
|
|
251
|
-
return 1
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
file_age() {
|
|
255
|
-
if [ -f "$1" ]; then
|
|
256
|
-
local mod_epoch
|
|
257
|
-
if $IS_MACOS; then
|
|
258
|
-
mod_epoch=$(stat -f %m "$1" 2>/dev/null || echo 0)
|
|
259
|
-
else
|
|
260
|
-
mod_epoch=$(stat -c %Y "$1" 2>/dev/null || echo 0)
|
|
261
|
-
fi
|
|
262
|
-
echo $(( TS_EPOCH - mod_epoch ))
|
|
263
|
-
else
|
|
264
|
-
echo 999999
|
|
265
|
-
fi
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
format_age() {
|
|
269
|
-
local secs=$1
|
|
270
|
-
if [ "$secs" -ge 999999 ]; then
|
|
271
|
-
echo "never"
|
|
272
|
-
elif [ "$secs" -ge 86400 ]; then
|
|
273
|
-
echo "$((secs / 86400))d $((secs % 86400 / 3600))h ago"
|
|
274
|
-
elif [ "$secs" -ge 3600 ]; then
|
|
275
|
-
echo "$((secs / 3600))h $((secs % 3600 / 60))m ago"
|
|
276
|
-
elif [ "$secs" -ge 60 ]; then
|
|
277
|
-
echo "$((secs / 60))m ago"
|
|
278
|
-
else
|
|
279
|
-
echo "${secs}s ago"
|
|
280
|
-
fi
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
check_errors() {
|
|
284
|
-
local logfile="$1"
|
|
285
|
-
if [ -f "$logfile" ] && [ -s "$logfile" ]; then
|
|
286
|
-
local count
|
|
287
|
-
count=$(tail -50 "$logfile" 2>/dev/null | grep -cE "$ERROR_PATTERNS" 2>/dev/null) || true
|
|
288
|
-
echo "${count:-0}"
|
|
289
|
-
else
|
|
290
|
-
echo 0
|
|
291
|
-
fi
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
process_running() {
|
|
295
|
-
if [ -n "$1" ]; then
|
|
296
|
-
pgrep -f "$1" > /dev/null 2>&1
|
|
297
|
-
else
|
|
298
|
-
return 0
|
|
299
|
-
fi
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
# Escape strings for JSON
|
|
303
|
-
json_escape() {
|
|
304
|
-
echo "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/ / /g' | tr '\n' ' '
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
# ============================================================================
|
|
308
|
-
# RUN CHECKS
|
|
309
|
-
# ============================================================================
|
|
310
|
-
|
|
311
|
-
TOTAL_PASS=0
|
|
312
|
-
TOTAL_WARN=0
|
|
313
|
-
TOTAL_FAIL=0
|
|
314
|
-
JSON_AGENTS=""
|
|
315
|
-
REPORT_LINES=""
|
|
316
|
-
FAILED_MONITORS=() # Track failed monitors for Level 2 repair
|
|
317
|
-
|
|
318
|
-
for monitor in "${MONITORS[@]}"; do
|
|
319
|
-
# Skip comment lines
|
|
320
|
-
[[ "$monitor" =~ ^[[:space:]]*# ]] && continue
|
|
321
|
-
IFS='|' read -r name plist_id log_stdout log_stderr max_stale proc_grep schedule mon_type <<< "$monitor"
|
|
322
|
-
mon_type="${mon_type:-core}"
|
|
323
|
-
|
|
324
|
-
status="PASS"
|
|
325
|
-
details=""
|
|
326
|
-
loaded="unknown"
|
|
327
|
-
stale_age="n/a"
|
|
328
|
-
error_count=0
|
|
329
|
-
proc_alive="n/a"
|
|
330
|
-
|
|
331
|
-
# Check 1: LaunchAgent loaded?
|
|
332
|
-
if is_loaded "$plist_id"; then
|
|
333
|
-
loaded="yes"
|
|
334
|
-
else
|
|
335
|
-
loaded="no"
|
|
336
|
-
# AUTO-REPAIR: try to bootstrap
|
|
337
|
-
if try_repair_launchagent "$plist_id" "$proc_grep"; then
|
|
338
|
-
loaded="yes"
|
|
339
|
-
status="HEALED"
|
|
340
|
-
details="${details}Self-healed: bootstrapped. "
|
|
341
|
-
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
342
|
-
else
|
|
343
|
-
status="FAIL"
|
|
344
|
-
details="${details}Not loaded in launchctl (repair failed). "
|
|
345
|
-
fi
|
|
346
|
-
fi
|
|
347
|
-
|
|
348
|
-
# Check 2: Process alive? (only for KeepAlive / long-running)
|
|
349
|
-
if [ -n "$proc_grep" ]; then
|
|
350
|
-
if process_running "$proc_grep"; then
|
|
351
|
-
proc_alive="yes"
|
|
352
|
-
else
|
|
353
|
-
proc_alive="no"
|
|
354
|
-
# AUTO-REPAIR: try to kickstart
|
|
355
|
-
if [ "$status" != "FAIL" ] && [ "$status" != "HEALED" ]; then
|
|
356
|
-
if try_repair_launchagent "$plist_id" "$proc_grep"; then
|
|
357
|
-
proc_alive="yes"
|
|
358
|
-
status="HEALED"
|
|
359
|
-
details="${details}Self-healed: kickstarted. "
|
|
360
|
-
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
361
|
-
else
|
|
362
|
-
status="WARN"
|
|
363
|
-
details="${details}Process '$proc_grep' not running (repair failed). "
|
|
364
|
-
fi
|
|
365
|
-
elif [ "$status" = "HEALED" ]; then
|
|
366
|
-
# Already healed by bootstrap, check if process came up
|
|
367
|
-
sleep 1
|
|
368
|
-
if process_running "$proc_grep"; then
|
|
369
|
-
proc_alive="yes"
|
|
370
|
-
else
|
|
371
|
-
details="${details}Process '$proc_grep' still not running after bootstrap. "
|
|
372
|
-
fi
|
|
373
|
-
fi
|
|
374
|
-
fi
|
|
375
|
-
fi
|
|
376
|
-
|
|
377
|
-
# Check 3: Log staleness + AUTO RE-EXECUTE missed crons
|
|
378
|
-
if [ -n "$log_stdout" ] && [ "$max_stale" -gt 0 ]; then
|
|
379
|
-
age=$(file_age "$log_stdout")
|
|
380
|
-
stale_age=$(format_age "$age")
|
|
381
|
-
if [ "$age" -gt $(( max_stale * 3 )) ]; then
|
|
382
|
-
# Severely stale — try to re-execute the missed cron
|
|
383
|
-
if try_reexecute_missed_cron "$plist_id"; then
|
|
384
|
-
status="HEALED"
|
|
385
|
-
details="${details}Self-healed: re-executed missed cron (was stale: $stale_age). "
|
|
386
|
-
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
387
|
-
else
|
|
388
|
-
status="FAIL"
|
|
389
|
-
details="${details}Log stale: $stale_age (limit: $(format_age "$max_stale")). Re-execute failed. "
|
|
390
|
-
fi
|
|
391
|
-
elif [ "$age" -gt "$max_stale" ]; then
|
|
392
|
-
[ "$status" = "PASS" ] && status="WARN"
|
|
393
|
-
details="${details}Log slightly stale: $stale_age. "
|
|
394
|
-
fi
|
|
395
|
-
elif [ -n "$log_stdout" ]; then
|
|
396
|
-
if [ -f "$log_stdout" ]; then
|
|
397
|
-
age=$(file_age "$log_stdout")
|
|
398
|
-
stale_age=$(format_age "$age")
|
|
399
|
-
else
|
|
400
|
-
stale_age="no log file"
|
|
401
|
-
fi
|
|
402
|
-
fi
|
|
403
|
-
|
|
404
|
-
# Check 4: Errors in stderr log
|
|
405
|
-
if [ -n "$log_stderr" ]; then
|
|
406
|
-
error_count=$(check_errors "$log_stderr")
|
|
407
|
-
if [ "$error_count" -gt 5 ]; then
|
|
408
|
-
[ "$status" = "PASS" ] && status="WARN"
|
|
409
|
-
details="${details}${error_count} errors in recent stderr. "
|
|
410
|
-
fi
|
|
411
|
-
fi
|
|
412
|
-
|
|
413
|
-
[ -z "$details" ] && details="All checks passed"
|
|
414
|
-
|
|
415
|
-
# HEALED counts as PASS for overall status
|
|
416
|
-
case "$status" in
|
|
417
|
-
PASS|HEALED) TOTAL_PASS=$((TOTAL_PASS + 1)) ;;
|
|
418
|
-
WARN) TOTAL_WARN=$((TOTAL_WARN + 1)) ;;
|
|
419
|
-
FAIL)
|
|
420
|
-
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
|
421
|
-
FAILED_MONITORS+=("${name}|${plist_id}|${log_stdout}|${log_stderr}|${proc_grep}|${schedule}|${mon_type}|${details}")
|
|
422
|
-
;;
|
|
423
|
-
esac
|
|
424
|
-
|
|
425
|
-
# JSON
|
|
426
|
-
escaped_details=$(json_escape "$details")
|
|
427
|
-
json_item=" {\"name\":\"$name\",\"plist\":\"$plist_id\",\"status\":\"$status\",\"type\":\"$mon_type\",\"loaded\":\"$loaded\",\"process\":\"$proc_alive\",\"last_activity\":\"$stale_age\",\"stderr_errors\":$error_count,\"schedule\":\"$schedule\",\"details\":\"$escaped_details\"}"
|
|
428
|
-
[ -n "$JSON_AGENTS" ] && JSON_AGENTS="${JSON_AGENTS},
|
|
429
|
-
${json_item}" || JSON_AGENTS="$json_item"
|
|
430
|
-
|
|
431
|
-
# Report
|
|
432
|
-
case "$status" in
|
|
433
|
-
PASS) icon="PASS" ;; HEALED) icon="HEAL" ;; WARN) icon="WARN" ;; FAIL) icon="FAIL" ;;
|
|
434
|
-
esac
|
|
435
|
-
REPORT_LINES="${REPORT_LINES} [${icon}] ${name} (${schedule})
|
|
436
|
-
Loaded: ${loaded} | Process: ${proc_alive} | Last: ${stale_age} | Errors: ${error_count}
|
|
437
|
-
${details}
|
|
438
|
-
"
|
|
439
|
-
done
|
|
440
|
-
|
|
441
|
-
# --- Cron job checks ---
|
|
442
|
-
CRON_JSON=""
|
|
443
|
-
CRON_REPORT=""
|
|
444
|
-
for cron_entry in "${CRON_MONITORS[@]}"; do
|
|
445
|
-
IFS='|' read -r name script check_path max_stale schedule <<< "$cron_entry"
|
|
446
|
-
|
|
447
|
-
c_status="PASS"
|
|
448
|
-
c_details=""
|
|
449
|
-
age_str="n/a"
|
|
450
|
-
|
|
451
|
-
# Check script exists and is executable
|
|
452
|
-
if [ ! -x "$script" ]; then
|
|
453
|
-
# AUTO-REPAIR: try chmod
|
|
454
|
-
if try_repair_cron "$script"; then
|
|
455
|
-
c_status="HEALED"
|
|
456
|
-
c_details="Self-healed: made executable. "
|
|
457
|
-
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
458
|
-
else
|
|
459
|
-
c_status="FAIL"
|
|
460
|
-
c_details="Script not executable or missing (repair failed). "
|
|
461
|
-
fi
|
|
462
|
-
fi
|
|
463
|
-
|
|
464
|
-
# Check output freshness
|
|
465
|
-
if [ -d "$check_path" ]; then
|
|
466
|
-
newest=$(ls -t "$check_path" 2>/dev/null | head -1)
|
|
467
|
-
if [ -n "$newest" ]; then
|
|
468
|
-
age=$(file_age "${check_path}${newest}")
|
|
469
|
-
age_str=$(format_age "$age")
|
|
470
|
-
if [ "$age" -gt $(( max_stale * 3 )) ]; then
|
|
471
|
-
c_status="FAIL"
|
|
472
|
-
c_details="${c_details}Output stale: $age_str. "
|
|
473
|
-
elif [ "$age" -gt "$max_stale" ]; then
|
|
474
|
-
[ "$c_status" = "PASS" ] && c_status="WARN"
|
|
475
|
-
c_details="${c_details}Output slightly stale: $age_str. "
|
|
476
|
-
fi
|
|
477
|
-
else
|
|
478
|
-
c_status="WARN"
|
|
479
|
-
c_details="${c_details}No output files found. "
|
|
480
|
-
age_str="no files"
|
|
481
|
-
fi
|
|
482
|
-
elif [ -f "$check_path" ]; then
|
|
483
|
-
age=$(file_age "$check_path")
|
|
484
|
-
age_str=$(format_age "$age")
|
|
485
|
-
if [ "$age" -gt $(( max_stale * 3 )) ]; then
|
|
486
|
-
c_status="FAIL"
|
|
487
|
-
c_details="${c_details}Output stale: $age_str. "
|
|
488
|
-
elif [ "$age" -gt "$max_stale" ]; then
|
|
489
|
-
[ "$c_status" = "PASS" ] && c_status="WARN"
|
|
490
|
-
c_details="${c_details}Output slightly stale: $age_str. "
|
|
491
|
-
fi
|
|
492
|
-
fi
|
|
493
|
-
|
|
494
|
-
[ -z "$c_details" ] && c_details="All checks passed"
|
|
495
|
-
|
|
496
|
-
case "$c_status" in
|
|
497
|
-
PASS|HEALED) TOTAL_PASS=$((TOTAL_PASS + 1)) ;;
|
|
498
|
-
WARN) TOTAL_WARN=$((TOTAL_WARN + 1)) ;;
|
|
499
|
-
FAIL) TOTAL_FAIL=$((TOTAL_FAIL + 1)) ;;
|
|
500
|
-
esac
|
|
501
|
-
|
|
502
|
-
escaped_details=$(json_escape "$c_details")
|
|
503
|
-
cron_item=" {\"name\":\"$name\",\"script\":\"$script\",\"status\":\"$c_status\",\"last_output\":\"$age_str\",\"schedule\":\"$schedule\",\"details\":\"$escaped_details\"}"
|
|
504
|
-
[ -n "$CRON_JSON" ] && CRON_JSON="${CRON_JSON},
|
|
505
|
-
${cron_item}" || CRON_JSON="$cron_item"
|
|
506
|
-
|
|
507
|
-
case "$c_status" in
|
|
508
|
-
PASS) icon="PASS" ;; HEALED) icon="HEAL" ;; WARN) icon="WARN" ;; FAIL) icon="FAIL" ;;
|
|
509
|
-
esac
|
|
510
|
-
CRON_REPORT="${CRON_REPORT} [${icon}] ${name} (${schedule})
|
|
511
|
-
Last output: ${age_str}
|
|
512
|
-
${c_details}
|
|
513
|
-
"
|
|
514
|
-
done
|
|
515
|
-
|
|
516
|
-
# ============================================================================
|
|
517
|
-
# INFRASTRUCTURE CHECKS
|
|
518
|
-
# ============================================================================
|
|
519
|
-
|
|
520
|
-
# --- SQLite integrity ---
|
|
521
|
-
SQLITE_STATUS="PASS"
|
|
522
|
-
SQLITE_DETAIL=""
|
|
523
|
-
INTEGRITY=$(sqlite3 "$NEXO_DIR/data/nexo.db" "PRAGMA integrity_check;" 2>/dev/null || echo "CORRUPT")
|
|
524
|
-
if [ "$INTEGRITY" != "ok" ]; then
|
|
525
|
-
SQLITE_STATUS="FAIL"
|
|
526
|
-
SQLITE_DETAIL="Integrity check: $INTEGRITY"
|
|
527
|
-
log "CRITICAL: SQLite integrity check failed: $INTEGRITY"
|
|
528
|
-
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
|
529
|
-
LATEST_BACKUP=$(ls -t "$NEXO_DIR/backups/nexo-"*.db 2>/dev/null | head -1)
|
|
530
|
-
if [ -n "$LATEST_BACKUP" ]; then
|
|
531
|
-
cp "$LATEST_BACKUP" "$NEXO_DIR/data/nexo.db"
|
|
532
|
-
log "RESTORED from $LATEST_BACKUP"
|
|
533
|
-
SQLITE_DETAIL="${SQLITE_DETAIL}. Restored from backup."
|
|
534
|
-
fi
|
|
535
|
-
else
|
|
536
|
-
SQLITE_DETAIL="Integrity OK"
|
|
537
|
-
TOTAL_PASS=$((TOTAL_PASS + 1))
|
|
538
|
-
fi
|
|
539
|
-
|
|
540
|
-
# --- Immutable file integrity ---
|
|
541
|
-
IMMUTABLE_STATUS="PASS"
|
|
542
|
-
IMMUTABLE_DETAIL=""
|
|
543
|
-
if [ -f "$HASH_REGISTRY" ]; then
|
|
544
|
-
TAMPERED=0
|
|
545
|
-
while IFS='|' read -r filepath expected_hash; do
|
|
546
|
-
if [ -f "$filepath" ]; then
|
|
547
|
-
ACTUAL=$(shasum -a 256 "$filepath" | cut -d' ' -f1)
|
|
548
|
-
if [ "$ACTUAL" != "$expected_hash" ]; then
|
|
549
|
-
TAMPERED=$((TAMPERED + 1))
|
|
550
|
-
log "CRITICAL: Immutable file modified: $filepath"
|
|
551
|
-
LATEST_SNAP=$(ls -td "$NEXO_HOME/snapshots/"*/ 2>/dev/null | head -1)
|
|
552
|
-
if [ -n "$LATEST_SNAP" ] && [ -f "${LATEST_SNAP}files/${filepath#$HOME_DIR/}" ]; then
|
|
553
|
-
cp "${LATEST_SNAP}files/${filepath#$HOME_DIR/}" "$filepath"
|
|
554
|
-
log "RESTORED immutable file from snapshot"
|
|
555
|
-
fi
|
|
556
|
-
fi
|
|
557
|
-
fi
|
|
558
|
-
done < "$HASH_REGISTRY"
|
|
559
|
-
if [ "$TAMPERED" -gt 0 ]; then
|
|
560
|
-
IMMUTABLE_STATUS="FAIL"
|
|
561
|
-
IMMUTABLE_DETAIL="$TAMPERED immutable files tampered"
|
|
562
|
-
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
|
563
|
-
OBJECTIVE="$CORTEX_DIR/evolution-objective.json"
|
|
564
|
-
if [ -f "$OBJECTIVE" ]; then
|
|
565
|
-
python3 -c "
|
|
566
|
-
import json
|
|
567
|
-
with open('$OBJECTIVE') as f: d = json.load(f)
|
|
568
|
-
d['evolution_enabled'] = False
|
|
569
|
-
d['disabled_reason'] = 'Immutable file tampered — watchdog disabled Evolution'
|
|
570
|
-
with open('$OBJECTIVE', 'w') as f: json.dump(d, f, indent=2)
|
|
571
|
-
" 2>/dev/null
|
|
572
|
-
log "DISABLED Evolution due to immutable file tampering"
|
|
573
|
-
fi
|
|
574
|
-
else
|
|
575
|
-
IMMUTABLE_DETAIL="All files intact"
|
|
576
|
-
TOTAL_PASS=$((TOTAL_PASS + 1))
|
|
577
|
-
fi
|
|
578
|
-
else
|
|
579
|
-
IMMUTABLE_DETAIL="No hash registry (skipped)"
|
|
580
|
-
TOTAL_PASS=$((TOTAL_PASS + 1))
|
|
581
|
-
fi
|
|
582
|
-
|
|
583
|
-
# --- Backup freshness ---
|
|
584
|
-
BACKUP_STATUS="PASS"
|
|
585
|
-
BACKUP_DETAIL=""
|
|
586
|
-
LATEST_BACKUP=$(ls -t "$NEXO_DIR/backups/nexo-"*.db 2>/dev/null | head -1)
|
|
587
|
-
if [ -n "$LATEST_BACKUP" ]; then
|
|
588
|
-
if $IS_MACOS; then BACKUP_AGE=$(( TS_EPOCH - $(stat -f %m "$LATEST_BACKUP") )); else BACKUP_AGE=$(( TS_EPOCH - $(stat -c %Y "$LATEST_BACKUP") )); fi
|
|
589
|
-
BACKUP_AGE_STR=$(format_age "$BACKUP_AGE")
|
|
590
|
-
if [ "$BACKUP_AGE" -gt 7200 ]; then
|
|
591
|
-
# AUTO-REPAIR: run backup now
|
|
592
|
-
if try_repair_backup; then
|
|
593
|
-
BACKUP_STATUS="HEALED"
|
|
594
|
-
BACKUP_DETAIL="Self-healed: backup was stale ($BACKUP_AGE_STR), ran fresh backup"
|
|
595
|
-
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
596
|
-
TOTAL_PASS=$((TOTAL_PASS + 1))
|
|
597
|
-
else
|
|
598
|
-
BACKUP_STATUS="WARN"
|
|
599
|
-
BACKUP_DETAIL="Last backup: $BACKUP_AGE_STR (>2h, repair failed)"
|
|
600
|
-
TOTAL_WARN=$((TOTAL_WARN + 1))
|
|
601
|
-
fi
|
|
602
|
-
else
|
|
603
|
-
BACKUP_DETAIL="Last backup: $BACKUP_AGE_STR"
|
|
604
|
-
TOTAL_PASS=$((TOTAL_PASS + 1))
|
|
605
|
-
fi
|
|
606
|
-
else
|
|
607
|
-
BACKUP_STATUS="FAIL"
|
|
608
|
-
BACKUP_DETAIL="No backups found"
|
|
609
|
-
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
|
610
|
-
fi
|
|
611
|
-
|
|
612
|
-
# --- Cognitive DB check ---
|
|
613
|
-
COG_STATUS="PASS"
|
|
614
|
-
COG_DETAIL=""
|
|
615
|
-
COG_DB="$NEXO_DIR/data/cognitive.db"
|
|
616
|
-
if [ -f "$COG_DB" ]; then
|
|
617
|
-
COG_INT=$(sqlite3 "$COG_DB" "PRAGMA integrity_check;" 2>/dev/null || echo "CORRUPT")
|
|
618
|
-
if [ "$COG_INT" != "ok" ]; then
|
|
619
|
-
COG_STATUS="FAIL"
|
|
620
|
-
COG_DETAIL="Cognitive DB integrity: $COG_INT"
|
|
621
|
-
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
|
622
|
-
else
|
|
623
|
-
COG_DETAIL="Integrity OK"
|
|
624
|
-
TOTAL_PASS=$((TOTAL_PASS + 1))
|
|
625
|
-
fi
|
|
626
|
-
else
|
|
627
|
-
COG_STATUS="WARN"
|
|
628
|
-
COG_DETAIL="cognitive.db not found"
|
|
629
|
-
TOTAL_WARN=$((TOTAL_WARN + 1))
|
|
630
|
-
fi
|
|
631
|
-
|
|
632
|
-
# ============================================================================
|
|
633
|
-
# WRITE JSON STATUS
|
|
634
|
-
# ============================================================================
|
|
635
|
-
TOTAL=$((TOTAL_PASS + TOTAL_WARN + TOTAL_FAIL))
|
|
636
|
-
OVERALL="PASS"
|
|
637
|
-
[ "$TOTAL_WARN" -gt 0 ] && OVERALL="WARN"
|
|
638
|
-
[ "$TOTAL_FAIL" -gt 0 ] && OVERALL="FAIL"
|
|
639
|
-
|
|
640
|
-
cat > "$STATUS_JSON" <<JSONEOF
|
|
641
|
-
{
|
|
642
|
-
"timestamp": "$TS",
|
|
643
|
-
"summary": {
|
|
644
|
-
"total": $TOTAL,
|
|
645
|
-
"pass": $TOTAL_PASS,
|
|
646
|
-
"warn": $TOTAL_WARN,
|
|
647
|
-
"fail": $TOTAL_FAIL,
|
|
648
|
-
"healed": $TOTAL_HEALED,
|
|
649
|
-
"overall": "$OVERALL"
|
|
650
|
-
},
|
|
651
|
-
"launch_agents": [
|
|
652
|
-
$JSON_AGENTS
|
|
653
|
-
],
|
|
654
|
-
"cron_jobs": [
|
|
655
|
-
$CRON_JSON
|
|
656
|
-
],
|
|
657
|
-
"infrastructure": {
|
|
658
|
-
"sqlite": {"status": "$SQLITE_STATUS", "detail": "$(json_escape "$SQLITE_DETAIL")"},
|
|
659
|
-
"cognitive_db": {"status": "$COG_STATUS", "detail": "$(json_escape "$COG_DETAIL")"},
|
|
660
|
-
"immutable_files": {"status": "$IMMUTABLE_STATUS", "detail": "$(json_escape "$IMMUTABLE_DETAIL")"},
|
|
661
|
-
"backups": {"status": "$BACKUP_STATUS", "detail": "$(json_escape "$BACKUP_DETAIL")"}
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
|
-
JSONEOF
|
|
665
|
-
|
|
666
|
-
# ============================================================================
|
|
667
|
-
# WRITE HUMAN-READABLE REPORT
|
|
668
|
-
# ============================================================================
|
|
669
|
-
cat > "$REPORT_TXT" <<REPORTEOF
|
|
670
|
-
======================================================
|
|
671
|
-
NEXO WATCHDOG REPORT — $TS
|
|
672
|
-
======================================================
|
|
673
|
-
PASS: $TOTAL_PASS | HEALED: $TOTAL_HEALED | WARN: $TOTAL_WARN | FAIL: $TOTAL_FAIL | TOTAL: $TOTAL
|
|
674
|
-
OVERALL: $OVERALL
|
|
675
|
-
======================================================
|
|
676
|
-
|
|
677
|
-
-- LaunchAgents (${#MONITORS[@]}) ---------------------
|
|
678
|
-
$REPORT_LINES
|
|
679
|
-
-- Cron Jobs ------------------------------------------
|
|
680
|
-
$CRON_REPORT
|
|
681
|
-
-- Infrastructure -------------------------------------
|
|
682
|
-
[$SQLITE_STATUS] SQLite nexo.db: $SQLITE_DETAIL
|
|
683
|
-
[$COG_STATUS] Cognitive DB: $COG_DETAIL
|
|
684
|
-
[$IMMUTABLE_STATUS] Immutable Files: $IMMUTABLE_DETAIL
|
|
685
|
-
[$BACKUP_STATUS] Backups: $BACKUP_DETAIL
|
|
686
|
-
|
|
687
|
-
-- End of Report --------------------------------------
|
|
688
|
-
REPORTEOF
|
|
689
|
-
|
|
690
|
-
# ============================================================================
|
|
691
|
-
# ALERT FILE
|
|
692
|
-
# ============================================================================
|
|
693
|
-
if [ "$TOTAL_FAIL" -gt 0 ]; then
|
|
694
|
-
{
|
|
695
|
-
echo "timestamp=$TS"
|
|
696
|
-
echo "fail_count=$TOTAL_FAIL"
|
|
697
|
-
echo "warn_count=$TOTAL_WARN"
|
|
698
|
-
echo "failures:"
|
|
699
|
-
grep '\[FAIL\]' "$REPORT_TXT" | head -10 | sed 's/^/ /'
|
|
700
|
-
} > "$ALERT_FILE"
|
|
701
|
-
log "ALERT: $TOTAL_FAIL failures detected"
|
|
702
|
-
else
|
|
703
|
-
rm -f "$ALERT_FILE"
|
|
704
|
-
fi
|
|
705
|
-
|
|
706
|
-
# ============================================================================
|
|
707
|
-
# CONSECUTIVE FAILURE TRACKING + NOTIFICATION
|
|
708
|
-
# ============================================================================
|
|
709
|
-
FAILS=$(cat "$FAIL_COUNT_FILE" 2>/dev/null || echo 0)
|
|
710
|
-
if [ "$TOTAL_FAIL" -gt 0 ]; then
|
|
711
|
-
FAILS=$((FAILS + 1))
|
|
712
|
-
echo "$FAILS" > "$FAIL_COUNT_FILE"
|
|
713
|
-
if [ "$FAILS" -ge "$MAX_FAILS" ]; then
|
|
714
|
-
log "ALERT: $FAILS consecutive runs with failures"
|
|
715
|
-
# Configure your own notification method here (optional)
|
|
716
|
-
# Example: send email, Slack webhook, desktop notification, etc.
|
|
717
|
-
log "NOTIFICATION: $FAILS consecutive failures ($TOTAL_FAIL items FAIL)"
|
|
718
|
-
fi
|
|
719
|
-
else
|
|
720
|
-
echo "0" > "$FAIL_COUNT_FILE"
|
|
721
|
-
fi
|
|
722
|
-
|
|
723
|
-
# ============================================================================
|
|
724
|
-
# LEVEL 2 AUTO-REPAIR: Launch NEXO for intelligent diagnosis
|
|
725
|
-
# ============================================================================
|
|
726
|
-
# Only triggers if: (a) there are FAILs after mechanical repair, (b) no NEXO
|
|
727
|
-
# repair is already running, (c) no interactive session is active (avoid conflict)
|
|
728
|
-
REPAIR_LOCK="$NEXO_HOME/scripts/.watchdog-nexo-repair.lock"
|
|
729
|
-
REPAIR_COOLDOWN=1800 # 30 min between NEXO repair attempts
|
|
730
|
-
|
|
731
|
-
if [ "$TOTAL_FAIL" -gt 0 ]; then
|
|
732
|
-
# Check cooldown — don't spam NEXO invocations
|
|
733
|
-
LOCK_AGE=999999
|
|
734
|
-
SKIP_REPAIR=false
|
|
735
|
-
if [ -f "$REPAIR_LOCK" ]; then
|
|
736
|
-
if $IS_MACOS; then LOCK_AGE=$(( TS_EPOCH - $(stat -f %m "$REPAIR_LOCK" 2>/dev/null || echo 0) )); else LOCK_AGE=$(( TS_EPOCH - $(stat -c %Y "$REPAIR_LOCK" 2>/dev/null || echo 0) )); fi
|
|
737
|
-
if [ "$LOCK_AGE" -lt "$REPAIR_COOLDOWN" ]; then
|
|
738
|
-
log "NEXO repair skipped: cooldown (${LOCK_AGE}s < ${REPAIR_COOLDOWN}s)"
|
|
739
|
-
SKIP_REPAIR=true
|
|
740
|
-
fi
|
|
741
|
-
fi
|
|
742
|
-
|
|
743
|
-
if ! $SKIP_REPAIR; then
|
|
744
|
-
# Collect failure details from tracked FAILED_MONITORS array
|
|
745
|
-
FAIL_DETAILS=""
|
|
746
|
-
HAS_CORE_FAILS=false
|
|
747
|
-
for failed in ${FAILED_MONITORS[@]+"${FAILED_MONITORS[@]}"}; do
|
|
748
|
-
IFS='|' read -r m_name m_plist m_stdout m_stderr m_proc m_sched m_type m_details <<< "$failed"
|
|
749
|
-
STDERR_TAIL=""
|
|
750
|
-
if [ -n "$m_stderr" ] && [ -f "$m_stderr" ]; then
|
|
751
|
-
STDERR_TAIL=$(tail -20 "$m_stderr" 2>/dev/null | head -20)
|
|
752
|
-
fi
|
|
753
|
-
STDOUT_TAIL=""
|
|
754
|
-
if [ -n "$m_stdout" ] && [ -f "$m_stdout" ]; then
|
|
755
|
-
STDOUT_TAIL=$(tail -10 "$m_stdout" 2>/dev/null | head -10)
|
|
756
|
-
fi
|
|
757
|
-
[ "$m_type" = "core" ] && HAS_CORE_FAILS=true
|
|
758
|
-
FAIL_DETAILS="${FAIL_DETAILS}
|
|
759
|
-
--- ${m_name} (${m_plist}) [${m_type}] ---
|
|
760
|
-
Schedule: ${m_sched}
|
|
761
|
-
Type: ${m_type}
|
|
762
|
-
Failure reason: ${m_details}
|
|
763
|
-
Plist: ~/Library/LaunchAgents/${m_plist}.plist
|
|
764
|
-
Process grep: ${m_proc}
|
|
765
|
-
Stderr (last 20 lines):
|
|
766
|
-
${STDERR_TAIL}
|
|
767
|
-
Stdout (last 10 lines):
|
|
768
|
-
${STDOUT_TAIL}
|
|
769
|
-
"
|
|
770
|
-
done
|
|
771
|
-
|
|
772
|
-
# Only launch if we actually have fail details
|
|
773
|
-
if [ -n "$FAIL_DETAILS" ]; then
|
|
774
|
-
touch "$REPAIR_LOCK"
|
|
775
|
-
log "Launching NEXO Level 2 repair..."
|
|
776
|
-
|
|
777
|
-
# Build propagation instructions if core services failed
|
|
778
|
-
# Only the origin maintainer (user) propagates fixes to the public repo
|
|
779
|
-
PROPAGATE_BLOCK=""
|
|
780
|
-
# If you maintain a public NEXO repo, configure propagation here
|
|
781
|
-
NEXO_PUBLIC_REPO="${NEXO_PUBLIC_REPO:-}"
|
|
782
|
-
if $HAS_CORE_FAILS && [ -n "$NEXO_PUBLIC_REPO" ] && [ -d "$NEXO_PUBLIC_REPO/.git" ]; then
|
|
783
|
-
PROPAGATE_BLOCK="
|
|
784
|
-
PROPAGATION (for [core] fixes ONLY):
|
|
785
|
-
If your fix modifies a file under $NEXO_HOME/ (server.py, db/, plugins/, scripts/):
|
|
786
|
-
1. Commit the fix locally with a descriptive message
|
|
787
|
-
2. Copy the changed files (sanitized — no personal data) to $NEXO_PUBLIC_REPO/src/
|
|
788
|
-
3. Bump patch version in $NEXO_PUBLIC_REPO/package.json
|
|
789
|
-
4. Commit + push
|
|
790
|
-
5. Create a GitHub release with gh release create
|
|
791
|
-
Do NOT propagate fixes for [personal] services — those stay local only."
|
|
792
|
-
fi
|
|
793
|
-
|
|
794
|
-
# Write prompt to temp file (avoids heredoc quoting issues in subshell)
|
|
795
|
-
REPAIR_PROMPT_FILE=$(mktemp /tmp/nexo-repair-XXXXXXXX)
|
|
796
|
-
cat > "$REPAIR_PROMPT_FILE" <<NEXOPROMPT
|
|
797
|
-
WATCHDOG LEVEL 2 REPAIR — Automated diagnostic session.
|
|
798
|
-
|
|
799
|
-
The watchdog detected services that failed mechanical repair (launchctl bootstrap/kickstart).
|
|
800
|
-
Your job: diagnose the root cause and fix it. Do NOT ask the user anything — resolve autonomously.
|
|
801
|
-
|
|
802
|
-
Each failure is tagged [core] or [personal]:
|
|
803
|
-
- [core] = part of NEXO public package — fix may need propagation to public repo
|
|
804
|
-
- [personal] = user-specific service — fix stays local only
|
|
805
|
-
|
|
806
|
-
FAILURES:
|
|
807
|
-
${FAIL_DETAILS}
|
|
808
|
-
|
|
809
|
-
STEPS:
|
|
810
|
-
1. Read the plist file to understand the service configuration
|
|
811
|
-
2. Check stderr/stdout logs for the actual error
|
|
812
|
-
3. Fix the root cause (missing file, bad config, dependency issue, etc.)
|
|
813
|
-
4. Reload the service and verify it is running
|
|
814
|
-
5. Log what you did to $NEXO_HOME/logs/watchdog-repair-result.log
|
|
815
|
-
${PROPAGATE_BLOCK}
|
|
816
|
-
|
|
817
|
-
CONSTRAINTS:
|
|
818
|
-
- Do NOT modify CLAUDE.md or any protected file
|
|
819
|
-
- Do NOT start interactive conversations
|
|
820
|
-
- Keep it under 5 minutes
|
|
821
|
-
- Log what you did to $NEXO_HOME/logs/watchdog-repair-result.log
|
|
822
|
-
NEXOPROMPT
|
|
823
|
-
|
|
824
|
-
# Launch NEXO in background with repair task
|
|
825
|
-
# Ensure claude CLI is in PATH (cron/LaunchAgent may have minimal PATH)
|
|
826
|
-
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "$HOME_DIR/.claude/local/bin/claude")
|
|
827
|
-
if [ ! -x "$CLAUDE_BIN" ]; then
|
|
828
|
-
CLAUDE_BIN=$(find /usr/local/bin /opt/homebrew/bin "$HOME_DIR/.local/bin" "$HOME_DIR/.npm-global/bin" -name claude -type f 2>/dev/null | head -1)
|
|
829
|
-
fi
|
|
830
|
-
|
|
831
|
-
if [ -n "$CLAUDE_BIN" ] && [ -x "$CLAUDE_BIN" ]; then
|
|
832
|
-
nohup bash -c "\"$CLAUDE_BIN\" --print --dangerously-skip-permissions -p \"\$(cat '$REPAIR_PROMPT_FILE')\" >> '$LOG_DIR/watchdog-nexo-repair.log' 2>&1; rm -f '$REPAIR_PROMPT_FILE'" &
|
|
833
|
-
else
|
|
834
|
-
log "NEXO repair ABORTED: claude CLI not found in PATH"
|
|
835
|
-
rm -f "$REPAIR_PROMPT_FILE"
|
|
836
|
-
fi
|
|
837
|
-
|
|
838
|
-
REPAIR_PID=$!
|
|
839
|
-
log "NEXO repair launched (PID: $REPAIR_PID)"
|
|
840
|
-
|
|
841
|
-
# Wait for repair to complete (max 5 min) then verify
|
|
842
|
-
(
|
|
843
|
-
wait_count=0
|
|
844
|
-
while kill -0 $REPAIR_PID 2>/dev/null && [ $wait_count -lt 60 ]; do
|
|
845
|
-
sleep 5
|
|
846
|
-
wait_count=$((wait_count + 1))
|
|
847
|
-
done
|
|
848
|
-
|
|
849
|
-
if [ $wait_count -ge 60 ]; then
|
|
850
|
-
log "NEXO repair timed out after 5 min"
|
|
851
|
-
kill $REPAIR_PID 2>/dev/null
|
|
852
|
-
else
|
|
853
|
-
log "NEXO repair completed. Verifying fixes..."
|
|
854
|
-
# Verify each failed monitor
|
|
855
|
-
VERIFY_PASS=0
|
|
856
|
-
VERIFY_FAIL=0
|
|
857
|
-
for failed in ${FAILED_MONITORS[@]+"${FAILED_MONITORS[@]}"}; do
|
|
858
|
-
IFS='|' read -r v_name v_plist v_stdout v_stderr v_proc v_sched v_type v_details <<< "$failed"
|
|
859
|
-
if try_verify_repair "$v_plist" "$v_stdout" "$v_proc"; then
|
|
860
|
-
VERIFY_PASS=$((VERIFY_PASS + 1))
|
|
861
|
-
log "VERIFY OK: $v_name"
|
|
862
|
-
else
|
|
863
|
-
VERIFY_FAIL=$((VERIFY_FAIL + 1))
|
|
864
|
-
log "VERIFY FAIL: $v_name — still broken after repair"
|
|
865
|
-
fi
|
|
866
|
-
done
|
|
867
|
-
log "Post-repair verification: $VERIFY_PASS passed, $VERIFY_FAIL failed"
|
|
868
|
-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Verification: $VERIFY_PASS OK, $VERIFY_FAIL FAIL" >> "$LOG_DIR/watchdog-nexo-repair.log"
|
|
869
|
-
fi
|
|
870
|
-
) &
|
|
871
|
-
fi
|
|
872
|
-
fi
|
|
873
|
-
fi
|
|
874
|
-
|
|
875
|
-
# ============================================================================
|
|
876
|
-
# LOG SUMMARY
|
|
877
|
-
# ============================================================================
|
|
878
|
-
log "Complete: PASS=$TOTAL_PASS HEALED=$TOTAL_HEALED WARN=$TOTAL_WARN FAIL=$TOTAL_FAIL"
|