nexo-brain 2.3.1 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/nexo-brain.js +92 -9
- package/bin/postinstall.js +22 -15
- package/package.json +2 -2
- package/src/auto_update.py +193 -5
- package/src/crons/sync.py +5 -0
- package/src/db/_schema.py +11 -1
- package/src/hooks/capture-tool-logs.sh +23 -6
- package/src/hooks/session-start.sh +4 -3
- package/src/plugins/update.py +376 -26
- package/src/scripts/nexo-catchup.py +29 -4
- package/src/scripts/nexo-daily-self-audit.py +21 -1
- package/src/scripts/nexo-evolution-run.py +21 -1
- package/src/scripts/nexo-postmortem-consolidator.py +34 -9
- package/src/scripts/nexo-sleep.py +32 -10
- package/src/scripts/nexo-synthesis.py +29 -9
- package/src/scripts/nexo-update.sh +109 -7
- package/src/scripts/nexo-watchdog.sh +103 -47
- package/src/server.py +65 -1
|
@@ -42,7 +42,27 @@ MEMORY_DIR = NEXO_HOME / "memory"
|
|
|
42
42
|
MEMORY_INDEX = MEMORY_DIR / "MEMORY.md"
|
|
43
43
|
HISTORY_FILE = NEXO_HOME / "coordination" / "postmortem-history.json"
|
|
44
44
|
CONSOLIDATION_LOG = NEXO_HOME / "logs" / "postmortem-consolidation.log"
|
|
45
|
-
|
|
45
|
+
def _resolve_claude_cli() -> Path:
|
|
46
|
+
"""Find claude CLI: saved path > PATH > common locations."""
|
|
47
|
+
import shutil as _shutil
|
|
48
|
+
saved = NEXO_HOME / "config" / "claude-cli-path"
|
|
49
|
+
if saved.exists():
|
|
50
|
+
p = Path(saved.read_text().strip())
|
|
51
|
+
if p.exists():
|
|
52
|
+
return p
|
|
53
|
+
found = _shutil.which("claude")
|
|
54
|
+
if found:
|
|
55
|
+
return Path(found)
|
|
56
|
+
for candidate in [
|
|
57
|
+
HOME / ".local" / "bin" / "claude",
|
|
58
|
+
HOME / ".npm-global" / "bin" / "claude",
|
|
59
|
+
Path("/usr/local/bin/claude"),
|
|
60
|
+
]:
|
|
61
|
+
if candidate.exists():
|
|
62
|
+
return candidate
|
|
63
|
+
return HOME / ".local" / "bin" / "claude"
|
|
64
|
+
|
|
65
|
+
CLAUDE_CLI = _resolve_claude_cli()
|
|
46
66
|
SESSION_BUFFER = NEXO_HOME / "brain" / "session_buffer.jsonl"
|
|
47
67
|
|
|
48
68
|
TODAY = date.today()
|
|
@@ -379,6 +399,7 @@ def main():
|
|
|
379
399
|
return
|
|
380
400
|
|
|
381
401
|
log("=== NEXO Post-Mortem Consolidator v2 starting ===")
|
|
402
|
+
had_errors = False
|
|
382
403
|
|
|
383
404
|
# Stage 1: Collect data
|
|
384
405
|
data = collect_data()
|
|
@@ -392,27 +413,31 @@ def main():
|
|
|
392
413
|
if not success:
|
|
393
414
|
log("Stage 2 failed (CLI unavailable or error). "
|
|
394
415
|
"Skipping intelligent consolidation. Stage 3 (sensory + force) will still run.")
|
|
416
|
+
had_errors = True
|
|
395
417
|
|
|
396
418
|
# Stage 3: Sensory Register (mechanical, kept from v1)
|
|
397
419
|
try:
|
|
398
420
|
process_sensory_register()
|
|
399
421
|
except Exception as e:
|
|
400
422
|
log(f"Sensory register failed: {e}")
|
|
423
|
+
had_errors = True
|
|
401
424
|
|
|
402
425
|
# Stage 3b: Force analysis (mechanical, kept from v1)
|
|
403
426
|
try:
|
|
404
427
|
analyze_force_events()
|
|
405
428
|
except Exception as e:
|
|
406
429
|
log(f"Force analysis failed: {e}")
|
|
430
|
+
had_errors = True
|
|
407
431
|
|
|
408
|
-
# Register successful run
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
432
|
+
# Register successful run only if no stages failed
|
|
433
|
+
if not had_errors:
|
|
434
|
+
try:
|
|
435
|
+
state_file = NEXO_HOME / "operations" / ".catchup-state.json"
|
|
436
|
+
state = json.loads(state_file.read_text()) if state_file.exists() else {}
|
|
437
|
+
state["postmortem"] = datetime.now().isoformat()
|
|
438
|
+
state_file.write_text(json.dumps(state, indent=2))
|
|
439
|
+
except Exception:
|
|
440
|
+
pass
|
|
416
441
|
|
|
417
442
|
mark_done()
|
|
418
443
|
log("=== Consolidation v2 complete ===")
|
|
@@ -49,7 +49,26 @@ SLEEP_LOG = COORD_DIR / "sleep-log.json"
|
|
|
49
49
|
MEMORY_MD = NEXO_HOME / "memory" / "MEMORY.md"
|
|
50
50
|
NEXO_DB = NEXO_HOME / "data" / "nexo.db"
|
|
51
51
|
CLAUDE_MEM_DB = Path.home() / ".claude-mem" / "claude-mem.db"
|
|
52
|
-
|
|
52
|
+
def _resolve_claude_cli() -> Path:
|
|
53
|
+
"""Find claude CLI: saved path > PATH > common locations."""
|
|
54
|
+
saved = NEXO_HOME / "config" / "claude-cli-path"
|
|
55
|
+
if saved.exists():
|
|
56
|
+
p = Path(saved.read_text().strip())
|
|
57
|
+
if p.exists():
|
|
58
|
+
return p
|
|
59
|
+
found = shutil.which("claude")
|
|
60
|
+
if found:
|
|
61
|
+
return Path(found)
|
|
62
|
+
for candidate in [
|
|
63
|
+
Path.home() / ".local" / "bin" / "claude",
|
|
64
|
+
Path.home() / ".npm-global" / "bin" / "claude",
|
|
65
|
+
Path("/usr/local/bin/claude"),
|
|
66
|
+
]:
|
|
67
|
+
if candidate.exists():
|
|
68
|
+
return candidate
|
|
69
|
+
return Path.home() / ".local" / "bin" / "claude"
|
|
70
|
+
|
|
71
|
+
CLAUDE_CLI = _resolve_claude_cli()
|
|
53
72
|
|
|
54
73
|
LAST_RUN_FILE = COORD_DIR / "sleep-last-run"
|
|
55
74
|
LOCK_FILE = COORD_DIR / "sleep.lock"
|
|
@@ -534,6 +553,7 @@ def main():
|
|
|
534
553
|
|
|
535
554
|
run_log = {"date": str(TODAY), "started": TIMESTAMP,
|
|
536
555
|
"stage_a": None, "stage_b": None, "completed": None}
|
|
556
|
+
sleep_had_errors = False
|
|
537
557
|
|
|
538
558
|
# Stage A: Housekeeping (mechanical)
|
|
539
559
|
if start_phase == "stage_a":
|
|
@@ -555,7 +575,8 @@ def main():
|
|
|
555
575
|
|
|
556
576
|
if "error" in dream_result:
|
|
557
577
|
log(f"Stage B: Dreaming failed ({dream_result['error']}). "
|
|
558
|
-
"Stage A cleanup completed successfully.
|
|
578
|
+
"Stage A cleanup completed successfully. Not marking catchup to allow retry.")
|
|
579
|
+
sleep_had_errors = True
|
|
559
580
|
else:
|
|
560
581
|
# Stage B2: Execute actions from CLI output
|
|
561
582
|
actions_file = COORD_DIR / "sleep-actions.json"
|
|
@@ -575,14 +596,15 @@ def main():
|
|
|
575
596
|
append_sleep_log(run_log)
|
|
576
597
|
log(f"NEXO Sleep v2 complete at {run_log['completed']}")
|
|
577
598
|
|
|
578
|
-
# Register for catch-up
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
599
|
+
# Register for catch-up only if all stages succeeded
|
|
600
|
+
if not sleep_had_errors:
|
|
601
|
+
try:
|
|
602
|
+
state_file = NEXO_HOME / "operations" / ".catchup-state.json"
|
|
603
|
+
st = json.loads(state_file.read_text()) if state_file.exists() else {}
|
|
604
|
+
st["sleep"] = datetime.now().isoformat()
|
|
605
|
+
state_file.write_text(json.dumps(st, indent=2))
|
|
606
|
+
except Exception:
|
|
607
|
+
pass
|
|
586
608
|
|
|
587
609
|
finally:
|
|
588
610
|
try:
|
|
@@ -26,7 +26,27 @@ NEXO_DB = NEXO_HOME / "data" / "nexo.db"
|
|
|
26
26
|
OUTPUT_FILE = COORD_DIR / "daily-synthesis.md"
|
|
27
27
|
LAST_RUN_FILE = COORD_DIR / "synthesis-last-run"
|
|
28
28
|
LOCK_FILE = COORD_DIR / "synthesis.lock"
|
|
29
|
-
|
|
29
|
+
def _resolve_claude_cli() -> Path:
|
|
30
|
+
"""Find claude CLI: saved path > PATH > common locations."""
|
|
31
|
+
import shutil as _shutil
|
|
32
|
+
saved = NEXO_HOME / "config" / "claude-cli-path"
|
|
33
|
+
if saved.exists():
|
|
34
|
+
p = Path(saved.read_text().strip())
|
|
35
|
+
if p.exists():
|
|
36
|
+
return p
|
|
37
|
+
found = _shutil.which("claude")
|
|
38
|
+
if found:
|
|
39
|
+
return Path(found)
|
|
40
|
+
for candidate in [
|
|
41
|
+
HOME / ".local" / "bin" / "claude",
|
|
42
|
+
HOME / ".npm-global" / "bin" / "claude",
|
|
43
|
+
Path("/usr/local/bin/claude"),
|
|
44
|
+
]:
|
|
45
|
+
if candidate.exists():
|
|
46
|
+
return candidate
|
|
47
|
+
return HOME / ".local" / "bin" / "claude"
|
|
48
|
+
|
|
49
|
+
CLAUDE_CLI = _resolve_claude_cli()
|
|
30
50
|
|
|
31
51
|
TODAY = date.today()
|
|
32
52
|
TODAY_STR = TODAY.isoformat()
|
|
@@ -109,17 +129,17 @@ def collect_data() -> dict:
|
|
|
109
129
|
(TODAY_STR,)
|
|
110
130
|
)
|
|
111
131
|
|
|
112
|
-
# Overdue reminders
|
|
132
|
+
# Overdue reminders (schema: description, date, status uppercase)
|
|
113
133
|
data["overdue_reminders"] = safe_query(
|
|
114
|
-
"SELECT id,
|
|
115
|
-
"WHERE status='PENDING' AND
|
|
134
|
+
"SELECT id, description, date FROM reminders "
|
|
135
|
+
"WHERE status='PENDING' AND date <= ? ORDER BY date",
|
|
116
136
|
(TODAY_STR,)
|
|
117
137
|
)
|
|
118
138
|
|
|
119
|
-
# Pending followups
|
|
139
|
+
# Pending followups (schema: description, date, status uppercase)
|
|
120
140
|
data["pending_followups"] = safe_query(
|
|
121
|
-
"SELECT id,
|
|
122
|
-
"WHERE status='
|
|
141
|
+
"SELECT id, description, date FROM followups "
|
|
142
|
+
"WHERE status='PENDING' ORDER BY date"
|
|
123
143
|
)
|
|
124
144
|
|
|
125
145
|
# Guard stats
|
|
@@ -240,13 +260,13 @@ def fallback_synthesis(data: dict):
|
|
|
240
260
|
if data.get("overdue_reminders"):
|
|
241
261
|
lines.append("## Overdue Reminders")
|
|
242
262
|
for r in data["overdue_reminders"][:10]:
|
|
243
|
-
lines.append(f"- #{r.get('id', '?')} {r.get('
|
|
263
|
+
lines.append(f"- #{r.get('id', '?')} {r.get('description', '')} (due {r.get('date', '?')})")
|
|
244
264
|
lines.append("")
|
|
245
265
|
|
|
246
266
|
if data.get("pending_followups"):
|
|
247
267
|
lines.append("## Pending Followups")
|
|
248
268
|
for f in data["pending_followups"][:10]:
|
|
249
|
-
lines.append(f"- #{f.get('id', '?')} {f.get('
|
|
269
|
+
lines.append(f"- #{f.get('id', '?')} {f.get('description', '')} (due {f.get('date', '?')})")
|
|
250
270
|
lines.append("")
|
|
251
271
|
|
|
252
272
|
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -34,13 +34,20 @@ read_version() {
|
|
|
34
34
|
python3 -c "import json; print(json.load(open('$PACKAGE_JSON')).get('version','unknown'))" 2>/dev/null || echo "unknown"
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
# ---
|
|
38
|
-
|
|
37
|
+
# --- Check if this is a git repo ---
|
|
38
|
+
if [ ! -d "$REPO_DIR/.git" ] && [ ! -f "$REPO_DIR/.git" ]; then
|
|
39
|
+
err "ABORTED: Not a git repository at $REPO_DIR"
|
|
40
|
+
err "For packaged installs, use: npm update -g nexo-brain"
|
|
41
|
+
exit 1
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
# --- Step 1: Check for uncommitted changes in entire worktree ---
|
|
45
|
+
log "Checking for uncommitted changes..."
|
|
39
46
|
cd "$REPO_DIR"
|
|
40
47
|
|
|
41
|
-
if [ -n "$(git status --porcelain
|
|
42
|
-
err "ABORTED: Uncommitted changes in
|
|
43
|
-
git status --short
|
|
48
|
+
if [ -n "$(git status --porcelain 2>/dev/null)" ]; then
|
|
49
|
+
err "ABORTED: Uncommitted changes in worktree"
|
|
50
|
+
git status --short
|
|
44
51
|
exit 1
|
|
45
52
|
fi
|
|
46
53
|
log "Working tree clean."
|
|
@@ -48,6 +55,11 @@ log "Working tree clean."
|
|
|
48
55
|
# Record current state
|
|
49
56
|
OLD_VERSION="$(read_version)"
|
|
50
57
|
OLD_COMMIT="$(git rev-parse HEAD)"
|
|
58
|
+
REQ_FILE="$SRC_DIR/requirements.txt"
|
|
59
|
+
OLD_REQ_HASH=""
|
|
60
|
+
if [ -f "$REQ_FILE" ]; then
|
|
61
|
+
OLD_REQ_HASH="$(shasum -a 256 "$REQ_FILE" | cut -d' ' -f1)"
|
|
62
|
+
fi
|
|
51
63
|
log "Current: v${OLD_VERSION} (${OLD_COMMIT:0:8})"
|
|
52
64
|
|
|
53
65
|
# --- Step 2: Backup databases ---
|
|
@@ -94,6 +106,54 @@ fi
|
|
|
94
106
|
NEW_VERSION="$(read_version)"
|
|
95
107
|
log "New version: v${NEW_VERSION}"
|
|
96
108
|
|
|
109
|
+
# --- Step 4b: Reinstall Python dependencies if requirements.txt changed ---
|
|
110
|
+
NEW_REQ_HASH=""
|
|
111
|
+
if [ -f "$REQ_FILE" ]; then
|
|
112
|
+
NEW_REQ_HASH="$(shasum -a 256 "$REQ_FILE" | cut -d' ' -f1)"
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
DEPS_CHANGED=false
|
|
116
|
+
if [ "$OLD_REQ_HASH" != "$NEW_REQ_HASH" ]; then
|
|
117
|
+
DEPS_CHANGED=true
|
|
118
|
+
fi
|
|
119
|
+
|
|
120
|
+
reinstall_pip_deps() {
|
|
121
|
+
local VENV_PIP="$NEXO_HOME/.venv/bin/pip"
|
|
122
|
+
if [ -f "$REQ_FILE" ]; then
|
|
123
|
+
if [ -x "$VENV_PIP" ]; then
|
|
124
|
+
"$VENV_PIP" install --quiet -r "$REQ_FILE" || return 1
|
|
125
|
+
else
|
|
126
|
+
python3 -m pip install --quiet -r "$REQ_FILE" --break-system-packages 2>/dev/null || return 1
|
|
127
|
+
fi
|
|
128
|
+
fi
|
|
129
|
+
return 0
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if [ "$DEPS_CHANGED" = true ] || [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
|
|
133
|
+
log "Reinstalling Python dependencies..."
|
|
134
|
+
if ! reinstall_pip_deps; then
|
|
135
|
+
err "pip install failed! Rolling back..."
|
|
136
|
+
git reset --hard "$OLD_COMMIT"
|
|
137
|
+
reinstall_pip_deps || warn "pip rollback also had issues"
|
|
138
|
+
if [ -d "$BACKUP_DIR" ]; then
|
|
139
|
+
for db in "$BACKUP_DIR"/*.db; do
|
|
140
|
+
[ -f "$db" ] || continue
|
|
141
|
+
BASENAME="$(basename "$db")"
|
|
142
|
+
for candidate in "$NEXO_HOME/data/$BASENAME" "$NEXO_HOME/$BASENAME" "$SRC_DIR/$BASENAME"; do
|
|
143
|
+
if [ -f "$candidate" ]; then
|
|
144
|
+
cp "$db" "$candidate"
|
|
145
|
+
warn " Restored: $BASENAME"
|
|
146
|
+
break
|
|
147
|
+
fi
|
|
148
|
+
done
|
|
149
|
+
done
|
|
150
|
+
fi
|
|
151
|
+
err "Rolled back to ${OLD_COMMIT:0:8}. Databases restored."
|
|
152
|
+
exit 1
|
|
153
|
+
fi
|
|
154
|
+
log "Python dependencies updated."
|
|
155
|
+
fi
|
|
156
|
+
|
|
97
157
|
# --- Step 5: Run migrations if version changed ---
|
|
98
158
|
if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
|
|
99
159
|
log "Version changed: ${OLD_VERSION} -> ${NEW_VERSION}"
|
|
@@ -101,6 +161,8 @@ if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
|
|
|
101
161
|
if ! (cd "$SRC_DIR" && python3 -c "import db; db.init_db()" 2>&1); then
|
|
102
162
|
err "Migration failed! Rolling back..."
|
|
103
163
|
git reset --hard "$OLD_COMMIT"
|
|
164
|
+
# Reinstall pip deps from restored old requirements.txt
|
|
165
|
+
reinstall_pip_deps || warn "pip rollback also had issues"
|
|
104
166
|
# Restore DB backups
|
|
105
167
|
if [ -d "$BACKUP_DIR" ]; then
|
|
106
168
|
for db in "$BACKUP_DIR"/*.db; do
|
|
@@ -115,7 +177,7 @@ if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
|
|
|
115
177
|
done
|
|
116
178
|
done
|
|
117
179
|
fi
|
|
118
|
-
err "Rolled back to ${OLD_COMMIT:0:8}. Databases restored."
|
|
180
|
+
err "Rolled back to ${OLD_COMMIT:0:8}. Databases and deps restored."
|
|
119
181
|
exit 1
|
|
120
182
|
fi
|
|
121
183
|
log "Migrations applied."
|
|
@@ -128,6 +190,8 @@ log "Verifying server.py import..."
|
|
|
128
190
|
if ! (cd "$SRC_DIR" && python3 -c "import server" 2>&1); then
|
|
129
191
|
err "Import verification failed! Rolling back..."
|
|
130
192
|
git reset --hard "$OLD_COMMIT"
|
|
193
|
+
# Reinstall pip deps from restored old requirements.txt
|
|
194
|
+
reinstall_pip_deps || warn "pip rollback also had issues"
|
|
131
195
|
if [ -d "$BACKUP_DIR" ]; then
|
|
132
196
|
for db in "$BACKUP_DIR"/*.db; do
|
|
133
197
|
[ -f "$db" ] || continue
|
|
@@ -141,10 +205,48 @@ if ! (cd "$SRC_DIR" && python3 -c "import server" 2>&1); then
|
|
|
141
205
|
done
|
|
142
206
|
done
|
|
143
207
|
fi
|
|
144
|
-
err "Rolled back to ${OLD_COMMIT:0:8}. Databases restored."
|
|
208
|
+
err "Rolled back to ${OLD_COMMIT:0:8}. Databases and deps restored."
|
|
145
209
|
exit 1
|
|
146
210
|
fi
|
|
147
211
|
|
|
212
|
+
# --- Step 7: Sync hooks to NEXO_HOME ---
|
|
213
|
+
HOOKS_SRC="$SRC_DIR/hooks"
|
|
214
|
+
HOOKS_DEST="$NEXO_HOME/hooks"
|
|
215
|
+
if [ -d "$HOOKS_SRC" ]; then
|
|
216
|
+
mkdir -p "$HOOKS_DEST"
|
|
217
|
+
SYNCED=0
|
|
218
|
+
for hook in "$HOOKS_SRC"/*.sh; do
|
|
219
|
+
[ -f "$hook" ] || continue
|
|
220
|
+
cp "$hook" "$HOOKS_DEST/$(basename "$hook")"
|
|
221
|
+
chmod 755 "$HOOKS_DEST/$(basename "$hook")"
|
|
222
|
+
SYNCED=$((SYNCED + 1))
|
|
223
|
+
done
|
|
224
|
+
if [ "$SYNCED" -gt 0 ]; then
|
|
225
|
+
log "Synced $SYNCED hook(s) to $HOOKS_DEST"
|
|
226
|
+
fi
|
|
227
|
+
fi
|
|
228
|
+
|
|
229
|
+
# --- Step 8: Sync cron definitions with manifest ---
|
|
230
|
+
CRON_SYNC="$SRC_DIR/crons/sync.py"
|
|
231
|
+
CRON_SYNC_OK=false
|
|
232
|
+
if [ -f "$CRON_SYNC" ]; then
|
|
233
|
+
log "Syncing cron definitions..."
|
|
234
|
+
if NEXO_HOME="$NEXO_HOME" NEXO_CODE="$SRC_DIR" python3 "$CRON_SYNC" 2>&1; then
|
|
235
|
+
log "Cron definitions synced."
|
|
236
|
+
CRON_SYNC_OK=true
|
|
237
|
+
else
|
|
238
|
+
warn "Cron sync failed (non-fatal). Installed manifest NOT refreshed to avoid divergence."
|
|
239
|
+
fi
|
|
240
|
+
fi
|
|
241
|
+
|
|
242
|
+
# --- Step 8b: Refresh installed manifest for catchup/watchdog (only if sync succeeded) ---
|
|
243
|
+
if $CRON_SYNC_OK && [ -d "$SRC_DIR/crons" ]; then
|
|
244
|
+
mkdir -p "$NEXO_HOME/crons"
|
|
245
|
+
cp -f "$SRC_DIR/crons/"*.json "$NEXO_HOME/crons/" 2>/dev/null
|
|
246
|
+
cp -f "$SRC_DIR/crons/"*.py "$NEXO_HOME/crons/" 2>/dev/null
|
|
247
|
+
log "Refreshed installed crons manifest."
|
|
248
|
+
fi
|
|
249
|
+
|
|
148
250
|
# --- Done ---
|
|
149
251
|
echo ""
|
|
150
252
|
log "========================================="
|
|
@@ -45,7 +45,12 @@ log() { echo "[$TS] $1" >> "$LOG"; }
|
|
|
45
45
|
# The NEXO_CODE env var must point to the repo src/ directory.
|
|
46
46
|
# Add personal (non-manifest) monitors to PERSONAL_MONITORS below.
|
|
47
47
|
NEXO_CODE="${NEXO_CODE:-$(cd "$(dirname "$0")/.." 2>/dev/null && pwd)}"
|
|
48
|
-
|
|
48
|
+
# Look for manifest in NEXO_HOME first (packaged install), then NEXO_CODE (dev/repo)
|
|
49
|
+
if [ -f "$NEXO_HOME/crons/manifest.json" ]; then
|
|
50
|
+
MANIFEST_FILE="$NEXO_HOME/crons/manifest.json"
|
|
51
|
+
else
|
|
52
|
+
MANIFEST_FILE="$NEXO_CODE/crons/manifest.json"
|
|
53
|
+
fi
|
|
49
54
|
|
|
50
55
|
_build_monitors_from_manifest() {
|
|
51
56
|
if [ ! -f "$MANIFEST_FILE" ]; then
|
|
@@ -53,18 +58,22 @@ _build_monitors_from_manifest() {
|
|
|
53
58
|
return
|
|
54
59
|
fi
|
|
55
60
|
python3 -c "
|
|
56
|
-
import json, sys
|
|
61
|
+
import json, sys, platform
|
|
57
62
|
|
|
58
63
|
nexo_home = '$NEXO_HOME'
|
|
64
|
+
is_mac = platform.system() == 'Darwin'
|
|
59
65
|
|
|
60
66
|
with open('$MANIFEST_FILE') as f:
|
|
61
67
|
data = json.load(f)
|
|
62
68
|
|
|
63
69
|
for c in data.get('crons', []):
|
|
64
70
|
cid = c['id']
|
|
65
|
-
# Derive human-readable name from id
|
|
66
71
|
name = cid.replace('-', ' ').title()
|
|
67
|
-
|
|
72
|
+
# Use the right service identifier per platform
|
|
73
|
+
if is_mac:
|
|
74
|
+
svc_id = 'com.nexo.' + cid
|
|
75
|
+
else:
|
|
76
|
+
svc_id = 'nexo-' + cid + '.timer'
|
|
68
77
|
stdout_log = nexo_home + '/logs/' + cid + '-stdout.log'
|
|
69
78
|
stderr_log = nexo_home + '/logs/' + cid + '-stderr.log'
|
|
70
79
|
|
|
@@ -98,7 +107,7 @@ for c in data.get('crons', []):
|
|
|
98
107
|
mon_type = 'core' if c.get('core') else 'personal'
|
|
99
108
|
proc_grep = '' # manifest crons are one-shot, no persistent process
|
|
100
109
|
|
|
101
|
-
print(f'{name}|{
|
|
110
|
+
print(f'{name}|{svc_id}|{stdout_log}|{stderr_log}|{max_stale}|{proc_grep}|{schedule_desc}|{mon_type}')
|
|
102
111
|
" 2>/dev/null
|
|
103
112
|
}
|
|
104
113
|
|
|
@@ -140,7 +149,12 @@ IS_MACOS=false
|
|
|
140
149
|
log_repair() { echo "[$TS] REPAIR: $1" >> "$REPAIR_LOG"; log "REPAIR: $1"; }
|
|
141
150
|
|
|
142
151
|
is_loaded() {
|
|
143
|
-
$IS_MACOS
|
|
152
|
+
if $IS_MACOS; then
|
|
153
|
+
launchctl list "$1" &>/dev/null
|
|
154
|
+
else
|
|
155
|
+
# On Linux, check if the systemd timer is enabled
|
|
156
|
+
systemctl --user is-enabled "$1" &>/dev/null
|
|
157
|
+
fi
|
|
144
158
|
}
|
|
145
159
|
|
|
146
160
|
# ============================================================================
|
|
@@ -179,6 +193,36 @@ try_repair_launchagent() {
|
|
|
179
193
|
return 1
|
|
180
194
|
}
|
|
181
195
|
|
|
196
|
+
try_repair_systemd() {
|
|
197
|
+
$IS_MACOS && return 1
|
|
198
|
+
local timer_unit="$1"
|
|
199
|
+
local service_unit="${timer_unit%.timer}.service"
|
|
200
|
+
|
|
201
|
+
# Repair 1: Timer not enabled — try to enable and start
|
|
202
|
+
if ! systemctl --user is-enabled "$timer_unit" &>/dev/null; then
|
|
203
|
+
systemctl --user daemon-reload 2>/dev/null
|
|
204
|
+
systemctl --user enable --now "$timer_unit" 2>/dev/null
|
|
205
|
+
sleep 1
|
|
206
|
+
if systemctl --user is-enabled "$timer_unit" &>/dev/null; then
|
|
207
|
+
log_repair "$timer_unit: enabled and started"
|
|
208
|
+
return 0
|
|
209
|
+
fi
|
|
210
|
+
return 1
|
|
211
|
+
fi
|
|
212
|
+
|
|
213
|
+
# Repair 2: Timer enabled but not active — start it
|
|
214
|
+
if ! systemctl --user is-active "$timer_unit" &>/dev/null; then
|
|
215
|
+
systemctl --user start "$timer_unit" 2>/dev/null
|
|
216
|
+
sleep 1
|
|
217
|
+
if systemctl --user is-active "$timer_unit" &>/dev/null; then
|
|
218
|
+
log_repair "$timer_unit: restarted"
|
|
219
|
+
return 0
|
|
220
|
+
fi
|
|
221
|
+
fi
|
|
222
|
+
|
|
223
|
+
return 1
|
|
224
|
+
}
|
|
225
|
+
|
|
182
226
|
try_repair_cron() {
|
|
183
227
|
local script="$1"
|
|
184
228
|
|
|
@@ -195,29 +239,26 @@ try_repair_cron() {
|
|
|
195
239
|
}
|
|
196
240
|
|
|
197
241
|
try_reexecute_missed_cron() {
|
|
198
|
-
$
|
|
199
|
-
# Re-execute a cron that missed its scheduled run
|
|
200
|
-
# Extracts ProgramArguments from the plist and runs them
|
|
201
|
-
local plist_id="$1"
|
|
202
|
-
local plist_file="$HOME_DIR/Library/LaunchAgents/${plist_id}.plist"
|
|
242
|
+
local svc_id="$1"
|
|
203
243
|
|
|
204
|
-
if
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
fi
|
|
244
|
+
if $IS_MACOS; then
|
|
245
|
+
# macOS: extract command from plist and run it
|
|
246
|
+
local plist_file="$HOME_DIR/Library/LaunchAgents/${svc_id}.plist"
|
|
208
247
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
248
|
+
if [ ! -f "$plist_file" ]; then
|
|
249
|
+
log "Re-execute skipped: no plist for $svc_id"
|
|
250
|
+
return 1
|
|
251
|
+
fi
|
|
252
|
+
|
|
253
|
+
local cmd
|
|
254
|
+
cmd=$(python3 -c "
|
|
212
255
|
import plistlib, sys
|
|
213
256
|
try:
|
|
214
257
|
with open('$plist_file', 'rb') as f:
|
|
215
258
|
d = plistlib.load(f)
|
|
216
259
|
args = d.get('ProgramArguments', [])
|
|
217
|
-
# Skip KeepAlive services (they should be running, not re-executed)
|
|
218
260
|
if d.get('KeepAlive'):
|
|
219
261
|
sys.exit(1)
|
|
220
|
-
# Skip services without a schedule (RunAtLoad only)
|
|
221
262
|
if not d.get('StartCalendarInterval') and not d.get('StartInterval'):
|
|
222
263
|
sys.exit(1)
|
|
223
264
|
print(' '.join(args))
|
|
@@ -225,28 +266,36 @@ except:
|
|
|
225
266
|
sys.exit(1)
|
|
226
267
|
" 2>/dev/null)
|
|
227
268
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
log "Re-executing missed cron: $plist_id → $cmd"
|
|
233
|
-
# Run in background with timeout (5 min max)
|
|
234
|
-
timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
|
|
235
|
-
local pid=$!
|
|
269
|
+
if [ -z "$cmd" ] || [ $? -ne 0 ]; then
|
|
270
|
+
return 1
|
|
271
|
+
fi
|
|
236
272
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
273
|
+
log "Re-executing missed cron: $svc_id → $cmd"
|
|
274
|
+
timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
|
|
275
|
+
local pid=$!
|
|
276
|
+
sleep 2
|
|
277
|
+
if kill -0 "$pid" 2>/dev/null || wait "$pid" 2>/dev/null; then
|
|
278
|
+
log_repair "$svc_id: re-executed missed cron (PID $pid)"
|
|
279
|
+
return 0
|
|
280
|
+
else
|
|
281
|
+
log "Re-execute failed for $svc_id"
|
|
282
|
+
return 1
|
|
283
|
+
fi
|
|
242
284
|
else
|
|
243
|
-
|
|
244
|
-
|
|
285
|
+
# Linux: start the corresponding service unit directly
|
|
286
|
+
local service_unit="${svc_id%.timer}.service"
|
|
287
|
+
log "Re-executing missed cron: $svc_id → systemctl start $service_unit"
|
|
288
|
+
if systemctl --user start "$service_unit" 2>/dev/null; then
|
|
289
|
+
log_repair "$svc_id: re-executed via systemctl start $service_unit"
|
|
290
|
+
return 0
|
|
291
|
+
else
|
|
292
|
+
log "Re-execute failed for $svc_id"
|
|
293
|
+
return 1
|
|
294
|
+
fi
|
|
245
295
|
fi
|
|
246
296
|
}
|
|
247
297
|
|
|
248
298
|
try_verify_repair() {
|
|
249
|
-
$IS_MACOS || return 1
|
|
250
299
|
# After Level 2 repair, wait and verify the service is healthy
|
|
251
300
|
local plist_id="$1"
|
|
252
301
|
local log_stdout="$2"
|
|
@@ -388,20 +437,26 @@ for monitor in "${MONITORS[@]}"; do
|
|
|
388
437
|
error_count=0
|
|
389
438
|
proc_alive="n/a"
|
|
390
439
|
|
|
391
|
-
# Check 1:
|
|
440
|
+
# Check 1: Service loaded? (launchd on macOS, systemd on Linux)
|
|
392
441
|
if is_loaded "$plist_id"; then
|
|
393
442
|
loaded="yes"
|
|
394
443
|
else
|
|
395
444
|
loaded="no"
|
|
396
|
-
# AUTO-REPAIR: try
|
|
397
|
-
|
|
445
|
+
# AUTO-REPAIR: try platform-appropriate repair
|
|
446
|
+
repair_ok=false
|
|
447
|
+
if $IS_MACOS; then
|
|
448
|
+
try_repair_launchagent "$plist_id" "$proc_grep" && repair_ok=true
|
|
449
|
+
else
|
|
450
|
+
try_repair_systemd "$plist_id" && repair_ok=true
|
|
451
|
+
fi
|
|
452
|
+
if $repair_ok; then
|
|
398
453
|
loaded="yes"
|
|
399
454
|
status="HEALED"
|
|
400
|
-
details="${details}Self-healed:
|
|
455
|
+
details="${details}Self-healed: service re-registered. "
|
|
401
456
|
TOTAL_HEALED=$((TOTAL_HEALED + 1))
|
|
402
457
|
else
|
|
403
458
|
status="FAIL"
|
|
404
|
-
details="${details}
|
|
459
|
+
details="${details}Service not loaded (repair failed). "
|
|
405
460
|
fi
|
|
406
461
|
fi
|
|
407
462
|
|
|
@@ -411,9 +466,10 @@ for monitor in "${MONITORS[@]}"; do
|
|
|
411
466
|
proc_alive="yes"
|
|
412
467
|
else
|
|
413
468
|
proc_alive="no"
|
|
414
|
-
# AUTO-REPAIR: try to kickstart
|
|
469
|
+
# AUTO-REPAIR: try to kickstart (platform-appropriate)
|
|
415
470
|
if [ "$status" != "FAIL" ] && [ "$status" != "HEALED" ]; then
|
|
416
|
-
if try_repair_launchagent "$plist_id" "$proc_grep"
|
|
471
|
+
if ($IS_MACOS && try_repair_launchagent "$plist_id" "$proc_grep") || \
|
|
472
|
+
(! $IS_MACOS && try_repair_systemd "$plist_id"); then
|
|
417
473
|
proc_alive="yes"
|
|
418
474
|
status="HEALED"
|
|
419
475
|
details="${details}Self-healed: kickstarted. "
|
|
@@ -820,7 +876,7 @@ if [ "$TOTAL_FAIL" -gt 0 ]; then
|
|
|
820
876
|
Schedule: ${m_sched}
|
|
821
877
|
Type: ${m_type}
|
|
822
878
|
Failure reason: ${m_details}
|
|
823
|
-
|
|
879
|
+
Service config: $($IS_MACOS && echo "~/Library/LaunchAgents/${m_plist}.plist" || echo "~/.config/systemd/user/${m_plist}")
|
|
824
880
|
Process grep: ${m_proc}
|
|
825
881
|
Stderr (last 20 lines):
|
|
826
882
|
${STDERR_TAIL}
|
|
@@ -857,7 +913,7 @@ Do NOT propagate fixes for [personal] services — those stay local only."
|
|
|
857
913
|
cat > "$REPAIR_PROMPT_FILE" <<NEXOPROMPT
|
|
858
914
|
WATCHDOG LEVEL 2 REPAIR — Automated diagnostic session.
|
|
859
915
|
|
|
860
|
-
The watchdog detected services that failed mechanical repair (launchctl
|
|
916
|
+
The watchdog detected services that failed mechanical repair (launchctl/systemctl re-registration).
|
|
861
917
|
Your job: diagnose the root cause and fix it. Do NOT ask the user anything — resolve autonomously.
|
|
862
918
|
|
|
863
919
|
Each failure is tagged [core] or [personal]:
|
|
@@ -868,10 +924,10 @@ FAILURES:
|
|
|
868
924
|
${FAIL_DETAILS}
|
|
869
925
|
|
|
870
926
|
STEPS:
|
|
871
|
-
1. Read the plist
|
|
927
|
+
1. Read the service config (plist on macOS, systemd unit on Linux) to understand the service
|
|
872
928
|
2. Check stderr/stdout logs for the actual error
|
|
873
929
|
3. Fix the root cause (missing file, bad config, dependency issue, etc.)
|
|
874
|
-
4. Reload the service and verify it is running
|
|
930
|
+
4. Reload the service and verify it is running (launchctl on macOS, systemctl on Linux)
|
|
875
931
|
5. Log what you did to $NEXO_HOME/logs/watchdog-repair-result.log
|
|
876
932
|
${PROPAGATE_BLOCK}
|
|
877
933
|
|