nexo-brain 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,27 @@ MEMORY_DIR = NEXO_HOME / "memory"
42
42
  MEMORY_INDEX = MEMORY_DIR / "MEMORY.md"
43
43
  HISTORY_FILE = NEXO_HOME / "coordination" / "postmortem-history.json"
44
44
  CONSOLIDATION_LOG = NEXO_HOME / "logs" / "postmortem-consolidation.log"
45
- CLAUDE_CLI = HOME / ".local" / "bin" / "claude"
45
+ def _resolve_claude_cli() -> Path:
46
+ """Find claude CLI: saved path > PATH > common locations."""
47
+ import shutil as _shutil
48
+ saved = NEXO_HOME / "config" / "claude-cli-path"
49
+ if saved.exists():
50
+ p = Path(saved.read_text().strip())
51
+ if p.exists():
52
+ return p
53
+ found = _shutil.which("claude")
54
+ if found:
55
+ return Path(found)
56
+ for candidate in [
57
+ HOME / ".local" / "bin" / "claude",
58
+ HOME / ".npm-global" / "bin" / "claude",
59
+ Path("/usr/local/bin/claude"),
60
+ ]:
61
+ if candidate.exists():
62
+ return candidate
63
+ return HOME / ".local" / "bin" / "claude"
64
+
65
+ CLAUDE_CLI = _resolve_claude_cli()
46
66
  SESSION_BUFFER = NEXO_HOME / "brain" / "session_buffer.jsonl"
47
67
 
48
68
  TODAY = date.today()
@@ -379,6 +399,7 @@ def main():
379
399
  return
380
400
 
381
401
  log("=== NEXO Post-Mortem Consolidator v2 starting ===")
402
+ had_errors = False
382
403
 
383
404
  # Stage 1: Collect data
384
405
  data = collect_data()
@@ -392,27 +413,31 @@ def main():
392
413
  if not success:
393
414
  log("Stage 2 failed (CLI unavailable or error). "
394
415
  "Skipping intelligent consolidation. Stage 3 (sensory + force) will still run.")
416
+ had_errors = True
395
417
 
396
418
  # Stage 3: Sensory Register (mechanical, kept from v1)
397
419
  try:
398
420
  process_sensory_register()
399
421
  except Exception as e:
400
422
  log(f"Sensory register failed: {e}")
423
+ had_errors = True
401
424
 
402
425
  # Stage 3b: Force analysis (mechanical, kept from v1)
403
426
  try:
404
427
  analyze_force_events()
405
428
  except Exception as e:
406
429
  log(f"Force analysis failed: {e}")
430
+ had_errors = True
407
431
 
408
- # Register successful run
409
- try:
410
- state_file = NEXO_HOME / "operations" / ".catchup-state.json"
411
- state = json.loads(state_file.read_text()) if state_file.exists() else {}
412
- state["postmortem"] = datetime.now().isoformat()
413
- state_file.write_text(json.dumps(state, indent=2))
414
- except Exception:
415
- pass
432
+ # Register successful run only if no stages failed
433
+ if not had_errors:
434
+ try:
435
+ state_file = NEXO_HOME / "operations" / ".catchup-state.json"
436
+ state = json.loads(state_file.read_text()) if state_file.exists() else {}
437
+ state["postmortem"] = datetime.now().isoformat()
438
+ state_file.write_text(json.dumps(state, indent=2))
439
+ except Exception:
440
+ pass
416
441
 
417
442
  mark_done()
418
443
  log("=== Consolidation v2 complete ===")
@@ -49,7 +49,26 @@ SLEEP_LOG = COORD_DIR / "sleep-log.json"
49
49
  MEMORY_MD = NEXO_HOME / "memory" / "MEMORY.md"
50
50
  NEXO_DB = NEXO_HOME / "data" / "nexo.db"
51
51
  CLAUDE_MEM_DB = Path.home() / ".claude-mem" / "claude-mem.db"
52
- CLAUDE_CLI = Path.home() / ".local" / "bin" / "claude"
52
+ def _resolve_claude_cli() -> Path:
53
+ """Find claude CLI: saved path > PATH > common locations."""
54
+ saved = NEXO_HOME / "config" / "claude-cli-path"
55
+ if saved.exists():
56
+ p = Path(saved.read_text().strip())
57
+ if p.exists():
58
+ return p
59
+ found = shutil.which("claude")
60
+ if found:
61
+ return Path(found)
62
+ for candidate in [
63
+ Path.home() / ".local" / "bin" / "claude",
64
+ Path.home() / ".npm-global" / "bin" / "claude",
65
+ Path("/usr/local/bin/claude"),
66
+ ]:
67
+ if candidate.exists():
68
+ return candidate
69
+ return Path.home() / ".local" / "bin" / "claude"
70
+
71
+ CLAUDE_CLI = _resolve_claude_cli()
53
72
 
54
73
  LAST_RUN_FILE = COORD_DIR / "sleep-last-run"
55
74
  LOCK_FILE = COORD_DIR / "sleep.lock"
@@ -534,6 +553,7 @@ def main():
534
553
 
535
554
  run_log = {"date": str(TODAY), "started": TIMESTAMP,
536
555
  "stage_a": None, "stage_b": None, "completed": None}
556
+ sleep_had_errors = False
537
557
 
538
558
  # Stage A: Housekeeping (mechanical)
539
559
  if start_phase == "stage_a":
@@ -555,7 +575,8 @@ def main():
555
575
 
556
576
  if "error" in dream_result:
557
577
  log(f"Stage B: Dreaming failed ({dream_result['error']}). "
558
- "Stage A cleanup completed successfully. Marking done to avoid retry loop.")
578
+ "Stage A cleanup completed successfully. Not marking catchup to allow retry.")
579
+ sleep_had_errors = True
559
580
  else:
560
581
  # Stage B2: Execute actions from CLI output
561
582
  actions_file = COORD_DIR / "sleep-actions.json"
@@ -575,14 +596,15 @@ def main():
575
596
  append_sleep_log(run_log)
576
597
  log(f"NEXO Sleep v2 complete at {run_log['completed']}")
577
598
 
578
- # Register for catch-up
579
- try:
580
- state_file = NEXO_HOME / "operations" / ".catchup-state.json"
581
- st = json.loads(state_file.read_text()) if state_file.exists() else {}
582
- st["sleep"] = datetime.now().isoformat()
583
- state_file.write_text(json.dumps(st, indent=2))
584
- except Exception:
585
- pass
599
+ # Register for catch-up only if all stages succeeded
600
+ if not sleep_had_errors:
601
+ try:
602
+ state_file = NEXO_HOME / "operations" / ".catchup-state.json"
603
+ st = json.loads(state_file.read_text()) if state_file.exists() else {}
604
+ st["sleep"] = datetime.now().isoformat()
605
+ state_file.write_text(json.dumps(st, indent=2))
606
+ except Exception:
607
+ pass
586
608
 
587
609
  finally:
588
610
  try:
@@ -26,7 +26,27 @@ NEXO_DB = NEXO_HOME / "data" / "nexo.db"
26
26
  OUTPUT_FILE = COORD_DIR / "daily-synthesis.md"
27
27
  LAST_RUN_FILE = COORD_DIR / "synthesis-last-run"
28
28
  LOCK_FILE = COORD_DIR / "synthesis.lock"
29
- CLAUDE_CLI = HOME / ".local" / "bin" / "claude"
29
+ def _resolve_claude_cli() -> Path:
30
+ """Find claude CLI: saved path > PATH > common locations."""
31
+ import shutil as _shutil
32
+ saved = NEXO_HOME / "config" / "claude-cli-path"
33
+ if saved.exists():
34
+ p = Path(saved.read_text().strip())
35
+ if p.exists():
36
+ return p
37
+ found = _shutil.which("claude")
38
+ if found:
39
+ return Path(found)
40
+ for candidate in [
41
+ HOME / ".local" / "bin" / "claude",
42
+ HOME / ".npm-global" / "bin" / "claude",
43
+ Path("/usr/local/bin/claude"),
44
+ ]:
45
+ if candidate.exists():
46
+ return candidate
47
+ return HOME / ".local" / "bin" / "claude"
48
+
49
+ CLAUDE_CLI = _resolve_claude_cli()
30
50
 
31
51
  TODAY = date.today()
32
52
  TODAY_STR = TODAY.isoformat()
@@ -109,17 +129,17 @@ def collect_data() -> dict:
109
129
  (TODAY_STR,)
110
130
  )
111
131
 
112
- # Overdue reminders
132
+ # Overdue reminders (schema: description, date, status uppercase)
113
133
  data["overdue_reminders"] = safe_query(
114
- "SELECT id, title, due_date FROM reminders "
115
- "WHERE status='PENDING' AND due_date <= ? ORDER BY due_date",
134
+ "SELECT id, description, date FROM reminders "
135
+ "WHERE status='PENDING' AND date <= ? ORDER BY date",
116
136
  (TODAY_STR,)
117
137
  )
118
138
 
119
- # Pending followups
139
+ # Pending followups (schema: description, date, status uppercase)
120
140
  data["pending_followups"] = safe_query(
121
- "SELECT id, title, description, due_date FROM followups "
122
- "WHERE status='pending' ORDER BY due_date"
141
+ "SELECT id, description, date FROM followups "
142
+ "WHERE status='PENDING' ORDER BY date"
123
143
  )
124
144
 
125
145
  # Guard stats
@@ -240,13 +260,13 @@ def fallback_synthesis(data: dict):
240
260
  if data.get("overdue_reminders"):
241
261
  lines.append("## Overdue Reminders")
242
262
  for r in data["overdue_reminders"][:10]:
243
- lines.append(f"- #{r.get('id', '?')} {r.get('title', '')} (due {r.get('due_date', '?')})")
263
+ lines.append(f"- #{r.get('id', '?')} {r.get('description', '')} (due {r.get('date', '?')})")
244
264
  lines.append("")
245
265
 
246
266
  if data.get("pending_followups"):
247
267
  lines.append("## Pending Followups")
248
268
  for f in data["pending_followups"][:10]:
249
- lines.append(f"- #{f.get('id', '?')} {f.get('title', '')} (due {f.get('due_date', '?')})")
269
+ lines.append(f"- #{f.get('id', '?')} {f.get('description', '')} (due {f.get('date', '?')})")
250
270
  lines.append("")
251
271
 
252
272
  OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
@@ -34,13 +34,20 @@ read_version() {
34
34
  python3 -c "import json; print(json.load(open('$PACKAGE_JSON')).get('version','unknown'))" 2>/dev/null || echo "unknown"
35
35
  }
36
36
 
37
- # --- Step 1: Check for uncommitted changes in src/ ---
38
- log "Checking for uncommitted changes in src/..."
37
+ # --- Check if this is a git repo ---
38
+ if [ ! -d "$REPO_DIR/.git" ] && [ ! -f "$REPO_DIR/.git" ]; then
39
+ err "ABORTED: Not a git repository at $REPO_DIR"
40
+ err "For packaged installs, use: npm update -g nexo-brain"
41
+ exit 1
42
+ fi
43
+
44
+ # --- Step 1: Check for uncommitted changes in entire worktree ---
45
+ log "Checking for uncommitted changes..."
39
46
  cd "$REPO_DIR"
40
47
 
41
- if [ -n "$(git status --porcelain -- src/ 2>/dev/null)" ]; then
42
- err "ABORTED: Uncommitted changes in src/"
43
- git status --short -- src/
48
+ if [ -n "$(git status --porcelain 2>/dev/null)" ]; then
49
+ err "ABORTED: Uncommitted changes in worktree"
50
+ git status --short
44
51
  exit 1
45
52
  fi
46
53
  log "Working tree clean."
@@ -48,6 +55,11 @@ log "Working tree clean."
48
55
  # Record current state
49
56
  OLD_VERSION="$(read_version)"
50
57
  OLD_COMMIT="$(git rev-parse HEAD)"
58
+ REQ_FILE="$SRC_DIR/requirements.txt"
59
+ OLD_REQ_HASH=""
60
+ if [ -f "$REQ_FILE" ]; then
61
+ OLD_REQ_HASH="$(shasum -a 256 "$REQ_FILE" | cut -d' ' -f1)"
62
+ fi
51
63
  log "Current: v${OLD_VERSION} (${OLD_COMMIT:0:8})"
52
64
 
53
65
  # --- Step 2: Backup databases ---
@@ -94,6 +106,54 @@ fi
94
106
  NEW_VERSION="$(read_version)"
95
107
  log "New version: v${NEW_VERSION}"
96
108
 
109
+ # --- Step 4b: Reinstall Python dependencies if requirements.txt changed ---
110
+ NEW_REQ_HASH=""
111
+ if [ -f "$REQ_FILE" ]; then
112
+ NEW_REQ_HASH="$(shasum -a 256 "$REQ_FILE" | cut -d' ' -f1)"
113
+ fi
114
+
115
+ DEPS_CHANGED=false
116
+ if [ "$OLD_REQ_HASH" != "$NEW_REQ_HASH" ]; then
117
+ DEPS_CHANGED=true
118
+ fi
119
+
120
+ reinstall_pip_deps() {
121
+ local VENV_PIP="$NEXO_HOME/.venv/bin/pip"
122
+ if [ -f "$REQ_FILE" ]; then
123
+ if [ -x "$VENV_PIP" ]; then
124
+ "$VENV_PIP" install --quiet -r "$REQ_FILE" || return 1
125
+ else
126
+ python3 -m pip install --quiet -r "$REQ_FILE" --break-system-packages 2>/dev/null || return 1
127
+ fi
128
+ fi
129
+ return 0
130
+ }
131
+
132
+ if [ "$DEPS_CHANGED" = true ] || [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
133
+ log "Reinstalling Python dependencies..."
134
+ if ! reinstall_pip_deps; then
135
+ err "pip install failed! Rolling back..."
136
+ git reset --hard "$OLD_COMMIT"
137
+ reinstall_pip_deps || warn "pip rollback also had issues"
138
+ if [ -d "$BACKUP_DIR" ]; then
139
+ for db in "$BACKUP_DIR"/*.db; do
140
+ [ -f "$db" ] || continue
141
+ BASENAME="$(basename "$db")"
142
+ for candidate in "$NEXO_HOME/data/$BASENAME" "$NEXO_HOME/$BASENAME" "$SRC_DIR/$BASENAME"; do
143
+ if [ -f "$candidate" ]; then
144
+ cp "$db" "$candidate"
145
+ warn " Restored: $BASENAME"
146
+ break
147
+ fi
148
+ done
149
+ done
150
+ fi
151
+ err "Rolled back to ${OLD_COMMIT:0:8}. Databases restored."
152
+ exit 1
153
+ fi
154
+ log "Python dependencies updated."
155
+ fi
156
+
97
157
  # --- Step 5: Run migrations if version changed ---
98
158
  if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
99
159
  log "Version changed: ${OLD_VERSION} -> ${NEW_VERSION}"
@@ -101,6 +161,8 @@ if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
101
161
  if ! (cd "$SRC_DIR" && python3 -c "import db; db.init_db()" 2>&1); then
102
162
  err "Migration failed! Rolling back..."
103
163
  git reset --hard "$OLD_COMMIT"
164
+ # Reinstall pip deps from restored old requirements.txt
165
+ reinstall_pip_deps || warn "pip rollback also had issues"
104
166
  # Restore DB backups
105
167
  if [ -d "$BACKUP_DIR" ]; then
106
168
  for db in "$BACKUP_DIR"/*.db; do
@@ -115,7 +177,7 @@ if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
115
177
  done
116
178
  done
117
179
  fi
118
- err "Rolled back to ${OLD_COMMIT:0:8}. Databases restored."
180
+ err "Rolled back to ${OLD_COMMIT:0:8}. Databases and deps restored."
119
181
  exit 1
120
182
  fi
121
183
  log "Migrations applied."
@@ -128,6 +190,8 @@ log "Verifying server.py import..."
128
190
  if ! (cd "$SRC_DIR" && python3 -c "import server" 2>&1); then
129
191
  err "Import verification failed! Rolling back..."
130
192
  git reset --hard "$OLD_COMMIT"
193
+ # Reinstall pip deps from restored old requirements.txt
194
+ reinstall_pip_deps || warn "pip rollback also had issues"
131
195
  if [ -d "$BACKUP_DIR" ]; then
132
196
  for db in "$BACKUP_DIR"/*.db; do
133
197
  [ -f "$db" ] || continue
@@ -141,10 +205,48 @@ if ! (cd "$SRC_DIR" && python3 -c "import server" 2>&1); then
141
205
  done
142
206
  done
143
207
  fi
144
- err "Rolled back to ${OLD_COMMIT:0:8}. Databases restored."
208
+ err "Rolled back to ${OLD_COMMIT:0:8}. Databases and deps restored."
145
209
  exit 1
146
210
  fi
147
211
 
212
+ # --- Step 7: Sync hooks to NEXO_HOME ---
213
+ HOOKS_SRC="$SRC_DIR/hooks"
214
+ HOOKS_DEST="$NEXO_HOME/hooks"
215
+ if [ -d "$HOOKS_SRC" ]; then
216
+ mkdir -p "$HOOKS_DEST"
217
+ SYNCED=0
218
+ for hook in "$HOOKS_SRC"/*.sh; do
219
+ [ -f "$hook" ] || continue
220
+ cp "$hook" "$HOOKS_DEST/$(basename "$hook")"
221
+ chmod 755 "$HOOKS_DEST/$(basename "$hook")"
222
+ SYNCED=$((SYNCED + 1))
223
+ done
224
+ if [ "$SYNCED" -gt 0 ]; then
225
+ log "Synced $SYNCED hook(s) to $HOOKS_DEST"
226
+ fi
227
+ fi
228
+
229
+ # --- Step 8: Sync cron definitions with manifest ---
230
+ CRON_SYNC="$SRC_DIR/crons/sync.py"
231
+ CRON_SYNC_OK=false
232
+ if [ -f "$CRON_SYNC" ]; then
233
+ log "Syncing cron definitions..."
234
+ if NEXO_HOME="$NEXO_HOME" NEXO_CODE="$SRC_DIR" python3 "$CRON_SYNC" 2>&1; then
235
+ log "Cron definitions synced."
236
+ CRON_SYNC_OK=true
237
+ else
238
+ warn "Cron sync failed (non-fatal). Installed manifest NOT refreshed to avoid divergence."
239
+ fi
240
+ fi
241
+
242
+ # --- Step 8b: Refresh installed manifest for catchup/watchdog (only if sync succeeded) ---
243
+ if $CRON_SYNC_OK && [ -d "$SRC_DIR/crons" ]; then
244
+ mkdir -p "$NEXO_HOME/crons"
245
+ cp -f "$SRC_DIR/crons/"*.json "$NEXO_HOME/crons/" 2>/dev/null
246
+ cp -f "$SRC_DIR/crons/"*.py "$NEXO_HOME/crons/" 2>/dev/null
247
+ log "Refreshed installed crons manifest."
248
+ fi
249
+
148
250
  # --- Done ---
149
251
  echo ""
150
252
  log "========================================="
@@ -45,7 +45,12 @@ log() { echo "[$TS] $1" >> "$LOG"; }
45
45
  # The NEXO_CODE env var must point to the repo src/ directory.
46
46
  # Add personal (non-manifest) monitors to PERSONAL_MONITORS below.
47
47
  NEXO_CODE="${NEXO_CODE:-$(cd "$(dirname "$0")/.." 2>/dev/null && pwd)}"
48
- MANIFEST_FILE="$NEXO_CODE/crons/manifest.json"
48
+ # Look for manifest in NEXO_HOME first (packaged install), then NEXO_CODE (dev/repo)
49
+ if [ -f "$NEXO_HOME/crons/manifest.json" ]; then
50
+ MANIFEST_FILE="$NEXO_HOME/crons/manifest.json"
51
+ else
52
+ MANIFEST_FILE="$NEXO_CODE/crons/manifest.json"
53
+ fi
49
54
 
50
55
  _build_monitors_from_manifest() {
51
56
  if [ ! -f "$MANIFEST_FILE" ]; then
@@ -53,18 +58,22 @@ _build_monitors_from_manifest() {
53
58
  return
54
59
  fi
55
60
  python3 -c "
56
- import json, sys
61
+ import json, sys, platform
57
62
 
58
63
  nexo_home = '$NEXO_HOME'
64
+ is_mac = platform.system() == 'Darwin'
59
65
 
60
66
  with open('$MANIFEST_FILE') as f:
61
67
  data = json.load(f)
62
68
 
63
69
  for c in data.get('crons', []):
64
70
  cid = c['id']
65
- # Derive human-readable name from id
66
71
  name = cid.replace('-', ' ').title()
67
- plist_id = 'com.nexo.' + cid
72
+ # Use the right service identifier per platform
73
+ if is_mac:
74
+ svc_id = 'com.nexo.' + cid
75
+ else:
76
+ svc_id = 'nexo-' + cid + '.timer'
68
77
  stdout_log = nexo_home + '/logs/' + cid + '-stdout.log'
69
78
  stderr_log = nexo_home + '/logs/' + cid + '-stderr.log'
70
79
 
@@ -98,7 +107,7 @@ for c in data.get('crons', []):
98
107
  mon_type = 'core' if c.get('core') else 'personal'
99
108
  proc_grep = '' # manifest crons are one-shot, no persistent process
100
109
 
101
- print(f'{name}|{plist_id}|{stdout_log}|{stderr_log}|{max_stale}|{proc_grep}|{schedule_desc}|{mon_type}')
110
+ print(f'{name}|{svc_id}|{stdout_log}|{stderr_log}|{max_stale}|{proc_grep}|{schedule_desc}|{mon_type}')
102
111
  " 2>/dev/null
103
112
  }
104
113
 
@@ -140,7 +149,12 @@ IS_MACOS=false
140
149
  log_repair() { echo "[$TS] REPAIR: $1" >> "$REPAIR_LOG"; log "REPAIR: $1"; }
141
150
 
142
151
  is_loaded() {
143
- $IS_MACOS && launchctl list "$1" &>/dev/null
152
+ if $IS_MACOS; then
153
+ launchctl list "$1" &>/dev/null
154
+ else
155
+ # On Linux, check if the systemd timer is enabled
156
+ systemctl --user is-enabled "$1" &>/dev/null
157
+ fi
144
158
  }
145
159
 
146
160
  # ============================================================================
@@ -179,6 +193,36 @@ try_repair_launchagent() {
179
193
  return 1
180
194
  }
181
195
 
196
+ try_repair_systemd() {
197
+ $IS_MACOS && return 1
198
+ local timer_unit="$1"
199
+ local service_unit="${timer_unit%.timer}.service"
200
+
201
+ # Repair 1: Timer not enabled — try to enable and start
202
+ if ! systemctl --user is-enabled "$timer_unit" &>/dev/null; then
203
+ systemctl --user daemon-reload 2>/dev/null
204
+ systemctl --user enable --now "$timer_unit" 2>/dev/null
205
+ sleep 1
206
+ if systemctl --user is-enabled "$timer_unit" &>/dev/null; then
207
+ log_repair "$timer_unit: enabled and started"
208
+ return 0
209
+ fi
210
+ return 1
211
+ fi
212
+
213
+ # Repair 2: Timer enabled but not active — start it
214
+ if ! systemctl --user is-active "$timer_unit" &>/dev/null; then
215
+ systemctl --user start "$timer_unit" 2>/dev/null
216
+ sleep 1
217
+ if systemctl --user is-active "$timer_unit" &>/dev/null; then
218
+ log_repair "$timer_unit: restarted"
219
+ return 0
220
+ fi
221
+ fi
222
+
223
+ return 1
224
+ }
225
+
182
226
  try_repair_cron() {
183
227
  local script="$1"
184
228
 
@@ -195,29 +239,26 @@ try_repair_cron() {
195
239
  }
196
240
 
197
241
  try_reexecute_missed_cron() {
198
- $IS_MACOS || return 1
199
- # Re-execute a cron that missed its scheduled run
200
- # Extracts ProgramArguments from the plist and runs them
201
- local plist_id="$1"
202
- local plist_file="$HOME_DIR/Library/LaunchAgents/${plist_id}.plist"
242
+ local svc_id="$1"
203
243
 
204
- if [ ! -f "$plist_file" ]; then
205
- log "Re-execute skipped: no plist for $plist_id"
206
- return 1
207
- fi
244
+ if $IS_MACOS; then
245
+ # macOS: extract command from plist and run it
246
+ local plist_file="$HOME_DIR/Library/LaunchAgents/${svc_id}.plist"
208
247
 
209
- # Extract the full command from plist
210
- local cmd
211
- cmd=$(python3 -c "
248
+ if [ ! -f "$plist_file" ]; then
249
+ log "Re-execute skipped: no plist for $svc_id"
250
+ return 1
251
+ fi
252
+
253
+ local cmd
254
+ cmd=$(python3 -c "
212
255
  import plistlib, sys
213
256
  try:
214
257
  with open('$plist_file', 'rb') as f:
215
258
  d = plistlib.load(f)
216
259
  args = d.get('ProgramArguments', [])
217
- # Skip KeepAlive services (they should be running, not re-executed)
218
260
  if d.get('KeepAlive'):
219
261
  sys.exit(1)
220
- # Skip services without a schedule (RunAtLoad only)
221
262
  if not d.get('StartCalendarInterval') and not d.get('StartInterval'):
222
263
  sys.exit(1)
223
264
  print(' '.join(args))
@@ -225,28 +266,36 @@ except:
225
266
  sys.exit(1)
226
267
  " 2>/dev/null)
227
268
 
228
- if [ -z "$cmd" ] || [ $? -ne 0 ]; then
229
- return 1
230
- fi
231
-
232
- log "Re-executing missed cron: $plist_id → $cmd"
233
- # Run in background with timeout (5 min max)
234
- timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
235
- local pid=$!
269
+ if [ -z "$cmd" ] || [ $? -ne 0 ]; then
270
+ return 1
271
+ fi
236
272
 
237
- # Wait briefly and check if it started ok
238
- sleep 2
239
- if kill -0 "$pid" 2>/dev/null || wait "$pid" 2>/dev/null; then
240
- log_repair "$plist_id: re-executed missed cron (PID $pid)"
241
- return 0
273
+ log "Re-executing missed cron: $svc_id $cmd"
274
+ timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
275
+ local pid=$!
276
+ sleep 2
277
+ if kill -0 "$pid" 2>/dev/null || wait "$pid" 2>/dev/null; then
278
+ log_repair "$svc_id: re-executed missed cron (PID $pid)"
279
+ return 0
280
+ else
281
+ log "Re-execute failed for $svc_id"
282
+ return 1
283
+ fi
242
284
  else
243
- log "Re-execute failed for $plist_id"
244
- return 1
285
+ # Linux: start the corresponding service unit directly
286
+ local service_unit="${svc_id%.timer}.service"
287
+ log "Re-executing missed cron: $svc_id → systemctl start $service_unit"
288
+ if systemctl --user start "$service_unit" 2>/dev/null; then
289
+ log_repair "$svc_id: re-executed via systemctl start $service_unit"
290
+ return 0
291
+ else
292
+ log "Re-execute failed for $svc_id"
293
+ return 1
294
+ fi
245
295
  fi
246
296
  }
247
297
 
248
298
  try_verify_repair() {
249
- $IS_MACOS || return 1
250
299
  # After Level 2 repair, wait and verify the service is healthy
251
300
  local plist_id="$1"
252
301
  local log_stdout="$2"
@@ -388,20 +437,26 @@ for monitor in "${MONITORS[@]}"; do
388
437
  error_count=0
389
438
  proc_alive="n/a"
390
439
 
391
- # Check 1: LaunchAgent loaded?
440
+ # Check 1: Service loaded? (launchd on macOS, systemd on Linux)
392
441
  if is_loaded "$plist_id"; then
393
442
  loaded="yes"
394
443
  else
395
444
  loaded="no"
396
- # AUTO-REPAIR: try to bootstrap
397
- if try_repair_launchagent "$plist_id" "$proc_grep"; then
445
+ # AUTO-REPAIR: try platform-appropriate repair
446
+ repair_ok=false
447
+ if $IS_MACOS; then
448
+ try_repair_launchagent "$plist_id" "$proc_grep" && repair_ok=true
449
+ else
450
+ try_repair_systemd "$plist_id" && repair_ok=true
451
+ fi
452
+ if $repair_ok; then
398
453
  loaded="yes"
399
454
  status="HEALED"
400
- details="${details}Self-healed: bootstrapped. "
455
+ details="${details}Self-healed: service re-registered. "
401
456
  TOTAL_HEALED=$((TOTAL_HEALED + 1))
402
457
  else
403
458
  status="FAIL"
404
- details="${details}Not loaded in launchctl (repair failed). "
459
+ details="${details}Service not loaded (repair failed). "
405
460
  fi
406
461
  fi
407
462
 
@@ -411,9 +466,10 @@ for monitor in "${MONITORS[@]}"; do
411
466
  proc_alive="yes"
412
467
  else
413
468
  proc_alive="no"
414
- # AUTO-REPAIR: try to kickstart
469
+ # AUTO-REPAIR: try to kickstart (platform-appropriate)
415
470
  if [ "$status" != "FAIL" ] && [ "$status" != "HEALED" ]; then
416
- if try_repair_launchagent "$plist_id" "$proc_grep"; then
471
+ if ($IS_MACOS && try_repair_launchagent "$plist_id" "$proc_grep") || \
472
+ (! $IS_MACOS && try_repair_systemd "$plist_id"); then
417
473
  proc_alive="yes"
418
474
  status="HEALED"
419
475
  details="${details}Self-healed: kickstarted. "
@@ -820,7 +876,7 @@ if [ "$TOTAL_FAIL" -gt 0 ]; then
820
876
  Schedule: ${m_sched}
821
877
  Type: ${m_type}
822
878
  Failure reason: ${m_details}
823
- Plist: ~/Library/LaunchAgents/${m_plist}.plist
879
+ Service config: $($IS_MACOS && echo "~/Library/LaunchAgents/${m_plist}.plist" || echo "~/.config/systemd/user/${m_plist}")
824
880
  Process grep: ${m_proc}
825
881
  Stderr (last 20 lines):
826
882
  ${STDERR_TAIL}
@@ -857,7 +913,7 @@ Do NOT propagate fixes for [personal] services — those stay local only."
857
913
  cat > "$REPAIR_PROMPT_FILE" <<NEXOPROMPT
858
914
  WATCHDOG LEVEL 2 REPAIR — Automated diagnostic session.
859
915
 
860
- The watchdog detected services that failed mechanical repair (launchctl bootstrap/kickstart).
916
+ The watchdog detected services that failed mechanical repair (launchctl/systemctl re-registration).
861
917
  Your job: diagnose the root cause and fix it. Do NOT ask the user anything — resolve autonomously.
862
918
 
863
919
  Each failure is tagged [core] or [personal]:
@@ -868,10 +924,10 @@ FAILURES:
868
924
  ${FAIL_DETAILS}
869
925
 
870
926
  STEPS:
871
- 1. Read the plist file to understand the service configuration
927
+ 1. Read the service config (plist on macOS, systemd unit on Linux) to understand the service
872
928
  2. Check stderr/stdout logs for the actual error
873
929
  3. Fix the root cause (missing file, bad config, dependency issue, etc.)
874
- 4. Reload the service and verify it is running
930
+ 4. Reload the service and verify it is running (launchctl on macOS, systemctl on Linux)
875
931
  5. Log what you did to $NEXO_HOME/logs/watchdog-repair-result.log
876
932
  ${PROPAGATE_BLOCK}
877
933