nexo-brain 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,12 @@ log() { echo "[$TS] $1" >> "$LOG"; }
45
45
  # The NEXO_CODE env var must point to the repo src/ directory.
46
46
  # Add personal (non-manifest) monitors to PERSONAL_MONITORS below.
47
47
  NEXO_CODE="${NEXO_CODE:-$(cd "$(dirname "$0")/.." 2>/dev/null && pwd)}"
48
- MANIFEST_FILE="$NEXO_CODE/crons/manifest.json"
48
+ # Look for manifest in NEXO_HOME first (packaged install), then NEXO_CODE (dev/repo)
49
+ if [ -f "$NEXO_HOME/crons/manifest.json" ]; then
50
+ MANIFEST_FILE="$NEXO_HOME/crons/manifest.json"
51
+ else
52
+ MANIFEST_FILE="$NEXO_CODE/crons/manifest.json"
53
+ fi
49
54
 
50
55
  _build_monitors_from_manifest() {
51
56
  if [ ! -f "$MANIFEST_FILE" ]; then
@@ -53,18 +58,22 @@ _build_monitors_from_manifest() {
53
58
  return
54
59
  fi
55
60
  python3 -c "
56
- import json, sys
61
+ import json, sys, platform
57
62
 
58
63
  nexo_home = '$NEXO_HOME'
64
+ is_mac = platform.system() == 'Darwin'
59
65
 
60
66
  with open('$MANIFEST_FILE') as f:
61
67
  data = json.load(f)
62
68
 
63
69
  for c in data.get('crons', []):
64
70
  cid = c['id']
65
- # Derive human-readable name from id
66
71
  name = cid.replace('-', ' ').title()
67
- plist_id = 'com.nexo.' + cid
72
+ # Use the right service identifier per platform
73
+ if is_mac:
74
+ svc_id = 'com.nexo.' + cid
75
+ else:
76
+ svc_id = 'nexo-' + cid + '.timer'
68
77
  stdout_log = nexo_home + '/logs/' + cid + '-stdout.log'
69
78
  stderr_log = nexo_home + '/logs/' + cid + '-stderr.log'
70
79
 
@@ -98,7 +107,7 @@ for c in data.get('crons', []):
98
107
  mon_type = 'core' if c.get('core') else 'personal'
99
108
  proc_grep = '' # manifest crons are one-shot, no persistent process
100
109
 
101
- print(f'{name}|{plist_id}|{stdout_log}|{stderr_log}|{max_stale}|{proc_grep}|{schedule_desc}|{mon_type}')
110
+ print(f'{name}|{svc_id}|{stdout_log}|{stderr_log}|{max_stale}|{proc_grep}|{schedule_desc}|{mon_type}')
102
111
  " 2>/dev/null
103
112
  }
104
113
 
@@ -140,7 +149,12 @@ IS_MACOS=false
140
149
  log_repair() { echo "[$TS] REPAIR: $1" >> "$REPAIR_LOG"; log "REPAIR: $1"; }
141
150
 
142
151
  is_loaded() {
143
- $IS_MACOS && launchctl list "$1" &>/dev/null
152
+ if $IS_MACOS; then
153
+ launchctl list "$1" &>/dev/null
154
+ else
155
+ # On Linux, check if the systemd timer is enabled
156
+ systemctl --user is-enabled "$1" &>/dev/null
157
+ fi
144
158
  }
145
159
 
146
160
  # ============================================================================
@@ -179,6 +193,36 @@ try_repair_launchagent() {
179
193
  return 1
180
194
  }
181
195
 
196
+ try_repair_systemd() {
197
+ $IS_MACOS && return 1
198
+ local timer_unit="$1"
199
+ local service_unit="${timer_unit%.timer}.service"
200
+
201
+ # Repair 1: Timer not enabled — try to enable and start
202
+ if ! systemctl --user is-enabled "$timer_unit" &>/dev/null; then
203
+ systemctl --user daemon-reload 2>/dev/null
204
+ systemctl --user enable --now "$timer_unit" 2>/dev/null
205
+ sleep 1
206
+ if systemctl --user is-enabled "$timer_unit" &>/dev/null; then
207
+ log_repair "$timer_unit: enabled and started"
208
+ return 0
209
+ fi
210
+ return 1
211
+ fi
212
+
213
+ # Repair 2: Timer enabled but not active — start it
214
+ if ! systemctl --user is-active "$timer_unit" &>/dev/null; then
215
+ systemctl --user start "$timer_unit" 2>/dev/null
216
+ sleep 1
217
+ if systemctl --user is-active "$timer_unit" &>/dev/null; then
218
+ log_repair "$timer_unit: restarted"
219
+ return 0
220
+ fi
221
+ fi
222
+
223
+ return 1
224
+ }
225
+
182
226
  try_repair_cron() {
183
227
  local script="$1"
184
228
 
@@ -195,29 +239,26 @@ try_repair_cron() {
195
239
  }
196
240
 
197
241
  try_reexecute_missed_cron() {
198
- $IS_MACOS || return 1
199
- # Re-execute a cron that missed its scheduled run
200
- # Extracts ProgramArguments from the plist and runs them
201
- local plist_id="$1"
202
- local plist_file="$HOME_DIR/Library/LaunchAgents/${plist_id}.plist"
242
+ local svc_id="$1"
203
243
 
204
- if [ ! -f "$plist_file" ]; then
205
- log "Re-execute skipped: no plist for $plist_id"
206
- return 1
207
- fi
244
+ if $IS_MACOS; then
245
+ # macOS: extract command from plist and run it
246
+ local plist_file="$HOME_DIR/Library/LaunchAgents/${svc_id}.plist"
208
247
 
209
- # Extract the full command from plist
210
- local cmd
211
- cmd=$(python3 -c "
248
+ if [ ! -f "$plist_file" ]; then
249
+ log "Re-execute skipped: no plist for $svc_id"
250
+ return 1
251
+ fi
252
+
253
+ local cmd
254
+ cmd=$(python3 -c "
212
255
  import plistlib, sys
213
256
  try:
214
257
  with open('$plist_file', 'rb') as f:
215
258
  d = plistlib.load(f)
216
259
  args = d.get('ProgramArguments', [])
217
- # Skip KeepAlive services (they should be running, not re-executed)
218
260
  if d.get('KeepAlive'):
219
261
  sys.exit(1)
220
- # Skip services without a schedule (RunAtLoad only)
221
262
  if not d.get('StartCalendarInterval') and not d.get('StartInterval'):
222
263
  sys.exit(1)
223
264
  print(' '.join(args))
@@ -225,28 +266,36 @@ except:
225
266
  sys.exit(1)
226
267
  " 2>/dev/null)
227
268
 
228
- if [ -z "$cmd" ] || [ $? -ne 0 ]; then
229
- return 1
230
- fi
231
-
232
- log "Re-executing missed cron: $plist_id → $cmd"
233
- # Run in background with timeout (5 min max)
234
- timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
235
- local pid=$!
269
+ if [ -z "$cmd" ] || [ $? -ne 0 ]; then
270
+ return 1
271
+ fi
236
272
 
237
- # Wait briefly and check if it started ok
238
- sleep 2
239
- if kill -0 "$pid" 2>/dev/null || wait "$pid" 2>/dev/null; then
240
- log_repair "$plist_id: re-executed missed cron (PID $pid)"
241
- return 0
273
+ log "Re-executing missed cron: $svc_id $cmd"
274
+ timeout 300 bash -c "$cmd" >> "$LOG_DIR/watchdog-reexec.log" 2>&1 &
275
+ local pid=$!
276
+ sleep 2
277
+ if kill -0 "$pid" 2>/dev/null || wait "$pid" 2>/dev/null; then
278
+ log_repair "$svc_id: re-executed missed cron (PID $pid)"
279
+ return 0
280
+ else
281
+ log "Re-execute failed for $svc_id"
282
+ return 1
283
+ fi
242
284
  else
243
- log "Re-execute failed for $plist_id"
244
- return 1
285
+ # Linux: start the corresponding service unit directly
286
+ local service_unit="${svc_id%.timer}.service"
287
+ log "Re-executing missed cron: $svc_id → systemctl start $service_unit"
288
+ if systemctl --user start "$service_unit" 2>/dev/null; then
289
+ log_repair "$svc_id: re-executed via systemctl start $service_unit"
290
+ return 0
291
+ else
292
+ log "Re-execute failed for $svc_id"
293
+ return 1
294
+ fi
245
295
  fi
246
296
  }
247
297
 
248
298
  try_verify_repair() {
249
- $IS_MACOS || return 1
250
299
  # After Level 2 repair, wait and verify the service is healthy
251
300
  local plist_id="$1"
252
301
  local log_stdout="$2"
@@ -388,20 +437,26 @@ for monitor in "${MONITORS[@]}"; do
388
437
  error_count=0
389
438
  proc_alive="n/a"
390
439
 
391
- # Check 1: LaunchAgent loaded?
440
+ # Check 1: Service loaded? (launchd on macOS, systemd on Linux)
392
441
  if is_loaded "$plist_id"; then
393
442
  loaded="yes"
394
443
  else
395
444
  loaded="no"
396
- # AUTO-REPAIR: try to bootstrap
397
- if try_repair_launchagent "$plist_id" "$proc_grep"; then
445
+ # AUTO-REPAIR: try platform-appropriate repair
446
+ repair_ok=false
447
+ if $IS_MACOS; then
448
+ try_repair_launchagent "$plist_id" "$proc_grep" && repair_ok=true
449
+ else
450
+ try_repair_systemd "$plist_id" && repair_ok=true
451
+ fi
452
+ if $repair_ok; then
398
453
  loaded="yes"
399
454
  status="HEALED"
400
- details="${details}Self-healed: bootstrapped. "
455
+ details="${details}Self-healed: service re-registered. "
401
456
  TOTAL_HEALED=$((TOTAL_HEALED + 1))
402
457
  else
403
458
  status="FAIL"
404
- details="${details}Not loaded in launchctl (repair failed). "
459
+ details="${details}Service not loaded (repair failed). "
405
460
  fi
406
461
  fi
407
462
 
@@ -411,9 +466,10 @@ for monitor in "${MONITORS[@]}"; do
411
466
  proc_alive="yes"
412
467
  else
413
468
  proc_alive="no"
414
- # AUTO-REPAIR: try to kickstart
469
+ # AUTO-REPAIR: try to kickstart (platform-appropriate)
415
470
  if [ "$status" != "FAIL" ] && [ "$status" != "HEALED" ]; then
416
- if try_repair_launchagent "$plist_id" "$proc_grep"; then
471
+ if ($IS_MACOS && try_repair_launchagent "$plist_id" "$proc_grep") || \
472
+ (! $IS_MACOS && try_repair_systemd "$plist_id"); then
417
473
  proc_alive="yes"
418
474
  status="HEALED"
419
475
  details="${details}Self-healed: kickstarted. "
@@ -820,7 +876,7 @@ if [ "$TOTAL_FAIL" -gt 0 ]; then
820
876
  Schedule: ${m_sched}
821
877
  Type: ${m_type}
822
878
  Failure reason: ${m_details}
823
- Plist: ~/Library/LaunchAgents/${m_plist}.plist
879
+ Service config: $($IS_MACOS && echo "~/Library/LaunchAgents/${m_plist}.plist" || echo "~/.config/systemd/user/${m_plist}")
824
880
  Process grep: ${m_proc}
825
881
  Stderr (last 20 lines):
826
882
  ${STDERR_TAIL}
@@ -857,7 +913,7 @@ Do NOT propagate fixes for [personal] services — those stay local only."
857
913
  cat > "$REPAIR_PROMPT_FILE" <<NEXOPROMPT
858
914
  WATCHDOG LEVEL 2 REPAIR — Automated diagnostic session.
859
915
 
860
- The watchdog detected services that failed mechanical repair (launchctl bootstrap/kickstart).
916
+ The watchdog detected services that failed mechanical repair (launchctl/systemctl re-registration).
861
917
  Your job: diagnose the root cause and fix it. Do NOT ask the user anything — resolve autonomously.
862
918
 
863
919
  Each failure is tagged [core] or [personal]:
@@ -868,10 +924,10 @@ FAILURES:
868
924
  ${FAIL_DETAILS}
869
925
 
870
926
  STEPS:
871
- 1. Read the plist file to understand the service configuration
927
+ 1. Read the service config (plist on macOS, systemd unit on Linux) to understand the service
872
928
  2. Check stderr/stdout logs for the actual error
873
929
  3. Fix the root cause (missing file, bad config, dependency issue, etc.)
874
- 4. Reload the service and verify it is running
930
+ 4. Reload the service and verify it is running (launchctl on macOS, systemctl on Linux)
875
931
  5. Log what you did to $NEXO_HOME/logs/watchdog-repair-result.log
876
932
  ${PROPAGATE_BLOCK}
877
933
 
package/src/server.py CHANGED
@@ -55,7 +55,71 @@ def _server_init():
55
55
  with open(_pid_file, "w") as f:
56
56
  f.write(str(os.getpid()))
57
57
 
58
- init_db()
58
+ # ── Database initialization with recovery ─────────────────────
59
+ import sqlite3
60
+ try:
61
+ init_db()
62
+ except sqlite3.DatabaseError as exc:
63
+ # Corruption or unreadable DB — attempt restore from backup
64
+ print(f"[NEXO] DB init failed: {exc}", file=sys.stderr)
65
+ _recovered = False
66
+ try:
67
+ from db._core import DB_PATH as _db_path
68
+ import glob as _glob
69
+ _backup_dir = os.path.join(
70
+ os.environ.get("NEXO_HOME", os.path.join(os.path.expanduser("~"), ".nexo")),
71
+ "backups",
72
+ )
73
+ _backups = sorted(_glob.glob(os.path.join(_backup_dir, "nexo-*.db")), reverse=True)
74
+ for _bk in _backups:
75
+ try:
76
+ _test = sqlite3.connect(_bk)
77
+ _result = _test.execute("PRAGMA integrity_check").fetchone()
78
+ _test.close()
79
+ if _result and _result[0] == "ok":
80
+ # Valid backup found — replace corrupt DB
81
+ import shutil
82
+ # Close any open connection before replacing
83
+ try:
84
+ close_db()
85
+ except Exception:
86
+ pass
87
+ shutil.copy2(_bk, _db_path)
88
+ print(f"[NEXO] Restored DB from backup: {os.path.basename(_bk)}", file=sys.stderr)
89
+ init_db()
90
+ _recovered = True
91
+ break
92
+ except Exception:
93
+ continue
94
+ except Exception as restore_exc:
95
+ print(f"[NEXO] Backup restore failed: {restore_exc}", file=sys.stderr)
96
+
97
+ if not _recovered:
98
+ # No valid backup — nuke corrupt file and start fresh
99
+ try:
100
+ close_db()
101
+ except Exception:
102
+ pass
103
+ try:
104
+ from db._core import DB_PATH as _db_path
105
+ if os.path.exists(_db_path):
106
+ _corrupt_path = _db_path + ".corrupt"
107
+ os.rename(_db_path, _corrupt_path)
108
+ print(f"[NEXO] Corrupt DB moved to {os.path.basename(_corrupt_path)}", file=sys.stderr)
109
+ # Remove WAL/SHM files too
110
+ for _ext in (".db-wal", ".db-shm"):
111
+ _wal = _db_path.replace(".db", _ext)
112
+ if os.path.exists(_wal):
113
+ os.remove(_wal)
114
+ except Exception:
115
+ pass
116
+ try:
117
+ init_db()
118
+ print("[NEXO] Fresh database created.", file=sys.stderr)
119
+ except Exception as fresh_exc:
120
+ print(f"[NEXO] FATAL: Cannot initialize database: {fresh_exc}", file=sys.stderr)
121
+ print("[NEXO] Check permissions on NEXO_HOME/data/ and disk space.", file=sys.stderr)
122
+ sys.exit(1)
59
123
 
60
124
  # ── Auto-update check (non-blocking, max 5s) ──────────────────
61
125
  try: