bgo-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bgo_cli/_core.py ADDED
@@ -0,0 +1,2041 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ bgo - A lightweight background process manager
4
+ Like pm2, but simpler, lighter, and without the forking headaches.
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ import os
10
+ import re
11
+ import shutil
12
+ import signal
13
+ import subprocess
14
+ import sys
15
+ import termios
16
+ import time
17
+ import tty
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+
21
+ # Config
22
+ BGO_DIR = Path.home() / ".bgo"
23
+ PROCS_DIR = BGO_DIR / "procs"
24
+ LOGS_DIR = BGO_DIR / "logs"
25
+
26
+ # ANSI colors
27
+ COLORS = {
28
+ "green": "\033[32m",
29
+ "red": "\033[31m",
30
+ "yellow": "\033[33m",
31
+ "blue": "\033[34m",
32
+ "gray": "\033[90m",
33
+ "reset": "\033[0m",
34
+ "bold": "\033[1m",
35
+ }
36
+
37
+ ANSI_RE = re.compile(r"\033\[[0-9;]*m")
38
+
39
+
40
+ def color(name: str, text: str) -> str:
41
+ """Wrap text in color codes, only if stdout is a TTY."""
42
+ if not sys.stdout.isatty():
43
+ return str(text)
44
+ return f"{COLORS.get(name, '')}{text}{COLORS['reset']}"
45
+
46
+
47
+ def strip_ansi(s: str) -> str:
48
+ """Remove ANSI escape codes from a string."""
49
+ return ANSI_RE.sub("", s)
50
+
51
+
52
+ def truncate(s: str, width: int) -> str:
53
+ """Truncate string to fit in width, accounting for ANSI codes."""
54
+ plain = strip_ansi(s)
55
+ if len(plain) > width:
56
+ return s[: width - 3] + "..."
57
+ return s
58
+
59
+
60
+ # --- Terminal capability detection ---
61
+
62
+ # Three levels:
63
+ # plain — no color, ASCII-only dashes, no glyphs (CI logs, dumb terms,
64
+ # non-TTY pipes)
65
+ # normal — ANSI color + ASCII dashes (default for color-capable TTYs)
66
+ # fancy — ANSI color + Unicode box-drawing + heavier visual structure
67
+ # (UTF-8-capable TTYs)
68
+ LEVEL_PLAIN = "plain"
69
+ LEVEL_NORMAL = "normal"
70
+ LEVEL_FANCY = "fancy"
71
+
72
+
73
+ def _detect_table_level(force: str | None = None) -> str:
74
+ """Decide which rendering level to use.
75
+
76
+ Resolution order:
77
+ 1. explicit `force` (from --plain/--fancy or BGO_TABLE env)
78
+ 2. non-TTY stdout -> plain
79
+ 3. TERM=dumb -> plain
80
+ 4. LANG/LC_* lacks UTF-8 -> normal
81
+ 5. otherwise -> fancy
82
+ """
83
+ if force in (LEVEL_PLAIN, LEVEL_NORMAL, LEVEL_FANCY):
84
+ return force
85
+ env_force = os.environ.get("BGO_TABLE", "").strip().lower()
86
+ if env_force in (LEVEL_PLAIN, LEVEL_NORMAL, LEVEL_FANCY):
87
+ return env_force
88
+ if not sys.stdout.isatty():
89
+ return LEVEL_PLAIN
90
+ if os.environ.get("TERM", "") == "dumb":
91
+ return LEVEL_PLAIN
92
+ lc = (
93
+ os.environ.get("LC_ALL")
94
+ or os.environ.get("LC_CTYPE")
95
+ or os.environ.get("LANG")
96
+ or ""
97
+ ).upper()
98
+ if "UTF-8" not in lc and "UTF8" not in lc:
99
+ return LEVEL_NORMAL
100
+ return LEVEL_FANCY
101
+
102
+
103
+ # Glyph set per level. Keys must be identical across levels so callers
104
+ # can index without branching.
105
+ GLYPHS = {
106
+ LEVEL_PLAIN: {
107
+ "hline": "-", "vline": "|", "cross": "+",
108
+ "tl": "+", "tr": "+", "bl": "+", "br": "+",
109
+ "tdown": "+", "tup": "+", "tleft": "+", "tright": "+",
110
+ "online": "ON", "stopped": "OFF",
111
+ "watching": "[W]", "errored": "[!]", "watcher_dead": "[?]",
112
+ "watch_none": "-",
113
+ "ok": "OK", "fail": "FAIL", "warn": "WARN",
114
+ "tombstone": "[X]", "rocket": "[+]", "eye": "[W]",
115
+ },
116
+ LEVEL_NORMAL: {
117
+ "hline": "─", "vline": "│", "cross": "┼",
118
+ "tl": "┌", "tr": "┐", "bl": "└", "br": "┘",
119
+ "tdown": "┬", "tup": "┴", "tleft": "┤", "tright": "├",
120
+ "online": "online", "stopped": "stopped",
121
+ "watching": "✓", "errored": "⚠", "watcher_dead": "!",
122
+ "watch_none": "-",
123
+ "ok": "✅", "fail": "❌", "warn": "⚠️",
124
+ "tombstone": "🗑️", "rocket": "🚀", "eye": "👁",
125
+ },
126
+ LEVEL_FANCY: {
127
+ "hline": "━", "vline": "┃", "cross": "╋",
128
+ "tl": "┏", "tr": "┓", "bl": "┗", "br": "┛",
129
+ "tdown": "┳", "tup": "┻", "tleft": "┫", "tright": "┣",
130
+ "online": "● online", "stopped": "○ stopped",
131
+ "watching": "✓", "errored": "⚠", "watcher_dead": "!",
132
+ "watch_none": "·",
133
+ "ok": "✅", "fail": "❌", "warn": "⚠️",
134
+ "tombstone": "🗑️", "rocket": "🚀", "eye": "👁",
135
+ },
136
+ }
137
+
138
+
139
+ def glyphs(level: str | None = None) -> dict:
140
+ """Return the glyph set for a level (default: auto-detect)."""
141
+ return GLYPHS[level or _detect_table_level()]
142
+
143
+
144
+ def init_dirs():
145
+ """Initialize required directories."""
146
+ PROCS_DIR.mkdir(parents=True, exist_ok=True)
147
+ LOGS_DIR.mkdir(parents=True, exist_ok=True)
148
+
149
+
150
+ # --- State management (one file per process) ---
151
+
152
+
153
+ def proc_file(name: str) -> Path:
154
+ return PROCS_DIR / f"{name}.json"
155
+
156
+
157
+ def log_path(name: str, stream: str = "out") -> Path:
158
+ return LOGS_DIR / f"{name}.{stream}.log"
159
+
160
+
161
+ def watcher_log_path(name: str) -> Path:
162
+ return LOGS_DIR / f"{name}.watcher.log"
163
+
164
+
165
+ def watcher_log(name: str, msg: str):
166
+ """Append timestamped line to watcher log."""
167
+ try:
168
+ ts = datetime.now().isoformat(timespec="seconds")
169
+ with open(watcher_log_path(name), "a") as f:
170
+ f.write(f"[{ts}] {msg}\n")
171
+ except OSError:
172
+ pass
173
+
174
+
175
+ def load_proc(name: str) -> dict | None:
176
+ pf = proc_file(name)
177
+ if not pf.exists():
178
+ return None
179
+ try:
180
+ return json.loads(pf.read_text())
181
+ except (json.JSONDecodeError, OSError):
182
+ return None
183
+
184
+
185
+ def save_proc(name: str, info: dict):
186
+ """Atomic write: tmp file + os.replace.
187
+
188
+ Prevents torn JSON if the process is killed mid-write or if the
189
+ watcher and the foreground CLI both save concurrently. os.replace
190
+ is atomic on POSIX when src and dst are on the same filesystem,
191
+ which is guaranteed here (both under ~/.bgo/procs/).
192
+ """
193
+ pf = proc_file(name)
194
+ tmp = pf.with_suffix(pf.suffix + ".tmp")
195
+ tmp.write_text(json.dumps(info, indent=2))
196
+ os.replace(tmp, pf)
197
+
198
+
199
+ def delete_proc(name: str, keep_logs: bool = False):
200
+ """Remove proc state file. Logs removed unless keep_logs=True."""
201
+ proc_file(name).unlink(missing_ok=True)
202
+ if keep_logs:
203
+ return
204
+ for stream in ("out", "err"):
205
+ log_path(name, stream).unlink(missing_ok=True)
206
+ watcher_log_path(name).unlink(missing_ok=True)
207
+
208
+
209
+ def load_all_procs() -> dict[str, dict]:
210
+ """Load all process states."""
211
+ procs = {}
212
+ for pf in sorted(PROCS_DIR.glob("*.json")):
213
+ try:
214
+ info = json.loads(pf.read_text())
215
+ procs[info.get("name", pf.stem)] = info
216
+ except (json.JSONDecodeError, OSError):
217
+ continue
218
+ return procs
219
+
220
+
221
+ # --- Utilities ---
222
+
223
+
224
+ def _is_zombie(pid: int) -> bool:
225
+ """Return True if pid is a zombie/defunct process. Platform-aware."""
226
+ if sys.platform.startswith("linux"):
227
+ try:
228
+ with open(f"/proc/{pid}/stat", "r") as f:
229
+ stat = f.read()
230
+ # stat format: pid (comm) state ... comm may contain spaces/parens.
231
+ rparen = stat.rfind(")")
232
+ if rparen != -1:
233
+ state = stat[rparen + 2 : rparen + 3]
234
+ return state == "Z"
235
+ except (OSError, IndexError):
236
+ return False
237
+ return False
238
+ if sys.platform == "darwin":
239
+ try:
240
+ result = subprocess.run(
241
+ ["ps", "-p", str(pid), "-o", "stat=", ],
242
+ capture_output=True, text=True, timeout=2,
243
+ )
244
+ if result.returncode == 0:
245
+ state = result.stdout.strip()
246
+ # macOS ps stat: 'Z' is zombie; may be prefixed with flags
247
+ return state.startswith("Z")
248
+ except (subprocess.SubprocessError, OSError):
249
+ return False
250
+ return False
251
+
252
+
253
+ def is_running(pid: int | None) -> bool:
254
+ """Return True if pid is alive AND not a zombie/defunct process."""
255
+ if pid is None:
256
+ return False
257
+ try:
258
+ os.kill(pid, 0)
259
+ except (ProcessLookupError, OSError):
260
+ return False
261
+ return not _is_zombie(pid)
262
+
263
+
264
+ _BLANK_PINFO = {"cpu": "-", "mem": "-", "time": "-"}
265
+
266
+
267
+ def get_process_info(pid: int) -> dict:
268
+ """Get CPU/MEM/uptime for a single pid via ps. Prefer batch lookup."""
269
+ return get_process_info_batch([pid]).get(pid, dict(_BLANK_PINFO))
270
+
271
+
272
+ def get_process_info_batch(pids: list[int]) -> dict[int, dict]:
273
+ """Single ps call for many pids. Returns {pid: {cpu, mem, time}}."""
274
+ result_map: dict[int, dict] = {}
275
+ if not pids:
276
+ return result_map
277
+ # ps -p accepts comma-separated pids on both Linux and macOS.
278
+ pid_arg = ",".join(str(p) for p in pids)
279
+ try:
280
+ result = subprocess.run(
281
+ ["ps", "-p", pid_arg, "-o", "pid,%cpu,%mem,etime", "--no-headers"],
282
+ capture_output=True,
283
+ text=True,
284
+ timeout=4,
285
+ )
286
+ if result.returncode == 0:
287
+ for line in result.stdout.splitlines():
288
+ parts = line.split(None, 3)
289
+ if len(parts) >= 4:
290
+ try:
291
+ result_map[int(parts[0])] = {
292
+ "cpu": parts[1], "mem": parts[2], "time": parts[3],
293
+ }
294
+ except ValueError:
295
+ continue
296
+ except (subprocess.SubprocessError, OSError):
297
+ pass
298
+ # Fill misses with blanks
299
+ for p in pids:
300
+ result_map.setdefault(p, dict(_BLANK_PINFO))
301
+ return result_map
302
+
303
+
304
+ def _looks_like_command(arg: str) -> bool:
305
+ """Check if an argument looks like an executable command (not a plain name).
306
+ Returns True if the arg is a path, has an extension, or resolves via which."""
307
+ if os.sep in arg or arg.startswith("./"):
308
+ return True
309
+ if "." in arg:
310
+ return True
311
+ if shutil.which(arg):
312
+ return True
313
+ return False
314
+
315
+
316
+ def derive_name(cmd: list[str]) -> str:
317
+ """Derive a process name from the command."""
318
+ base = os.path.basename(cmd[0])
319
+ for ext in (".py", ".sh", ".js", ".ts", ".rb", ".pl", ".exe"):
320
+ if base.endswith(ext):
321
+ base = base[: -len(ext)]
322
+ return base
323
+
324
+
325
+ def resolve_command(cmd: list[str]) -> list[str]:
326
+ """Resolve command to full path if possible."""
327
+ binary = shutil.which(cmd[0])
328
+ if binary:
329
+ cmd = cmd[:]
330
+ cmd[0] = binary
331
+ return cmd
332
+
333
+
334
+ def kill_process(pid: int, pgid: int | None, force: bool = False) -> bool:
335
+ """Kill a process (and its entire process group). Returns True if dead."""
336
+ sig = signal.SIGKILL if force else signal.SIGTERM
337
+
338
+ try:
339
+ if pgid:
340
+ os.killpg(pgid, sig)
341
+ else:
342
+ os.kill(pid, sig)
343
+ except ProcessLookupError:
344
+ return True
345
+ except PermissionError:
346
+ print(f"{color('red', '❌')} Permission denied killing PID {pid}")
347
+ return False
348
+
349
+ # Wait for termination
350
+ for _ in range(50): # 5 seconds
351
+ if not is_running(pid):
352
+ return True
353
+ time.sleep(0.1)
354
+
355
+ # Escalate to SIGKILL if SIGTERM didn't work
356
+ if not force and is_running(pid):
357
+ try:
358
+ if pgid:
359
+ os.killpg(pgid, signal.SIGKILL)
360
+ else:
361
+ os.kill(pid, signal.SIGKILL)
362
+ time.sleep(0.3)
363
+ except (ProcessLookupError, PermissionError):
364
+ pass
365
+
366
+ return not is_running(pid)
367
+
368
+
369
+ # --- Watch helpers ---
370
+
371
+
372
+ WATCH_DEFAULTS = {
373
+ "interval": 3,
374
+ "min_uptime": 2,
375
+ "on_fast_crash": "backoff", # backoff | stop | retry
376
+ }
377
+ BACKOFF_SCHEDULE = [2, 4, 8]
378
+ TAIL_BYTES = 2048
379
+
380
+
381
+ def _resolve_watch_block(
382
+ want_watch: bool,
383
+ overrides: dict | None,
384
+ prior_watch: dict | None,
385
+ ) -> dict | None:
386
+ """Decide what watch block a (re)starting proc should have.
387
+
388
+ Three paths, in priority order:
389
+ 1. want_watch=True -> fresh default block, optionally with overrides
390
+ 2. prior_watch enabled (internal restart path) -> carry forward,
391
+ clear runtime fields (watcher_pid, errored, error_reason,
392
+ last_stderr_tail) but PRESERVE restart counters
393
+ 3. otherwise -> None (no watch)
394
+
395
+ Pure: no side effects. Returns a new dict in all non-None cases.
396
+ """
397
+ if want_watch:
398
+ return _default_watch_config(overrides)
399
+ if prior_watch and prior_watch.get("enabled"):
400
+ carried = dict(prior_watch)
401
+ carried["watcher_pid"] = None
402
+ carried["watcher_pgid"] = None
403
+ carried["errored"] = False
404
+ carried["error_reason"] = None
405
+ carried["last_stderr_tail"] = None
406
+ return carried
407
+ return None
408
+
409
+
410
+ def _default_watch_config(overrides: dict | None = None) -> dict:
411
+ """Build a fresh watch config block with defaults."""
412
+ cfg = {
413
+ "enabled": True,
414
+ "interval": WATCH_DEFAULTS["interval"],
415
+ "min_uptime": WATCH_DEFAULTS["min_uptime"],
416
+ "on_fast_crash": WATCH_DEFAULTS["on_fast_crash"],
417
+ "watcher_pid": None,
418
+ "watcher_pgid": None,
419
+ "restarts": 0,
420
+ "last_restart_at": None,
421
+ "errored": False,
422
+ "error_reason": None,
423
+ "last_stderr_tail": None,
424
+ }
425
+ if overrides:
426
+ for k, v in overrides.items():
427
+ if v is not None and k in cfg:
428
+ cfg[k] = v
429
+ return cfg
430
+
431
+
432
+ def _spawn_watcher(name: str) -> tuple[int | None, int | None]:
433
+ """Detach a watcher process for `name`. Returns (pid, pgid)."""
434
+ try:
435
+ wlog = open(watcher_log_path(name), "a")
436
+ proc = subprocess.Popen(
437
+ [sys.executable, os.path.abspath(__file__), "__watcher__", name],
438
+ stdout=wlog,
439
+ stderr=wlog,
440
+ stdin=subprocess.DEVNULL,
441
+ start_new_session=True,
442
+ )
443
+ wlog.close()
444
+ except Exception as e:
445
+ watcher_log(name, f"failed to spawn watcher: {e}")
446
+ return None, None
447
+ try:
448
+ pgid = os.getpgid(proc.pid)
449
+ except OSError:
450
+ pgid = None
451
+ return proc.pid, pgid
452
+
453
+
454
+ def _kill_watcher(info: dict) -> None:
455
+ """Kill the watcher associated with `info` (if any), in-place clears pids."""
456
+ w = info.get("watch") or {}
457
+ wpid = w.get("watcher_pid")
458
+ wpgid = w.get("watcher_pgid")
459
+ if wpid and is_running(wpid):
460
+ kill_process(wpid, wpgid)
461
+ if "watch" in info:
462
+ info["watch"]["watcher_pid"] = None
463
+ info["watch"]["watcher_pgid"] = None
464
+
465
+
466
+ def _tail_stderr(name: str, nbytes: int = TAIL_BYTES) -> str:
467
+ """Return last nbytes of stderr log, stripped."""
468
+ lf = log_path(name, "err")
469
+ if not lf.exists():
470
+ return ""
471
+ try:
472
+ with open(lf, "rb") as f:
473
+ f.seek(0, os.SEEK_END)
474
+ size = f.tell()
475
+ f.seek(max(0, size - nbytes))
476
+ data = f.read().decode("utf-8", errors="replace")
477
+ # strip leading partial line
478
+ if size > nbytes and "\n" in data:
479
+ data = data[data.index("\n") + 1:]
480
+ return data.strip()
481
+ except OSError:
482
+ return ""
483
+
484
+
485
+ def _restart_proc_inplace(info: dict) -> tuple[int | None, int | None, str | None]:
486
+ """Spawn target command again, return (pid, pgid, err_msg)."""
487
+ name = info["name"]
488
+ command = info["command"]
489
+ cwd = info.get("cwd") or os.getcwd()
490
+ stdout_log = log_path(name, "out")
491
+ stderr_log = log_path(name, "err")
492
+ try:
493
+ out_f = open(stdout_log, "a")
494
+ err_f = open(stderr_log, "a")
495
+ ts = datetime.now().isoformat()
496
+ marker = f"\n=== [{ts}] [watch restart] {' '.join(command)} ===\n"
497
+ out_f.write(marker)
498
+ err_f.write(marker)
499
+ out_f.flush()
500
+ err_f.flush()
501
+ proc = subprocess.Popen(
502
+ command,
503
+ stdout=out_f,
504
+ stderr=err_f,
505
+ stdin=subprocess.DEVNULL,
506
+ start_new_session=True,
507
+ cwd=cwd,
508
+ )
509
+ out_f.close()
510
+ err_f.close()
511
+ except FileNotFoundError:
512
+ return None, None, f"command not found: {command[0]}"
513
+ except PermissionError:
514
+ return None, None, f"permission denied: {command[0]}"
515
+ except Exception as e:
516
+ return None, None, f"failed to start: {e}"
517
+ try:
518
+ pgid = os.getpgid(proc.pid)
519
+ except OSError:
520
+ pgid = None
521
+ return proc.pid, pgid, None
522
+
523
+
524
+ def cmd_watcher_loop(name: str) -> int:
525
+ """Internal: watcher loop entry point. Invoked as `bgo __watcher__ <name>`."""
526
+ # Auto-reap children so dead procs disappear instead of lingering as zombies
527
+ try:
528
+ signal.signal(signal.SIGCHLD, signal.SIG_IGN)
529
+ except (ValueError, OSError):
530
+ pass
531
+
532
+ info = load_proc(name)
533
+ if not info or not info.get("watch", {}).get("enabled"):
534
+ return 0
535
+
536
+ watcher_log(name, f"watcher started for pid={info.get('pid')}")
537
+ backoff_idx = 0
538
+ current_started_at = info.get("started_at")
539
+ needs_early_check = True
540
+
541
+ def _start_epoch() -> float:
542
+ try:
543
+ s = current_started_at.replace("Z", "+00:00")
544
+ return datetime.fromisoformat(s).timestamp()
545
+ except Exception:
546
+ return time.time()
547
+
548
+ while True:
549
+ interval = info.get("watch", {}).get("interval", WATCH_DEFAULTS["interval"])
550
+ min_uptime_cfg = info.get("watch", {}).get("min_uptime", WATCH_DEFAULTS["min_uptime"])
551
+
552
+ if needs_early_check:
553
+ # High-frequency poll during the min_uptime window so fast-crashes
554
+ # are caught with accurate short uptime readings (even when the
555
+ # routine poll interval is larger than min_uptime).
556
+ deadline = _start_epoch() + min_uptime_cfg
557
+ died_early = False
558
+ while time.time() < deadline:
559
+ time.sleep(0.2)
560
+ cur_pid = info.get("pid")
561
+ if not is_running(cur_pid):
562
+ died_early = True
563
+ break
564
+ needs_early_check = False
565
+ if not died_early:
566
+ # Sleep the remainder of the normal interval
567
+ remaining = max(0.0, (_start_epoch() + min_uptime_cfg + interval) - time.time())
568
+ if remaining > 0:
569
+ time.sleep(min(remaining, interval))
570
+ else:
571
+ time.sleep(max(1, interval))
572
+
573
+ info = load_proc(name)
574
+ if not info:
575
+ watcher_log(name, "proc state vanished; exiting")
576
+ return 0
577
+ w = info.get("watch") or {}
578
+ if not w.get("enabled"):
579
+ watcher_log(name, "watch disabled; exiting")
580
+ return 0
581
+ if info.get("status") == "stopped":
582
+ watcher_log(name, "proc manually stopped; exiting")
583
+ return 0
584
+
585
+ pid = info.get("pid")
586
+ if is_running(pid):
587
+ continue
588
+
589
+ # Process died. Compute uptime.
590
+ try:
591
+ started = datetime.fromisoformat(current_started_at.replace("Z", "+00:00"))
592
+ uptime = (datetime.now(timezone.utc) - started).total_seconds()
593
+ except Exception:
594
+ uptime = 0.0
595
+
596
+ min_uptime = w.get("min_uptime", WATCH_DEFAULTS["min_uptime"])
597
+ mode = w.get("on_fast_crash", WATCH_DEFAULTS["on_fast_crash"])
598
+ fast = uptime < min_uptime
599
+
600
+ if fast:
601
+ tail = _tail_stderr(name)
602
+ watcher_log(name, f"fast-crash: uptime={uptime:.2f}s mode={mode}")
603
+
604
+ if mode == "stop":
605
+ info["watch"]["errored"] = True
606
+ info["watch"]["error_reason"] = f"fast-crash (uptime {uptime:.2f}s, mode=stop)"
607
+ info["watch"]["last_stderr_tail"] = tail
608
+ info["watch"]["watcher_pid"] = None
609
+ info["watch"]["watcher_pgid"] = None
610
+ info["status"] = "stopped"
611
+ save_proc(name, info)
612
+ watcher_log(name, "errored; exiting")
613
+ return 0
614
+
615
+ if mode == "backoff":
616
+ if backoff_idx >= len(BACKOFF_SCHEDULE):
617
+ info["watch"]["errored"] = True
618
+ info["watch"]["error_reason"] = f"{len(BACKOFF_SCHEDULE) + 1} consecutive fast-crashes"
619
+ info["watch"]["last_stderr_tail"] = tail
620
+ info["watch"]["watcher_pid"] = None
621
+ info["watch"]["watcher_pgid"] = None
622
+ info["status"] = "stopped"
623
+ save_proc(name, info)
624
+ watcher_log(name, "backoff exhausted; errored; exiting")
625
+ return 0
626
+ wait = BACKOFF_SCHEDULE[backoff_idx]
627
+ watcher_log(name, f"backoff sleep {wait}s (step {backoff_idx + 1}/{len(BACKOFF_SCHEDULE)})")
628
+ time.sleep(wait)
629
+ backoff_idx += 1
630
+ elif mode == "retry":
631
+ wait = BACKOFF_SCHEDULE[min(backoff_idx, len(BACKOFF_SCHEDULE) - 1)]
632
+ watcher_log(name, f"retry mode: sleep {wait}s")
633
+ time.sleep(wait)
634
+ backoff_idx = min(backoff_idx + 1, len(BACKOFF_SCHEDULE) - 1)
635
+ else:
636
+ backoff_idx = 0
637
+
638
+ # Reload (state may have shifted during sleep)
639
+ info = load_proc(name)
640
+ if not info or info.get("status") == "stopped" or not info.get("watch", {}).get("enabled"):
641
+ watcher_log(name, "state changed during backoff; exiting")
642
+ return 0
643
+
644
+ new_pid, new_pgid, err = _restart_proc_inplace(info)
645
+ if err:
646
+ watcher_log(name, f"restart failed: {err}")
647
+ info["watch"]["errored"] = True
648
+ info["watch"]["error_reason"] = err
649
+ info["watch"]["last_stderr_tail"] = _tail_stderr(name)
650
+ info["watch"]["watcher_pid"] = None
651
+ info["watch"]["watcher_pgid"] = None
652
+ info["status"] = "stopped"
653
+ save_proc(name, info)
654
+ return 0
655
+
656
+ now_iso = datetime.now(timezone.utc).isoformat()
657
+ info["pid"] = new_pid
658
+ info["pgid"] = new_pgid
659
+ info["started_at"] = now_iso
660
+ info["status"] = "running"
661
+ info["watch"]["restarts"] = info["watch"].get("restarts", 0) + 1
662
+ info["watch"]["last_restart_at"] = now_iso
663
+ save_proc(name, info)
664
+ current_started_at = now_iso
665
+ needs_early_check = True
666
+ watcher_log(name, f"restart #{info['watch']['restarts']} pid={new_pid} (prev uptime {uptime:.2f}s)")
667
+
668
+
669
+ # --- Commands ---
670
+
671
+
672
+ def cmd_start(args):
673
+ """Start a process in the background."""
674
+ init_dirs()
675
+
676
+ name = args.name
677
+ command = getattr(args, "cmd", None) or getattr(args, "command", [])
678
+
679
+ # Filter out leading '--' if present
680
+ if command and command[0] == "--":
681
+ command = command[1:]
682
+
683
+ if not command:
684
+ print(f"{color('red', '❌')} No command specified")
685
+ print(f" Usage: bgo start {name} -- <command> [args...]")
686
+ return 1
687
+
688
+ # Check if already running
689
+ existing = load_proc(name)
690
+ if existing and is_running(existing.get("pid")):
691
+ print(
692
+ f"{color('red', '❌')} Process '{name}' is already running (PID: {existing['pid']})"
693
+ )
694
+ print(f" Use 'bgo stop {name}' first or choose a different name.")
695
+ return 1
696
+
697
+ command = resolve_command(command)
698
+
699
+ # Prepare log files
700
+ stdout_log = log_path(name, "out")
701
+ stderr_log = log_path(name, "err")
702
+
703
+ with open(stdout_log, "a") as out_f, open(stderr_log, "a") as err_f:
704
+ # Write start marker
705
+ timestamp = datetime.now().isoformat()
706
+ marker = f"\n=== [{timestamp}] Starting: {' '.join(command)} ===\n"
707
+ out_f.write(marker)
708
+ err_f.write(marker)
709
+ out_f.flush()
710
+ err_f.flush()
711
+
712
+ try:
713
+ process = subprocess.Popen(
714
+ command,
715
+ stdout=out_f,
716
+ stderr=err_f,
717
+ stdin=subprocess.DEVNULL,
718
+ start_new_session=True,
719
+ cwd=args.cwd or os.getcwd(),
720
+ )
721
+ except FileNotFoundError:
722
+ print(f"{color('red', '❌')} Command not found: {command[0]}")
723
+ return 1
724
+ except PermissionError:
725
+ print(f"{color('red', '❌')} Permission denied: {command[0]}")
726
+ return 1
727
+ except Exception as e:
728
+ print(f"{color('red', '❌')} Failed to start: {e}")
729
+ return 1
730
+
731
+ # Brief check for immediate crash
732
+ time.sleep(0.1)
733
+ if not is_running(process.pid):
734
+ print(
735
+ f"{color('red', '❌')} Process '{name}' failed to start (exited immediately)"
736
+ )
737
+ print(f" Check logs: bgo logs {name}")
738
+ return 1
739
+
740
+ # Get pgid for process group killing
741
+ try:
742
+ pgid = os.getpgid(process.pid)
743
+ except OSError:
744
+ pgid = None
745
+
746
+ prior_watch = (existing or {}).get("watch") if existing else None
747
+ watch_block = _resolve_watch_block(
748
+ want_watch=getattr(args, "watch", False),
749
+ overrides={
750
+ "interval": getattr(args, "interval", None),
751
+ "min_uptime": getattr(args, "min_uptime", None),
752
+ "on_fast_crash": getattr(args, "on_fast_crash", None),
753
+ },
754
+ prior_watch=prior_watch,
755
+ )
756
+
757
+ info = {
758
+ "name": name,
759
+ "pid": process.pid,
760
+ "pgid": pgid,
761
+ "command": command,
762
+ "cwd": args.cwd or os.getcwd(),
763
+ "started_at": datetime.now(timezone.utc).isoformat(),
764
+ "status": "running",
765
+ }
766
+ if watch_block:
767
+ info["watch"] = watch_block
768
+ save_proc(name, info)
769
+
770
+ print(f"{color('green', '✅')} Started '{name}' (PID: {process.pid})")
771
+
772
+ if watch_block:
773
+ wpid, wpgid = _spawn_watcher(name)
774
+ if wpid:
775
+ info["watch"]["watcher_pid"] = wpid
776
+ info["watch"]["watcher_pgid"] = wpgid
777
+ save_proc(name, info)
778
+ print(
779
+ f" {color('blue', '👁')} Watching "
780
+ f"(interval={watch_block['interval']}s, "
781
+ f"min-uptime={watch_block['min_uptime']}s, "
782
+ f"on-fast-crash={watch_block['on_fast_crash']})"
783
+ )
784
+ else:
785
+ print(f" {color('yellow', '⚠️')} Failed to spawn watcher")
786
+
787
+ print(f" Logs: bgo logs {name}")
788
+ return 0
789
+
790
+
791
+ def cmd_stop(args):
792
+ """Stop a running process."""
793
+ name = args.name
794
+ info = load_proc(name)
795
+
796
+ if not info:
797
+ print(f"{color('red', '❌')} No process named '{name}' found")
798
+ return 1
799
+
800
+ pid = info.get("pid")
801
+
802
+ if not is_running(pid):
803
+ print(f"{color('yellow', '⚠️')} Process '{name}' is not running")
804
+ info["status"] = "stopped"
805
+ save_proc(name, info)
806
+ return 0
807
+
808
+ force = getattr(args, "force", False)
809
+ pgid = info.get("pgid")
810
+
811
+ # Mark stopped BEFORE killing so watcher (if any) sees the flag and exits.
812
+ info["status"] = "stopped"
813
+ save_proc(name, info)
814
+
815
+ # Kill watcher first so it doesn't observe the death and try to restart.
816
+ _kill_watcher(info)
817
+
818
+ killed = kill_process(pid, pgid, force=force)
819
+
820
+ if killed:
821
+ label = "💀 Killed" if force else "🛑 Stopped"
822
+ print(f"{label} '{name}' (PID: {pid})")
823
+ info["stopped_at"] = datetime.now(timezone.utc).isoformat()
824
+ save_proc(name, info)
825
+ else:
826
+ print(
827
+ f"{color('red', '❌')} Failed to stop '{name}' (PID: {pid}). Try --force"
828
+ )
829
+ return 1
830
+
831
+ return 0
832
+
833
+
834
+ def cmd_restart(args):
835
+ """Restart a process. Clears errored state and re-spawns watcher if enabled."""
836
+ name = args.name
837
+ info = load_proc(name)
838
+
839
+ if not info:
840
+ print(f"{color('red', '❌')} No process named '{name}' found")
841
+ return 1
842
+
843
+ # Clear errored state. Restart counter is preserved by default so
844
+ # operators can see chronic crashers across manual restarts; pass
845
+ # --reset-counters to wipe it explicitly.
846
+ reset = getattr(args, "reset_counters", False)
847
+ if "watch" in info:
848
+ info["watch"]["errored"] = False
849
+ info["watch"]["error_reason"] = None
850
+ info["watch"]["last_stderr_tail"] = None
851
+ if reset:
852
+ info["watch"]["restarts"] = 0
853
+ info["watch"]["last_restart_at"] = None
854
+ save_proc(name, info)
855
+
856
+ # Stop watcher first to prevent restart race
857
+ _kill_watcher(info)
858
+ save_proc(name, info)
859
+
860
+ # Stop target if running
861
+ pid = info.get("pid")
862
+ if is_running(pid):
863
+ print(f"🛑 Stopping '{name}'...")
864
+ kill_process(pid, info.get("pgid"))
865
+
866
+ # cmd_start picks up info["watch"] via load_proc(existing) and re-spawns watcher
867
+ restart_args = argparse.Namespace(
868
+ name=name,
869
+ command=info["command"],
870
+ cwd=info.get("cwd"),
871
+ )
872
+ return cmd_start(restart_args)
873
+
874
+
875
+ def cmd_watch(args):
876
+ """Attach a watcher to an existing process (or replace its config)."""
877
+ name = args.name
878
+ info = load_proc(name)
879
+ if not info:
880
+ print(f"{color('red', '❌')} No process named '{name}' found")
881
+ return 1
882
+
883
+ pid = info.get("pid")
884
+ if not is_running(pid):
885
+ print(f"{color('yellow', '⚠️')} Process '{name}' is not running. Start it first.")
886
+ return 1
887
+
888
+ # Kill any prior watcher
889
+ _kill_watcher(info)
890
+
891
+ reset = getattr(args, "reset", False)
892
+ prior = info.get("watch") or {}
893
+ overrides = {
894
+ "interval": getattr(args, "interval", None),
895
+ "min_uptime": getattr(args, "min_uptime", None),
896
+ "on_fast_crash": getattr(args, "on_fast_crash", None),
897
+ }
898
+
899
+ if reset or not prior:
900
+ watch_block = _default_watch_config(overrides)
901
+ else:
902
+ watch_block = dict(prior)
903
+ watch_block["enabled"] = True
904
+ watch_block["errored"] = False
905
+ watch_block["error_reason"] = None
906
+ watch_block["last_stderr_tail"] = None
907
+ for k, v in overrides.items():
908
+ if v is not None:
909
+ watch_block[k] = v
910
+
911
+ info["watch"] = watch_block
912
+ save_proc(name, info)
913
+
914
+ wpid, wpgid = _spawn_watcher(name)
915
+ if not wpid:
916
+ print(f"{color('red', '❌')} Failed to spawn watcher")
917
+ return 1
918
+ info["watch"]["watcher_pid"] = wpid
919
+ info["watch"]["watcher_pgid"] = wpgid
920
+ save_proc(name, info)
921
+ print(
922
+ f"{color('blue', '👁')} Watching '{name}' "
923
+ f"(interval={watch_block['interval']}s, "
924
+ f"min-uptime={watch_block['min_uptime']}s, "
925
+ f"on-fast-crash={watch_block['on_fast_crash']})"
926
+ )
927
+ return 0
928
+
929
+
930
+ def cmd_unwatch(args):
931
+ """Detach watcher from a process (keeps the process running)."""
932
+ name = args.name
933
+ info = load_proc(name)
934
+ if not info:
935
+ print(f"{color('red', '❌')} No process named '{name}' found")
936
+ return 1
937
+ if not info.get("watch", {}).get("enabled"):
938
+ print(f"{color('yellow', '⚠️')} '{name}' is not being watched")
939
+ return 0
940
+ _kill_watcher(info)
941
+ info["watch"]["enabled"] = False
942
+ save_proc(name, info)
943
+ print(f"{color('gray', '👁 unwatched')} '{name}'")
944
+ return 0
945
+
946
+
947
+ def _watch_cell(info: dict, level: str | None = None) -> str:
948
+ """Render the WATCH column cell for the status table."""
949
+ g = glyphs(level)
950
+ w = info.get("watch")
951
+ if not w or not w.get("enabled"):
952
+ return g["watch_none"]
953
+ if w.get("errored"):
954
+ return color("red", f"{g['errored']} errored")
955
+ wpid = w.get("watcher_pid")
956
+ if wpid and not is_running(wpid):
957
+ return color("yellow", f"{g['watcher_dead']} dead")
958
+ return color("green", f"{g['watching']} {w.get('restarts', 0)}")
959
+
960
+
961
+ def _clear_screen():
962
+ """ANSI clear-screen + home cursor. TTY safe."""
963
+ if sys.stdout.isatty():
964
+ sys.stdout.write("\033[2J\033[H")
965
+ sys.stdout.flush()
966
+
967
+
968
+ def _status_snapshot(procs: dict) -> list[dict]:
969
+ """Build a status snapshot for procs. Returns list of dicts keyed by name.
970
+
971
+ Pure: no printing. Side effect: persists status='stopped' for procs
972
+ whose pid is no longer running, so subsequent status calls stay
973
+ consistent. Batches the ps lookup into a single subprocess call.
974
+ """
975
+ running_pids = []
976
+ for info in procs.values():
977
+ pid = info.get("pid")
978
+ if is_running(pid):
979
+ running_pids.append(pid)
980
+ pinfo_map = get_process_info_batch(running_pids)
981
+
982
+ rows = []
983
+ for name, info in sorted(procs.items()):
984
+ pid = info.get("pid")
985
+ running = is_running(pid)
986
+ if running:
987
+ pinfo = pinfo_map.get(pid, dict(_BLANK_PINFO))
988
+ else:
989
+ pinfo = dict(_BLANK_PINFO)
990
+ if info.get("status") != "stopped":
991
+ info["status"] = "stopped"
992
+ save_proc(name, info)
993
+ rows.append({
994
+ "name": name,
995
+ "pid": pid,
996
+ "status": "online" if running else "stopped",
997
+ "cpu": pinfo["cpu"],
998
+ "mem": pinfo["mem"],
999
+ "uptime": pinfo["time"],
1000
+ "command": info.get("command", []),
1001
+ "cwd": info.get("cwd"),
1002
+ "started_at": info.get("started_at"),
1003
+ "watch": info.get("watch"),
1004
+ })
1005
+ return rows
1006
+
1007
+
1008
+ def _visible_width(s: str) -> int:
1009
+ """Visible length of a string (ANSI codes stripped)."""
1010
+ return len(strip_ansi(s))
1011
+
1012
+
1013
+ def _pad(text: str, width: int, align: str = "left") -> str:
1014
+ """Pad `text` to visible width using spaces. ANSI-safe."""
1015
+ pad = max(0, width - _visible_width(text))
1016
+ if align == "right":
1017
+ return " " * pad + text
1018
+ return text + " " * pad
1019
+
1020
+
1021
+ def _print_status_table(rows: list[dict], level: str | None = None):
1022
+ """Render the status table for a snapshot."""
1023
+ if not rows:
1024
+ return
1025
+ level = level or _detect_table_level()
1026
+ g = GLYPHS[level]
1027
+ term_width = shutil.get_terminal_size().columns
1028
+
1029
+ # column widths (visible chars only)
1030
+ name_w = max(12, max(len(r["name"]) for r in rows) + 1)
1031
+ status_w = max(len(g["online"]), len(g["stopped"])) + 1
1032
+ pid_w = 8
1033
+ cpu_w = 6
1034
+ mem_w = 6
1035
+ uptime_w = 12
1036
+ watch_w = 12
1037
+
1038
+ fixed = name_w + status_w + pid_w + cpu_w + mem_w + uptime_w + watch_w
1039
+ # column separators: plain/normal use spaces; fancy uses vertical bars
1040
+ sep_w = 7 # 7 inter-column gaps
1041
+ cmd_w = max(20, term_width - fixed - sep_w - 2)
1042
+
1043
+ # --- header ---
1044
+ headers = ["NAME", "STATUS", "PID", "CPU", "MEM", "UPTIME", "WATCH", "COMMAND"]
1045
+ widths = [name_w, status_w, pid_w, cpu_w, mem_w, uptime_w, watch_w, cmd_w]
1046
+ if level == LEVEL_FANCY:
1047
+ rule = g["hline"] * (sum(widths) + sep_w + 2)
1048
+ top = g["tl"] + rule[1:-1] + g["tr"]
1049
+ bottom = g["bl"] + rule[1:-1] + g["br"]
1050
+ mid_rule = g["hline"] * (sum(widths) + sep_w + 2)
1051
+ print(color("gray", top))
1052
+ cells = [color("bold", _pad(h, w)) for h, w in zip(headers, widths)]
1053
+ print(g["vline"] + " " + (" ").join(cells) + " " + g["vline"])
1054
+ print(color("gray", g["tright"] + mid_rule[1:-1] + g["tleft"]))
1055
+ else:
1056
+ cells = [color("bold", _pad(h, w)) for h, w in zip(headers, widths)]
1057
+ print(" ".join(cells))
1058
+ print(color("gray", g["hline"] * min(term_width, sum(widths) + sep_w)))
1059
+
1060
+ running_count = 0
1061
+ stopped_count = 0
1062
+ errored_procs = []
1063
+
1064
+ for r in rows:
1065
+ cmd_str = truncate(" ".join(r["command"]), cmd_w)
1066
+ watch_str = _watch_cell({"watch": r["watch"]}, level)
1067
+ w = r["watch"] or {}
1068
+ if w.get("errored"):
1069
+ errored_procs.append((r["name"], w.get("error_reason", "unknown")))
1070
+
1071
+ if r["status"] == "online":
1072
+ running_count += 1
1073
+ status_str = color("green", g["online"])
1074
+ cells = [
1075
+ _pad(r["name"], name_w),
1076
+ _pad(status_str, status_w),
1077
+ _pad(color("gray", str(r["pid"])), pid_w),
1078
+ _pad(str(r["cpu"]), cpu_w),
1079
+ _pad(str(r["mem"]), mem_w),
1080
+ _pad(str(r["uptime"]), uptime_w),
1081
+ _pad(watch_str, watch_w),
1082
+ cmd_str,
1083
+ ]
1084
+ else:
1085
+ stopped_count += 1
1086
+ status_str = color("red", g["stopped"])
1087
+ cells = [
1088
+ _pad(r["name"], name_w),
1089
+ _pad(status_str, status_w),
1090
+ _pad("-", pid_w),
1091
+ _pad("-", cpu_w),
1092
+ _pad("-", mem_w),
1093
+ _pad("-", uptime_w),
1094
+ _pad(watch_str, watch_w),
1095
+ color("gray", cmd_str),
1096
+ ]
1097
+
1098
+ if level == LEVEL_FANCY:
1099
+ print(g["vline"] + " " + " ".join(cells) + " " + g["vline"])
1100
+ else:
1101
+ print(" ".join(cells))
1102
+
1103
+ if level == LEVEL_FANCY:
1104
+ print(color("gray", bottom))
1105
+ else:
1106
+ print(color("gray", g["hline"] * min(term_width, sum(widths) + sep_w)))
1107
+
1108
+ summary = (
1109
+ f"Total: {len(rows)} | {color('green', g['online'])}: {running_count} | "
1110
+ f"{color('red', g['stopped'])}: {stopped_count}"
1111
+ )
1112
+ print(summary)
1113
+ if errored_procs:
1114
+ print()
1115
+ print(color("red", f"{g['errored']} {len(errored_procs)} errored:"))
1116
+ for n, reason in errored_procs:
1117
+ print(f" {color('red', n)} — {reason}")
1118
+ print(f" {color('gray', f'bgo logs {n} --watcher | bgo restart {n}')}")
1119
+
1120
+
1121
+ def _level_from_args(args) -> str | None:
1122
+ """Resolve --plain / --fancy CLI flags into a level string, or None for auto."""
1123
+ if getattr(args, "plain", False):
1124
+ return LEVEL_PLAIN
1125
+ if getattr(args, "fancy", False):
1126
+ return LEVEL_FANCY
1127
+ return None
1128
+
1129
+
1130
+ def cmd_status(args):
1131
+ """Show status of all processes."""
1132
+ # JSON output mode bypasses everything else
1133
+ if getattr(args, "json", False):
1134
+ procs = load_all_procs()
1135
+ if getattr(args, "name", None):
1136
+ info = load_proc(args.name)
1137
+ if not info:
1138
+ print(json.dumps({"error": f"no process named '{args.name}'"}))
1139
+ return 1
1140
+ rows = _status_snapshot({args.name: info})
1141
+ print(json.dumps(rows[0], indent=2, default=str))
1142
+ return 0
1143
+ rows = _status_snapshot(procs)
1144
+ print(json.dumps(rows, indent=2, default=str))
1145
+ return 0
1146
+
1147
+ level_override = _level_from_args(args)
1148
+
1149
+ # Watch mode: refresh until Ctrl-C
1150
+ if getattr(args, "watch", False):
1151
+ interval = max(1, getattr(args, "interval", None) or 2)
1152
+ try:
1153
+ while True:
1154
+ _clear_screen()
1155
+ procs = load_all_procs()
1156
+ if not procs:
1157
+ print("No processes registered.")
1158
+ print("Usage: bgo start <n> -- <command> [args...]")
1159
+ else:
1160
+ rows = _status_snapshot(procs)
1161
+ _print_status_table(rows, level=level_override)
1162
+ print(color("gray", f"\nRefreshing every {interval}s. Ctrl-C to exit."))
1163
+ try:
1164
+ time.sleep(interval)
1165
+ except KeyboardInterrupt:
1166
+ return 0
1167
+ except KeyboardInterrupt:
1168
+ return 0
1169
+
1170
+ # Default: one-shot table or detail view
1171
+ procs = load_all_procs()
1172
+ if not procs:
1173
+ print("No processes registered.")
1174
+ print("Usage: bgo start <n> -- <command> [args...]")
1175
+ print(" bgo <command> [args...]")
1176
+ return 0
1177
+
1178
+ if getattr(args, "name", None):
1179
+ name = args.name
1180
+ info = load_proc(name)
1181
+ if not info:
1182
+ print(f"{color('red', '❌')} No process named '{name}' found")
1183
+ return 1
1184
+ _print_proc_detail(info)
1185
+ return 0
1186
+
1187
+ rows = _status_snapshot(procs)
1188
+ _print_status_table(rows, level=level_override)
1189
+ return 0
1190
+
1191
+
1192
+ def _print_proc_detail(info: dict):
1193
+ """Print detailed info for a single process."""
1194
+ pid = info["pid"]
1195
+ running = is_running(pid)
1196
+ status_str = color("green", "online") if running else color("red", "stopped")
1197
+
1198
+ print(f"{color('bold', 'Process:')} {info['name']}")
1199
+ print(f" Status: {status_str}")
1200
+ print(f" PID: {pid}")
1201
+ print(f" PGID: {info.get('pgid', 'N/A')}")
1202
+ print(f" Command: {' '.join(info['command'])}")
1203
+ print(f" CWD: {info.get('cwd', 'N/A')}")
1204
+ print(f" Started: {info.get('started_at', 'N/A')}")
1205
+
1206
+ if running:
1207
+ proc_info = get_process_info(pid)
1208
+ print(f" CPU: {proc_info['cpu']}%")
1209
+ print(f" MEM: {proc_info['mem']}%")
1210
+ print(f" Uptime: {proc_info['time']}")
1211
+
1212
+ for stream, label in [("out", "stdout"), ("err", "stderr")]:
1213
+ lf = log_path(info["name"], stream)
1214
+ if lf.exists():
1215
+ size = lf.stat().st_size
1216
+ print(f" {label}: {_human_size(size)} ({lf})")
1217
+
1218
+ w = info.get("watch")
1219
+ if w:
1220
+ print()
1221
+ print(f"{color('bold', 'Watch:')}")
1222
+ enabled = w.get("enabled")
1223
+ if enabled:
1224
+ print(
1225
+ f" Enabled: yes "
1226
+ f"(interval {w.get('interval')}s, "
1227
+ f"min-uptime {w.get('min_uptime')}s, "
1228
+ f"mode {w.get('on_fast_crash')})"
1229
+ )
1230
+ else:
1231
+ print(f" Enabled: no")
1232
+ wpid = w.get("watcher_pid")
1233
+ if wpid:
1234
+ alive = is_running(wpid)
1235
+ wstate = color("green", "alive") if alive else color("yellow", "dead")
1236
+ print(f" Watcher: PID {wpid} ({wstate})")
1237
+ else:
1238
+ print(f" Watcher: {color('gray', 'not running')}")
1239
+ print(f" Restarts: {w.get('restarts', 0)}")
1240
+ if w.get("last_restart_at"):
1241
+ print(f" Last: {w['last_restart_at']}")
1242
+ if w.get("errored"):
1243
+ print(f" Errored: {color('red', 'YES')} — {w.get('error_reason')}")
1244
+ tail = w.get("last_stderr_tail")
1245
+ if tail:
1246
+ print(f" Last err:")
1247
+ for line in tail.splitlines()[-5:]:
1248
+ print(f" {color('gray', line)}")
1249
+ nm = info["name"]
1250
+ print(
1251
+ f" Recover: {color('gray', f'bgo logs {nm} --watcher | bgo restart {nm}')}"
1252
+ )
1253
+ else:
1254
+ print(f" Errored: no")
1255
+ wlog = watcher_log_path(info["name"])
1256
+ if wlog.exists() and wlog.stat().st_size > 0:
1257
+ print(f" watcher: {_human_size(wlog.stat().st_size)} ({wlog})")
1258
+
1259
+
1260
+ def _human_size(n: int) -> str:
1261
+ for unit in ("B", "KB", "MB", "GB"):
1262
+ if n < 1024:
1263
+ return f"{n:.0f}{unit}" if unit == "B" else f"{n:.1f}{unit}"
1264
+ n /= 1024
1265
+ return f"{n:.1f}TB"
1266
+
1267
+
1268
+ def cmd_logs(args):
1269
+ """Show logs for a process."""
1270
+ name = args.name
1271
+ info = load_proc(name)
1272
+
1273
+ if not info:
1274
+ print(f"{color('red', '❌')} No process named '{name}' found")
1275
+ return 1
1276
+
1277
+ # Determine which log to show
1278
+ show_watcher = getattr(args, "watcher", False)
1279
+ if show_watcher:
1280
+ stream = "watcher"
1281
+ lf = watcher_log_path(name)
1282
+ elif getattr(args, "stderr", False):
1283
+ stream = "err"
1284
+ lf = log_path(name, stream)
1285
+ elif getattr(args, "stdout", False):
1286
+ stream = "out"
1287
+ lf = log_path(name, stream)
1288
+ else:
1289
+ stream = "out"
1290
+ lf = log_path(name, stream)
1291
+
1292
+ if not lf.exists() or lf.stat().st_size == 0:
1293
+ label = {"err": "stderr", "out": "stdout", "watcher": "watcher"}.get(stream, stream)
1294
+ print(f"No {label} logs found for '{name}'")
1295
+ if not getattr(args, "follow", False):
1296
+ return 0
1297
+
1298
+ lines_to_show = getattr(args, "lines", 50)
1299
+ follow = getattr(args, "follow", False)
1300
+
1301
+ if follow:
1302
+ # Follow mode: use tail -f for reliability
1303
+ tail_args = ["tail", "-f", "-n", str(lines_to_show)]
1304
+
1305
+ # Show both streams if not explicitly filtered (only for stdout/stderr default)
1306
+ files = []
1307
+ if (
1308
+ not show_watcher
1309
+ and not getattr(args, "stderr", False)
1310
+ and not getattr(args, "stdout", False)
1311
+ ):
1312
+ err_log = log_path(name, "err")
1313
+ files.append(str(lf))
1314
+ if err_log.exists() and err_log.stat().st_size > 0:
1315
+ files.append(str(err_log))
1316
+ else:
1317
+ files.append(str(lf))
1318
+
1319
+ print(color("gray", "─" * 50))
1320
+ print(color("yellow", f"Following logs for '{name}' (Ctrl+C to exit)"))
1321
+ print(color("gray", "─" * 50))
1322
+
1323
+ try:
1324
+ subprocess.run(tail_args + files)
1325
+ except KeyboardInterrupt:
1326
+ print(color("gray", "\n[stopped following]"))
1327
+ else:
1328
+ # Static view
1329
+ try:
1330
+ with open(lf) as f:
1331
+ lines = f.readlines()
1332
+
1333
+ if lines_to_show > 0 and len(lines) > lines_to_show:
1334
+ lines = lines[-lines_to_show:]
1335
+ print(f"... (showing last {lines_to_show} lines)\n")
1336
+
1337
+ for line in lines:
1338
+ print(line, end="")
1339
+
1340
+ # Also show stderr if not filtered and has content
1341
+ if stream == "out" and not getattr(args, "stdout", False):
1342
+ err_log = log_path(name, "err")
1343
+ if err_log.exists() and err_log.stat().st_size > 0:
1344
+ with open(err_log) as f:
1345
+ err_lines = f.readlines()
1346
+ if err_lines:
1347
+ if lines_to_show > 0 and len(err_lines) > lines_to_show:
1348
+ err_lines = err_lines[-lines_to_show:]
1349
+ print(f"\n{color('gray', '─── stderr ───')}")
1350
+ for line in err_lines:
1351
+ print(line, end="")
1352
+
1353
+ except FileNotFoundError:
1354
+ print(f"Log file not found: {lf}")
1355
+ return 1
1356
+
1357
+ return 0
1358
+
1359
+
1360
+ def cmd_logs_follow(args):
1361
+ """Follow logs for a process (shorthand for logs -f)."""
1362
+ args.follow = True
1363
+ if not hasattr(args, "lines") or args.lines is None:
1364
+ args.lines = 10
1365
+ return cmd_logs(args)
1366
+
1367
+
1368
+ def cmd_clean(args):
1369
+ """Remove stopped processes from state."""
1370
+ procs = load_all_procs()
1371
+ cleaned = []
1372
+
1373
+ for name, info in procs.items():
1374
+ pid = info.get("pid")
1375
+ if not is_running(pid):
1376
+ cleaned.append(name)
1377
+ delete_proc(name)
1378
+
1379
+ if cleaned:
1380
+ print(
1381
+ f"🧹 Cleaned up {len(cleaned)} stopped process(es): {', '.join(cleaned)}"
1382
+ )
1383
+ else:
1384
+ print("Nothing to clean up.")
1385
+
1386
+ return 0
1387
+
1388
+
1389
+ def cmd_resurrect(args):
1390
+ """Restart all processes that were running before shutdown/reboot."""
1391
+ procs = load_all_procs()
1392
+
1393
+ if not procs:
1394
+ print("No processes registered.")
1395
+ return 0
1396
+
1397
+ candidates = []
1398
+ for name, info in procs.items():
1399
+ pid = info.get("pid")
1400
+ if not is_running(pid) and info.get("status") == "running":
1401
+ candidates.append((name, info))
1402
+
1403
+ if not candidates:
1404
+ print("No processes to resurrect (all running or already stopped).")
1405
+ return 0
1406
+
1407
+ print(f"🔄 Resurrecting {len(candidates)} process(es)...")
1408
+ failures = 0
1409
+
1410
+ for name, info in candidates:
1411
+ restart_args = argparse.Namespace(
1412
+ name=name,
1413
+ command=info["command"],
1414
+ cwd=info.get("cwd"),
1415
+ )
1416
+ result = cmd_start(restart_args)
1417
+ if result != 0:
1418
+ failures += 1
1419
+
1420
+ if failures:
1421
+ print(f"\n⚠️ {failures}/{len(candidates)} process(es) failed to start")
1422
+ return 1
1423
+
1424
+ print(f"\n✅ All {len(candidates)} process(es) resurrected")
1425
+ return 0
1426
+
1427
+
1428
+ def _interactive_multiselect(title: str, options: list[tuple[str, str]]) -> list[str] | None:
1429
+ """Render a TTY checkbox list. Returns selected keys, or None if cancelled.
1430
+
1431
+ options: list of (key, label) tuples. Keys returned; labels displayed.
1432
+ Controls: ↑/↓ navigate, space toggle, a toggle-all, enter confirm, q/esc cancel.
1433
+ Falls back to a numbered prompt if stdin/stdout isn't a TTY.
1434
+ """
1435
+ if not options:
1436
+ return []
1437
+
1438
+ if not (sys.stdin.isatty() and sys.stdout.isatty()):
1439
+ # Non-TTY fallback: numbered prompt
1440
+ print(title)
1441
+ for i, (_, label) in enumerate(options, 1):
1442
+ print(f" {i}) {label}")
1443
+ print("Enter numbers (comma/space separated), 'a' for all, blank to cancel:")
1444
+ try:
1445
+ raw = input("> ").strip()
1446
+ except EOFError:
1447
+ return None
1448
+ if not raw:
1449
+ return None
1450
+ if raw.lower() == "a":
1451
+ return [k for k, _ in options]
1452
+ picks: list[str] = []
1453
+ for tok in re.split(r"[,\s]+", raw):
1454
+ if not tok:
1455
+ continue
1456
+ try:
1457
+ idx = int(tok) - 1
1458
+ if 0 <= idx < len(options):
1459
+ picks.append(options[idx][0])
1460
+ except ValueError:
1461
+ pass
1462
+ return picks
1463
+
1464
+ selected = [False] * len(options)
1465
+ cursor = 0
1466
+ fd = sys.stdin.fileno()
1467
+ old_settings = termios.tcgetattr(fd)
1468
+
1469
+ def render(first: bool):
1470
+ if not first:
1471
+ # Move cursor up to redraw
1472
+ sys.stdout.write(f"\033[{len(options) + 3}A")
1473
+ sys.stdout.write("\033[J") # clear from cursor down
1474
+ sys.stdout.write(f"{title}\n")
1475
+ sys.stdout.write(color("gray", " ↑/↓ move · space toggle · a all · enter confirm · q cancel\n"))
1476
+ for i, (_, label) in enumerate(options):
1477
+ mark = color("green", "[x]") if selected[i] else "[ ]"
1478
+ line = f" {mark} {label}"
1479
+ if i == cursor:
1480
+ line = color("blue", "▶ ") + line.lstrip()
1481
+ else:
1482
+ line = " " + line
1483
+ sys.stdout.write(line + "\n")
1484
+ sys.stdout.write(color("gray", f" ({sum(selected)} selected)\n"))
1485
+ sys.stdout.flush()
1486
+
1487
+ try:
1488
+ tty.setcbreak(fd)
1489
+ render(first=True)
1490
+ while True:
1491
+ ch = sys.stdin.read(1)
1492
+ if ch == "\x1b":
1493
+ # Escape sequence (arrow key) or plain ESC
1494
+ next1 = sys.stdin.read(1)
1495
+ if next1 == "":
1496
+ return None # ESC alone
1497
+ if next1 == "[":
1498
+ arrow = sys.stdin.read(1)
1499
+ if arrow == "A":
1500
+ cursor = (cursor - 1) % len(options)
1501
+ elif arrow == "B":
1502
+ cursor = (cursor + 1) % len(options)
1503
+ render(first=False)
1504
+ continue
1505
+ if ch == " ":
1506
+ selected[cursor] = not selected[cursor]
1507
+ render(first=False)
1508
+ elif ch.lower() == "a":
1509
+ new_state = not all(selected)
1510
+ selected = [new_state] * len(options)
1511
+ render(first=False)
1512
+ elif ch in ("\r", "\n"):
1513
+ return [options[i][0] for i, s in enumerate(selected) if s]
1514
+ elif ch.lower() == "q" or ch == "\x03": # q or Ctrl-C
1515
+ return None
1516
+ finally:
1517
+ termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
1518
+ sys.stdout.write("\n")
1519
+ sys.stdout.flush()
1520
+
1521
+
1522
+ def _restart_one(name: str, info: dict) -> int:
1523
+ """Stop (if running) then start a single proc. Returns 0 on success."""
1524
+ pid = info.get("pid")
1525
+ if is_running(pid):
1526
+ kill_process(pid, info.get("pgid"))
1527
+ restart_args = argparse.Namespace(
1528
+ name=name,
1529
+ command=info["command"],
1530
+ cwd=info.get("cwd"),
1531
+ )
1532
+ return cmd_start(restart_args)
1533
+
1534
+
1535
+ def cmd_restart_stopped(args):
1536
+ """Restart procs that are not currently running.
1537
+
1538
+ Includes both explicitly stopped procs AND procs whose state says
1539
+ 'running' but whose PID is dead (e.g., killed by reboot).
1540
+ """
1541
+ procs = load_all_procs()
1542
+
1543
+ if not procs:
1544
+ print("No processes registered.")
1545
+ return 0
1546
+
1547
+ candidates = [
1548
+ (name, info) for name, info in procs.items()
1549
+ if not is_running(info.get("pid"))
1550
+ ]
1551
+
1552
+ requested = getattr(args, "names", None) or []
1553
+ use_all = getattr(args, "all", False)
1554
+
1555
+ if requested:
1556
+ cand_map = dict(candidates)
1557
+ unknown = [n for n in requested if n not in procs]
1558
+ not_stopped = [n for n in requested if n in procs and n not in cand_map]
1559
+ if unknown:
1560
+ print(f"{color('red', '❌')} Unknown process(es): {', '.join(unknown)}")
1561
+ return 1
1562
+ if not_stopped:
1563
+ print(f"{color('yellow', '⚠️')} Skipping (already running): {', '.join(not_stopped)}")
1564
+ targets = [(n, cand_map[n]) for n in requested if n in cand_map]
1565
+ if not targets:
1566
+ print("Nothing to restart.")
1567
+ return 0
1568
+ elif not candidates:
1569
+ print("No stopped processes to restart.")
1570
+ return 0
1571
+ elif use_all:
1572
+ targets = candidates
1573
+ else:
1574
+ options = []
1575
+ for name, info in candidates:
1576
+ cmd_str = " ".join(info.get("command", []))
1577
+ stopped_at = info.get("stopped_at") or info.get("started_at") or "?"
1578
+ label = f"{name:<20} {color('gray', stopped_at[:19])} {cmd_str[:60]}"
1579
+ options.append((name, label))
1580
+ picks = _interactive_multiselect(
1581
+ color("bold", f"Select stopped processes to restart ({len(candidates)} available):"),
1582
+ options,
1583
+ )
1584
+ if picks is None:
1585
+ print("Cancelled.")
1586
+ return 0
1587
+ if not picks:
1588
+ print("Nothing selected.")
1589
+ return 0
1590
+ cand_map = dict(candidates)
1591
+ targets = [(n, cand_map[n]) for n in picks]
1592
+
1593
+ if not targets:
1594
+ print("No matching stopped processes.")
1595
+ return 0
1596
+
1597
+ print(f"🔄 Restarting {len(targets)} process(es)...")
1598
+ failures = 0
1599
+ for name, info in targets:
1600
+ if _restart_one(name, info) != 0:
1601
+ failures += 1
1602
+
1603
+ if failures:
1604
+ print(f"\n⚠️ {failures}/{len(targets)} process(es) failed to start")
1605
+ return 1
1606
+ print(f"\n✅ All {len(targets)} process(es) restarted")
1607
+ return 0
1608
+
1609
+
1610
+ def cmd_restart_last(args):
1611
+ """Restart processes ordered by most-recent-first.
1612
+
1613
+ With --all: restart every not-running proc in most-recent order.
1614
+ Otherwise: interactive menu sorted by most-recent-first (no pre-check).
1615
+ """
1616
+ procs = load_all_procs()
1617
+
1618
+ if not procs:
1619
+ print("No processes registered.")
1620
+ return 0
1621
+
1622
+ candidates = [
1623
+ (name, info) for name, info in procs.items()
1624
+ if not is_running(info.get("pid"))
1625
+ ]
1626
+
1627
+ if not candidates:
1628
+ print("No stopped processes to restart.")
1629
+ return 0
1630
+
1631
+ def sort_key(item):
1632
+ _, info = item
1633
+ return info.get("stopped_at") or info.get("started_at") or ""
1634
+
1635
+ candidates.sort(key=sort_key, reverse=True)
1636
+
1637
+ use_all = getattr(args, "all", False)
1638
+
1639
+ if use_all:
1640
+ targets = candidates
1641
+ else:
1642
+ options = []
1643
+ for name, info in candidates:
1644
+ cmd_str = " ".join(info.get("command", []))
1645
+ ts = info.get("stopped_at") or info.get("started_at") or "?"
1646
+ label = f"{name:<20} {color('gray', ts[:19])} {cmd_str[:60]}"
1647
+ options.append((name, label))
1648
+ picks = _interactive_multiselect(
1649
+ color("bold", f"Select processes to restart (most recent first, {len(candidates)} available):"),
1650
+ options,
1651
+ )
1652
+ if picks is None:
1653
+ print("Cancelled.")
1654
+ return 0
1655
+ if not picks:
1656
+ print("Nothing selected.")
1657
+ return 0
1658
+ cand_map = dict(candidates)
1659
+ targets = [(n, cand_map[n]) for n in picks]
1660
+
1661
+ print(f"🔄 Restarting {len(targets)} process(es)...")
1662
+ failures = 0
1663
+ for name, info in targets:
1664
+ if _restart_one(name, info) != 0:
1665
+ failures += 1
1666
+
1667
+ if failures:
1668
+ print(f"\n⚠️ {failures}/{len(targets)} process(es) failed to start")
1669
+ return 1
1670
+ print(f"\n✅ All {len(targets)} process(es) restarted")
1671
+ return 0
1672
+
1673
+
1674
+ def cmd_delete(args):
1675
+ """Delete a process from state (stop if running first)."""
1676
+ name = args.name
1677
+ info = load_proc(name)
1678
+
1679
+ if not info:
1680
+ print(f"{color('red', '❌')} No process named '{name}' found")
1681
+ return 1
1682
+
1683
+ # Stop if running
1684
+ pid = info.get("pid")
1685
+ if is_running(pid):
1686
+ if not getattr(args, "yes", False):
1687
+ confirm = input(f"Process '{name}' is running. Stop and delete? [y/N] ")
1688
+ if confirm.lower() != "y":
1689
+ print("Cancelled.")
1690
+ return 0
1691
+ kill_process(pid, info.get("pgid"))
1692
+
1693
+ keep_logs = getattr(args, "keep_logs", False)
1694
+ delete_proc(name, keep_logs=keep_logs)
1695
+ suffix = " (logs kept)" if keep_logs else ""
1696
+ print(f"🗑️ Deleted '{name}'{suffix}")
1697
+ return 0
1698
+
1699
+
1700
+ # --- CLI ---
1701
+
1702
+
1703
+ def main():
1704
+ init_dirs()
1705
+
1706
+ # Hidden subcommand for internal watcher loop
1707
+ if len(sys.argv) >= 3 and sys.argv[1] == "__watcher__":
1708
+ return cmd_watcher_loop(sys.argv[2])
1709
+
1710
+ known_commands = {
1711
+ "start", "stop", "restart", "restart-stopped", "restart-last",
1712
+ "status", "logs", "clean", "delete", "resurrect",
1713
+ "watch", "unwatch",
1714
+ "ls", "list", "follow", "tail", "-h", "--help",
1715
+ "open", "kill", "rm",
1716
+ }
1717
+
1718
+ # Direct mode: bgo <n> -- <cmd> OR bgo <cmd> [args...]
1719
+ # Single-token invocation NEVER falls through to start — it routes to
1720
+ # status detail if it names a registered proc, or prints help.
1721
+ # Direct-mode start requires either an explicit '--' separator, a -w
1722
+ # flag, OR at least 2 positional tokens (name + command).
1723
+ if len(sys.argv) > 1 and sys.argv[1] not in known_commands:
1724
+ remaining = sys.argv[1:]
1725
+
1726
+ # Single arg → status detail if registered, else help
1727
+ if len(remaining) == 1 and remaining[0] not in ("-w", "--watch"):
1728
+ tok = remaining[0]
1729
+ if tok.startswith("-"):
1730
+ # Unknown flag — argparse handles it (prints error + usage)
1731
+ pass
1732
+ elif load_proc(tok) is not None:
1733
+ args = argparse.Namespace(name=tok, watch=False, json=False, interval=None)
1734
+ return cmd_status(args)
1735
+ else:
1736
+ print(f"{color('red', '❌')} Unknown command or process: '{tok}'")
1737
+ print(f" Run {color('bold', 'bgo --help')} for usage, or "
1738
+ f"{color('bold', 'bgo start ' + tok + ' -- <command>')} to start a new process.")
1739
+ return 1
1740
+
1741
+ # Strip -w / --watch in head (positions before '--', or first two positions if no '--').
1742
+ want_watch = False
1743
+ if "--" in remaining:
1744
+ sep = remaining.index("--")
1745
+ head, tail = remaining[:sep], remaining[sep:]
1746
+ new_head = []
1747
+ for tok in head:
1748
+ if tok in ("-w", "--watch"):
1749
+ want_watch = True
1750
+ else:
1751
+ new_head.append(tok)
1752
+ remaining = new_head + tail
1753
+ else:
1754
+ new_remaining = []
1755
+ for idx, tok in enumerate(remaining):
1756
+ if idx < 2 and tok in ("-w", "--watch"):
1757
+ want_watch = True
1758
+ continue
1759
+ new_remaining.append(tok)
1760
+ remaining = new_remaining
1761
+
1762
+ if not remaining:
1763
+ print("Usage: bgo <n> -- <command> [args...]")
1764
+ return 1
1765
+
1766
+ # Check if there's a '--' separator → explicit name mode
1767
+ if "--" in remaining:
1768
+ sep_idx = remaining.index("--")
1769
+ name = remaining[0] if sep_idx > 0 else None
1770
+ command = remaining[sep_idx + 1 :]
1771
+
1772
+ if not name or not command:
1773
+ print("Usage: bgo [-w] <n> -- <command> [args...]")
1774
+ return 1
1775
+
1776
+ args = argparse.Namespace(
1777
+ name=name, command=command, cwd=None, watch=want_watch,
1778
+ interval=None, min_uptime=None, on_fast_crash=None,
1779
+ )
1780
+ return cmd_start(args)
1781
+ else:
1782
+ # No '--' → heuristic: if first arg is NOT an executable but
1783
+ # second arg IS (or looks like a path/command), treat first as name.
1784
+ if len(remaining) >= 2 and not _looks_like_command(remaining[0]) and _looks_like_command(remaining[1]):
1785
+ name = remaining[0]
1786
+ command = remaining[1:]
1787
+ else:
1788
+ command = remaining
1789
+ name = derive_name(command)
1790
+ args = argparse.Namespace(
1791
+ name=name, command=command, cwd=None, watch=want_watch,
1792
+ interval=None, min_uptime=None, on_fast_crash=None,
1793
+ )
1794
+ return cmd_start(args)
1795
+
1796
+ parser = argparse.ArgumentParser(
1797
+ prog="bgo",
1798
+ description="A lightweight background process manager",
1799
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1800
+ epilog="""
1801
+ Examples:
1802
+ bgo python3 server.py # Start (auto-name: 'server')
1803
+ bgo myapp python3 server.py --port 8080 # Start with name + args
1804
+ bgo start myapp -- python3 server.py # Start with explicit name
1805
+ bgo open myapp -- python3 server.py # Alias for start
1806
+ bgo myapp -- ./api-server --port 8080 # Shorthand with name
1807
+ bgo status # Show all processes
1808
+ bgo status myapp # Detailed view of one process
1809
+ bgo logs myapp # View logs
1810
+ bgo logs myapp -f # Follow logs (tail -f)
1811
+ bgo follow myapp # Shorthand for logs -f
1812
+ bgo stop myapp # Stop (SIGTERM → SIGKILL)
1813
+ bgo kill myapp # Alias for stop
1814
+ bgo stop myapp --force # Force kill (SIGKILL)
1815
+ bgo restart myapp # Restart a process
1816
+ bgo restart-stopped # Pick stopped procs to restart (interactive)
1817
+ bgo restart-stopped --all # Restart all stopped procs
1818
+ bgo restart-stopped foo bar # Restart named stopped procs
1819
+ bgo restart-last # Pick from most-recent-first menu
1820
+ bgo restart-last --all # Restart all not-running, recent first
1821
+ bgo delete myapp # Remove from list + logs
1822
+ bgo rm myapp # Alias for delete
1823
+ bgo clean # Remove all stopped entries
1824
+ bgo resurrect # Restart all previously running procs
1825
+
1826
+ Watch mode (auto-restart on crash):
1827
+ bgo start -w myapp -- python3 server.py # Start with watcher
1828
+ bgo -w myapp python3 server.py # Direct mode with watcher
1829
+ bgo watch myapp # Attach watcher to existing process
1830
+ bgo watch myapp --interval 5 --min-uptime 3
1831
+ bgo unwatch myapp # Detach watcher (keeps proc)
1832
+ bgo logs myapp --watcher # View watcher events
1833
+ """,
1834
+ )
1835
+
1836
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
1837
+
1838
+ # start
1839
+ start_parser = subparsers.add_parser(
1840
+ "start", aliases=["open"], help="Start a process"
1841
+ )
1842
+ start_parser.add_argument("name", help="Name for this process")
1843
+ start_parser.add_argument(
1844
+ "cmd", nargs=argparse.REMAINDER, help="Command to run (use -- before command)"
1845
+ )
1846
+ start_parser.add_argument("--cwd", help="Working directory for the process")
1847
+ start_parser.add_argument(
1848
+ "-w", "--watch", action="store_true",
1849
+ help="Auto-restart this process if it crashes",
1850
+ )
1851
+ start_parser.add_argument(
1852
+ "--interval", type=int, default=None,
1853
+ help=f"Watch poll interval in seconds (default: {WATCH_DEFAULTS['interval']})",
1854
+ )
1855
+ start_parser.add_argument(
1856
+ "--min-uptime", dest="min_uptime", type=int, default=None,
1857
+ help=f"Crash threshold in seconds (default: {WATCH_DEFAULTS['min_uptime']})",
1858
+ )
1859
+ start_parser.add_argument(
1860
+ "--on-fast-crash", dest="on_fast_crash",
1861
+ choices=["backoff", "stop", "retry"], default=None,
1862
+ help=f"Fast-crash policy (default: {WATCH_DEFAULTS['on_fast_crash']})",
1863
+ )
1864
+
1865
+ # stop
1866
+ stop_parser = subparsers.add_parser(
1867
+ "stop", aliases=["kill"], help="Stop a process"
1868
+ )
1869
+ stop_parser.add_argument("name", help="Name of the process")
1870
+ stop_parser.add_argument(
1871
+ "-f", "--force", action="store_true", help="Force kill (SIGKILL)"
1872
+ )
1873
+
1874
+ # restart
1875
+ restart_parser = subparsers.add_parser("restart", help="Restart a process")
1876
+ restart_parser.add_argument("name", help="Name of the process")
1877
+ restart_parser.add_argument(
1878
+ "--reset-counters", dest="reset_counters", action="store_true",
1879
+ help="Also zero the watch restart counter (default: preserve)",
1880
+ )
1881
+
1882
+ # watch
1883
+ watch_parser = subparsers.add_parser(
1884
+ "watch", help="Attach a watcher to an existing running process",
1885
+ )
1886
+ watch_parser.add_argument("name", help="Name of the process")
1887
+ watch_parser.add_argument(
1888
+ "--interval", type=int, default=None,
1889
+ help=f"Poll interval in seconds (default: {WATCH_DEFAULTS['interval']})",
1890
+ )
1891
+ watch_parser.add_argument(
1892
+ "--min-uptime", dest="min_uptime", type=int, default=None,
1893
+ help=f"Crash threshold in seconds (default: {WATCH_DEFAULTS['min_uptime']})",
1894
+ )
1895
+ watch_parser.add_argument(
1896
+ "--on-fast-crash", dest="on_fast_crash",
1897
+ choices=["backoff", "stop", "retry"], default=None,
1898
+ help=f"Fast-crash policy (default: {WATCH_DEFAULTS['on_fast_crash']})",
1899
+ )
1900
+ watch_parser.add_argument(
1901
+ "--reset", action="store_true",
1902
+ help="Reset watch config to defaults (ignore prior settings)",
1903
+ )
1904
+
1905
+ # unwatch
1906
+ unwatch_parser = subparsers.add_parser(
1907
+ "unwatch", help="Detach watcher from a process (keeps proc running)",
1908
+ )
1909
+ unwatch_parser.add_argument("name", help="Name of the process")
1910
+
1911
+ # restart-stopped
1912
+ rs_parser = subparsers.add_parser(
1913
+ "restart-stopped",
1914
+ help="Restart stopped processes (interactive menu, --all, or by name)",
1915
+ )
1916
+ rs_parser.add_argument("names", nargs="*", help="Specific process names (optional)")
1917
+ rs_parser.add_argument(
1918
+ "-a", "--all", action="store_true", help="Restart all stopped processes without prompting"
1919
+ )
1920
+
1921
+ # restart-last
1922
+ rl_parser = subparsers.add_parser(
1923
+ "restart-last",
1924
+ help="Restart processes ordered by most recent (interactive menu or --all)",
1925
+ )
1926
+ rl_parser.add_argument(
1927
+ "-a", "--all", action="store_true", help="Restart all not-running processes (most recent first)"
1928
+ )
1929
+
1930
+ # status
1931
+ status_parser = subparsers.add_parser(
1932
+ "status", aliases=["ls", "list"], help="Show process status"
1933
+ )
1934
+ status_parser.add_argument(
1935
+ "name", nargs="?", default=None, help="Show details for specific process"
1936
+ )
1937
+ status_parser.add_argument(
1938
+ "-w", "--watch", action="store_true", help="Watch mode (auto-refresh)"
1939
+ )
1940
+ status_parser.add_argument(
1941
+ "--interval", type=int, default=None,
1942
+ help="Refresh interval in seconds for --watch (default: 2)",
1943
+ )
1944
+ status_parser.add_argument(
1945
+ "--json", action="store_true",
1946
+ help="Output as JSON (machine-readable, no colors)",
1947
+ )
1948
+ status_level_group = status_parser.add_mutually_exclusive_group()
1949
+ status_level_group.add_argument(
1950
+ "--plain", action="store_true",
1951
+ help="Force plain rendering (no color, ASCII only)",
1952
+ )
1953
+ status_level_group.add_argument(
1954
+ "--fancy", action="store_true",
1955
+ help="Force fancy rendering (Unicode box-drawing)",
1956
+ )
1957
+
1958
+ # logs
1959
+ logs_parser = subparsers.add_parser("logs", help="View process logs")
1960
+ logs_parser.add_argument("name", help="Name of the process")
1961
+ logs_parser.add_argument(
1962
+ "-f", "--follow", action="store_true", help="Follow log output"
1963
+ )
1964
+ logs_parser.add_argument(
1965
+ "-n", "--lines", type=int, default=50, help="Number of lines (default: 50)"
1966
+ )
1967
+ logs_parser.add_argument("--stdout", action="store_true", help="Show only stdout")
1968
+ logs_parser.add_argument("--stderr", action="store_true", help="Show only stderr")
1969
+ logs_parser.add_argument(
1970
+ "--watcher", action="store_true",
1971
+ help="Show the watcher log (restart events, errors)",
1972
+ )
1973
+
1974
+ # follow
1975
+ follow_parser = subparsers.add_parser(
1976
+ "follow", aliases=["tail"], help="Follow logs (shorthand for logs -f)"
1977
+ )
1978
+ follow_parser.add_argument("name", help="Name of the process")
1979
+ follow_parser.add_argument(
1980
+ "-n", "--lines", type=int, default=10, help="Initial lines (default: 10)"
1981
+ )
1982
+ follow_parser.add_argument("--stdout", action="store_true", help="Show only stdout")
1983
+ follow_parser.add_argument("--stderr", action="store_true", help="Show only stderr")
1984
+
1985
+ # clean
1986
+ subparsers.add_parser("clean", help="Remove stopped processes from list")
1987
+
1988
+ # resurrect
1989
+ subparsers.add_parser("resurrect", help="Restart all processes that were running before shutdown")
1990
+
1991
+ # delete
1992
+ delete_parser = subparsers.add_parser(
1993
+ "delete", aliases=["rm"], help="Delete a process completely"
1994
+ )
1995
+ delete_parser.add_argument("name", help="Name of the process")
1996
+ delete_parser.add_argument(
1997
+ "-y", "--yes", action="store_true", help="Skip confirmation"
1998
+ )
1999
+ delete_parser.add_argument(
2000
+ "--keep-logs", dest="keep_logs", action="store_true",
2001
+ help="Preserve out/err/watcher log files after delete",
2002
+ )
2003
+
2004
+ args = parser.parse_args()
2005
+
2006
+ if not args.command:
2007
+ parser.print_help()
2008
+ return 0
2009
+
2010
+ handlers = {
2011
+ "start": cmd_start,
2012
+ "open": cmd_start,
2013
+ "stop": cmd_stop,
2014
+ "kill": cmd_stop,
2015
+ "restart": cmd_restart,
2016
+ "restart-stopped": cmd_restart_stopped,
2017
+ "restart-last": cmd_restart_last,
2018
+ "status": cmd_status,
2019
+ "ls": cmd_status,
2020
+ "list": cmd_status,
2021
+ "logs": cmd_logs,
2022
+ "follow": cmd_logs_follow,
2023
+ "tail": cmd_logs_follow,
2024
+ "clean": cmd_clean,
2025
+ "resurrect": cmd_resurrect,
2026
+ "delete": cmd_delete,
2027
+ "rm": cmd_delete,
2028
+ "watch": cmd_watch,
2029
+ "unwatch": cmd_unwatch,
2030
+ }
2031
+
2032
+ handler = handlers.get(args.command)
2033
+ if handler:
2034
+ return handler(args)
2035
+
2036
+ parser.print_help()
2037
+ return 0
2038
+
2039
+
2040
+ if __name__ == "__main__":
2041
+ sys.exit(main())