@os-eco/overstory-cli 0.9.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +50 -19
  2. package/agents/builder.md +19 -9
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +204 -87
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +219 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/mail-poll-detect.test.ts +153 -0
  18. package/src/agents/mail-poll-detect.ts +73 -0
  19. package/src/agents/overlay.test.ts +60 -4
  20. package/src/agents/overlay.ts +63 -8
  21. package/src/agents/scope-detect.test.ts +190 -0
  22. package/src/agents/scope-detect.ts +146 -0
  23. package/src/agents/turn-lock.test.ts +181 -0
  24. package/src/agents/turn-lock.ts +235 -0
  25. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  26. package/src/agents/turn-runner-dispatch.ts +105 -0
  27. package/src/agents/turn-runner.test.ts +2312 -0
  28. package/src/agents/turn-runner.ts +1383 -0
  29. package/src/commands/agents.ts +9 -0
  30. package/src/commands/clean.ts +54 -0
  31. package/src/commands/coordinator.test.ts +254 -0
  32. package/src/commands/coordinator.ts +273 -8
  33. package/src/commands/dashboard.test.ts +188 -0
  34. package/src/commands/dashboard.ts +14 -4
  35. package/src/commands/doctor.ts +3 -1
  36. package/src/commands/group.test.ts +94 -0
  37. package/src/commands/group.ts +49 -20
  38. package/src/commands/init.test.ts +8 -0
  39. package/src/commands/init.ts +8 -1
  40. package/src/commands/log.test.ts +187 -11
  41. package/src/commands/log.ts +171 -71
  42. package/src/commands/mail.test.ts +162 -0
  43. package/src/commands/mail.ts +64 -9
  44. package/src/commands/merge.test.ts +230 -1
  45. package/src/commands/merge.ts +68 -12
  46. package/src/commands/nudge.test.ts +351 -4
  47. package/src/commands/nudge.ts +356 -34
  48. package/src/commands/run.test.ts +43 -7
  49. package/src/commands/serve/build.test.ts +202 -0
  50. package/src/commands/serve/build.ts +206 -0
  51. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  52. package/src/commands/serve/coordinator-actions.ts +408 -0
  53. package/src/commands/serve/dev.test.ts +168 -0
  54. package/src/commands/serve/dev.ts +117 -0
  55. package/src/commands/serve/mail-actions.test.ts +312 -0
  56. package/src/commands/serve/mail-actions.ts +167 -0
  57. package/src/commands/serve/rest.test.ts +1323 -0
  58. package/src/commands/serve/rest.ts +708 -0
  59. package/src/commands/serve/static.ts +51 -0
  60. package/src/commands/serve/ws.test.ts +361 -0
  61. package/src/commands/serve/ws.ts +332 -0
  62. package/src/commands/serve.test.ts +459 -0
  63. package/src/commands/serve.ts +565 -0
  64. package/src/commands/sling.test.ts +177 -1
  65. package/src/commands/sling.ts +243 -71
  66. package/src/commands/status.test.ts +9 -0
  67. package/src/commands/status.ts +12 -4
  68. package/src/commands/stop.test.ts +255 -1
  69. package/src/commands/stop.ts +107 -8
  70. package/src/commands/watch.test.ts +43 -0
  71. package/src/commands/watch.ts +153 -28
  72. package/src/config.ts +23 -0
  73. package/src/doctor/consistency.test.ts +106 -0
  74. package/src/doctor/consistency.ts +48 -1
  75. package/src/doctor/serve.test.ts +95 -0
  76. package/src/doctor/serve.ts +86 -0
  77. package/src/doctor/types.ts +2 -1
  78. package/src/doctor/watchdog.ts +57 -1
  79. package/src/events/tailer.test.ts +234 -1
  80. package/src/events/tailer.ts +90 -0
  81. package/src/index.ts +57 -6
  82. package/src/insights/quality-gates.test.ts +141 -0
  83. package/src/insights/quality-gates.ts +156 -0
  84. package/src/json.ts +29 -0
  85. package/src/logging/theme.ts +4 -0
  86. package/src/mail/client.ts +15 -2
  87. package/src/mail/store.test.ts +82 -0
  88. package/src/mail/store.ts +41 -4
  89. package/src/merge/lock.test.ts +149 -0
  90. package/src/merge/lock.ts +140 -0
  91. package/src/merge/predict.test.ts +387 -0
  92. package/src/merge/predict.ts +249 -0
  93. package/src/merge/resolver.ts +1 -1
  94. package/src/mulch/client.ts +3 -3
  95. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  96. package/src/runtimes/claude.test.ts +791 -1
  97. package/src/runtimes/claude.ts +323 -1
  98. package/src/runtimes/connections.test.ts +141 -1
  99. package/src/runtimes/connections.ts +73 -4
  100. package/src/runtimes/headless-connection.test.ts +264 -0
  101. package/src/runtimes/headless-connection.ts +158 -0
  102. package/src/runtimes/types.ts +10 -0
  103. package/src/schema-consistency.test.ts +1 -0
  104. package/src/sessions/store.test.ts +657 -29
  105. package/src/sessions/store.ts +286 -23
  106. package/src/test-setup.test.ts +31 -0
  107. package/src/test-setup.ts +28 -0
  108. package/src/types.ts +107 -2
  109. package/src/utils/pid.test.ts +85 -1
  110. package/src/utils/pid.ts +86 -1
  111. package/src/utils/process-scan.test.ts +53 -0
  112. package/src/utils/process-scan.ts +76 -0
  113. package/src/watchdog/daemon.test.ts +1607 -376
  114. package/src/watchdog/daemon.ts +462 -88
  115. package/src/watchdog/health.test.ts +282 -0
  116. package/src/watchdog/health.ts +126 -27
  117. package/src/worktree/manager.test.ts +218 -1
  118. package/src/worktree/manager.ts +55 -0
  119. package/src/worktree/process.test.ts +71 -0
  120. package/src/worktree/process.ts +25 -5
  121. package/src/worktree/tmux.test.ts +28 -0
  122. package/src/worktree/tmux.ts +27 -3
  123. package/templates/CLAUDE.md.tmpl +19 -8
  124. package/templates/overlay.md.tmpl +5 -2
@@ -15,6 +15,7 @@
15
15
  import { mkdir, unlink } from "node:fs/promises";
16
16
  import { join } from "node:path";
17
17
  import { Command } from "commander";
18
+ import { buildInitialHeadlessPrompt, formatMailSection } from "../agents/headless-prompt.ts";
18
19
  import { createIdentity, loadIdentity } from "../agents/identity.ts";
19
20
  import { createManifestLoader, resolveModel } from "../agents/manifest.ts";
20
21
  import { loadConfig } from "../config.ts";
@@ -29,6 +30,8 @@ import { createRunStore, createSessionStore } from "../sessions/store.ts";
29
30
  import { resolveBackend, trackerCliName } from "../tracker/factory.ts";
30
31
  import type { AgentSession } from "../types.ts";
31
32
  import { isProcessRunning } from "../watchdog/health.ts";
33
+ import type { SpawnHeadlessOptions } from "../worktree/process.ts";
34
+ import { spawnHeadlessAgent } from "../worktree/process.ts";
32
35
  import type { SessionState } from "../worktree/tmux.ts";
33
36
  import {
34
37
  capturePaneContent,
@@ -46,7 +49,7 @@ import { nudgeAgent } from "./nudge.ts";
46
49
  import { isRunningAsRoot } from "./sling.ts";
47
50
 
48
51
  /** Default coordinator agent name. */
49
- const COORDINATOR_NAME = "coordinator";
52
+ export const COORDINATOR_NAME = "coordinator";
50
53
 
51
54
  export interface PersistentAgentSpec {
52
55
  commandName: string;
@@ -120,6 +123,15 @@ export interface CoordinatorDeps {
120
123
  _capturePaneContent?: (name: string, lines?: number) => Promise<string | null>;
121
124
  /** Override poll interval for ask subcommand (default: ASK_POLL_INTERVAL_MS). Used in tests. */
122
125
  _pollIntervalMs?: number;
126
+ /** Override headless spawn (used by tests to avoid forking real subprocesses). */
127
+ _spawnHeadless?: (
128
+ argv: string[],
129
+ opts: SpawnHeadlessOptions,
130
+ ) => Promise<{
131
+ pid: number;
132
+ stdin: { write(data: string | Uint8Array): number | Promise<number> };
133
+ stdout: ReadableStream<Uint8Array> | null;
134
+ }>;
123
135
  }
124
136
 
125
137
  /**
@@ -332,6 +344,21 @@ export interface CoordinatorSessionOptions {
332
344
  displayName?: string;
333
345
  /** Custom beacon builder. Receives tracker CLI name, returns beacon string. */
334
346
  beaconBuilder?: (trackerCli: string) => string;
347
+ /**
348
+ * When true, spawn the coordinator headless (no tmux pane). The runtime must
349
+ * implement buildDirectSpawn(). The CLI command `ov coordinator start` does
350
+ * not yet pass this flag — it is consumed by the headless start path used by
351
+ * the web UI's POST /api/coordinator/start endpoint.
352
+ */
353
+ headless?: boolean;
354
+ /**
355
+ * Acknowledge that a watchdog daemon from a previous session may already be
356
+ * running and should be allowed to supervise this coordinator. Without this
357
+ * (or `--watchdog`), the start command refuses to spawn when a leftover
358
+ * daemon is detected, to surface the "watchdog persists across runs" trap
359
+ * that overstory-3f0c was filed for.
360
+ */
361
+ acceptExistingWatchdog?: boolean;
335
362
  }
336
363
 
337
364
  /**
@@ -365,6 +392,8 @@ export async function startCoordinatorSession(
365
392
  agentDefFile: agentDefFileOpt,
366
393
  displayName: displayNameOpt,
367
394
  beaconBuilder: beaconBuilderOpt,
395
+ headless: headlessFlag,
396
+ acceptExistingWatchdog: acceptExistingWatchdogFlag,
368
397
  } = opts;
369
398
 
370
399
  const coordinatorName = agentNameOpt ?? coordinatorNameOpt ?? COORDINATOR_NAME;
@@ -386,6 +415,25 @@ export async function startCoordinatorSession(
386
415
  const monitor = deps._monitor ?? createDefaultMonitor(projectRoot);
387
416
  const tmuxSession = coordinatorTmuxSession(config.project.name, coordinatorName);
388
417
 
418
+ // Detect leftover watchdog daemon from a previous session (overstory-3f0c).
419
+ // If a watchdog is already running and the operator did not pass --watchdog
420
+ // or --accept-existing-watchdog, refuse to start: a persistent daemon will
421
+ // supervise this coordinator with policy decided by the original invocation,
422
+ // not the current one. This prevents "I didn't run --watchdog, why is the
423
+ // watchdog killing things?" surprises.
424
+ const watchdogAlreadyRunning = await watchdog.isRunning();
425
+ if (watchdogAlreadyRunning && !watchdogFlag && !acceptExistingWatchdogFlag) {
426
+ const existingPid = await readWatchdogPid(projectRoot);
427
+ const pidLabel = existingPid !== null ? `PID ${existingPid}` : "unknown PID";
428
+ throw new AgentError(
429
+ `Watchdog daemon (${pidLabel}) is already running from a previous session. ` +
430
+ `It will supervise this ${displayName.toLowerCase()} run and may take escalation actions you did not opt into. ` +
431
+ `To proceed: pass --watchdog to acknowledge, pass --accept-existing-watchdog to suppress this check, ` +
432
+ `or run 'ov watch --kill-others' (or remove .overstory/watchdog.pid) first.`,
433
+ { agentName: coordinatorName },
434
+ );
435
+ }
436
+
389
437
  // Check for existing coordinator session with the same name
390
438
  const overstoryDir = join(projectRoot, ".overstory");
391
439
  const { store } = openSessionStore(overstoryDir);
@@ -459,6 +507,170 @@ export async function startCoordinatorSession(
459
507
  });
460
508
  }
461
509
 
510
+ // Headless start path: bypass tmux entirely and spawn the coordinator
511
+ // process directly via runtime.buildDirectSpawn(). Same hooks, identity,
512
+ // and run-tracking as the tmux path — only the spawn mechanism differs.
513
+ if (headlessFlag === true) {
514
+ if (!runtime.buildDirectSpawn) {
515
+ throw new ValidationError(
516
+ `Headless coordinator start requires a runtime with buildDirectSpawn (got: ${runtime.id})`,
517
+ { field: "runtime", value: runtime.id },
518
+ );
519
+ }
520
+
521
+ const spawnHeadless = deps._spawnHeadless ?? spawnHeadlessAgent;
522
+ const directEnv: Record<string, string> = {
523
+ ...runtime.buildEnv(resolvedModel),
524
+ OVERSTORY_AGENT_NAME: coordinatorName,
525
+ OVERSTORY_PROJECT_ROOT: projectRoot,
526
+ ...(profileFlag ? { OVERSTORY_PROFILE: profileFlag } : {}),
527
+ };
528
+ const argv = runtime.buildDirectSpawn({
529
+ cwd: projectRoot,
530
+ env: directEnv,
531
+ ...(resolvedModel.isExplicitOverride ? { model: resolvedModel.model } : {}),
532
+ instructionPath: runtime.instructionPath,
533
+ });
534
+
535
+ // Per-session log dir mirrors sling.ts headless path.
536
+ const logTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
537
+ const headlessLogDir = join(overstoryDir, "logs", "coordinator", logTimestamp);
538
+ await mkdir(headlessLogDir, { recursive: true });
539
+
540
+ const headlessProc = await spawnHeadless(argv, {
541
+ cwd: projectRoot,
542
+ env: { ...(process.env as Record<string, string>), ...directEnv },
543
+ stdoutFile: join(headlessLogDir, "stdout.log"),
544
+ stderrFile: join(headlessLogDir, "stderr.log"),
545
+ agentName: coordinatorName,
546
+ });
547
+
548
+ // Build the initial stdin prompt from agent definition + pending dispatch
549
+ // mail + activation beacon. Replaces SessionStart hooks (no-op headless).
550
+ const agentDefPath = join(projectRoot, ".overstory", "agent-defs", agentDefFile);
551
+ const agentDefHandle = Bun.file(agentDefPath);
552
+ const primeContext = (await agentDefHandle.exists()) ? await agentDefHandle.text() : "";
553
+
554
+ const mailDbPath = join(overstoryDir, "mail.db");
555
+ const pendingMailStore = createMailStore(mailDbPath);
556
+ let mailSection = "";
557
+ try {
558
+ const pendingMailClient = createMailClient(pendingMailStore);
559
+ const pendingMessages = pendingMailClient.check(coordinatorName);
560
+ mailSection = formatMailSection(pendingMessages);
561
+ } finally {
562
+ pendingMailStore.close();
563
+ }
564
+
565
+ const resolvedBackend = await resolveBackend(config.taskTracker.backend, config.project.root);
566
+ const trackerCli = trackerCliName(resolvedBackend);
567
+ const beacon = beaconBuilder(trackerCli);
568
+ const initialPrompt = buildInitialHeadlessPrompt(
569
+ primeContext || undefined,
570
+ mailSection || undefined,
571
+ beacon,
572
+ );
573
+ await headlessProc.stdin.write(initialPrompt);
574
+
575
+ // Create run record + current-run.txt + session row.
576
+ const sessionId = `session-${Date.now()}-${coordinatorName}`;
577
+ const runId = `run-${new Date().toISOString().replace(/[:.]/g, "-")}`;
578
+ const runStore = createRunStore(join(overstoryDir, "sessions.db"));
579
+ try {
580
+ runStore.createRun({
581
+ id: runId,
582
+ startedAt: new Date().toISOString(),
583
+ coordinatorSessionId: sessionId,
584
+ coordinatorName,
585
+ status: "active",
586
+ });
587
+ } finally {
588
+ runStore.close();
589
+ }
590
+ await Bun.write(join(overstoryDir, "current-run.txt"), runId);
591
+
592
+ const session: AgentSession = {
593
+ id: sessionId,
594
+ agentName: coordinatorName,
595
+ capability,
596
+ worktreePath: projectRoot,
597
+ branchName: config.project.canonicalBranch,
598
+ taskId: "",
599
+ tmuxSession: "", // headless: no tmux pane
600
+ state: "booting",
601
+ pid: headlessProc.pid,
602
+ parentAgent: null,
603
+ depth: 0,
604
+ runId,
605
+ startedAt: new Date().toISOString(),
606
+ lastActivity: new Date().toISOString(),
607
+ escalationLevel: 0,
608
+ stalledSince: null,
609
+ transcriptPath: null,
610
+ };
611
+ store.upsert(session);
612
+
613
+ // Auto-start watchdog / monitor (same as tmux path).
614
+ let watchdogPid: number | undefined;
615
+ if (watchdogFlag) {
616
+ const watchdogResult = await watchdog.start();
617
+ if (watchdogResult) {
618
+ watchdogPid = watchdogResult.pid;
619
+ if (!json) printHint("Watchdog started");
620
+ } else if (watchdogAlreadyRunning) {
621
+ // createDefaultWatchdog.start() returns null when an existing PID
622
+ // is alive — that's a no-op success, not a failure. Reuse the
623
+ // existing daemon. Sentinel value keeps `watchdogPid !== undefined`
624
+ // truthy in the JSON output.
625
+ watchdogPid = -1;
626
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
627
+ } else {
628
+ if (!json) printWarning("Watchdog failed to start");
629
+ }
630
+ } else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
631
+ // --accept-existing-watchdog without --watchdog: surface that an
632
+ // existing daemon is supervising this run, but do not call start().
633
+ watchdogPid = -1;
634
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
635
+ }
636
+ let monitorPid: number | undefined;
637
+ if (monitorFlag) {
638
+ if (!config.watchdog.tier2Enabled) {
639
+ if (!json) printWarning("Monitor skipped", "watchdog.tier2Enabled is false");
640
+ } else {
641
+ const monitorResult = await monitor.start([]);
642
+ if (monitorResult) {
643
+ monitorPid = monitorResult.pid;
644
+ if (!json) printHint("Monitor started");
645
+ } else {
646
+ if (!json) printWarning("Monitor failed to start");
647
+ }
648
+ }
649
+ }
650
+
651
+ const output = {
652
+ agentName: coordinatorName,
653
+ capability,
654
+ tmuxSession: "",
655
+ projectRoot,
656
+ pid: headlessProc.pid,
657
+ headless: true,
658
+ watchdog: watchdogPid !== undefined,
659
+ watchdogPreexisting: watchdogAlreadyRunning,
660
+ monitor: monitorFlag ? monitorPid !== undefined : false,
661
+ };
662
+
663
+ if (json) {
664
+ jsonOutput(`${capability} start`, output);
665
+ } else {
666
+ printSuccess(`${displayName} started (headless)`);
667
+ process.stdout.write(` Root: ${projectRoot}\n`);
668
+ process.stdout.write(` PID: ${headlessProc.pid}\n`);
669
+ process.stdout.write(` Logs: ${headlessLogDir}\n`);
670
+ }
671
+ return;
672
+ }
673
+
462
674
  // Preflight: verify tmux is installed before attempting to spawn.
463
675
  // Without this check, a missing tmux leads to cryptic errors later.
464
676
  await tmux.ensureTmuxAvailable();
@@ -584,16 +796,28 @@ export async function startCoordinatorSession(
584
796
  await tmux.sendKeys(tmuxSession, "");
585
797
  }
586
798
 
587
- // Auto-start watchdog if --watchdog flag is present
799
+ // Auto-start watchdog if --watchdog flag is present.
588
800
  let watchdogPid: number | undefined;
589
801
  if (watchdogFlag) {
590
802
  const watchdogResult = await watchdog.start();
591
803
  if (watchdogResult) {
592
804
  watchdogPid = watchdogResult.pid;
593
805
  if (!json) printHint("Watchdog started");
806
+ } else if (watchdogAlreadyRunning) {
807
+ // createDefaultWatchdog.start() returns null when an existing PID
808
+ // is alive — that's a no-op success, not a failure. Reuse the
809
+ // existing daemon. Sentinel value keeps `watchdogPid !== undefined`
810
+ // truthy in the JSON output.
811
+ watchdogPid = -1;
812
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
594
813
  } else {
595
814
  if (!json) printWarning("Watchdog failed to start");
596
815
  }
816
+ } else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
817
+ // --accept-existing-watchdog without --watchdog: surface that an
818
+ // existing daemon is supervising this run, but do not call start().
819
+ watchdogPid = -1;
820
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
597
821
  }
598
822
 
599
823
  // Auto-start monitor if --monitor flag is present and tier2 is enabled
@@ -618,7 +842,8 @@ export async function startCoordinatorSession(
618
842
  tmuxSession,
619
843
  projectRoot,
620
844
  pid,
621
- watchdog: watchdogFlag ? watchdogPid !== undefined : false,
845
+ watchdog: watchdogPid !== undefined,
846
+ watchdogPreexisting: watchdogAlreadyRunning,
622
847
  monitor: monitorFlag ? monitorPid !== undefined : false,
623
848
  };
624
849
 
@@ -629,6 +854,7 @@ export async function startCoordinatorSession(
629
854
  process.stdout.write(` Tmux: ${tmuxSession}\n`);
630
855
  process.stdout.write(` Root: ${projectRoot}\n`);
631
856
  process.stdout.write(` PID: ${pid}\n`);
857
+ printHint("Open the UI: `ov serve` then http://localhost:7321 — primary operator surface");
632
858
  }
633
859
 
634
860
  if (shouldAttach) {
@@ -643,7 +869,14 @@ export async function startCoordinatorSession(
643
869
 
644
870
  async function startPersistentAgent(
645
871
  spec: PersistentAgentSpec,
646
- opts: { json: boolean; attach: boolean; watchdog: boolean; monitor: boolean; profile?: string },
872
+ opts: {
873
+ json: boolean;
874
+ attach: boolean;
875
+ watchdog: boolean;
876
+ monitor: boolean;
877
+ profile?: string;
878
+ acceptExistingWatchdog?: boolean;
879
+ },
647
880
  deps: CoordinatorDeps = {},
648
881
  ): Promise<void> {
649
882
  await startCoordinatorSession(
@@ -679,6 +912,18 @@ function isActivePersistentAgentSession(
679
912
  * 3. Mark session as completed in SessionStore
680
913
  * 4. Auto-complete the active run (if current-run.txt exists)
681
914
  */
915
+ /**
916
+ * Stop the default coordinator. Handles both tmux and headless sessions.
917
+ * Exposed for callers outside the CLI command surface (e.g. the web-UI POST
918
+ * /api/coordinator/stop endpoint, which lives in coordinator-actions.ts).
919
+ */
920
+ export async function stopCoordinatorSession(
921
+ opts: { json: boolean },
922
+ deps: CoordinatorDeps = {},
923
+ ): Promise<void> {
924
+ await stopPersistentAgent(COORDINATOR_SPEC, opts, deps);
925
+ }
926
+
682
927
  async function stopPersistentAgent(
683
928
  spec: PersistentAgentSpec,
684
929
  opts: { json: boolean },
@@ -712,10 +957,24 @@ async function stopPersistentAgent(
712
957
  });
713
958
  }
714
959
 
715
- // Kill tmux session with process tree cleanup
716
- const alive = await tmux.isSessionAlive(session.tmuxSession);
717
- if (alive) {
718
- await tmux.killSession(session.tmuxSession);
960
+ // Headless sessions have no tmux pane (tmuxSession === ""). Tear down via
961
+ // the connection registry (SIGTERM-with-SIGKILL-escalation) and skip tmux.
962
+ if (session.tmuxSession === "") {
963
+ const { removeConnection } = await import("../runtimes/connections.ts");
964
+ removeConnection(spec.agentName);
965
+ if (session.pid !== null && isProcessRunning(session.pid)) {
966
+ try {
967
+ process.kill(session.pid, "SIGTERM");
968
+ } catch {
969
+ // process may have exited between the check and the signal
970
+ }
971
+ }
972
+ } else {
973
+ // Kill tmux session with process tree cleanup
974
+ const alive = await tmux.isSessionAlive(session.tmuxSession);
975
+ if (alive) {
976
+ await tmux.killSession(session.tmuxSession);
977
+ }
719
978
  }
720
979
 
721
980
  // Always attempt to stop watchdog
@@ -1359,6 +1618,10 @@ export function createPersistentAgentCommand(
1359
1618
  .option("--attach", "Always attach to tmux session after start")
1360
1619
  .option("--no-attach", "Never attach to tmux session after start")
1361
1620
  .option("--watchdog", `Auto-start watchdog daemon with ${spec.commandName}`)
1621
+ .option(
1622
+ "--accept-existing-watchdog",
1623
+ "Continue when a watchdog daemon from a previous session is already running (it will supervise this run)",
1624
+ )
1362
1625
  .option("--monitor", `Auto-start Tier 2 monitor agent with ${spec.commandName}`)
1363
1626
  .option("--profile <name>", "Canopy profile to apply to spawned agents")
1364
1627
  .option("--json", "Output as JSON")
@@ -1366,6 +1629,7 @@ export function createPersistentAgentCommand(
1366
1629
  async (opts: {
1367
1630
  attach?: boolean;
1368
1631
  watchdog?: boolean;
1632
+ acceptExistingWatchdog?: boolean;
1369
1633
  monitor?: boolean;
1370
1634
  json?: boolean;
1371
1635
  profile?: string;
@@ -1378,6 +1642,7 @@ export function createPersistentAgentCommand(
1378
1642
  json: opts.json ?? false,
1379
1643
  attach: shouldAttach,
1380
1644
  watchdog: opts.watchdog ?? false,
1645
+ acceptExistingWatchdog: opts.acceptExistingWatchdog ?? false,
1381
1646
  monitor: opts.monitor ?? false,
1382
1647
  profile: opts.profile,
1383
1648
  },
@@ -295,6 +295,7 @@ function makeDashboardData(
295
295
  worktrees: [],
296
296
  tmuxSessions: [],
297
297
  unreadMailCount: 0,
298
+ unreadMailScope: "orchestrator",
298
299
  mergeQueueCount: 0,
299
300
  recentMetricsCount: 0,
300
301
  },
@@ -447,6 +448,7 @@ describe("renderAgentPanel", () => {
447
448
  worktrees: [],
448
449
  tmuxSessions: [], // no tmux sessions
449
450
  unreadMailCount: 0,
451
+ unreadMailScope: "orchestrator",
450
452
  mergeQueueCount: 0,
451
453
  recentMetricsCount: 0,
452
454
  },
@@ -487,6 +489,7 @@ describe("renderAgentPanel", () => {
487
489
  worktrees: [],
488
490
  tmuxSessions: [],
489
491
  unreadMailCount: 0,
492
+ unreadMailScope: "orchestrator",
490
493
  mergeQueueCount: 0,
491
494
  recentMetricsCount: 0,
492
495
  },
@@ -495,6 +498,191 @@ describe("renderAgentPanel", () => {
495
498
  expect(out).toContain("x");
496
499
  expect(out).toContain("dead-headless");
497
500
  });
501
+
502
+ test("renders mixed tmux + headless agents in same frame with correct liveness", () => {
503
+ const data = {
504
+ ...makeDashboardData({}),
505
+ status: {
506
+ currentRunId: null,
507
+ agents: [
508
+ {
509
+ id: "sess-tmux-1",
510
+ agentName: "pane-agent",
511
+ capability: "builder",
512
+ worktreePath: "/tmp/wt/pane-agent",
513
+ branchName: "overstory/pane-agent/task-t1",
514
+ taskId: "task-t1",
515
+ tmuxSession: "overstory-pane-agent",
516
+ state: "working" as const,
517
+ pid: 99999,
518
+ parentAgent: null,
519
+ depth: 0,
520
+ runId: null,
521
+ startedAt: new Date(Date.now() - 10_000).toISOString(),
522
+ lastActivity: new Date().toISOString(),
523
+ escalationLevel: 0,
524
+ stalledSince: null,
525
+ transcriptPath: null,
526
+ },
527
+ {
528
+ id: "sess-headless-1",
529
+ agentName: "live-headless",
530
+ capability: "builder",
531
+ worktreePath: "/tmp/wt/live-headless",
532
+ branchName: "overstory/live-headless/task-h1",
533
+ taskId: "task-h1",
534
+ tmuxSession: "", // headless
535
+ state: "working" as const,
536
+ pid: process.pid, // own PID — guaranteed alive
537
+ parentAgent: null,
538
+ depth: 0,
539
+ runId: null,
540
+ startedAt: new Date(Date.now() - 10_000).toISOString(),
541
+ lastActivity: new Date().toISOString(),
542
+ escalationLevel: 0,
543
+ stalledSince: null,
544
+ transcriptPath: null,
545
+ },
546
+ ],
547
+ worktrees: [],
548
+ tmuxSessions: [{ name: "overstory-pane-agent", pid: 99998 }],
549
+ unreadMailCount: 0,
550
+ unreadMailScope: "orchestrator",
551
+ mergeQueueCount: 0,
552
+ recentMetricsCount: 0,
553
+ },
554
+ };
555
+ const out = renderAgentPanel(data, 100, 12, 3);
556
+ expect(out).toContain("pane-agent");
557
+ expect(out).toContain("live-headless");
558
+ const aliveMarkers = (out.match(/>/g) ?? []).length;
559
+ expect(aliveMarkers).toBeGreaterThanOrEqual(2);
560
+ expect(out).not.toContain("x");
561
+ });
562
+
563
+ test("spawn-per-turn worker (no tmux, no pid) renders alive when state is non-terminal (overstory-7a34)", () => {
564
+ // Repro: freshly slung headless lead has tmuxSession='' and pid=null.
565
+ // Previously fell into the tmux path → never matched → red "x" while
566
+ // ov feed showed live tool events from the same agent.
567
+ const data = {
568
+ ...makeDashboardData({}),
569
+ status: {
570
+ currentRunId: null,
571
+ agents: [
572
+ {
573
+ id: "sess-spt-1",
574
+ agentName: "freshly-slung",
575
+ capability: "lead",
576
+ worktreePath: "/tmp/wt/freshly-slung",
577
+ branchName: "overstory/freshly-slung/task-l1",
578
+ taskId: "task-l1",
579
+ tmuxSession: "", // headless
580
+ state: "working" as const,
581
+ pid: null, // spawn-per-turn: no persistent process between turns
582
+ parentAgent: null,
583
+ depth: 0,
584
+ runId: null,
585
+ startedAt: new Date(Date.now() - 5_000).toISOString(),
586
+ lastActivity: new Date().toISOString(),
587
+ escalationLevel: 0,
588
+ stalledSince: null,
589
+ transcriptPath: null,
590
+ },
591
+ ],
592
+ worktrees: [],
593
+ tmuxSessions: [],
594
+ unreadMailCount: 0,
595
+ unreadMailScope: "orchestrator",
596
+ mergeQueueCount: 0,
597
+ recentMetricsCount: 0,
598
+ },
599
+ };
600
+ const out = renderAgentPanel(data, 100, 12, 3);
601
+ expect(out).toContain("freshly-slung");
602
+ // Green ">" — agent is logically alive between turns
603
+ expect(out).toContain(">");
604
+ // No red marker should be present (name 'freshly-slung' has no 'x')
605
+ expect(out).not.toContain("x");
606
+ });
607
+
608
+ test("spawn-per-turn worker in zombie state renders dead marker (overstory-7a34)", () => {
609
+ const data = {
610
+ ...makeDashboardData({}),
611
+ status: {
612
+ currentRunId: null,
613
+ agents: [
614
+ {
615
+ id: "sess-spt-2",
616
+ agentName: "abandoned-spt",
617
+ capability: "builder",
618
+ worktreePath: "/tmp/wt/abandoned-spt",
619
+ branchName: "overstory/abandoned-spt/task-a1",
620
+ taskId: "task-a1",
621
+ tmuxSession: "",
622
+ state: "zombie" as const,
623
+ pid: null,
624
+ parentAgent: null,
625
+ depth: 0,
626
+ runId: null,
627
+ startedAt: new Date(Date.now() - 600_000).toISOString(),
628
+ lastActivity: new Date(Date.now() - 600_000).toISOString(),
629
+ escalationLevel: 0,
630
+ stalledSince: null,
631
+ transcriptPath: null,
632
+ },
633
+ ],
634
+ worktrees: [],
635
+ tmuxSessions: [],
636
+ unreadMailCount: 0,
637
+ unreadMailScope: "orchestrator",
638
+ mergeQueueCount: 0,
639
+ recentMetricsCount: 0,
640
+ },
641
+ };
642
+ const out = renderAgentPanel(data, 100, 12, 3);
643
+ expect(out).toContain("abandoned-spt");
644
+ expect(out).toContain("x");
645
+ });
646
+
647
+ test("headless agent renders dead marker when tmux session list is non-empty", () => {
648
+ const deadPid = 2_147_483_647;
649
+ const data = {
650
+ ...makeDashboardData({}),
651
+ status: {
652
+ currentRunId: null,
653
+ agents: [
654
+ {
655
+ id: "sess-dead-headless-1",
656
+ agentName: "gone-headless",
657
+ capability: "builder",
658
+ worktreePath: "/tmp/wt/gone-headless",
659
+ branchName: "overstory/gone-headless/task-g1",
660
+ taskId: "task-g1",
661
+ tmuxSession: "", // headless
662
+ state: "working" as const,
663
+ pid: deadPid,
664
+ parentAgent: null,
665
+ depth: 0,
666
+ runId: null,
667
+ startedAt: new Date(Date.now() - 10_000).toISOString(),
668
+ lastActivity: new Date().toISOString(),
669
+ escalationLevel: 0,
670
+ stalledSince: null,
671
+ transcriptPath: null,
672
+ },
673
+ ],
674
+ worktrees: [],
675
+ tmuxSessions: [{ name: "overstory-other-tmux", pid: 11111 }],
676
+ unreadMailCount: 0,
677
+ unreadMailScope: "orchestrator",
678
+ mergeQueueCount: 0,
679
+ recentMetricsCount: 0,
680
+ },
681
+ };
682
+ const out = renderAgentPanel(data, 100, 12, 3);
683
+ expect(out).toContain("x");
684
+ expect(out).toContain("gone-headless");
685
+ });
498
686
  });
499
687
 
500
688
  describe("openDashboardStores", () => {
@@ -434,6 +434,7 @@ async function loadDashboardData(
434
434
  worktrees,
435
435
  tmuxSessions,
436
436
  unreadMailCount,
437
+ unreadMailScope: "orchestrator",
437
438
  mergeQueueCount,
438
439
  recentMetricsCount,
439
440
  };
@@ -614,7 +615,7 @@ export function renderAgentPanel(
614
615
 
615
616
  // Sort agents: active first, then completed, then zombie
616
617
  const agents = [...data.status.agents].sort((a, b) => {
617
- const activeStates = ["working", "booting", "stalled"];
618
+ const activeStates = ["working", "in_turn", "between_turns", "booting", "stalled"];
618
619
  const aActive = activeStates.includes(a.state);
619
620
  const bActive = activeStates.includes(b.state);
620
621
  if (aActive && !bActive) return -1;
@@ -644,10 +645,19 @@ export function renderAgentPanel(
644
645
  : now;
645
646
  const duration = formatDuration(endTime - new Date(agent.startedAt).getTime());
646
647
  const durationPadded = pad(duration, 9);
648
+ // Three liveness topologies (overstory-7a34):
649
+ // tmux: tmuxSession !== "" → tmux session must exist
650
+ // long-lived headless: tmuxSession === "" && pid !== null → PID must be alive
651
+ // spawn-per-turn: tmuxSession === "" && pid === null → no process between
652
+ // turns is normal, so liveness reduces to "state is non-terminal".
653
+ // Time-based stale/zombie classification is handled in evaluateHealth.
647
654
  const isHeadless = agent.tmuxSession === "" && agent.pid !== null;
648
- const alive = isHeadless
649
- ? agent.pid !== null && isProcessAlive(agent.pid)
650
- : data.status.tmuxSessions.some((s) => s.name === agent.tmuxSession);
655
+ const isSpawnPerTurn = agent.tmuxSession === "" && agent.pid === null;
656
+ const alive = isSpawnPerTurn
657
+ ? agent.state !== "zombie" && agent.state !== "completed"
658
+ : isHeadless
659
+ ? agent.pid !== null && isProcessAlive(agent.pid)
660
+ : data.status.tmuxSessions.some((s) => s.name === agent.tmuxSession);
651
661
  const aliveDot = alive ? color.green(">") : color.red("x");
652
662
 
653
663
  const lineContent = `${dimBox.vertical} ${stateColorFn(icon)} ${name} ${capability} ${color.dim(runtime)} ${stateColorFn(state)} ${taskId} ${durationPadded} ${aliveDot} `;
@@ -16,6 +16,7 @@ import { checkEcosystem } from "../doctor/ecosystem.ts";
16
16
  import { checkLogs } from "../doctor/logs.ts";
17
17
  import { checkMergeQueue } from "../doctor/merge-queue.ts";
18
18
  import { checkProviders } from "../doctor/providers.ts";
19
+ import { checkServe } from "../doctor/serve.ts";
19
20
  import { checkStructure } from "../doctor/structure.ts";
20
21
  import type { DoctorCategory, DoctorCheck, DoctorCheckFn } from "../doctor/types.ts";
21
22
  import { checkVersion } from "../doctor/version.ts";
@@ -39,6 +40,7 @@ const ALL_CHECKS: Array<{ category: DoctorCategory; fn: DoctorCheckFn }> = [
39
40
  { category: "ecosystem", fn: checkEcosystem },
40
41
  { category: "providers", fn: checkProviders },
41
42
  { category: "watchdog", fn: checkWatchdog },
43
+ { category: "serve", fn: checkServe },
42
44
  ];
43
45
 
44
46
  /**
@@ -241,7 +243,7 @@ function buildDoctorCommand(
241
243
  .option("--fix", "Attempt to auto-fix issues")
242
244
  .addHelpText(
243
245
  "after",
244
- "\nCategories: dependencies, structure, config, databases, consistency, agents, merge, logs, version, ecosystem, providers, watchdog",
246
+ "\nCategories: dependencies, structure, config, databases, consistency, agents, merge, logs, version, ecosystem, providers, watchdog, serve",
245
247
  )
246
248
  .action(async (opts: DoctorActionOpts) => {
247
249
  onResult(await runDoctorChecks(opts, checkRunners));