@os-eco/overstory-cli 0.9.4 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/agents/builder.md +19 -9
- package/agents/coordinator.md +6 -6
- package/agents/lead.md +204 -87
- package/agents/merger.md +25 -14
- package/agents/reviewer.md +22 -16
- package/agents/scout.md +17 -12
- package/package.json +6 -3
- package/src/agents/capabilities.test.ts +85 -0
- package/src/agents/capabilities.ts +125 -0
- package/src/agents/headless-mail-injector.test.ts +448 -0
- package/src/agents/headless-mail-injector.ts +219 -0
- package/src/agents/headless-prompt.test.ts +102 -0
- package/src/agents/headless-prompt.ts +68 -0
- package/src/agents/hooks-deployer.test.ts +514 -14
- package/src/agents/hooks-deployer.ts +141 -0
- package/src/agents/mail-poll-detect.test.ts +153 -0
- package/src/agents/mail-poll-detect.ts +73 -0
- package/src/agents/overlay.test.ts +60 -4
- package/src/agents/overlay.ts +63 -8
- package/src/agents/scope-detect.test.ts +190 -0
- package/src/agents/scope-detect.ts +146 -0
- package/src/agents/turn-lock.test.ts +181 -0
- package/src/agents/turn-lock.ts +235 -0
- package/src/agents/turn-runner-dispatch.test.ts +182 -0
- package/src/agents/turn-runner-dispatch.ts +105 -0
- package/src/agents/turn-runner.test.ts +2312 -0
- package/src/agents/turn-runner.ts +1383 -0
- package/src/commands/agents.ts +9 -0
- package/src/commands/clean.ts +54 -0
- package/src/commands/coordinator.test.ts +254 -0
- package/src/commands/coordinator.ts +273 -8
- package/src/commands/dashboard.test.ts +188 -0
- package/src/commands/dashboard.ts +14 -4
- package/src/commands/doctor.ts +3 -1
- package/src/commands/group.test.ts +94 -0
- package/src/commands/group.ts +49 -20
- package/src/commands/init.test.ts +8 -0
- package/src/commands/init.ts +8 -1
- package/src/commands/log.test.ts +187 -11
- package/src/commands/log.ts +171 -71
- package/src/commands/mail.test.ts +162 -0
- package/src/commands/mail.ts +64 -9
- package/src/commands/merge.test.ts +230 -1
- package/src/commands/merge.ts +68 -12
- package/src/commands/nudge.test.ts +351 -4
- package/src/commands/nudge.ts +356 -34
- package/src/commands/run.test.ts +43 -7
- package/src/commands/serve/build.test.ts +202 -0
- package/src/commands/serve/build.ts +206 -0
- package/src/commands/serve/coordinator-actions.test.ts +339 -0
- package/src/commands/serve/coordinator-actions.ts +408 -0
- package/src/commands/serve/dev.test.ts +168 -0
- package/src/commands/serve/dev.ts +117 -0
- package/src/commands/serve/mail-actions.test.ts +312 -0
- package/src/commands/serve/mail-actions.ts +167 -0
- package/src/commands/serve/rest.test.ts +1323 -0
- package/src/commands/serve/rest.ts +708 -0
- package/src/commands/serve/static.ts +51 -0
- package/src/commands/serve/ws.test.ts +361 -0
- package/src/commands/serve/ws.ts +332 -0
- package/src/commands/serve.test.ts +459 -0
- package/src/commands/serve.ts +565 -0
- package/src/commands/sling.test.ts +177 -1
- package/src/commands/sling.ts +243 -71
- package/src/commands/status.test.ts +9 -0
- package/src/commands/status.ts +12 -4
- package/src/commands/stop.test.ts +255 -1
- package/src/commands/stop.ts +107 -8
- package/src/commands/watch.test.ts +43 -0
- package/src/commands/watch.ts +153 -28
- package/src/config.ts +23 -0
- package/src/doctor/consistency.test.ts +106 -0
- package/src/doctor/consistency.ts +48 -1
- package/src/doctor/serve.test.ts +95 -0
- package/src/doctor/serve.ts +86 -0
- package/src/doctor/types.ts +2 -1
- package/src/doctor/watchdog.ts +57 -1
- package/src/events/tailer.test.ts +234 -1
- package/src/events/tailer.ts +90 -0
- package/src/index.ts +57 -6
- package/src/insights/quality-gates.test.ts +141 -0
- package/src/insights/quality-gates.ts +156 -0
- package/src/json.ts +29 -0
- package/src/logging/theme.ts +4 -0
- package/src/mail/client.ts +15 -2
- package/src/mail/store.test.ts +82 -0
- package/src/mail/store.ts +41 -4
- package/src/merge/lock.test.ts +149 -0
- package/src/merge/lock.ts +140 -0
- package/src/merge/predict.test.ts +387 -0
- package/src/merge/predict.ts +249 -0
- package/src/merge/resolver.ts +1 -1
- package/src/mulch/client.ts +3 -3
- package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
- package/src/runtimes/claude.test.ts +791 -1
- package/src/runtimes/claude.ts +323 -1
- package/src/runtimes/connections.test.ts +141 -1
- package/src/runtimes/connections.ts +73 -4
- package/src/runtimes/headless-connection.test.ts +264 -0
- package/src/runtimes/headless-connection.ts +158 -0
- package/src/runtimes/types.ts +10 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/sessions/store.test.ts +657 -29
- package/src/sessions/store.ts +286 -23
- package/src/test-setup.test.ts +31 -0
- package/src/test-setup.ts +28 -0
- package/src/types.ts +107 -2
- package/src/utils/pid.test.ts +85 -1
- package/src/utils/pid.ts +86 -1
- package/src/utils/process-scan.test.ts +53 -0
- package/src/utils/process-scan.ts +76 -0
- package/src/watchdog/daemon.test.ts +1607 -376
- package/src/watchdog/daemon.ts +462 -88
- package/src/watchdog/health.test.ts +282 -0
- package/src/watchdog/health.ts +126 -27
- package/src/worktree/manager.test.ts +218 -1
- package/src/worktree/manager.ts +55 -0
- package/src/worktree/process.test.ts +71 -0
- package/src/worktree/process.ts +25 -5
- package/src/worktree/tmux.test.ts +28 -0
- package/src/worktree/tmux.ts +27 -3
- package/templates/CLAUDE.md.tmpl +19 -8
- package/templates/overlay.md.tmpl +5 -2
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import { mkdir, unlink } from "node:fs/promises";
|
|
16
16
|
import { join } from "node:path";
|
|
17
17
|
import { Command } from "commander";
|
|
18
|
+
import { buildInitialHeadlessPrompt, formatMailSection } from "../agents/headless-prompt.ts";
|
|
18
19
|
import { createIdentity, loadIdentity } from "../agents/identity.ts";
|
|
19
20
|
import { createManifestLoader, resolveModel } from "../agents/manifest.ts";
|
|
20
21
|
import { loadConfig } from "../config.ts";
|
|
@@ -29,6 +30,8 @@ import { createRunStore, createSessionStore } from "../sessions/store.ts";
|
|
|
29
30
|
import { resolveBackend, trackerCliName } from "../tracker/factory.ts";
|
|
30
31
|
import type { AgentSession } from "../types.ts";
|
|
31
32
|
import { isProcessRunning } from "../watchdog/health.ts";
|
|
33
|
+
import type { SpawnHeadlessOptions } from "../worktree/process.ts";
|
|
34
|
+
import { spawnHeadlessAgent } from "../worktree/process.ts";
|
|
32
35
|
import type { SessionState } from "../worktree/tmux.ts";
|
|
33
36
|
import {
|
|
34
37
|
capturePaneContent,
|
|
@@ -46,7 +49,7 @@ import { nudgeAgent } from "./nudge.ts";
|
|
|
46
49
|
import { isRunningAsRoot } from "./sling.ts";
|
|
47
50
|
|
|
48
51
|
/** Default coordinator agent name. */
|
|
49
|
-
const COORDINATOR_NAME = "coordinator";
|
|
52
|
+
export const COORDINATOR_NAME = "coordinator";
|
|
50
53
|
|
|
51
54
|
export interface PersistentAgentSpec {
|
|
52
55
|
commandName: string;
|
|
@@ -120,6 +123,15 @@ export interface CoordinatorDeps {
|
|
|
120
123
|
_capturePaneContent?: (name: string, lines?: number) => Promise<string | null>;
|
|
121
124
|
/** Override poll interval for ask subcommand (default: ASK_POLL_INTERVAL_MS). Used in tests. */
|
|
122
125
|
_pollIntervalMs?: number;
|
|
126
|
+
/** Override headless spawn (used by tests to avoid forking real subprocesses). */
|
|
127
|
+
_spawnHeadless?: (
|
|
128
|
+
argv: string[],
|
|
129
|
+
opts: SpawnHeadlessOptions,
|
|
130
|
+
) => Promise<{
|
|
131
|
+
pid: number;
|
|
132
|
+
stdin: { write(data: string | Uint8Array): number | Promise<number> };
|
|
133
|
+
stdout: ReadableStream<Uint8Array> | null;
|
|
134
|
+
}>;
|
|
123
135
|
}
|
|
124
136
|
|
|
125
137
|
/**
|
|
@@ -332,6 +344,21 @@ export interface CoordinatorSessionOptions {
|
|
|
332
344
|
displayName?: string;
|
|
333
345
|
/** Custom beacon builder. Receives tracker CLI name, returns beacon string. */
|
|
334
346
|
beaconBuilder?: (trackerCli: string) => string;
|
|
347
|
+
/**
|
|
348
|
+
* When true, spawn the coordinator headless (no tmux pane). The runtime must
|
|
349
|
+
* implement buildDirectSpawn(). The CLI command `ov coordinator start` does
|
|
350
|
+
* not yet pass this flag — it is consumed by the headless start path used by
|
|
351
|
+
* the web UI's POST /api/coordinator/start endpoint.
|
|
352
|
+
*/
|
|
353
|
+
headless?: boolean;
|
|
354
|
+
/**
|
|
355
|
+
* Acknowledge that a watchdog daemon from a previous session may already be
|
|
356
|
+
* running and should be allowed to supervise this coordinator. Without this
|
|
357
|
+
* (or `--watchdog`), the start command refuses to spawn when a leftover
|
|
358
|
+
* daemon is detected, to surface the "watchdog persists across runs" trap
|
|
359
|
+
* that overstory-3f0c was filed for.
|
|
360
|
+
*/
|
|
361
|
+
acceptExistingWatchdog?: boolean;
|
|
335
362
|
}
|
|
336
363
|
|
|
337
364
|
/**
|
|
@@ -365,6 +392,8 @@ export async function startCoordinatorSession(
|
|
|
365
392
|
agentDefFile: agentDefFileOpt,
|
|
366
393
|
displayName: displayNameOpt,
|
|
367
394
|
beaconBuilder: beaconBuilderOpt,
|
|
395
|
+
headless: headlessFlag,
|
|
396
|
+
acceptExistingWatchdog: acceptExistingWatchdogFlag,
|
|
368
397
|
} = opts;
|
|
369
398
|
|
|
370
399
|
const coordinatorName = agentNameOpt ?? coordinatorNameOpt ?? COORDINATOR_NAME;
|
|
@@ -386,6 +415,25 @@ export async function startCoordinatorSession(
|
|
|
386
415
|
const monitor = deps._monitor ?? createDefaultMonitor(projectRoot);
|
|
387
416
|
const tmuxSession = coordinatorTmuxSession(config.project.name, coordinatorName);
|
|
388
417
|
|
|
418
|
+
// Detect leftover watchdog daemon from a previous session (overstory-3f0c).
|
|
419
|
+
// If a watchdog is already running and the operator did not pass --watchdog
|
|
420
|
+
// or --accept-existing-watchdog, refuse to start: a persistent daemon will
|
|
421
|
+
// supervise this coordinator with policy decided by the original invocation,
|
|
422
|
+
// not the current one. This prevents "I didn't run --watchdog, why is the
|
|
423
|
+
// watchdog killing things?" surprises.
|
|
424
|
+
const watchdogAlreadyRunning = await watchdog.isRunning();
|
|
425
|
+
if (watchdogAlreadyRunning && !watchdogFlag && !acceptExistingWatchdogFlag) {
|
|
426
|
+
const existingPid = await readWatchdogPid(projectRoot);
|
|
427
|
+
const pidLabel = existingPid !== null ? `PID ${existingPid}` : "unknown PID";
|
|
428
|
+
throw new AgentError(
|
|
429
|
+
`Watchdog daemon (${pidLabel}) is already running from a previous session. ` +
|
|
430
|
+
`It will supervise this ${displayName.toLowerCase()} run and may take escalation actions you did not opt into. ` +
|
|
431
|
+
`To proceed: pass --watchdog to acknowledge, pass --accept-existing-watchdog to suppress this check, ` +
|
|
432
|
+
`or run 'ov watch --kill-others' (or remove .overstory/watchdog.pid) first.`,
|
|
433
|
+
{ agentName: coordinatorName },
|
|
434
|
+
);
|
|
435
|
+
}
|
|
436
|
+
|
|
389
437
|
// Check for existing coordinator session with the same name
|
|
390
438
|
const overstoryDir = join(projectRoot, ".overstory");
|
|
391
439
|
const { store } = openSessionStore(overstoryDir);
|
|
@@ -459,6 +507,170 @@ export async function startCoordinatorSession(
|
|
|
459
507
|
});
|
|
460
508
|
}
|
|
461
509
|
|
|
510
|
+
// Headless start path: bypass tmux entirely and spawn the coordinator
|
|
511
|
+
// process directly via runtime.buildDirectSpawn(). Same hooks, identity,
|
|
512
|
+
// and run-tracking as the tmux path — only the spawn mechanism differs.
|
|
513
|
+
if (headlessFlag === true) {
|
|
514
|
+
if (!runtime.buildDirectSpawn) {
|
|
515
|
+
throw new ValidationError(
|
|
516
|
+
`Headless coordinator start requires a runtime with buildDirectSpawn (got: ${runtime.id})`,
|
|
517
|
+
{ field: "runtime", value: runtime.id },
|
|
518
|
+
);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
const spawnHeadless = deps._spawnHeadless ?? spawnHeadlessAgent;
|
|
522
|
+
const directEnv: Record<string, string> = {
|
|
523
|
+
...runtime.buildEnv(resolvedModel),
|
|
524
|
+
OVERSTORY_AGENT_NAME: coordinatorName,
|
|
525
|
+
OVERSTORY_PROJECT_ROOT: projectRoot,
|
|
526
|
+
...(profileFlag ? { OVERSTORY_PROFILE: profileFlag } : {}),
|
|
527
|
+
};
|
|
528
|
+
const argv = runtime.buildDirectSpawn({
|
|
529
|
+
cwd: projectRoot,
|
|
530
|
+
env: directEnv,
|
|
531
|
+
...(resolvedModel.isExplicitOverride ? { model: resolvedModel.model } : {}),
|
|
532
|
+
instructionPath: runtime.instructionPath,
|
|
533
|
+
});
|
|
534
|
+
|
|
535
|
+
// Per-session log dir mirrors sling.ts headless path.
|
|
536
|
+
const logTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
537
|
+
const headlessLogDir = join(overstoryDir, "logs", "coordinator", logTimestamp);
|
|
538
|
+
await mkdir(headlessLogDir, { recursive: true });
|
|
539
|
+
|
|
540
|
+
const headlessProc = await spawnHeadless(argv, {
|
|
541
|
+
cwd: projectRoot,
|
|
542
|
+
env: { ...(process.env as Record<string, string>), ...directEnv },
|
|
543
|
+
stdoutFile: join(headlessLogDir, "stdout.log"),
|
|
544
|
+
stderrFile: join(headlessLogDir, "stderr.log"),
|
|
545
|
+
agentName: coordinatorName,
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
// Build the initial stdin prompt from agent definition + pending dispatch
|
|
549
|
+
// mail + activation beacon. Replaces SessionStart hooks (no-op headless).
|
|
550
|
+
const agentDefPath = join(projectRoot, ".overstory", "agent-defs", agentDefFile);
|
|
551
|
+
const agentDefHandle = Bun.file(agentDefPath);
|
|
552
|
+
const primeContext = (await agentDefHandle.exists()) ? await agentDefHandle.text() : "";
|
|
553
|
+
|
|
554
|
+
const mailDbPath = join(overstoryDir, "mail.db");
|
|
555
|
+
const pendingMailStore = createMailStore(mailDbPath);
|
|
556
|
+
let mailSection = "";
|
|
557
|
+
try {
|
|
558
|
+
const pendingMailClient = createMailClient(pendingMailStore);
|
|
559
|
+
const pendingMessages = pendingMailClient.check(coordinatorName);
|
|
560
|
+
mailSection = formatMailSection(pendingMessages);
|
|
561
|
+
} finally {
|
|
562
|
+
pendingMailStore.close();
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
const resolvedBackend = await resolveBackend(config.taskTracker.backend, config.project.root);
|
|
566
|
+
const trackerCli = trackerCliName(resolvedBackend);
|
|
567
|
+
const beacon = beaconBuilder(trackerCli);
|
|
568
|
+
const initialPrompt = buildInitialHeadlessPrompt(
|
|
569
|
+
primeContext || undefined,
|
|
570
|
+
mailSection || undefined,
|
|
571
|
+
beacon,
|
|
572
|
+
);
|
|
573
|
+
await headlessProc.stdin.write(initialPrompt);
|
|
574
|
+
|
|
575
|
+
// Create run record + current-run.txt + session row.
|
|
576
|
+
const sessionId = `session-${Date.now()}-${coordinatorName}`;
|
|
577
|
+
const runId = `run-${new Date().toISOString().replace(/[:.]/g, "-")}`;
|
|
578
|
+
const runStore = createRunStore(join(overstoryDir, "sessions.db"));
|
|
579
|
+
try {
|
|
580
|
+
runStore.createRun({
|
|
581
|
+
id: runId,
|
|
582
|
+
startedAt: new Date().toISOString(),
|
|
583
|
+
coordinatorSessionId: sessionId,
|
|
584
|
+
coordinatorName,
|
|
585
|
+
status: "active",
|
|
586
|
+
});
|
|
587
|
+
} finally {
|
|
588
|
+
runStore.close();
|
|
589
|
+
}
|
|
590
|
+
await Bun.write(join(overstoryDir, "current-run.txt"), runId);
|
|
591
|
+
|
|
592
|
+
const session: AgentSession = {
|
|
593
|
+
id: sessionId,
|
|
594
|
+
agentName: coordinatorName,
|
|
595
|
+
capability,
|
|
596
|
+
worktreePath: projectRoot,
|
|
597
|
+
branchName: config.project.canonicalBranch,
|
|
598
|
+
taskId: "",
|
|
599
|
+
tmuxSession: "", // headless: no tmux pane
|
|
600
|
+
state: "booting",
|
|
601
|
+
pid: headlessProc.pid,
|
|
602
|
+
parentAgent: null,
|
|
603
|
+
depth: 0,
|
|
604
|
+
runId,
|
|
605
|
+
startedAt: new Date().toISOString(),
|
|
606
|
+
lastActivity: new Date().toISOString(),
|
|
607
|
+
escalationLevel: 0,
|
|
608
|
+
stalledSince: null,
|
|
609
|
+
transcriptPath: null,
|
|
610
|
+
};
|
|
611
|
+
store.upsert(session);
|
|
612
|
+
|
|
613
|
+
// Auto-start watchdog / monitor (same as tmux path).
|
|
614
|
+
let watchdogPid: number | undefined;
|
|
615
|
+
if (watchdogFlag) {
|
|
616
|
+
const watchdogResult = await watchdog.start();
|
|
617
|
+
if (watchdogResult) {
|
|
618
|
+
watchdogPid = watchdogResult.pid;
|
|
619
|
+
if (!json) printHint("Watchdog started");
|
|
620
|
+
} else if (watchdogAlreadyRunning) {
|
|
621
|
+
// createDefaultWatchdog.start() returns null when an existing PID
|
|
622
|
+
// is alive — that's a no-op success, not a failure. Reuse the
|
|
623
|
+
// existing daemon. Sentinel value keeps `watchdogPid !== undefined`
|
|
624
|
+
// truthy in the JSON output.
|
|
625
|
+
watchdogPid = -1;
|
|
626
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
627
|
+
} else {
|
|
628
|
+
if (!json) printWarning("Watchdog failed to start");
|
|
629
|
+
}
|
|
630
|
+
} else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
|
|
631
|
+
// --accept-existing-watchdog without --watchdog: surface that an
|
|
632
|
+
// existing daemon is supervising this run, but do not call start().
|
|
633
|
+
watchdogPid = -1;
|
|
634
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
635
|
+
}
|
|
636
|
+
let monitorPid: number | undefined;
|
|
637
|
+
if (monitorFlag) {
|
|
638
|
+
if (!config.watchdog.tier2Enabled) {
|
|
639
|
+
if (!json) printWarning("Monitor skipped", "watchdog.tier2Enabled is false");
|
|
640
|
+
} else {
|
|
641
|
+
const monitorResult = await monitor.start([]);
|
|
642
|
+
if (monitorResult) {
|
|
643
|
+
monitorPid = monitorResult.pid;
|
|
644
|
+
if (!json) printHint("Monitor started");
|
|
645
|
+
} else {
|
|
646
|
+
if (!json) printWarning("Monitor failed to start");
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
const output = {
|
|
652
|
+
agentName: coordinatorName,
|
|
653
|
+
capability,
|
|
654
|
+
tmuxSession: "",
|
|
655
|
+
projectRoot,
|
|
656
|
+
pid: headlessProc.pid,
|
|
657
|
+
headless: true,
|
|
658
|
+
watchdog: watchdogPid !== undefined,
|
|
659
|
+
watchdogPreexisting: watchdogAlreadyRunning,
|
|
660
|
+
monitor: monitorFlag ? monitorPid !== undefined : false,
|
|
661
|
+
};
|
|
662
|
+
|
|
663
|
+
if (json) {
|
|
664
|
+
jsonOutput(`${capability} start`, output);
|
|
665
|
+
} else {
|
|
666
|
+
printSuccess(`${displayName} started (headless)`);
|
|
667
|
+
process.stdout.write(` Root: ${projectRoot}\n`);
|
|
668
|
+
process.stdout.write(` PID: ${headlessProc.pid}\n`);
|
|
669
|
+
process.stdout.write(` Logs: ${headlessLogDir}\n`);
|
|
670
|
+
}
|
|
671
|
+
return;
|
|
672
|
+
}
|
|
673
|
+
|
|
462
674
|
// Preflight: verify tmux is installed before attempting to spawn.
|
|
463
675
|
// Without this check, a missing tmux leads to cryptic errors later.
|
|
464
676
|
await tmux.ensureTmuxAvailable();
|
|
@@ -584,16 +796,28 @@ export async function startCoordinatorSession(
|
|
|
584
796
|
await tmux.sendKeys(tmuxSession, "");
|
|
585
797
|
}
|
|
586
798
|
|
|
587
|
-
// Auto-start watchdog if --watchdog flag is present
|
|
799
|
+
// Auto-start watchdog if --watchdog flag is present.
|
|
588
800
|
let watchdogPid: number | undefined;
|
|
589
801
|
if (watchdogFlag) {
|
|
590
802
|
const watchdogResult = await watchdog.start();
|
|
591
803
|
if (watchdogResult) {
|
|
592
804
|
watchdogPid = watchdogResult.pid;
|
|
593
805
|
if (!json) printHint("Watchdog started");
|
|
806
|
+
} else if (watchdogAlreadyRunning) {
|
|
807
|
+
// createDefaultWatchdog.start() returns null when an existing PID
|
|
808
|
+
// is alive — that's a no-op success, not a failure. Reuse the
|
|
809
|
+
// existing daemon. Sentinel value keeps `watchdogPid !== undefined`
|
|
810
|
+
// truthy in the JSON output.
|
|
811
|
+
watchdogPid = -1;
|
|
812
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
594
813
|
} else {
|
|
595
814
|
if (!json) printWarning("Watchdog failed to start");
|
|
596
815
|
}
|
|
816
|
+
} else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
|
|
817
|
+
// --accept-existing-watchdog without --watchdog: surface that an
|
|
818
|
+
// existing daemon is supervising this run, but do not call start().
|
|
819
|
+
watchdogPid = -1;
|
|
820
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
597
821
|
}
|
|
598
822
|
|
|
599
823
|
// Auto-start monitor if --monitor flag is present and tier2 is enabled
|
|
@@ -618,7 +842,8 @@ export async function startCoordinatorSession(
|
|
|
618
842
|
tmuxSession,
|
|
619
843
|
projectRoot,
|
|
620
844
|
pid,
|
|
621
|
-
watchdog:
|
|
845
|
+
watchdog: watchdogPid !== undefined,
|
|
846
|
+
watchdogPreexisting: watchdogAlreadyRunning,
|
|
622
847
|
monitor: monitorFlag ? monitorPid !== undefined : false,
|
|
623
848
|
};
|
|
624
849
|
|
|
@@ -629,6 +854,7 @@ export async function startCoordinatorSession(
|
|
|
629
854
|
process.stdout.write(` Tmux: ${tmuxSession}\n`);
|
|
630
855
|
process.stdout.write(` Root: ${projectRoot}\n`);
|
|
631
856
|
process.stdout.write(` PID: ${pid}\n`);
|
|
857
|
+
printHint("Open the UI: `ov serve` then http://localhost:7321 — primary operator surface");
|
|
632
858
|
}
|
|
633
859
|
|
|
634
860
|
if (shouldAttach) {
|
|
@@ -643,7 +869,14 @@ export async function startCoordinatorSession(
|
|
|
643
869
|
|
|
644
870
|
async function startPersistentAgent(
|
|
645
871
|
spec: PersistentAgentSpec,
|
|
646
|
-
opts: {
|
|
872
|
+
opts: {
|
|
873
|
+
json: boolean;
|
|
874
|
+
attach: boolean;
|
|
875
|
+
watchdog: boolean;
|
|
876
|
+
monitor: boolean;
|
|
877
|
+
profile?: string;
|
|
878
|
+
acceptExistingWatchdog?: boolean;
|
|
879
|
+
},
|
|
647
880
|
deps: CoordinatorDeps = {},
|
|
648
881
|
): Promise<void> {
|
|
649
882
|
await startCoordinatorSession(
|
|
@@ -679,6 +912,18 @@ function isActivePersistentAgentSession(
|
|
|
679
912
|
* 3. Mark session as completed in SessionStore
|
|
680
913
|
* 4. Auto-complete the active run (if current-run.txt exists)
|
|
681
914
|
*/
|
|
915
|
+
/**
|
|
916
|
+
* Stop the default coordinator. Handles both tmux and headless sessions.
|
|
917
|
+
* Exposed for callers outside the CLI command surface (e.g. the web-UI POST
|
|
918
|
+
* /api/coordinator/stop endpoint, which lives in coordinator-actions.ts).
|
|
919
|
+
*/
|
|
920
|
+
export async function stopCoordinatorSession(
|
|
921
|
+
opts: { json: boolean },
|
|
922
|
+
deps: CoordinatorDeps = {},
|
|
923
|
+
): Promise<void> {
|
|
924
|
+
await stopPersistentAgent(COORDINATOR_SPEC, opts, deps);
|
|
925
|
+
}
|
|
926
|
+
|
|
682
927
|
async function stopPersistentAgent(
|
|
683
928
|
spec: PersistentAgentSpec,
|
|
684
929
|
opts: { json: boolean },
|
|
@@ -712,10 +957,24 @@ async function stopPersistentAgent(
|
|
|
712
957
|
});
|
|
713
958
|
}
|
|
714
959
|
|
|
715
|
-
//
|
|
716
|
-
|
|
717
|
-
if (
|
|
718
|
-
await
|
|
960
|
+
// Headless sessions have no tmux pane (tmuxSession === ""). Tear down via
|
|
961
|
+
// the connection registry (SIGTERM-with-SIGKILL-escalation) and skip tmux.
|
|
962
|
+
if (session.tmuxSession === "") {
|
|
963
|
+
const { removeConnection } = await import("../runtimes/connections.ts");
|
|
964
|
+
removeConnection(spec.agentName);
|
|
965
|
+
if (session.pid !== null && isProcessRunning(session.pid)) {
|
|
966
|
+
try {
|
|
967
|
+
process.kill(session.pid, "SIGTERM");
|
|
968
|
+
} catch {
|
|
969
|
+
// process may have exited between the check and the signal
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
} else {
|
|
973
|
+
// Kill tmux session with process tree cleanup
|
|
974
|
+
const alive = await tmux.isSessionAlive(session.tmuxSession);
|
|
975
|
+
if (alive) {
|
|
976
|
+
await tmux.killSession(session.tmuxSession);
|
|
977
|
+
}
|
|
719
978
|
}
|
|
720
979
|
|
|
721
980
|
// Always attempt to stop watchdog
|
|
@@ -1359,6 +1618,10 @@ export function createPersistentAgentCommand(
|
|
|
1359
1618
|
.option("--attach", "Always attach to tmux session after start")
|
|
1360
1619
|
.option("--no-attach", "Never attach to tmux session after start")
|
|
1361
1620
|
.option("--watchdog", `Auto-start watchdog daemon with ${spec.commandName}`)
|
|
1621
|
+
.option(
|
|
1622
|
+
"--accept-existing-watchdog",
|
|
1623
|
+
"Continue when a watchdog daemon from a previous session is already running (it will supervise this run)",
|
|
1624
|
+
)
|
|
1362
1625
|
.option("--monitor", `Auto-start Tier 2 monitor agent with ${spec.commandName}`)
|
|
1363
1626
|
.option("--profile <name>", "Canopy profile to apply to spawned agents")
|
|
1364
1627
|
.option("--json", "Output as JSON")
|
|
@@ -1366,6 +1629,7 @@ export function createPersistentAgentCommand(
|
|
|
1366
1629
|
async (opts: {
|
|
1367
1630
|
attach?: boolean;
|
|
1368
1631
|
watchdog?: boolean;
|
|
1632
|
+
acceptExistingWatchdog?: boolean;
|
|
1369
1633
|
monitor?: boolean;
|
|
1370
1634
|
json?: boolean;
|
|
1371
1635
|
profile?: string;
|
|
@@ -1378,6 +1642,7 @@ export function createPersistentAgentCommand(
|
|
|
1378
1642
|
json: opts.json ?? false,
|
|
1379
1643
|
attach: shouldAttach,
|
|
1380
1644
|
watchdog: opts.watchdog ?? false,
|
|
1645
|
+
acceptExistingWatchdog: opts.acceptExistingWatchdog ?? false,
|
|
1381
1646
|
monitor: opts.monitor ?? false,
|
|
1382
1647
|
profile: opts.profile,
|
|
1383
1648
|
},
|
|
@@ -295,6 +295,7 @@ function makeDashboardData(
|
|
|
295
295
|
worktrees: [],
|
|
296
296
|
tmuxSessions: [],
|
|
297
297
|
unreadMailCount: 0,
|
|
298
|
+
unreadMailScope: "orchestrator",
|
|
298
299
|
mergeQueueCount: 0,
|
|
299
300
|
recentMetricsCount: 0,
|
|
300
301
|
},
|
|
@@ -447,6 +448,7 @@ describe("renderAgentPanel", () => {
|
|
|
447
448
|
worktrees: [],
|
|
448
449
|
tmuxSessions: [], // no tmux sessions
|
|
449
450
|
unreadMailCount: 0,
|
|
451
|
+
unreadMailScope: "orchestrator",
|
|
450
452
|
mergeQueueCount: 0,
|
|
451
453
|
recentMetricsCount: 0,
|
|
452
454
|
},
|
|
@@ -487,6 +489,7 @@ describe("renderAgentPanel", () => {
|
|
|
487
489
|
worktrees: [],
|
|
488
490
|
tmuxSessions: [],
|
|
489
491
|
unreadMailCount: 0,
|
|
492
|
+
unreadMailScope: "orchestrator",
|
|
490
493
|
mergeQueueCount: 0,
|
|
491
494
|
recentMetricsCount: 0,
|
|
492
495
|
},
|
|
@@ -495,6 +498,191 @@ describe("renderAgentPanel", () => {
|
|
|
495
498
|
expect(out).toContain("x");
|
|
496
499
|
expect(out).toContain("dead-headless");
|
|
497
500
|
});
|
|
501
|
+
|
|
502
|
+
test("renders mixed tmux + headless agents in same frame with correct liveness", () => {
|
|
503
|
+
const data = {
|
|
504
|
+
...makeDashboardData({}),
|
|
505
|
+
status: {
|
|
506
|
+
currentRunId: null,
|
|
507
|
+
agents: [
|
|
508
|
+
{
|
|
509
|
+
id: "sess-tmux-1",
|
|
510
|
+
agentName: "pane-agent",
|
|
511
|
+
capability: "builder",
|
|
512
|
+
worktreePath: "/tmp/wt/pane-agent",
|
|
513
|
+
branchName: "overstory/pane-agent/task-t1",
|
|
514
|
+
taskId: "task-t1",
|
|
515
|
+
tmuxSession: "overstory-pane-agent",
|
|
516
|
+
state: "working" as const,
|
|
517
|
+
pid: 99999,
|
|
518
|
+
parentAgent: null,
|
|
519
|
+
depth: 0,
|
|
520
|
+
runId: null,
|
|
521
|
+
startedAt: new Date(Date.now() - 10_000).toISOString(),
|
|
522
|
+
lastActivity: new Date().toISOString(),
|
|
523
|
+
escalationLevel: 0,
|
|
524
|
+
stalledSince: null,
|
|
525
|
+
transcriptPath: null,
|
|
526
|
+
},
|
|
527
|
+
{
|
|
528
|
+
id: "sess-headless-1",
|
|
529
|
+
agentName: "live-headless",
|
|
530
|
+
capability: "builder",
|
|
531
|
+
worktreePath: "/tmp/wt/live-headless",
|
|
532
|
+
branchName: "overstory/live-headless/task-h1",
|
|
533
|
+
taskId: "task-h1",
|
|
534
|
+
tmuxSession: "", // headless
|
|
535
|
+
state: "working" as const,
|
|
536
|
+
pid: process.pid, // own PID — guaranteed alive
|
|
537
|
+
parentAgent: null,
|
|
538
|
+
depth: 0,
|
|
539
|
+
runId: null,
|
|
540
|
+
startedAt: new Date(Date.now() - 10_000).toISOString(),
|
|
541
|
+
lastActivity: new Date().toISOString(),
|
|
542
|
+
escalationLevel: 0,
|
|
543
|
+
stalledSince: null,
|
|
544
|
+
transcriptPath: null,
|
|
545
|
+
},
|
|
546
|
+
],
|
|
547
|
+
worktrees: [],
|
|
548
|
+
tmuxSessions: [{ name: "overstory-pane-agent", pid: 99998 }],
|
|
549
|
+
unreadMailCount: 0,
|
|
550
|
+
unreadMailScope: "orchestrator",
|
|
551
|
+
mergeQueueCount: 0,
|
|
552
|
+
recentMetricsCount: 0,
|
|
553
|
+
},
|
|
554
|
+
};
|
|
555
|
+
const out = renderAgentPanel(data, 100, 12, 3);
|
|
556
|
+
expect(out).toContain("pane-agent");
|
|
557
|
+
expect(out).toContain("live-headless");
|
|
558
|
+
const aliveMarkers = (out.match(/>/g) ?? []).length;
|
|
559
|
+
expect(aliveMarkers).toBeGreaterThanOrEqual(2);
|
|
560
|
+
expect(out).not.toContain("x");
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
test("spawn-per-turn worker (no tmux, no pid) renders alive when state is non-terminal (overstory-7a34)", () => {
|
|
564
|
+
// Repro: freshly slung headless lead has tmuxSession='' and pid=null.
|
|
565
|
+
// Previously fell into the tmux path → never matched → red "x" while
|
|
566
|
+
// ov feed showed live tool events from the same agent.
|
|
567
|
+
const data = {
|
|
568
|
+
...makeDashboardData({}),
|
|
569
|
+
status: {
|
|
570
|
+
currentRunId: null,
|
|
571
|
+
agents: [
|
|
572
|
+
{
|
|
573
|
+
id: "sess-spt-1",
|
|
574
|
+
agentName: "freshly-slung",
|
|
575
|
+
capability: "lead",
|
|
576
|
+
worktreePath: "/tmp/wt/freshly-slung",
|
|
577
|
+
branchName: "overstory/freshly-slung/task-l1",
|
|
578
|
+
taskId: "task-l1",
|
|
579
|
+
tmuxSession: "", // headless
|
|
580
|
+
state: "working" as const,
|
|
581
|
+
pid: null, // spawn-per-turn: no persistent process between turns
|
|
582
|
+
parentAgent: null,
|
|
583
|
+
depth: 0,
|
|
584
|
+
runId: null,
|
|
585
|
+
startedAt: new Date(Date.now() - 5_000).toISOString(),
|
|
586
|
+
lastActivity: new Date().toISOString(),
|
|
587
|
+
escalationLevel: 0,
|
|
588
|
+
stalledSince: null,
|
|
589
|
+
transcriptPath: null,
|
|
590
|
+
},
|
|
591
|
+
],
|
|
592
|
+
worktrees: [],
|
|
593
|
+
tmuxSessions: [],
|
|
594
|
+
unreadMailCount: 0,
|
|
595
|
+
unreadMailScope: "orchestrator",
|
|
596
|
+
mergeQueueCount: 0,
|
|
597
|
+
recentMetricsCount: 0,
|
|
598
|
+
},
|
|
599
|
+
};
|
|
600
|
+
const out = renderAgentPanel(data, 100, 12, 3);
|
|
601
|
+
expect(out).toContain("freshly-slung");
|
|
602
|
+
// Green ">" — agent is logically alive between turns
|
|
603
|
+
expect(out).toContain(">");
|
|
604
|
+
// No red marker should be present (name 'freshly-slung' has no 'x')
|
|
605
|
+
expect(out).not.toContain("x");
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
test("spawn-per-turn worker in zombie state renders dead marker (overstory-7a34)", () => {
|
|
609
|
+
const data = {
|
|
610
|
+
...makeDashboardData({}),
|
|
611
|
+
status: {
|
|
612
|
+
currentRunId: null,
|
|
613
|
+
agents: [
|
|
614
|
+
{
|
|
615
|
+
id: "sess-spt-2",
|
|
616
|
+
agentName: "abandoned-spt",
|
|
617
|
+
capability: "builder",
|
|
618
|
+
worktreePath: "/tmp/wt/abandoned-spt",
|
|
619
|
+
branchName: "overstory/abandoned-spt/task-a1",
|
|
620
|
+
taskId: "task-a1",
|
|
621
|
+
tmuxSession: "",
|
|
622
|
+
state: "zombie" as const,
|
|
623
|
+
pid: null,
|
|
624
|
+
parentAgent: null,
|
|
625
|
+
depth: 0,
|
|
626
|
+
runId: null,
|
|
627
|
+
startedAt: new Date(Date.now() - 600_000).toISOString(),
|
|
628
|
+
lastActivity: new Date(Date.now() - 600_000).toISOString(),
|
|
629
|
+
escalationLevel: 0,
|
|
630
|
+
stalledSince: null,
|
|
631
|
+
transcriptPath: null,
|
|
632
|
+
},
|
|
633
|
+
],
|
|
634
|
+
worktrees: [],
|
|
635
|
+
tmuxSessions: [],
|
|
636
|
+
unreadMailCount: 0,
|
|
637
|
+
unreadMailScope: "orchestrator",
|
|
638
|
+
mergeQueueCount: 0,
|
|
639
|
+
recentMetricsCount: 0,
|
|
640
|
+
},
|
|
641
|
+
};
|
|
642
|
+
const out = renderAgentPanel(data, 100, 12, 3);
|
|
643
|
+
expect(out).toContain("abandoned-spt");
|
|
644
|
+
expect(out).toContain("x");
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
test("headless agent renders dead marker when tmux session list is non-empty", () => {
|
|
648
|
+
const deadPid = 2_147_483_647;
|
|
649
|
+
const data = {
|
|
650
|
+
...makeDashboardData({}),
|
|
651
|
+
status: {
|
|
652
|
+
currentRunId: null,
|
|
653
|
+
agents: [
|
|
654
|
+
{
|
|
655
|
+
id: "sess-dead-headless-1",
|
|
656
|
+
agentName: "gone-headless",
|
|
657
|
+
capability: "builder",
|
|
658
|
+
worktreePath: "/tmp/wt/gone-headless",
|
|
659
|
+
branchName: "overstory/gone-headless/task-g1",
|
|
660
|
+
taskId: "task-g1",
|
|
661
|
+
tmuxSession: "", // headless
|
|
662
|
+
state: "working" as const,
|
|
663
|
+
pid: deadPid,
|
|
664
|
+
parentAgent: null,
|
|
665
|
+
depth: 0,
|
|
666
|
+
runId: null,
|
|
667
|
+
startedAt: new Date(Date.now() - 10_000).toISOString(),
|
|
668
|
+
lastActivity: new Date().toISOString(),
|
|
669
|
+
escalationLevel: 0,
|
|
670
|
+
stalledSince: null,
|
|
671
|
+
transcriptPath: null,
|
|
672
|
+
},
|
|
673
|
+
],
|
|
674
|
+
worktrees: [],
|
|
675
|
+
tmuxSessions: [{ name: "overstory-other-tmux", pid: 11111 }],
|
|
676
|
+
unreadMailCount: 0,
|
|
677
|
+
unreadMailScope: "orchestrator",
|
|
678
|
+
mergeQueueCount: 0,
|
|
679
|
+
recentMetricsCount: 0,
|
|
680
|
+
},
|
|
681
|
+
};
|
|
682
|
+
const out = renderAgentPanel(data, 100, 12, 3);
|
|
683
|
+
expect(out).toContain("x");
|
|
684
|
+
expect(out).toContain("gone-headless");
|
|
685
|
+
});
|
|
498
686
|
});
|
|
499
687
|
|
|
500
688
|
describe("openDashboardStores", () => {
|
|
@@ -434,6 +434,7 @@ async function loadDashboardData(
|
|
|
434
434
|
worktrees,
|
|
435
435
|
tmuxSessions,
|
|
436
436
|
unreadMailCount,
|
|
437
|
+
unreadMailScope: "orchestrator",
|
|
437
438
|
mergeQueueCount,
|
|
438
439
|
recentMetricsCount,
|
|
439
440
|
};
|
|
@@ -614,7 +615,7 @@ export function renderAgentPanel(
|
|
|
614
615
|
|
|
615
616
|
// Sort agents: active first, then completed, then zombie
|
|
616
617
|
const agents = [...data.status.agents].sort((a, b) => {
|
|
617
|
-
const activeStates = ["working", "booting", "stalled"];
|
|
618
|
+
const activeStates = ["working", "in_turn", "between_turns", "booting", "stalled"];
|
|
618
619
|
const aActive = activeStates.includes(a.state);
|
|
619
620
|
const bActive = activeStates.includes(b.state);
|
|
620
621
|
if (aActive && !bActive) return -1;
|
|
@@ -644,10 +645,19 @@ export function renderAgentPanel(
|
|
|
644
645
|
: now;
|
|
645
646
|
const duration = formatDuration(endTime - new Date(agent.startedAt).getTime());
|
|
646
647
|
const durationPadded = pad(duration, 9);
|
|
648
|
+
// Three liveness topologies (overstory-7a34):
|
|
649
|
+
// tmux: tmuxSession !== "" → tmux session must exist
|
|
650
|
+
// long-lived headless: tmuxSession === "" && pid !== null → PID must be alive
|
|
651
|
+
// spawn-per-turn: tmuxSession === "" && pid === null → no process between
|
|
652
|
+
// turns is normal, so liveness reduces to "state is non-terminal".
|
|
653
|
+
// Time-based stale/zombie classification is handled in evaluateHealth.
|
|
647
654
|
const isHeadless = agent.tmuxSession === "" && agent.pid !== null;
|
|
648
|
-
const
|
|
649
|
-
|
|
650
|
-
|
|
655
|
+
const isSpawnPerTurn = agent.tmuxSession === "" && agent.pid === null;
|
|
656
|
+
const alive = isSpawnPerTurn
|
|
657
|
+
? agent.state !== "zombie" && agent.state !== "completed"
|
|
658
|
+
: isHeadless
|
|
659
|
+
? agent.pid !== null && isProcessAlive(agent.pid)
|
|
660
|
+
: data.status.tmuxSessions.some((s) => s.name === agent.tmuxSession);
|
|
651
661
|
const aliveDot = alive ? color.green(">") : color.red("x");
|
|
652
662
|
|
|
653
663
|
const lineContent = `${dimBox.vertical} ${stateColorFn(icon)} ${name} ${capability} ${color.dim(runtime)} ${stateColorFn(state)} ${taskId} ${durationPadded} ${aliveDot} `;
|
package/src/commands/doctor.ts
CHANGED
|
@@ -16,6 +16,7 @@ import { checkEcosystem } from "../doctor/ecosystem.ts";
|
|
|
16
16
|
import { checkLogs } from "../doctor/logs.ts";
|
|
17
17
|
import { checkMergeQueue } from "../doctor/merge-queue.ts";
|
|
18
18
|
import { checkProviders } from "../doctor/providers.ts";
|
|
19
|
+
import { checkServe } from "../doctor/serve.ts";
|
|
19
20
|
import { checkStructure } from "../doctor/structure.ts";
|
|
20
21
|
import type { DoctorCategory, DoctorCheck, DoctorCheckFn } from "../doctor/types.ts";
|
|
21
22
|
import { checkVersion } from "../doctor/version.ts";
|
|
@@ -39,6 +40,7 @@ const ALL_CHECKS: Array<{ category: DoctorCategory; fn: DoctorCheckFn }> = [
|
|
|
39
40
|
{ category: "ecosystem", fn: checkEcosystem },
|
|
40
41
|
{ category: "providers", fn: checkProviders },
|
|
41
42
|
{ category: "watchdog", fn: checkWatchdog },
|
|
43
|
+
{ category: "serve", fn: checkServe },
|
|
42
44
|
];
|
|
43
45
|
|
|
44
46
|
/**
|
|
@@ -241,7 +243,7 @@ function buildDoctorCommand(
|
|
|
241
243
|
.option("--fix", "Attempt to auto-fix issues")
|
|
242
244
|
.addHelpText(
|
|
243
245
|
"after",
|
|
244
|
-
"\nCategories: dependencies, structure, config, databases, consistency, agents, merge, logs, version, ecosystem, providers, watchdog",
|
|
246
|
+
"\nCategories: dependencies, structure, config, databases, consistency, agents, merge, logs, version, ecosystem, providers, watchdog, serve",
|
|
245
247
|
)
|
|
246
248
|
.action(async (opts: DoctorActionOpts) => {
|
|
247
249
|
onResult(await runDoctorChecks(opts, checkRunners));
|