npm - @os-eco/overstory-cli - Versions diffs - 0.9.3 → 0.10.3 - Mend

@os-eco/overstory-cli 0.9.3 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/README.md +49 -18
package/agents/builder.md +9 -8
package/agents/coordinator.md +6 -6
package/agents/lead.md +98 -82
package/agents/merger.md +25 -14
package/agents/reviewer.md +22 -16
package/agents/scout.md +17 -12
package/package.json +6 -3
package/src/agents/capabilities.test.ts +85 -0
package/src/agents/capabilities.ts +125 -0
package/src/agents/headless-mail-injector.test.ts +448 -0
package/src/agents/headless-mail-injector.ts +211 -0
package/src/agents/headless-prompt.test.ts +102 -0
package/src/agents/headless-prompt.ts +68 -0
package/src/agents/hooks-deployer.test.ts +514 -14
package/src/agents/hooks-deployer.ts +141 -0
package/src/agents/overlay.test.ts +4 -4
package/src/agents/overlay.ts +30 -8
package/src/agents/turn-lock.test.ts +181 -0
package/src/agents/turn-lock.ts +235 -0
package/src/agents/turn-runner-dispatch.test.ts +182 -0
package/src/agents/turn-runner-dispatch.ts +105 -0
package/src/agents/turn-runner.test.ts +1450 -0
package/src/agents/turn-runner.ts +1166 -0
package/src/commands/clean.ts +56 -1
package/src/commands/completions.test.ts +4 -1
package/src/commands/coordinator.test.ts +127 -0
package/src/commands/coordinator.ts +205 -6
package/src/commands/dashboard.test.ts +188 -0
package/src/commands/dashboard.ts +13 -3
package/src/commands/doctor.ts +94 -77
package/src/commands/group.test.ts +94 -0
package/src/commands/group.ts +49 -20
package/src/commands/init.test.ts +8 -0
package/src/commands/init.ts +8 -1
package/src/commands/log.test.ts +56 -11
package/src/commands/log.ts +134 -69
package/src/commands/mail.test.ts +162 -0
package/src/commands/mail.ts +64 -9
package/src/commands/merge.test.ts +112 -1
package/src/commands/merge.ts +17 -4
package/src/commands/monitor.ts +2 -1
package/src/commands/nudge.test.ts +351 -4
package/src/commands/nudge.ts +356 -34
package/src/commands/run.test.ts +43 -7
package/src/commands/serve/build.test.ts +202 -0
package/src/commands/serve/build.ts +206 -0
package/src/commands/serve/coordinator-actions.test.ts +339 -0
package/src/commands/serve/coordinator-actions.ts +408 -0
package/src/commands/serve/dev.test.ts +168 -0
package/src/commands/serve/dev.ts +117 -0
package/src/commands/serve/mail-actions.test.ts +312 -0
package/src/commands/serve/mail-actions.ts +167 -0
package/src/commands/serve/rest.test.ts +1323 -0
package/src/commands/serve/rest.ts +708 -0
package/src/commands/serve/static.ts +51 -0
package/src/commands/serve/ws.test.ts +361 -0
package/src/commands/serve/ws.ts +332 -0
package/src/commands/serve.test.ts +459 -0
package/src/commands/serve.ts +565 -0
package/src/commands/sling.test.ts +85 -1
package/src/commands/sling.ts +153 -64
package/src/commands/status.test.ts +9 -0
package/src/commands/status.ts +12 -4
package/src/commands/stop.test.ts +174 -1
package/src/commands/stop.ts +107 -8
package/src/commands/supervisor.ts +2 -1
package/src/commands/watch.test.ts +49 -4
package/src/commands/watch.ts +153 -28
package/src/commands/worktree.test.ts +319 -3
package/src/commands/worktree.ts +86 -0
package/src/config.test.ts +78 -0
package/src/config.ts +43 -1
package/src/doctor/consistency.test.ts +106 -0
package/src/doctor/consistency.ts +50 -3
package/src/doctor/serve.test.ts +95 -0
package/src/doctor/serve.ts +86 -0
package/src/doctor/types.ts +2 -1
package/src/doctor/watchdog.ts +57 -1
package/src/events/tailer.test.ts +234 -1
package/src/events/tailer.ts +90 -0
package/src/index.ts +53 -6
package/src/json.ts +29 -0
package/src/mail/client.ts +15 -2
package/src/mail/store.test.ts +82 -0
package/src/mail/store.ts +41 -4
package/src/merge/lock.test.ts +149 -0
package/src/merge/lock.ts +140 -0
package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
package/src/runtimes/claude.test.ts +791 -1
package/src/runtimes/claude.ts +323 -1
package/src/runtimes/connections.test.ts +141 -1
package/src/runtimes/connections.ts +73 -4
package/src/runtimes/headless-connection.test.ts +264 -0
package/src/runtimes/headless-connection.ts +158 -0
package/src/runtimes/types.ts +10 -0
package/src/schema-consistency.test.ts +1 -0
package/src/sessions/store.test.ts +390 -24
package/src/sessions/store.ts +184 -19
package/src/test-setup.test.ts +31 -0
package/src/test-setup.ts +28 -0
package/src/types.ts +56 -1
package/src/utils/pid.test.ts +85 -1
package/src/utils/pid.ts +86 -1
package/src/utils/process-scan.test.ts +53 -0
package/src/utils/process-scan.ts +76 -0
package/src/watchdog/daemon.test.ts +1520 -411
package/src/watchdog/daemon.ts +442 -83
package/src/watchdog/health.test.ts +157 -0
package/src/watchdog/health.ts +92 -25
package/src/worktree/process.test.ts +71 -0
package/src/worktree/process.ts +25 -5
package/src/worktree/tmux.test.ts +39 -0
package/src/worktree/tmux.ts +23 -3
package/templates/CLAUDE.md.tmpl +19 -8
package/templates/overlay.md.tmpl +3 -2

package/src/commands/group.ts CHANGED Viewed

@@ -79,6 +79,44 @@ function generateGroupId(): string {
 	return `group-${crypto.randomUUID().slice(0, 8)}`;
 }
+/**
+ * Resolve a group by ID or name.
+ *
+ * Names are not enforced unique by `createGroup`, so live `groups.json` files
+ * contain duplicate names — a naive name lookup would silently pick the wrong
+ * group. Resolution precedence:
+ *   1. Exact ID match wins (UUIDs are unambiguous).
+ *   2. Otherwise filter by name. If exactly one match, return it.
+ *   3. If multiple name matches, prefer a single `active` one. If still
+ *      ambiguous, throw with the matching IDs so the caller can disambiguate
+ *      by passing the UUID.
+ *
+ * @internal Exported for testing.
+ */
+export function resolveGroup(groups: TaskGroup[], identifier: string): TaskGroup {
+	const byId = groups.find((g) => g.id === identifier);
+	if (byId) return byId;
+	const byName = groups.filter((g) => g.name === identifier);
+	if (byName.length === 1) {
+		const only = byName[0];
+		if (only) return only;
+	}
+	if (byName.length > 1) {
+		const active = byName.filter((g) => g.status === "active");
+		if (active.length === 1) {
+			const only = active[0];
+			if (only) return only;
+		}
+		const ids = byName.map((g) => g.id).join(", ");
+		throw new GroupError(
+			`Group name "${identifier}" is ambiguous (matches: ${ids}). Use the group ID.`,
+			{ groupId: identifier },
+		);
+	}
+	throw new GroupError(`Group "${identifier}" not found`, { groupId: identifier });
+}
 /**
  * Create a new task group.
  * @internal Exported for testing.
@@ -140,16 +178,13 @@ export async function addToGroup(
 	}
 	const groups = await loadGroups(projectRoot);
-	const group = groups.find((g) => g.id === groupId);
-	if (!group) {
-		throw new GroupError(`Group "${groupId}" not found`, { groupId });
-	}
+	const group = resolveGroup(groups, groupId);
 	// Check for duplicates against existing members
 	for (const id of issueIds) {
 		if (group.memberIssueIds.includes(id)) {
-			throw new GroupError(`Issue "${id}" is already a member of group "${groupId}"`, {
-				groupId,
+			throw new GroupError(`Issue "${id}" is already a member of group "${group.id}"`, {
+				groupId: group.id,
 			});
 		}
 	}
@@ -187,16 +222,13 @@ export async function removeFromGroup(
 	}
 	const groups = await loadGroups(projectRoot);
-	const group = groups.find((g) => g.id === groupId);
-	if (!group) {
-		throw new GroupError(`Group "${groupId}" not found`, { groupId });
-	}
+	const group = resolveGroup(groups, groupId);
 	// Validate all issues are members
 	for (const id of issueIds) {
 		if (!group.memberIssueIds.includes(id)) {
-			throw new GroupError(`Issue "${id}" is not a member of group "${groupId}"`, {
-				groupId,
+			throw new GroupError(`Issue "${id}" is not a member of group "${group.id}"`, {
+				groupId: group.id,
 			});
 		}
 	}
@@ -204,7 +236,7 @@ export async function removeFromGroup(
 	// Check that removal won't empty the group
 	const remaining = group.memberIssueIds.filter((id) => !issueIds.includes(id));
 	if (remaining.length === 0) {
-		throw new GroupError("Cannot remove all issues from a group", { groupId });
+		throw new GroupError("Cannot remove all issues from a group", { groupId: group.id });
 	}
 	group.memberIssueIds = remaining;
@@ -347,7 +379,7 @@ export function createGroupCommand(): Command {
 	cmd
 		.command("status")
 		.description("Show progress for one or all groups")
-		.argument("[group-id]", "Group ID (optional, shows all if omitted)")
+		.argument("[group-id-or-name]", "Group ID or name (optional, shows all if omitted)")
 		.option("--json", "Output as JSON")
 		.option("--skip-validation", "Skip task validation (for offline use)")
 		.action(
@@ -361,10 +393,7 @@ export function createGroupCommand(): Command {
 				const groups = await loadGroups(projectRoot);
 				if (groupId) {
-					const group = groups.find((g) => g.id === groupId);
-					if (!group) {
-						throw new GroupError(`Group "${groupId}" not found`, { groupId });
-					}
+					const group = resolveGroup(groups, groupId);
 					const progress = await getGroupProgress(projectRoot, group, groups, tracker);
 					if (json) {
 						jsonOutput("group status", { ...progress });
@@ -401,7 +430,7 @@ export function createGroupCommand(): Command {
 	cmd
 		.command("add")
 		.description("Add issues to a group")
-		.argument("<group-id>", "Group ID")
+		.argument("<group-id-or-name>", "Group ID or name")
 		.argument("<ids...>", "Issue IDs to add")
 		.option("--json", "Output as JSON")
 		.option("--skip-validation", "Skip task validation (for offline use)")
@@ -437,7 +466,7 @@ export function createGroupCommand(): Command {
 	cmd
 		.command("remove")
 		.description("Remove issues from a group")
-		.argument("<group-id>", "Group ID")
+		.argument("<group-id-or-name>", "Group ID or name")
 		.argument("<ids...>", "Issue IDs to remove")
 		.option("--json", "Output as JSON")
 		.action(async (groupId: string, ids: string[], opts: { json?: boolean }) => {

package/src/commands/init.test.ts CHANGED Viewed

@@ -353,6 +353,14 @@ describe("initCommand: canonical branch detection", () => {
 		const content = await Bun.file(configPath).text();
 		expect(content).toContain("canonicalBranch: main");
 	});
+	test("generated config opts into headless Claude by default (overstory-caec)", async () => {
+		await initCommand({ _spawner: noopSpawner });
+		const configPath = join(tempDir, ".overstory", "config.yaml");
+		const content = await Bun.file(configPath).text();
+		expect(content).toContain("claudeHeadlessByDefault: true");
+	});
 });
 describe("initCommand: --yes flag", () => {

package/src/commands/init.ts CHANGED Viewed

@@ -816,6 +816,10 @@ export async function initCommand(opts: InitOptions): Promise<void> {
 	config.project.canonicalBranch = canonicalBranch;
 	if (config.runtime) {
 		config.runtime.default = defaultRuntime;
+		// New projects default to headless Claude spawns; the UI (`ov serve`) is the
+		// primary operator surface and tmux is opt-in via `--no-headless`. Existing
+		// projects keep tmux until they edit their config (overstory-caec).
+		config.runtime.claudeHeadlessByDefault = true;
 	}
 	const configYaml = serializeConfigToYaml(config);
@@ -956,5 +960,8 @@ export async function initCommand(opts: InitOptions): Promise<void> {
 	printSuccess("Initialized");
 	printHint("Next: run `ov hooks install` to enable Claude Code hooks.");
-	printHint("Then: run `ov status` to see the current state.");
+	printHint("Then: `ov coordinator start` and `ov serve` — open http://localhost:7321");
+	printHint(
+		"       (UI is the primary operator surface; pass `--no-headless` to ov sling for tmux attach)",
+	);
 }

package/src/commands/log.test.ts CHANGED Viewed

@@ -633,8 +633,55 @@ describe("logCommand", () => {
 		});
 	});
-	test("session-end writes pending-nudge marker for coordinator when lead completes", async () => {
-		// Create sessions.db with a lead agent
+	test("session-end does NOT transition lead to completed (persistent agent)", async () => {
+		// Regression test for overstory-49a7:
+		// The lead's Stop hook fires every turn (interactive Claude Code), not just at
+		// true session end. session-end must NOT mark leads completed, or they vanish
+		// from getActive() after their first turn while their tmux is still alive.
+		const dbPath = join(tempDir, ".overstory", "sessions.db");
+		const session: AgentSession = {
+			id: "session-lead",
+			agentName: "lead-alpha",
+			capability: "lead",
+			worktreePath: tempDir,
+			branchName: "lead-alpha-branch",
+			taskId: "bead-lead-001",
+			tmuxSession: "overstory-lead-alpha",
+			state: "working",
+			pid: 33333,
+			parentAgent: null,
+			depth: 0,
+			runId: null,
+			startedAt: new Date().toISOString(),
+			lastActivity: new Date(Date.now() - 60_000).toISOString(),
+			escalationLevel: 0,
+			stalledSince: null,
+			transcriptPath: null,
+		};
+		const store = createSessionStore(dbPath);
+		store.upsert(session);
+		store.close();
+		await logCommand(["session-end", "--agent", "lead-alpha"]);
+		// Lead should remain 'working', not transition to 'completed'
+		const readStore = createSessionStore(dbPath);
+		const updatedSession = readStore.getByName("lead-alpha");
+		readStore.close();
+		expect(updatedSession).toBeDefined();
+		expect(updatedSession?.state).toBe("working");
+		// But lastActivity should be updated
+		expect(new Date(updatedSession?.lastActivity ?? "").getTime()).toBeGreaterThan(
+			new Date(session.lastActivity).getTime(),
+		);
+	});
+	test("session-end does NOT write pending-nudge marker for leads (moved to ov stop)", async () => {
+		// Regression test for overstory-49a7:
+		// The lead_completed nudge used to fire from the per-turn Stop hook, spamming
+		// the coordinator with false completion signals every turn. It is now emitted
+		// only by `ov stop <lead>` (the real completion signal).
 		const dbPath = join(tempDir, ".overstory", "sessions.db");
 		const session: AgentSession = {
 			id: "session-lead",
@@ -661,17 +708,10 @@ describe("logCommand", () => {
 		await logCommand(["session-end", "--agent", "lead-alpha"]);
-		// Verify the pending-nudge marker was written for the coordinator
+		// No pending-nudge marker should be written from session-end
 		const markerPath = join(tempDir, ".overstory", "pending-nudges", "coordinator.json");
 		const markerFile = Bun.file(markerPath);
-		expect(await markerFile.exists()).toBe(true);
-		const marker = JSON.parse(await markerFile.text());
-		expect(marker.from).toBe("lead-alpha");
-		expect(marker.reason).toBe("lead_completed");
-		expect(marker.subject).toContain("lead-alpha");
-		expect(marker.messageId).toContain("auto-nudge-lead-alpha-");
-		expect(marker.createdAt).toBeDefined();
+		expect(await markerFile.exists()).toBe(false);
 	});
 	test("session-end does NOT write pending-nudge marker for non-lead agents", async () => {
@@ -1312,6 +1352,10 @@ try {
 			stdin: "pipe",
 			stdout: "pipe",
 			stderr: "pipe",
+			// Pin project root to tempDir. Without this, a subprocess started from
+			// inside an `ov sling`-spawned worktree inherits OVERSTORY_PROJECT_ROOT
+			// pointing at the parent project, and writes events to prod's events.db.
+			env: { ...process.env, OVERSTORY_PROJECT_ROOT: tempDir },
 		});
 		// Write the JSON payload to stdin and close
@@ -1501,6 +1545,7 @@ try {
 			stdin: "pipe",
 			stdout: "pipe",
 			stderr: "pipe",
+			env: { ...process.env, OVERSTORY_PROJECT_ROOT: tempDir },
 		});
 		// Write empty string and close immediately

package/src/commands/log.ts CHANGED Viewed

@@ -12,6 +12,7 @@
 import { join } from "node:path";
 import { Command } from "commander";
+import { isStopHookPersistentCapability } from "../agents/capabilities.ts";
 import { updateIdentity } from "../agents/identity.ts";
 import { loadConfig } from "../config.ts";
 import { ValidationError } from "../errors.ts";
@@ -66,8 +67,12 @@ function updateLastActivity(projectRoot: string, agentName: string): void {
 			const session = store.getByName(agentName);
 			if (session) {
 				store.updateLastActivity(agentName);
-				if (session.state === "booting" || session.state === "zombie") {
-					store.updateState(agentName, "working");
+				// Tool-use observed: try booting → working. Matrix-guarded so a
+				// zombie classification (set by watchdog) is NOT silently revived
+				// here — that revival was a contributor to the schizophrenic
+				// state=zombie + tool-use-active symptom in overstory-a993.
+				if (session.state === "booting") {
+					store.tryTransitionState(agentName, "working");
 				}
 			}
 		} finally {
@@ -79,63 +84,144 @@ function updateLastActivity(projectRoot: string, agentName: string): void {
 }
 /**
- * Agent capabilities that run as persistent interactive sessions.
- * The Stop hook fires every turn for these agents (not just at session end),
- * so they must NOT auto-transition to 'completed' on session-end events.
+ * Maximum retry attempts for the session-end transition.
+ *
+ * The Stop hook is the only signal that turns sessions.db state from
+ * "working" to "completed" for headless legacy paths and tmux sessions.
+ * If it loses that signal due to a transient SQLite contention error
+ * (e.g. "database is locked" while the watchdog ticks against the same
+ * file), the row stays in "working" forever and the watchdog later
+ * promotes it to "zombie". Retrying with exponential backoff lets brief
+ * lock contention resolve before we give up. (overstory-e74b)
  */
-const PERSISTENT_CAPABILITIES = new Set(["coordinator", "orchestrator", "monitor"]);
+const TRANSITION_MAX_ATTEMPTS = 5;
+const TRANSITION_BACKOFF_BASE_MS = 50;
 /**
- * Transition agent state to 'completed' in the SessionStore.
- * Called when session-end event fires.
- *
- * Skips the transition for persistent agent types (coordinator, orchestrator, monitor)
- * whose Stop hook fires every turn, not just at true session end.
+ * One attempt at the session-end state transition.
  *
- * Non-fatal: silently ignores errors to avoid breaking hook execution.
+ * Throws on transient failures (e.g. SQLite "database is locked") so the
+ * caller can retry. The body is the original logic from
+ * `transitionToCompleted`.
  */
-function transitionToCompleted(projectRoot: string, agentName: string): void {
+function transitionToCompletedOnce(projectRoot: string, agentName: string): void {
+	const overstoryDir = join(projectRoot, ".overstory");
+	const { store } = openSessionStore(overstoryDir);
 	try {
-		const overstoryDir = join(projectRoot, ".overstory");
-		const { store } = openSessionStore(overstoryDir);
-		try {
-			const session = store.getByName(agentName);
-			if (session && PERSISTENT_CAPABILITIES.has(session.capability)) {
-				// Check if a persistent top-level agent self-exited by verifying the run
-				// is already completed.
-				// If `ov run complete` was called before session-end, the run status is 'completed'
-				// and we should transition the persistent session to completed too.
-				if (
-					(session.capability === "coordinator" || session.capability === "orchestrator") &&
-					session.runId
-				) {
-					const runStore = createRunStore(join(overstoryDir, "sessions.db"));
-					try {
-						const run = runStore.getRun(session.runId);
-						if (run && run.status === "completed") {
-							// Self-exit: the persistent agent called ov run complete before session ended
-							store.updateState(agentName, "completed");
-							store.updateLastActivity(agentName);
-							return;
-						}
-					} finally {
-						runStore.close();
+		const session = store.getByName(agentName);
+		if (session && isStopHookPersistentCapability(session.capability)) {
+			// Check if a persistent top-level agent self-exited by verifying the run
+			// is already completed.
+			// If `ov run complete` was called before session-end, the run status is 'completed'
+			// and we should transition the persistent session to completed too.
+			if (
+				(session.capability === "coordinator" || session.capability === "orchestrator") &&
+				session.runId
+			) {
+				const runStore = createRunStore(join(overstoryDir, "sessions.db"));
+				try {
+					const run = runStore.getRun(session.runId);
+					if (run && run.status === "completed") {
+						// Self-exit: the persistent agent called ov run complete before session ended
+						store.updateState(agentName, "completed");
+						store.updateLastActivity(agentName);
+						return;
 					}
+				} finally {
+					runStore.close();
 				}
-				// Normal persistent agent: only update activity, don't mark completed
-				store.updateLastActivity(agentName);
-				return;
 			}
-			store.updateState(agentName, "completed");
+			// Normal persistent agent: only update activity, don't mark completed
 			store.updateLastActivity(agentName);
+			return;
+		}
+		store.updateState(agentName, "completed");
+		store.updateLastActivity(agentName);
+	} finally {
+		store.close();
+	}
+}
+/**
+ * Best-effort: log a session-end hook failure to events.db so it surfaces in
+ * `ov errors` and trace timelines. Swallows secondary errors (events.db may
+ * also be locked when the primary write failed).
+ */
+async function logHookFailure(
+	projectRoot: string,
+	agentName: string,
+	hookName: string,
+	error: unknown,
+	attempts: number,
+): Promise<void> {
+	try {
+		const eventsDbPath = join(projectRoot, ".overstory", "events.db");
+		const eventStore = createEventStore(eventsDbPath);
+		try {
+			eventStore.insert({
+				runId: null,
+				agentName,
+				sessionId: null,
+				eventType: "error",
+				toolName: null,
+				toolArgs: null,
+				toolDurationMs: null,
+				level: "error",
+				data: JSON.stringify({
+					hook: hookName,
+					attempts,
+					message: error instanceof Error ? error.message : String(error),
+				}),
+			});
 		} finally {
-			store.close();
+			eventStore.close();
 		}
 	} catch {
-		// Non-fatal: don't break logging if session update fails
+		// Non-fatal: events.db may also be unavailable when the primary write failed.
 	}
 }
+/**
+ * Transition agent state to 'completed' in the SessionStore.
+ * Called when session-end event fires.
+ *
+ * Retries on transient SQLite contention with exponential backoff
+ * (50/100/200/400/800ms). On persistent failure, records an `error` event
+ * to events.db so the missed signal shows up in observability tooling and
+ * the watchdog's stale-but-tmux-dead fallback can recognize it.
+ * (overstory-e74b)
+ *
+ * Skips the transition for capabilities in `STOP_HOOK_PERSISTENT_CAPABILITIES`
+ * (coordinator, orchestrator, monitor, lead) whose Stop hook fires every model
+ * turn rather than once at true session end. See
+ * `src/agents/capabilities.ts` for the full rationale and consumer list.
+ *
+ * Non-fatal: silently ignores errors to avoid breaking hook execution.
+ */
+async function transitionToCompleted(projectRoot: string, agentName: string): Promise<void> {
+	let lastError: unknown;
+	for (let attempt = 0; attempt < TRANSITION_MAX_ATTEMPTS; attempt++) {
+		try {
+			transitionToCompletedOnce(projectRoot, agentName);
+			return;
+		} catch (err) {
+			lastError = err;
+			if (attempt < TRANSITION_MAX_ATTEMPTS - 1) {
+				await Bun.sleep(TRANSITION_BACKOFF_BASE_MS * 2 ** attempt);
+			}
+		}
+	}
+	// All retries failed — surface the missed signal via events.db.
+	await logHookFailure(
+		projectRoot,
+		agentName,
+		"session-end:transitionToCompleted",
+		lastError,
+		TRANSITION_MAX_ATTEMPTS,
+	);
+}
 /**
  * Look up an agent's session record.
  * Returns null if not found.
@@ -629,8 +715,9 @@ async function runLog(opts: {
 		}
 		case "session-end":
 			logger.info("session.end", { agentName: opts.agent });
-			// Transition agent state to completed
-			transitionToCompleted(config.project.root, opts.agent);
+			// Transition agent state to completed (with retry/backoff and
+			// events.db fallback on persistent failure — overstory-e74b).
+			await transitionToCompleted(config.project.root, opts.agent);
 			// Look up agent session for identity update and metrics recording
 			{
 				const agentSession = getAgentSession(config.project.root, opts.agent);
@@ -647,28 +734,6 @@ async function runLog(opts: {
 					// Non-fatal: identity may not exist for this agent
 				}
-				// Auto-nudge coordinator when a lead completes so it wakes up
-				// to process merge_ready / worker_done messages without waiting
-				// for user input (see decision mx-728f8d).
-				if (agentSession?.capability === "lead") {
-					try {
-						const nudgesDir = join(config.project.root, ".overstory", "pending-nudges");
-						const { mkdir } = await import("node:fs/promises");
-						await mkdir(nudgesDir, { recursive: true });
-						const markerPath = join(nudgesDir, "coordinator.json");
-						const marker = {
-							from: opts.agent,
-							reason: "lead_completed",
-							subject: `Lead ${opts.agent} completed — check mail for merge_ready/worker_done`,
-							messageId: `auto-nudge-${opts.agent}-${Date.now()}`,
-							createdAt: new Date().toISOString(),
-						};
-						await Bun.write(markerPath, `${JSON.stringify(marker, null, "\t")}\n`);
-					} catch {
-						// Non-fatal: nudge failure should not break session-end
-					}
-				}
 				// Record session metrics (with optional token data from transcript)
 				if (agentSession) {
 					// NOTE: We intentionally do NOT auto-complete the run here for coordinator agents.
@@ -730,7 +795,7 @@ async function runLog(opts: {
 					// Auto-record expertise via mulch learn + record (post-session).
 					// Skip persistent agents whose Stop hook fires every turn.
-					if (!PERSISTENT_CAPABILITIES.has(agentSession.capability)) {
+					if (!isStopHookPersistentCapability(agentSession.capability)) {
 						try {
 							const mulchClient = createMulchClient(config.project.root);
 							const mailDbPath = join(config.project.root, ".overstory", "mail.db");
@@ -751,7 +816,7 @@ async function runLog(opts: {
 					// Append outcomes to applied mulch records (outcome feedback loop).
 					// Reads applied-records.json written by sling.ts at spawn time.
-					if (!PERSISTENT_CAPABILITIES.has(agentSession.capability)) {
+					if (!isStopHookPersistentCapability(agentSession.capability)) {
 						try {
 							const mulchClient = createMulchClient(config.project.root);
 							await appendOutcomeToAppliedRecords({