npm - @os-eco/overstory-cli - Versions diffs - 0.7.0 → 0.7.3 - Mend

@os-eco/overstory-cli 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md +7 -6
package/agents/builder.md +1 -1
package/agents/coordinator.md +12 -11
package/agents/lead.md +6 -6
package/agents/monitor.md +4 -4
package/agents/reviewer.md +1 -1
package/agents/scout.md +5 -5
package/agents/supervisor.md +36 -32
package/package.json +1 -1
package/src/agents/guard-rules.ts +97 -0
package/src/agents/hooks-deployer.test.ts +6 -5
package/src/agents/hooks-deployer.ts +7 -90
package/src/agents/identity.test.ts +3 -2
package/src/agents/manifest.test.ts +4 -3
package/src/agents/overlay.test.ts +10 -9
package/src/agents/overlay.ts +5 -5
package/src/commands/agents.test.ts +10 -4
package/src/commands/clean.test.ts +3 -0
package/src/commands/completions.test.ts +8 -5
package/src/commands/completions.ts +38 -2
package/src/commands/coordinator.test.ts +1 -0
package/src/commands/coordinator.ts +15 -11
package/src/commands/costs.test.ts +9 -3
package/src/commands/dashboard.test.ts +265 -6
package/src/commands/dashboard.ts +367 -64
package/src/commands/doctor.test.ts +3 -2
package/src/commands/errors.test.ts +3 -2
package/src/commands/feed.test.ts +3 -2
package/src/commands/feed.ts +2 -29
package/src/commands/init.test.ts +1 -2
package/src/commands/init.ts +1 -8
package/src/commands/inspect.test.ts +17 -2
package/src/commands/log.test.ts +262 -8
package/src/commands/log.ts +232 -110
package/src/commands/logs.test.ts +3 -2
package/src/commands/mail.test.ts +8 -2
package/src/commands/metrics.test.ts +4 -3
package/src/commands/monitor.ts +15 -11
package/src/commands/nudge.test.ts +4 -2
package/src/commands/prime.test.ts +4 -2
package/src/commands/prime.ts +6 -2
package/src/commands/replay.test.ts +3 -2
package/src/commands/run.test.ts +3 -1
package/src/commands/sling.test.ts +142 -1
package/src/commands/sling.ts +145 -24
package/src/commands/status.test.ts +9 -8
package/src/commands/stop.test.ts +1 -0
package/src/commands/supervisor.ts +19 -12
package/src/commands/trace.test.ts +4 -2
package/src/commands/watch.test.ts +3 -2
package/src/commands/worktree.test.ts +9 -0
package/src/config.test.ts +3 -3
package/src/config.ts +29 -0
package/src/doctor/agents.test.ts +3 -2
package/src/doctor/consistency.test.ts +14 -0
package/src/doctor/logs.test.ts +3 -2
package/src/doctor/structure.test.ts +3 -2
package/src/e2e/init-sling-lifecycle.test.ts +3 -5
package/src/index.ts +3 -1
package/src/logging/color.ts +1 -1
package/src/logging/format.test.ts +110 -0
package/src/logging/format.ts +42 -1
package/src/logging/logger.test.ts +3 -2
package/src/mail/broadcast.test.ts +1 -0
package/src/mail/client.test.ts +3 -2
package/src/mail/store.test.ts +3 -2
package/src/merge/queue.test.ts +3 -2
package/src/merge/resolver.test.ts +39 -0
package/src/merge/resolver.ts +24 -5
package/src/mulch/client.test.ts +63 -2
package/src/mulch/client.ts +62 -1
package/src/runtimes/claude.test.ts +5 -4
package/src/runtimes/pi-guards.test.ts +457 -0
package/src/runtimes/pi-guards.ts +349 -0
package/src/runtimes/pi.test.ts +620 -0
package/src/runtimes/pi.ts +244 -0
package/src/runtimes/registry.test.ts +33 -0
package/src/runtimes/registry.ts +15 -2
package/src/runtimes/types.ts +63 -0
package/src/schema-consistency.test.ts +5 -2
package/src/sessions/compat.test.ts +3 -2
package/src/sessions/compat.ts +1 -0
package/src/sessions/store.test.ts +34 -2
package/src/sessions/store.ts +37 -4
package/src/test-helpers.ts +20 -1
package/src/types.ts +17 -0
package/src/watchdog/daemon.test.ts +11 -7
package/src/watchdog/daemon.ts +1 -1
package/src/watchdog/health.test.ts +1 -0
package/src/watchdog/triage.test.ts +3 -2
package/src/watchdog/triage.ts +14 -4

package/src/commands/replay.test.ts CHANGED Viewed

@@ -9,11 +9,12 @@
  */
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdtemp, rm } from "node:fs/promises";
+import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { ValidationError } from "../errors.ts";
 import { createEventStore } from "../events/store.ts";
+import { cleanupTempDir } from "../test-helpers.ts";
 import type { InsertEvent } from "../types.ts";
 import { replayCommand } from "./replay.ts";
@@ -64,7 +65,7 @@ describe("replayCommand", () => {
 	afterEach(async () => {
 		process.stdout.write = originalWrite;
 		process.chdir(originalCwd);
-		await rm(tempDir, { recursive: true, force: true });
+		await cleanupTempDir(tempDir);
 	});
 	function output(): string {

package/src/commands/run.test.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import type { SessionStore } from "../sessions/store.ts";
 import { createRunStore, createSessionStore } from "../sessions/store.ts";
+import { cleanupTempDir } from "../test-helpers.ts";
 import type { AgentSession, InsertRun, RunStore } from "../types.ts";
 let tempDir: string;
@@ -31,7 +32,7 @@ beforeEach(async () => {
 afterEach(async () => {
 	runStore.close();
 	sessionStore.close();
-	await rm(tempDir, { recursive: true, force: true });
+	await cleanupTempDir(tempDir);
 });
 /** Write a run ID to current-run.txt. */
@@ -79,6 +80,7 @@ function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
 		lastActivity: "2026-02-13T10:30:00.000Z",
 		escalationLevel: 0,
 		stalledSince: null,
+		transcriptPath: null,
 		...overrides,
 	};
 }

package/src/commands/sling.test.ts CHANGED Viewed

@@ -14,9 +14,11 @@ import {
 	checkParentAgentLimit,
 	checkRunSessionLimit,
 	checkTaskLock,
+	extractMulchRecordIds,
 	inferDomainsFromFiles,
 	isRunningAsRoot,
 	parentHasScouts,
+	shouldShowScoutWarning,
 	validateHierarchy,
 } from "./sling.ts";
@@ -275,6 +277,65 @@ describe("parentHasScouts", () => {
 	});
 });
+/**
+ * Tests for shouldShowScoutWarning (overstory-6eyw).
+ *
+ * shouldShowScoutWarning determines whether the "spawning builder without scouts"
+ * warning should be emitted. It is a pure function extracted from slingCommand
+ * so it can be suppressed via --no-scout-check or --skip-scout.
+ */
+describe("shouldShowScoutWarning", () => {
+	function makeSession(
+		parentAgent: string | null,
+		capability: string,
+	): { parentAgent: string | null; capability: string } {
+		return { parentAgent, capability };
+	}
+	const withScout = [makeSession("lead-alpha", "scout"), makeSession("lead-alpha", "builder")];
+	const withoutScout = [makeSession("lead-alpha", "builder")];
+	const empty: { parentAgent: string | null; capability: string }[] = [];
+	test("returns true when builder has parent but no scouts", () => {
+		expect(shouldShowScoutWarning("builder", "lead-alpha", withoutScout, false, false)).toBe(true);
+	});
+	test("returns false when builder has parent and scouts exist", () => {
+		expect(shouldShowScoutWarning("builder", "lead-alpha", withScout, false, false)).toBe(false);
+	});
+	test("returns false when capability is not builder", () => {
+		expect(shouldShowScoutWarning("scout", "lead-alpha", empty, false, false)).toBe(false);
+		expect(shouldShowScoutWarning("reviewer", "lead-alpha", empty, false, false)).toBe(false);
+		expect(shouldShowScoutWarning("lead", "lead-alpha", empty, false, false)).toBe(false);
+	});
+	test("returns false when parentAgent is null (coordinator spawn)", () => {
+		expect(shouldShowScoutWarning("builder", null, withoutScout, false, false)).toBe(false);
+	});
+	test("returns false when noScoutCheck is true (flag suppresses warning)", () => {
+		expect(shouldShowScoutWarning("builder", "lead-alpha", withoutScout, true, false)).toBe(false);
+	});
+	test("returns false when skipScout is true (lead opted out of scouting)", () => {
+		expect(shouldShowScoutWarning("builder", "lead-alpha", withoutScout, false, true)).toBe(false);
+	});
+	test("returns false when both noScoutCheck and skipScout are true", () => {
+		expect(shouldShowScoutWarning("builder", "lead-alpha", withoutScout, true, true)).toBe(false);
+	});
+	test("returns false with empty sessions and no parent", () => {
+		expect(shouldShowScoutWarning("builder", null, empty, false, false)).toBe(false);
+	});
+	test("returns true with empty sessions and a parent (no scouts ever spawned)", () => {
+		expect(shouldShowScoutWarning("builder", "lead-alpha", empty, false, false)).toBe(true);
+	});
+});
 /**
  * Tests for hierarchy validation in sling.
  *
@@ -369,6 +430,7 @@ function makeBeaconOpts(overrides?: Partial<BeaconOptions>): BeaconOptions {
 		taskId: "overstory-abc",
 		parentAgent: null,
 		depth: 0,
+		instructionPath: ".claude/CLAUDE.md",
 		...overrides,
 	};
 }
@@ -409,12 +471,20 @@ describe("buildBeacon", () => {
 		const opts = makeBeaconOpts({ agentName: "scout-1", taskId: "overstory-xyz" });
 		const beacon = buildBeacon(opts);
-		expect(beacon).toContain("read .claude/CLAUDE.md");
+		expect(beacon).toContain(`read ${opts.instructionPath}`);
 		expect(beacon).toContain("mulch prime");
 		expect(beacon).toContain("ov mail check --agent scout-1");
 		expect(beacon).toContain("begin task overstory-xyz");
 	});
+	test("uses custom instructionPath in startup instructions", () => {
+		const opts = makeBeaconOpts({ instructionPath: "AGENTS.md" });
+		const beacon = buildBeacon(opts);
+		expect(beacon).toContain("read AGENTS.md");
+		expect(beacon).not.toContain(".claude/CLAUDE.md");
+	});
 	test("uses agent name in mail check command", () => {
 		const beacon = buildBeacon(makeBeaconOpts({ agentName: "reviewer-beta" }));
@@ -1001,6 +1071,7 @@ function makeAutoDispatchOpts(overrides?: Partial<AutoDispatchOptions>): AutoDis
 		capability: "builder",
 		specPath: "/path/to/spec.md",
 		parentAgent: "lead-alpha",
+		instructionPath: ".claude/CLAUDE.md",
 		...overrides,
 	};
 }
@@ -1013,6 +1084,7 @@ describe("buildAutoDispatch", () => {
 			capability: "builder",
 			specPath: "/path/to/spec.md",
 			parentAgent: "lead-alpha",
+			instructionPath: ".claude/CLAUDE.md",
 		});
 		expect(dispatch.from).toBe("lead-alpha");
 		expect(dispatch.to).toBe("builder-1");
@@ -1027,6 +1099,7 @@ describe("buildAutoDispatch", () => {
 			capability: "lead",
 			specPath: null,
 			parentAgent: null,
+			instructionPath: ".claude/CLAUDE.md",
 		});
 		expect(dispatch.from).toBe("orchestrator");
 		expect(dispatch.body).toContain("No spec file");
@@ -1039,6 +1112,7 @@ describe("buildAutoDispatch", () => {
 			capability: "scout",
 			specPath: null,
 			parentAgent: "lead-alpha",
+			instructionPath: ".claude/CLAUDE.md",
 		});
 		expect(dispatch.body).toContain("scout");
 	});
@@ -1050,6 +1124,7 @@ describe("buildAutoDispatch", () => {
 			capability: "builder",
 			specPath: "/abs/path/to/spec.md",
 			parentAgent: "lead-alpha",
+			instructionPath: ".claude/CLAUDE.md",
 		});
 		expect(dispatch.body).toContain("/abs/path/to/spec.md");
 	});
@@ -1133,3 +1208,69 @@ describe("sling runtime integration", () => {
 		expect(state.phase).toBe("loading");
 	});
 });
+describe("extractMulchRecordIds", () => {
+	test("returns empty array for empty string", () => {
+		expect(extractMulchRecordIds("")).toEqual([]);
+	});
+	test("returns empty when no mx-IDs present", () => {
+		const text = "## agents (2 records)\n- convention without ID";
+		expect(extractMulchRecordIds(text)).toEqual([]);
+	});
+	test("extracts single ID from a domain", () => {
+		const text = "## agents (1 records)\n- [convention] Some. (mx-abc123)";
+		expect(extractMulchRecordIds(text)).toEqual([{ id: "mx-abc123", domain: "agents" }]);
+	});
+	test("extracts multiple IDs from same domain", () => {
+		const text = ["## typescript", "- first. (mx-aaa111)", "- second. (mx-bbb222)"].join("\n");
+		expect(extractMulchRecordIds(text)).toEqual([
+			{ id: "mx-aaa111", domain: "typescript" },
+			{ id: "mx-bbb222", domain: "typescript" },
+		]);
+	});
+	test("extracts IDs from multiple domains", () => {
+		const text = ["## agents", "- agent. (mx-111aaa)", "## typescript", "- ts. (mx-222bbb)"].join(
+			"\n",
+		);
+		expect(extractMulchRecordIds(text)).toEqual([
+			{ id: "mx-111aaa", domain: "agents" },
+			{ id: "mx-222bbb", domain: "typescript" },
+		]);
+	});
+	test("ignores non-domain headings with no mx-IDs", () => {
+		const text = [
+			"## Quick Reference",
+			"- use mulch search",
+			"## agents",
+			"- real. (mx-deadbeef)",
+		].join("\n");
+		expect(extractMulchRecordIds(text)).toEqual([{ id: "mx-deadbeef", domain: "agents" }]);
+	});
+	test("deduplicates repeated pairs", () => {
+		const text = ["## agents", "- first. (mx-aabbcc)", "- dup. (mx-aabbcc)"].join("\n");
+		expect(extractMulchRecordIds(text)).toEqual([{ id: "mx-aabbcc", domain: "agents" }]);
+	});
+	test("handles realistic ml prime output", () => {
+		const text = [
+			"## agents (3 records, updated just now)",
+			"- [convention] lead.md convention. (mx-636708)",
+			"- [convention] writeOverlay(). (mx-b7fa3d)",
+			"## typescript (2 records, updated just now)",
+			"- [convention] No any types. (mx-2ce43d)",
+			"## Quick Reference",
+			"- mulch search",
+		].join("\n");
+		const result = extractMulchRecordIds(text);
+		expect(result).toHaveLength(3);
+		expect(result).toContainEqual({ id: "mx-636708", domain: "agents" });
+		expect(result).toContainEqual({ id: "mx-b7fa3d", domain: "agents" });
+		expect(result).toContainEqual({ id: "mx-2ce43d", domain: "typescript" });
+	});
+});

package/src/commands/sling.ts CHANGED Viewed

@@ -20,7 +20,6 @@
 import { mkdir } from "node:fs/promises";
 import { join, resolve } from "node:path";
-import { deployHooks } from "../agents/hooks-deployer.ts";
 import { createIdentity, loadIdentity } from "../agents/identity.ts";
 import { createManifestLoader, resolveModel } from "../agents/manifest.ts";
 import { writeOverlay } from "../agents/overlay.ts";
@@ -124,6 +123,7 @@ export interface SlingOptions {
 	skipReview?: boolean;
 	dispatchMaxAgents?: string;
 	runtime?: string;
+	noScoutCheck?: boolean;
 }
 export interface AutoDispatchOptions {
@@ -132,6 +132,7 @@ export interface AutoDispatchOptions {
 	capability: string;
 	specPath: string | null;
 	parentAgent: string | null;
+	instructionPath: string;
 }
 /**
@@ -154,7 +155,7 @@ export function buildAutoDispatch(opts: AutoDispatchOptions): {
 	const body = [
 		`You have been assigned task ${opts.taskId} as a ${opts.capability} agent.`,
 		specLine,
-		`Read your overlay at .claude/CLAUDE.md and begin immediately.`,
+		`Read your overlay at ${opts.instructionPath} and begin immediately.`,
 	].join(" ");
 	return {
@@ -174,6 +175,7 @@ export interface BeaconOptions {
 	taskId: string;
 	parentAgent: string | null;
 	depth: number;
+	instructionPath: string;
 }
 /**
@@ -198,7 +200,7 @@ export function buildBeacon(opts: BeaconOptions): string {
 	const parts = [
 		`[OVERSTORY] ${opts.agentName} (${opts.capability}) ${timestamp} task:${opts.taskId}`,
 		`Depth: ${opts.depth} | Parent: ${parent}`,
-		`Startup: read .claude/CLAUDE.md, run mulch prime, check mail (ov mail check --agent ${opts.agentName}), then begin task ${opts.taskId}`,
+		`Startup: read ${opts.instructionPath}, run mulch prime, check mail (ov mail check --agent ${opts.agentName}), then begin task ${opts.taskId}`,
 	];
 	return parts.join(" — ");
 }
@@ -214,6 +216,38 @@ export function parentHasScouts(
 	return sessions.some((s) => s.parentAgent === parentAgent && s.capability === "scout");
 }
+/**
+ * Determine whether to emit the scout-before-build warning.
+ *
+ * Returns true when all of the following hold:
+ *  - The incoming capability is "builder" (only builders trigger the check)
+ *  - A parent agent is set (orphaned builders don't trigger it)
+ *  - The parent has not yet spawned any scouts
+ *  - noScoutCheck is false (caller has not suppressed the warning)
+ *  - skipScout is false (the lead is not intentionally running without scouts)
+ *
+ * Extracted from slingCommand for testability (overstory-6eyw).
+ *
+ * @param capability - The requested agent capability
+ * @param parentAgent - The --parent flag value (null = coordinator/human)
+ * @param sessions - All sessions (not just active) for parentHasScouts query
+ * @param noScoutCheck - True when --no-scout-check flag is set
+ * @param skipScout - True when --skip-scout flag is set (lead opted out of scouting)
+ */
+export function shouldShowScoutWarning(
+	capability: string,
+	parentAgent: string | null,
+	sessions: ReadonlyArray<{ parentAgent: string | null; capability: string }>,
+	noScoutCheck: boolean,
+	skipScout: boolean,
+): boolean {
+	if (capability !== "builder") return false;
+	if (parentAgent === null) return false;
+	if (noScoutCheck) return false;
+	if (skipScout) return false;
+	return !parentHasScouts(sessions, parentAgent);
+}
 /**
  * Check if any active agent is already working on the given task ID.
  * Returns the agent name if locked, or null if the task is free.
@@ -289,7 +323,7 @@ export function checkParentAgentLimit(
  *
  * When parentAgent is null, the caller is the coordinator or a human.
  * Only "lead" capability is allowed in that case. All other capabilities
- * (builder, scout, reviewer, merger) must be spawned by a lead or supervisor
+ * (builder, scout, reviewer, merger) must be spawned by a lead
  * that passes --parent.
  *
  * @param parentAgent - The --parent flag value (null = coordinator/human)
@@ -318,6 +352,43 @@ export function validateHierarchy(
 	}
 }
+/**
+ * Extract mulch record IDs and their domains from mulch prime output text.
+ * Parses the markdown structure produced by ml prime: domain headings
+ * (## <name>) followed by record lines containing (mx-XXXXXX) identifiers.
+ * @param primeText - The output text from ml prime
+ * @returns Array of {id, domain} pairs. Deduplicated.
+ */
+export function extractMulchRecordIds(primeText: string): Array<{ id: string; domain: string }> {
+	const results: Array<{ id: string; domain: string }> = [];
+	const seen = new Set<string>();
+	let currentDomain = "";
+	for (const line of primeText.split("\n")) {
+		const domainMatch = line.match(/^## ([\w-]+)/);
+		if (domainMatch) {
+			currentDomain = domainMatch[1] ?? "";
+			continue;
+		}
+		if (currentDomain) {
+			const idRegex = /\(mx-([a-f0-9]+)\)/g;
+			let match = idRegex.exec(line);
+			while (match !== null) {
+				const shortId = match[1] ?? "";
+				if (shortId) {
+					const key = `${currentDomain}:mx-${shortId}`;
+					if (!seen.has(key)) {
+						seen.add(key);
+						results.push({ id: `mx-${shortId}`, domain: currentDomain });
+					}
+				}
+				match = idRegex.exec(line);
+			}
+		}
+	}
+	return results;
+}
 /**
  * Entry point for `ov sling <task-id> [flags]`.
  *
@@ -543,7 +614,16 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 		// 5c. Structural enforcement: warn when a lead spawns a builder without prior scouts.
 		// This is a non-blocking warning — it does not prevent the spawn, but surfaces
 		// the scout-skip pattern so agents and operators can see it happening.
-		if (capability === "builder" && parentAgent && !parentHasScouts(store.getAll(), parentAgent)) {
+		// Use --no-scout-check to suppress this warning when intentionally skipping scouts.
+		if (
+			shouldShowScoutWarning(
+				capability,
+				parentAgent,
+				store.getAll(),
+				opts.noScoutCheck ?? false,
+				skipScout,
+			)
+		) {
 			process.stderr.write(
 				`Warning: "${parentAgent}" is spawning builder "${name}" without having spawned any scouts.\n`,
 			);
@@ -595,7 +675,10 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 		if (config.mulch.enabled && fileScope.length > 0) {
 			try {
 				const mulch = createMulchClient(config.project.root);
-				mulchExpertise = await mulch.prime(undefined, undefined, { files: fileScope });
+				mulchExpertise = await mulch.prime(undefined, undefined, {
+					files: fileScope,
+					sortByScore: true,
+				});
 			} catch {
 				// Non-fatal: mulch expertise is supplementary context
 				mulchExpertise = undefined;
@@ -629,8 +712,11 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			trackerName: resolvedBackend,
 		};
+		// Resolve runtime before writeOverlay so we can pass runtime.instructionPath
+		const runtime = getRuntime(opts.runtime, config);
 		try {
-			await writeOverlay(worktreePath, overlayConfig, config.project.root);
+			await writeOverlay(worktreePath, overlayConfig, config.project.root, runtime.instructionPath);
 		} catch (err) {
 			// Clean up the orphaned worktree created in step 7 (overstory-p4st)
 			try {
@@ -646,8 +732,16 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			throw err;
 		}
-		// 9. Deploy hooks config (capability-specific guards)
-		await deployHooks(worktreePath, name, capability, config.project.qualityGates);
+		// 9. Resolve runtime + model (needed for deployConfig, spawn, and beacon)
+		const resolvedModel = resolveModel(config, manifest, capability, agentDef.model);
+		// 9a. Deploy hooks config (capability-specific guards)
+		await runtime.deployConfig(worktreePath, undefined, {
+			agentName: name,
+			capability,
+			worktreePath,
+			qualityGates: config.project.qualityGates,
+		});
 		// 9b. Send auto-dispatch mail so it exists when SessionStart hook fires.
 		// This eliminates the race where coordinator sends dispatch AFTER agent boots.
@@ -657,6 +751,7 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			capability,
 			specPath: absoluteSpecPath,
 			parentAgent,
+			instructionPath: runtime.instructionPath,
 		});
 		const mailStore = createMailStore(join(overstoryDir, "mail.db"));
 		try {
@@ -696,13 +791,27 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			});
 		}
-		// 11b. Preflight: verify tmux is available before attempting session creation
+		// 11b. Save applied mulch record IDs for session-end outcome tracking.
+		// Written to .overstory/agents/{name}/applied-records.json so log.ts
+		// can append outcomes when the session completes.
+		if (mulchExpertise) {
+			const appliedRecords = extractMulchRecordIds(mulchExpertise);
+			if (appliedRecords.length > 0) {
+				const appliedRecordsPath = join(identityBaseDir, name, "applied-records.json");
+				const appliedData = { taskId, agentName: name, capability, records: appliedRecords };
+				try {
+					await Bun.write(appliedRecordsPath, `${JSON.stringify(appliedData, null, "\t")}\n`);
+				} catch {
+					// Non-fatal: outcome tracking is supplementary context
+				}
+			}
+		}
+		// 11c. Preflight: verify tmux is available before attempting session creation
 		await ensureTmuxAvailable();
 		// 12. Create tmux session running claude in interactive mode
 		const tmuxSessionName = `overstory-${config.project.name}-${name}`;
-		const resolvedModel = resolveModel(config, manifest, capability, agentDef.model);
-		const runtime = getRuntime(opts.runtime, config);
 		const spawnCmd = runtime.buildSpawnCommand({
 			model: resolvedModel.model,
 			permissionMode: "bypass",
@@ -740,6 +849,7 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			lastActivity: new Date().toISOString(),
 			escalationLevel: 0,
 			stalledSince: null,
+			transcriptPath: null,
 		};
 		store.upsert(session);
@@ -765,6 +875,7 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			taskId,
 			parentAgent,
 			depth,
+			instructionPath: runtime.instructionPath,
 		});
 		await sendKeys(tmuxSessionName, beacon);
@@ -780,20 +891,30 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 		// screen (detectReady returns "ready"), resend the beacon. Claude Code's TUI
 		// sometimes consumes the Enter keystroke during late initialization, swallowing
 		// the beacon text entirely (overstory-3271).
-		const verifyAttempts = 5;
-		for (let v = 0; v < verifyAttempts; v++) {
-			await Bun.sleep(2_000);
-			const paneContent = await capturePaneContent(tmuxSessionName);
-			if (paneContent) {
-				const readyState = runtime.detectReady(paneContent);
-				if (readyState.phase !== "ready") {
-					break; // Agent is processing — beacon was received
+		//
+		// Skipped for runtimes that return false from requiresBeaconVerification().
+		// Pi's TUI idle and processing states are indistinguishable via detectReady
+		// (both show "pi v..." header and the token-usage status bar), so the loop
+		// would incorrectly conclude the beacon was not received and spam duplicate
+		// startup messages.
+		const needsVerification =
+			!runtime.requiresBeaconVerification || runtime.requiresBeaconVerification();
+		if (needsVerification) {
+			const verifyAttempts = 5;
+			for (let v = 0; v < verifyAttempts; v++) {
+				await Bun.sleep(2_000);
+				const paneContent = await capturePaneContent(tmuxSessionName);
+				if (paneContent) {
+					const readyState = runtime.detectReady(paneContent);
+					if (readyState.phase !== "ready") {
+						break; // Agent is processing — beacon was received
+					}
 				}
+				// Still at welcome/idle screen — resend beacon
+				await sendKeys(tmuxSessionName, beacon);
+				await Bun.sleep(1_000);
+				await sendKeys(tmuxSessionName, ""); // Follow-up Enter
 			}
-			// Still at welcome/idle screen — resend beacon
-			await sendKeys(tmuxSessionName, beacon);
-			await Bun.sleep(1_000);
-			await sendKeys(tmuxSessionName, ""); // Follow-up Enter
 		}
 		// 14. Output result

package/src/commands/status.test.ts CHANGED Viewed

@@ -1,10 +1,10 @@
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { mkdir, mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { stripAnsi } from "../logging/color.ts";
 import { createSessionStore } from "../sessions/store.ts";
-import { createTempGitRepo } from "../test-helpers.ts";
+import { cleanupTempDir, createTempGitRepo } from "../test-helpers.ts";
 import type { AgentSession } from "../types.ts";
 import {
 	gatherStatus,
@@ -40,6 +40,7 @@ function makeAgent(overrides: Partial<AgentSession> = {}): AgentSession {
 		lastActivity: new Date().toISOString(),
 		escalationLevel: 0,
 		stalledSince: null,
+		transcriptPath: null,
 		...overrides,
 	};
 }
@@ -343,7 +344,7 @@ describe("run scoping", () => {
 			// out-of-scope builder must NOT appear
 			expect(names).not.toContain("builder-2");
 		} finally {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		}
 	});
 });
@@ -390,7 +391,7 @@ describe("--watch deprecation", () => {
 		} finally {
 			process.stderr.write = originalStderr;
 			process.chdir(originalCwd);
-			await rm(tmpDir, { recursive: true, force: true });
+			await cleanupTempDir(tmpDir);
 		}
 		const err = stderrChunks.join("");
@@ -431,7 +432,7 @@ describe("gatherStatus reconciliation", () => {
 			expect(agent).toBeDefined();
 			expect(agent?.state).toBe("zombie");
 		} finally {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		}
 	});
@@ -460,7 +461,7 @@ describe("gatherStatus reconciliation", () => {
 			expect(agent).toBeDefined();
 			expect(agent?.state).toBe("completed");
 		} finally {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		}
 	});
@@ -490,7 +491,7 @@ describe("gatherStatus reconciliation", () => {
 			expect(agent).toBeDefined();
 			expect(agent?.state).toBe("zombie");
 		} finally {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		}
 	});
 });
@@ -521,7 +522,7 @@ describe("subprocess caching (invalidateStatusCache)", () => {
 			expect(Array.isArray(result1.worktrees)).toBe(true);
 			expect(Array.isArray(result2.worktrees)).toBe(true);
 		} finally {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		}
 	});
 });

package/src/commands/stop.test.ts CHANGED Viewed

@@ -148,6 +148,7 @@ function makeAgentSession(overrides: Partial<AgentSession> = {}): AgentSession {
 		lastActivity: new Date().toISOString(),
 		escalationLevel: 0,
 		stalledSince: null,
+		transcriptPath: null,
 		...overrides,
 	};
 }