npm - @os-eco/overstory-cli - Versions diffs - 0.7.2 → 0.7.4 - Mend

@os-eco/overstory-cli 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/README.md +21 -9
package/agents/builder.md +6 -0
package/agents/coordinator.md +2 -2
package/agents/lead.md +4 -1
package/agents/merger.md +3 -2
package/agents/monitor.md +1 -1
package/agents/reviewer.md +1 -0
package/agents/scout.md +1 -0
package/package.json +2 -2
package/src/agents/hooks-deployer.test.ts +6 -5
package/src/agents/identity.test.ts +3 -2
package/src/agents/manifest.test.ts +4 -3
package/src/agents/overlay.test.ts +3 -2
package/src/commands/agents.test.ts +5 -4
package/src/commands/agents.ts +18 -8
package/src/commands/completions.test.ts +8 -5
package/src/commands/completions.ts +37 -1
package/src/commands/costs.test.ts +4 -3
package/src/commands/dashboard.test.ts +265 -6
package/src/commands/dashboard.ts +367 -64
package/src/commands/doctor.test.ts +3 -2
package/src/commands/errors.test.ts +3 -2
package/src/commands/feed.test.ts +3 -2
package/src/commands/feed.ts +2 -29
package/src/commands/inspect.test.ts +3 -2
package/src/commands/log.test.ts +248 -8
package/src/commands/log.ts +193 -110
package/src/commands/logs.test.ts +3 -2
package/src/commands/mail.test.ts +3 -2
package/src/commands/metrics.test.ts +4 -3
package/src/commands/nudge.test.ts +3 -2
package/src/commands/prime.test.ts +3 -2
package/src/commands/prime.ts +1 -16
package/src/commands/replay.test.ts +3 -2
package/src/commands/run.test.ts +2 -1
package/src/commands/sling.test.ts +127 -0
package/src/commands/sling.ts +101 -3
package/src/commands/status.test.ts +8 -8
package/src/commands/trace.test.ts +3 -2
package/src/commands/watch.test.ts +3 -2
package/src/config.test.ts +3 -3
package/src/doctor/agents.test.ts +3 -2
package/src/doctor/logs.test.ts +3 -2
package/src/doctor/structure.test.ts +3 -2
package/src/index.ts +3 -1
package/src/logging/color.ts +1 -1
package/src/logging/format.test.ts +110 -0
package/src/logging/format.ts +42 -1
package/src/logging/logger.test.ts +3 -2
package/src/mail/client.test.ts +3 -2
package/src/mail/store.test.ts +3 -2
package/src/merge/queue.test.ts +3 -2
package/src/merge/resolver.test.ts +39 -0
package/src/merge/resolver.ts +1 -1
package/src/metrics/pricing.ts +80 -0
package/src/metrics/transcript.test.ts +58 -1
package/src/metrics/transcript.ts +9 -68
package/src/mulch/client.test.ts +63 -2
package/src/mulch/client.ts +62 -1
package/src/runtimes/claude.test.ts +4 -3
package/src/runtimes/pi-guards.test.ts +55 -2
package/src/runtimes/pi-guards.ts +26 -9
package/src/schema-consistency.test.ts +4 -2
package/src/sessions/compat.test.ts +3 -2
package/src/sessions/store.test.ts +3 -2
package/src/test-helpers.ts +20 -1
package/src/tracker/beads.test.ts +454 -0
package/src/tracker/seeds.test.ts +461 -0
package/src/watchdog/daemon.test.ts +4 -3
package/src/watchdog/triage.test.ts +3 -2

package/src/merge/resolver.test.ts CHANGED Viewed

@@ -203,6 +203,9 @@ function createMockMulchClient(
 				action: "analyze",
 			};
 		},
+		async appendOutcome() {
+			// No-op stub: resolver tests don't exercise outcome appending
+		},
 	};
 }
@@ -1440,6 +1443,42 @@ describe("createMergeResolver", () => {
 		});
 	});
+	describe("queryConflictHistory uses sortByScore", () => {
+		test("passes sortByScore: true to mulch search when querying conflict history", async () => {
+			const repoDir = await createTempGitRepo();
+			try {
+				const defaultBranch = await getDefaultBranch(repoDir);
+				await setupContentConflict(repoDir, defaultBranch);
+				const entry = makeTestEntry({
+					branchName: "feature-branch",
+					filesModified: ["src/test.ts"],
+				});
+				// Capture search call options
+				let capturedSearchOptions: unknown;
+				const mockMulchClient = createMockMulchClient();
+				mockMulchClient.search = async (_query, options) => {
+					capturedSearchOptions = options;
+					return "";
+				};
+				const resolver = createMergeResolver({
+					aiResolveEnabled: false,
+					reimagineEnabled: false,
+					mulchClient: mockMulchClient,
+				});
+				await resolver.resolve(entry, defaultBranch, repoDir);
+				// Verify sortByScore was passed to search
+				expect(capturedSearchOptions).toMatchObject({ sortByScore: true });
+			} finally {
+				await cleanupTempDir(repoDir);
+			}
+		});
+	});
 	describe("AI-resolve with history context", () => {
 		test("includes historical context in AI prompt when available", async () => {
 			const repoDir = await createTempGitRepo();

package/src/merge/resolver.ts CHANGED Viewed

@@ -514,7 +514,7 @@ async function queryConflictHistory(
 	entry: MergeEntry,
 ): Promise<ConflictHistory> {
 	try {
-		const searchOutput = await mulchClient.search("merge-conflict");
+		const searchOutput = await mulchClient.search("merge-conflict", { sortByScore: true });
 		const patterns = parseConflictPatterns(searchOutput);
 		return buildConflictHistory(patterns, entry.filesModified);
 	} catch {

package/src/metrics/pricing.ts ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * Runtime-agnostic pricing and cost estimation for AI models.
+ *
+ * Extracted from transcript.ts so any runtime can use cost estimation
+ * without pulling in Claude Code-specific JSONL parsing logic.
+ *
+ * To add support for a new provider model, add an entry to MODEL_PRICING
+ * using a lowercase substring that uniquely identifies the model tier
+ * (e.g. "opus", "sonnet", "haiku").
+ */
+/** Canonical token usage representation shared across all runtimes. */
+export interface TokenUsage {
+	inputTokens: number;
+	outputTokens: number;
+	cacheReadTokens: number;
+	cacheCreationTokens: number;
+	modelUsed: string | null;
+}
+/** Pricing per million tokens (USD). */
+export interface ModelPricing {
+	inputPerMTok: number;
+	outputPerMTok: number;
+	cacheReadPerMTok: number;
+	cacheCreationPerMTok: number;
+}
+/** Hardcoded pricing for known Claude models. */
+const MODEL_PRICING: Record<string, ModelPricing> = {
+	opus: {
+		inputPerMTok: 15,
+		outputPerMTok: 75,
+		cacheReadPerMTok: 1.5, // 10% of input
+		cacheCreationPerMTok: 3.75, // 25% of input
+	},
+	sonnet: {
+		inputPerMTok: 3,
+		outputPerMTok: 15,
+		cacheReadPerMTok: 0.3, // 10% of input
+		cacheCreationPerMTok: 0.75, // 25% of input
+	},
+	haiku: {
+		inputPerMTok: 0.8,
+		outputPerMTok: 4,
+		cacheReadPerMTok: 0.08, // 10% of input
+		cacheCreationPerMTok: 0.2, // 25% of input
+	},
+};
+/**
+ * Determine the pricing tier for a given model string.
+ * Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
+ * Returns null if unrecognized.
+ */
+export function getPricingForModel(model: string): ModelPricing | null {
+	const lower = model.toLowerCase();
+	if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
+	if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
+	if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
+	return null;
+}
+/**
+ * Calculate the estimated cost in USD for a given usage and model.
+ * Returns null if the model is unrecognized.
+ */
+export function estimateCost(usage: TokenUsage): number | null {
+	if (usage.modelUsed === null) return null;
+	const pricing = getPricingForModel(usage.modelUsed);
+	if (pricing === null) return null;
+	const inputCost = (usage.inputTokens / 1_000_000) * pricing.inputPerMTok;
+	const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPerMTok;
+	const cacheReadCost = (usage.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMTok;
+	const cacheCreationCost = (usage.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMTok;
+	return inputCost + outputCost + cacheReadCost + cacheCreationCost;
+}

package/src/metrics/transcript.test.ts CHANGED Viewed

@@ -1,8 +1,13 @@
 /**
- * Tests for Claude Code transcript JSONL parser.
+ * Tests for Claude Code transcript JSONL parser and pricing.ts module.
  *
  * Uses temp files with real-format JSONL data. No mocks.
  * Philosophy: "never mock what you can use for real" (mx-252b16).
+ *
+ * Coverage:
+ *   - parseTranscriptUsage (transcript.ts)
+ *   - estimateCost re-export (transcript.ts -> pricing.ts)
+ *   - getPricingForModel (pricing.ts)
  */
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
@@ -10,6 +15,7 @@ import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { cleanupTempDir } from "../test-helpers.ts";
+import { getPricingForModel, estimateCost as pricingEstimateCost } from "./pricing.ts";
 import { estimateCost, parseTranscriptUsage } from "./transcript.ts";
 let tempDir: string;
@@ -354,3 +360,54 @@ describe("estimateCost", () => {
 		}
 	});
 });
+// === getPricingForModel (pricing.ts) ===
+describe("getPricingForModel", () => {
+	test("matches opus substring", () => {
+		const pricing = getPricingForModel("claude-opus-4-6");
+		expect(pricing).not.toBeNull();
+		if (pricing !== null) {
+			expect(pricing.inputPerMTok).toBe(15);
+			expect(pricing.outputPerMTok).toBe(75);
+		}
+	});
+	test("matches sonnet substring", () => {
+		const pricing = getPricingForModel("claude-sonnet-4-20250514");
+		expect(pricing).not.toBeNull();
+		if (pricing !== null) {
+			expect(pricing.inputPerMTok).toBe(3);
+			expect(pricing.outputPerMTok).toBe(15);
+		}
+	});
+	test("matches haiku substring", () => {
+		const pricing = getPricingForModel("claude-haiku-3-5-20241022");
+		expect(pricing).not.toBeNull();
+		if (pricing !== null) {
+			expect(pricing.inputPerMTok).toBe(0.8);
+			expect(pricing.outputPerMTok).toBe(4);
+		}
+	});
+	test("returns null for unknown model", () => {
+		const pricing = getPricingForModel("gpt-4o");
+		expect(pricing).toBeNull();
+	});
+});
+// === re-export parity ===
+describe("estimateCost re-export parity", () => {
+	test("transcript.estimateCost and pricing.estimateCost produce same result", () => {
+		const usage = {
+			inputTokens: 1_000_000,
+			outputTokens: 1_000_000,
+			cacheReadTokens: 1_000_000,
+			cacheCreationTokens: 1_000_000,
+			modelUsed: "claude-opus-4-6",
+		};
+		expect(estimateCost(usage)).toBe(pricingEstimateCost(usage));
+	});
+});

package/src/metrics/transcript.ts CHANGED Viewed

@@ -1,8 +1,12 @@
 /**
  * Parser for Claude Code transcript JSONL files.
  *
- * Extracts token usage data from assistant-type entries in transcript files
- * at ~/.claude/projects/{project-slug}/{session-id}.jsonl.
+ * This is a Claude Code-specific JSONL parser that extracts token usage data
+ * from assistant-type entries in transcript files at
+ * ~/.claude/projects/{project-slug}/{session-id}.jsonl.
+ *
+ * Runtime-agnostic pricing logic lives in ./pricing.ts. Other runtimes
+ * implement their own transcript parsing via AgentRuntime.parseTranscript().
  *
  * Each assistant entry contains per-turn usage:
  * {
@@ -19,74 +23,11 @@
  * }
  */
-export interface TranscriptUsage {
-	inputTokens: number;
-	outputTokens: number;
-	cacheReadTokens: number;
-	cacheCreationTokens: number;
-	modelUsed: string | null;
-}
-/** Pricing per million tokens (USD). */
-interface ModelPricing {
-	inputPerMTok: number;
-	outputPerMTok: number;
-	cacheReadPerMTok: number;
-	cacheCreationPerMTok: number;
-}
-/** Hardcoded pricing for known Claude models. */
-const MODEL_PRICING: Record<string, ModelPricing> = {
-	opus: {
-		inputPerMTok: 15,
-		outputPerMTok: 75,
-		cacheReadPerMTok: 1.5, // 10% of input
-		cacheCreationPerMTok: 3.75, // 25% of input
-	},
-	sonnet: {
-		inputPerMTok: 3,
-		outputPerMTok: 15,
-		cacheReadPerMTok: 0.3, // 10% of input
-		cacheCreationPerMTok: 0.75, // 25% of input
-	},
-	haiku: {
-		inputPerMTok: 0.8,
-		outputPerMTok: 4,
-		cacheReadPerMTok: 0.08, // 10% of input
-		cacheCreationPerMTok: 0.2, // 25% of input
-	},
-};
-/**
- * Determine the pricing tier for a given model string.
- * Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
- * Returns null if unrecognized.
- */
-function getPricingForModel(model: string): ModelPricing | null {
-	const lower = model.toLowerCase();
-	if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
-	if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
-	if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
-	return null;
-}
-/**
- * Calculate the estimated cost in USD for a given usage and model.
- * Returns null if the model is unrecognized.
- */
-export function estimateCost(usage: TranscriptUsage): number | null {
-	if (usage.modelUsed === null) return null;
+import type { TokenUsage } from "./pricing.ts";
-	const pricing = getPricingForModel(usage.modelUsed);
-	if (pricing === null) return null;
+export type TranscriptUsage = TokenUsage;
-	const inputCost = (usage.inputTokens / 1_000_000) * pricing.inputPerMTok;
-	const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPerMTok;
-	const cacheReadCost = (usage.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMTok;
-	const cacheCreationCost = (usage.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMTok;
-	return inputCost + outputCost + cacheReadCost + cacheCreationCost;
-}
+export { estimateCost } from "./pricing.ts";
 /**
  * Narrow an unknown value to determine if it looks like a transcript assistant entry.

package/src/mulch/client.test.ts CHANGED Viewed

@@ -6,10 +6,11 @@
  */
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdtemp, rm } from "node:fs/promises";
+import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { AgentError } from "../errors.ts";
+import { cleanupTempDir } from "../test-helpers.ts";
 import { createMulchClient } from "./client.ts";
 // Check if mulch is available
@@ -30,7 +31,7 @@ describe("createMulchClient", () => {
 	});
 	afterEach(async () => {
-		await rm(tempDir, { recursive: true, force: true });
+		await cleanupTempDir(tempDir);
 	});
 	/**
@@ -162,6 +163,33 @@ describe("createMulchClient", () => {
 			});
 			expect(typeof result).toBe("string");
 		});
+		test.skipIf(!hasMulch)("passes --sort-by-score flag in prime options", async () => {
+			await initMulch();
+			const client = createMulchClient(tempDir);
+			// mulch prime --sort-by-score may not be supported in older mulch versions;
+			// the interface and impl are forward-looking — test accepts both outcomes.
+			try {
+				const result = await client.prime([], "markdown", { sortByScore: true });
+				expect(typeof result).toBe("string");
+			} catch (error) {
+				expect(error).toBeInstanceOf(AgentError);
+			}
+		});
+		test.skipIf(!hasMulch)("passes --sort-by-score with --files together", async () => {
+			await initMulch();
+			const client = createMulchClient(tempDir);
+			try {
+				const result = await client.prime([], "markdown", {
+					files: ["src/config.ts"],
+					sortByScore: true,
+				});
+				expect(typeof result).toBe("string");
+			} catch (error) {
+				expect(error).toBeInstanceOf(AgentError);
+			}
+		});
 	});
 	describe("status", () => {
@@ -452,6 +480,39 @@ describe("createMulchClient", () => {
 			expect(typeof result).toBe("string");
 		});
+		test.skipIf(!hasMulch)("passes --classification flag when provided", async () => {
+			await initMulch();
+			const client = createMulchClient(tempDir);
+			const result = await client.search("test", { classification: "foundational" });
+			expect(typeof result).toBe("string");
+		});
+		test.skipIf(!hasMulch)("passes --outcome-status flag when provided (success)", async () => {
+			await initMulch();
+			const client = createMulchClient(tempDir);
+			const result = await client.search("test", { outcomeStatus: "success" });
+			expect(typeof result).toBe("string");
+		});
+		test.skipIf(!hasMulch)("passes --outcome-status flag when provided (failure)", async () => {
+			await initMulch();
+			const client = createMulchClient(tempDir);
+			const result = await client.search("test", { outcomeStatus: "failure" });
+			expect(typeof result).toBe("string");
+		});
+		test.skipIf(!hasMulch)("passes all search filters together", async () => {
+			await initMulch();
+			const client = createMulchClient(tempDir);
+			const result = await client.search("test", {
+				classification: "tactical",
+				outcomeStatus: "success",
+				sortByScore: true,
+				file: "src/config.ts",
+			});
+			expect(typeof result).toBe("string");
+		});
 		test.skipIf(!hasMulch)("roundtrip: record via API then search and find it", async () => {
 			await initMulch();
 			const addProc = Bun.spawn(["ml", "add", "roundtrip"], {

package/src/mulch/client.ts CHANGED Viewed

@@ -28,9 +28,22 @@ export interface MulchClient {
 		options?: {
 			files?: string[];
 			excludeDomain?: string[];
+			sortByScore?: boolean;
 		},
 	): Promise<string>;
+	/** Append an outcome entry to an existing record by ID in the given domain. */
+	appendOutcome(
+		domain: string,
+		id: string,
+		outcome: {
+			status: "success" | "failure" | "partial";
+			agent?: string;
+			notes?: string;
+			duration?: number;
+		},
+	): Promise<void>;
 	/** Show domain statistics. */
 	status(): Promise<MulchStatus>;
@@ -58,7 +71,15 @@ export interface MulchClient {
 	query(domain?: string): Promise<string>;
 	/** Search records across all domains. */
-	search(query: string, options?: { file?: string; sortByScore?: boolean }): Promise<string>;
+	search(
+		query: string,
+		options?: {
+			file?: string;
+			sortByScore?: boolean;
+			classification?: string;
+			outcomeStatus?: "success" | "failure";
+		},
+	): Promise<string>;
 	/** Show expertise record changes since a git ref. */
 	diff(options?: { since?: string }): Promise<MulchDiffResult>;
@@ -214,6 +235,8 @@ interface MulchProgrammaticApi {
 			type?: string;
 			tag?: string;
 			classification?: string;
+			outcomeStatus?: "success" | "failure";
+			sortByScore?: boolean;
 			file?: string;
 			cwd?: string;
 		},
@@ -222,6 +245,22 @@ interface MulchProgrammaticApi {
 		domain: string,
 		options?: { type?: string; classification?: string; file?: string; cwd?: string },
 	): Promise<MulchExpertiseRecord[]>;
+	appendOutcome(
+		domain: string,
+		id: string,
+		outcome: {
+			status: "success" | "failure" | "partial";
+			agent?: string;
+			notes?: string;
+			duration?: number;
+			recorded_at?: string;
+		},
+		options?: { cwd?: string },
+	): Promise<{
+		record: MulchExpertiseRecord;
+		outcome: { status: string; agent?: string; notes?: string; recorded_at?: string };
+		total_outcomes: number;
+	}>;
 }
 const MULCH_PKG = "@os-eco/mulch-cli";
@@ -406,6 +445,9 @@ export function createMulchClient(cwd: string): MulchClient {
 			if (options?.excludeDomain && options.excludeDomain.length > 0) {
 				args.push("--exclude-domain", ...options.excludeDomain);
 			}
+			if (options?.sortByScore) {
+				args.push("--sort-by-score");
+			}
 			const { stdout } = await runMulch(args, "prime");
 			return stdout;
 		},
@@ -472,6 +514,9 @@ export function createMulchClient(cwd: string): MulchClient {
 				const api = await loadMulchApi();
 				const results = await api.searchExpertise(query, {
 					file: options?.file,
+					classification: options?.classification,
+					outcomeStatus: options?.outcomeStatus,
+					sortByScore: options?.sortByScore,
 					cwd,
 				});
 				return formatSearchResults(results);
@@ -595,5 +640,21 @@ export function createMulchClient(cwd: string): MulchClient {
 				throw new AgentError(`Failed to parse JSON from mulch compact: ${trimmed.slice(0, 200)}`);
 			}
 		},
+		async appendOutcome(domain, id, outcome) {
+			const api = await loadMulchApi();
+			try {
+				await api.appendOutcome(
+					domain,
+					id,
+					{ ...outcome, recorded_at: new Date().toISOString() },
+					{ cwd },
+				);
+			} catch (error) {
+				throw new AgentError(
+					`mulch appendOutcome ${domain}/${id} failed: ${error instanceof Error ? error.message : String(error)}`,
+				);
+			}
+		},
 	};
 }

package/src/runtimes/claude.test.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdtemp, rm } from "node:fs/promises";
+import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
+import { cleanupTempDir } from "../test-helpers.ts";
 import type { ResolvedModel } from "../types.ts";
 import { ClaudeRuntime } from "./claude.ts";
 import type { SpawnOpts } from "./types.ts";
@@ -239,7 +240,7 @@ describe("ClaudeRuntime", () => {
 		});
 		afterEach(async () => {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		});
 		test("writes overlay to .claude/CLAUDE.md when overlay is provided", async () => {
@@ -373,7 +374,7 @@ describe("ClaudeRuntime", () => {
 		});
 		afterEach(async () => {
-			await rm(tempDir, { recursive: true, force: true });
+			await cleanupTempDir(tempDir);
 		});
 		test("returns null for non-existent file", async () => {

package/src/runtimes/pi-guards.test.ts CHANGED Viewed

@@ -349,7 +349,9 @@ describe("generatePiGuardExtension", () => {
 		test("generated code contains pi.exec ov log tool-start in tool_call handler", () => {
 			const generated = generatePiGuardExtension(builderHooks());
-			expect(generated).toContain('pi.exec("ov", ["log", "tool-start", "--agent", AGENT_NAME])');
+			expect(generated).toContain(
+				'pi.exec("ov", ["log", "tool-start", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
+			);
 		});
 		test('generated code contains pi.on("tool_execution_end", ...)', () => {
@@ -359,7 +361,9 @@ describe("generatePiGuardExtension", () => {
 		test("generated code contains pi.exec ov log tool-end in tool_execution_end handler", () => {
 			const generated = generatePiGuardExtension(builderHooks());
-			expect(generated).toContain('pi.exec("ov", ["log", "tool-end", "--agent", AGENT_NAME])');
+			expect(generated).toContain(
+				'pi.exec("ov", ["log", "tool-end", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
+			);
 		});
 		test('generated code contains pi.on("session_shutdown", ...)', () => {
@@ -373,6 +377,55 @@ describe("generatePiGuardExtension", () => {
 				'await pi.exec("ov", ["log", "session-end", "--agent", AGENT_NAME])',
 			);
 		});
+		test("tool_call handler passes --tool-name event.toolName to tool-start", () => {
+			const generated = generatePiGuardExtension(builderHooks());
+			expect(generated).toContain(
+				'pi.exec("ov", ["log", "tool-start", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
+			);
+		});
+		test("tool_execution_end handler passes --tool-name event.toolName to tool-end", () => {
+			const generated = generatePiGuardExtension(builderHooks());
+			expect(generated).toContain(
+				'pi.exec("ov", ["log", "tool-end", "--agent", AGENT_NAME, "--tool-name", event.toolName])',
+			);
+		});
+		test("tool_execution_end handler uses named event parameter (not _event)", () => {
+			const generated = generatePiGuardExtension(builderHooks());
+			expect(generated).toContain('pi.on("tool_execution_end", async (event) => {');
+			expect(generated).not.toContain('pi.on("tool_execution_end", async (_event) => {');
+		});
+		test('generated code contains pi.on("agent_end", ...)', () => {
+			const generated = generatePiGuardExtension(builderHooks());
+			expect(generated).toContain('pi.on("agent_end",');
+		});
+		test("generated code awaits pi.exec ov log session-end in agent_end handler", () => {
+			const generated = generatePiGuardExtension(builderHooks());
+			// agent_end handler must await (not fire-and-forget) so it completes
+			// before Pi moves on, ensuring the SessionStore is updated.
+			const agentEndIdx = generated.indexOf('pi.on("agent_end"');
+			const sessionShutdownIdx = generated.indexOf('pi.on("session_shutdown"');
+			expect(agentEndIdx).toBeGreaterThan(-1);
+			expect(sessionShutdownIdx).toBeGreaterThan(-1);
+			// agent_end must come before session_shutdown
+			expect(agentEndIdx).toBeLessThan(sessionShutdownIdx);
+			// Extract the agent_end handler body
+			const handlerBody = generated.slice(agentEndIdx, sessionShutdownIdx);
+			expect(handlerBody).toContain(
+				'await pi.exec("ov", ["log", "session-end", "--agent", AGENT_NAME])',
+			);
+		});
+		test("agent_end handler is present for all capabilities", () => {
+			for (const hooks of [builderHooks(), scoutHooks(), coordinatorHooks()]) {
+				const generated = generatePiGuardExtension(hooks);
+				expect(generated).toContain('pi.on("agent_end",');
+			}
+		});
 	});
 	describe("PiRuntime integration", () => {