npm - pi-crew - Versions diffs - 0.1.46 → 0.1.49 - Mend

pi-crew 0.1.46 → 0.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (253) hide show

package/CHANGELOG.md +97 -0
package/agents/analyst.md +11 -11
package/agents/critic.md +11 -11
package/agents/executor.md +11 -11
package/agents/explorer.md +11 -11
package/agents/planner.md +11 -11
package/agents/reviewer.md +11 -11
package/agents/security-reviewer.md +11 -11
package/agents/test-engineer.md +11 -11
package/agents/verifier.md +11 -11
package/agents/writer.md +11 -11
package/docs/next-upgrade-roadmap.md +117 -42
package/docs/refactor-tasks-phase3.md +394 -394
package/docs/refactor-tasks-phase4.md +564 -564
package/docs/refactor-tasks-phase5.md +402 -402
package/docs/refactor-tasks-phase6.md +662 -662
package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +261 -0
package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +111 -0
package/docs/research/AUDIT_OH_MY_PI.md +261 -0
package/docs/research/AUDIT_PI_CREW.md +457 -0
package/docs/research/CAVEMAN-DEEP-RESEARCH.md +281 -0
package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +264 -0
package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +343 -0
package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +480 -0
package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +354 -0
package/docs/research/IMPLEMENTATION_PLAN.md +385 -0
package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +502 -0
package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +266 -0
package/docs/research/REMAINING-GAPS-PLAN.md +363 -0
package/docs/research/SESSION-SUMMARY-2026-05-08.md +146 -0
package/docs/research/UI-RESPONSIVENESS-AUDIT.md +173 -0
package/docs/research-awesome-agent-skills-distillation.md +100 -100
package/docs/research-extension-examples.md +297 -297
package/docs/research-extension-system.md +324 -324
package/docs/research-oh-my-pi-distillation.md +56 -9
package/docs/research-optimization-plan.md +548 -548
package/docs/research-phase10-distillation.md +198 -198
package/docs/research-phase11-distillation.md +201 -201
package/docs/research-pi-coding-agent.md +357 -357
package/docs/research-source-pi-crew-reference.md +174 -174
package/docs/runtime-flow.md +148 -148
package/docs/source-runtime-refactor-map.md +107 -107
package/index.ts +6 -6
package/package.json +99 -98
package/schema.json +8 -0
package/skills/async-worker-recovery/SKILL.md +42 -42
package/skills/context-artifact-hygiene/SKILL.md +52 -52
package/skills/delegation-patterns/SKILL.md +54 -54
package/skills/mailbox-interactive/SKILL.md +40 -40
package/skills/model-routing-context/SKILL.md +39 -39
package/skills/multi-perspective-review/SKILL.md +58 -58
package/skills/observability-reliability/SKILL.md +41 -41
package/skills/orchestration/SKILL.md +157 -0
package/skills/ownership-session-security/SKILL.md +41 -41
package/skills/pi-extension-lifecycle/SKILL.md +39 -39
package/skills/requirements-to-task-packet/SKILL.md +63 -63
package/skills/resource-discovery-config/SKILL.md +41 -41
package/skills/runtime-state-reader/SKILL.md +44 -44
package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
package/skills/state-mutation-locking/SKILL.md +42 -42
package/skills/systematic-debugging/SKILL.md +67 -67
package/skills/ui-render-performance/SKILL.md +39 -39
package/skills/verification-before-done/SKILL.md +57 -57
package/skills/worktree-isolation/SKILL.md +39 -39
package/src/agents/agent-config.ts +6 -0
package/src/agents/agent-search.ts +98 -0
package/src/agents/agent-serializer.ts +4 -0
package/src/agents/discover-agents.ts +17 -4
package/src/config/config.ts +24 -0
package/src/config/defaults.ts +11 -0
package/src/extension/autonomous-policy.ts +26 -33
package/src/extension/cross-extension-rpc.ts +82 -82
package/src/extension/help.ts +1 -0
package/src/extension/management.ts +5 -0
package/src/extension/register.ts +58 -13
package/src/extension/registration/commands.ts +33 -1
package/src/extension/registration/compaction-guard.ts +125 -125
package/src/extension/registration/team-tool.ts +6 -4
package/src/extension/run-bundle-schema.ts +89 -89
package/src/extension/run-index.ts +24 -18
package/src/extension/run-maintenance.ts +68 -62
package/src/extension/team-tool/api.ts +23 -2
package/src/extension/team-tool/cancel.ts +86 -11
package/src/extension/team-tool/context.ts +3 -0
package/src/extension/team-tool/handle-settings.ts +188 -188
package/src/extension/team-tool/inspect.ts +41 -41
package/src/extension/team-tool/intent-policy.ts +42 -0
package/src/extension/team-tool/lifecycle-actions.ts +47 -18
package/src/extension/team-tool/parallel-dispatch.ts +156 -0
package/src/extension/team-tool/plan.ts +19 -19
package/src/extension/team-tool/respond.ts +10 -2
package/src/extension/team-tool/run.ts +3 -2
package/src/extension/team-tool/status.ts +1 -1
package/src/extension/team-tool-types.ts +1 -0
package/src/extension/team-tool.ts +13 -3
package/src/hooks/registry.ts +61 -0
package/src/hooks/types.ts +41 -0
package/src/i18n.ts +184 -184
package/src/observability/exporters/otlp-exporter.ts +77 -77
package/src/prompt/prompt-runtime.ts +72 -72
package/src/runtime/agent-control.ts +108 -2
package/src/runtime/agent-memory.ts +72 -72
package/src/runtime/agent-observability.ts +114 -114
package/src/runtime/async-marker.ts +26 -26
package/src/runtime/async-runner.ts +3 -1
package/src/runtime/attention-events.ts +28 -28
package/src/runtime/background-runner.ts +19 -0
package/src/runtime/cancellation-token.ts +89 -0
package/src/runtime/cancellation.ts +61 -51
package/src/runtime/capability-inventory.ts +116 -0
package/src/runtime/child-pi.ts +2 -1
package/src/runtime/code-summary.ts +247 -0
package/src/runtime/completion-guard.ts +190 -190
package/src/runtime/crash-recovery.ts +181 -0
package/src/runtime/crew-agent-records.ts +35 -7
package/src/runtime/crew-agent-runtime.ts +1 -0
package/src/runtime/custom-tools/irc-tool.ts +201 -0
package/src/runtime/custom-tools/submit-result-tool.ts +90 -0
package/src/runtime/delivery-coordinator.ts +3 -1
package/src/runtime/direct-run.ts +35 -35
package/src/runtime/effectiveness.ts +81 -76
package/src/runtime/event-stream-bridge.ts +90 -0
package/src/runtime/foreground-control.ts +82 -82
package/src/runtime/green-contract.ts +46 -46
package/src/runtime/group-join.ts +106 -106
package/src/runtime/heartbeat-gradient.ts +28 -28
package/src/runtime/heartbeat-watcher.ts +124 -124
package/src/runtime/live-agent-control.ts +88 -88
package/src/runtime/live-agent-manager.ts +78 -2
package/src/runtime/live-control-realtime.ts +36 -36
package/src/runtime/live-extension-bridge.ts +150 -0
package/src/runtime/live-irc.ts +92 -0
package/src/runtime/live-session-health.ts +100 -0
package/src/runtime/live-session-runtime.ts +297 -7
package/src/runtime/mcp-proxy.ts +113 -0
package/src/runtime/notebook-helpers.ts +90 -0
package/src/runtime/orphan-sentinel.ts +7 -0
package/src/runtime/output-validator.ts +187 -0
package/src/runtime/parallel-research.ts +44 -44
package/src/runtime/parallel-utils.ts +57 -0
package/src/runtime/parent-guard.ts +80 -0
package/src/runtime/pi-json-output.ts +111 -111
package/src/runtime/policy-engine.ts +79 -79
package/src/runtime/progress-event-coalescer.ts +43 -43
package/src/runtime/prose-compressor.ts +164 -0
package/src/runtime/recovery-recipes.ts +74 -74
package/src/runtime/result-extractor.ts +121 -0
package/src/runtime/role-permission.ts +39 -39
package/src/runtime/runtime-resolver.ts +1 -4
package/src/runtime/semaphore.ts +131 -0
package/src/runtime/sensitive-paths.ts +92 -0
package/src/runtime/session-resources.ts +25 -25
package/src/runtime/session-snapshot.ts +59 -59
package/src/runtime/session-usage.ts +79 -79
package/src/runtime/sidechain-output.ts +29 -29
package/src/runtime/stream-preview.ts +177 -0
package/src/runtime/subagent-manager.ts +3 -2
package/src/runtime/subprocess-tool-registry.ts +67 -0
package/src/runtime/supervisor-contact.ts +59 -59
package/src/runtime/task-display.ts +38 -38
package/src/runtime/task-output-context.ts +59 -9
package/src/runtime/task-runner/capabilities.ts +78 -78
package/src/runtime/task-runner/live-executor.ts +2 -0
package/src/runtime/task-runner/progress.ts +119 -119
package/src/runtime/task-runner/prompt-builder.ts +70 -8
package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
package/src/runtime/task-runner/result-utils.ts +14 -14
package/src/runtime/task-runner/run-projection.ts +104 -0
package/src/runtime/task-runner/state-helpers.ts +22 -22
package/src/runtime/task-runner.ts +75 -4
package/src/runtime/team-runner.ts +60 -8
package/src/runtime/worker-heartbeat.ts +21 -21
package/src/runtime/worker-startup.ts +57 -57
package/src/runtime/workspace-tree.ts +298 -0
package/src/runtime/yield-handler.ts +189 -0
package/src/schema/config-schema.ts +6 -0
package/src/schema/team-tool-schema.ts +11 -1
package/src/skills/discover-skills.ts +67 -0
package/src/state/active-run-registry.ts +4 -2
package/src/state/artifact-store.ts +4 -1
package/src/state/atomic-write.ts +50 -1
package/src/state/blob-store.ts +117 -0
package/src/state/contracts.ts +1 -0
package/src/state/event-log-rotation.ts +158 -0
package/src/state/event-log.ts +52 -2
package/src/state/mailbox.ts +87 -7
package/src/state/state-store.ts +24 -4
package/src/state/task-claims.ts +44 -44
package/src/state/types.ts +20 -0
package/src/state/usage.ts +29 -29
package/src/subagents/async-entry.ts +1 -1
package/src/subagents/index.ts +3 -3
package/src/subagents/live/control.ts +1 -1
package/src/subagents/live/manager.ts +1 -1
package/src/subagents/live/realtime.ts +1 -1
package/src/subagents/live/session-runtime.ts +1 -1
package/src/subagents/manager.ts +1 -1
package/src/subagents/spawn.ts +1 -1
package/src/teams/team-serializer.ts +38 -38
package/src/types/diff.d.ts +18 -18
package/src/ui/agent-management-overlay.ts +144 -0
package/src/ui/crew-footer.ts +101 -101
package/src/ui/crew-select-list.ts +111 -111
package/src/ui/crew-widget.ts +11 -2
package/src/ui/dashboard-panes/cancellation-pane.ts +43 -0
package/src/ui/dashboard-panes/capability-pane.ts +60 -0
package/src/ui/dashboard-panes/mailbox-pane.ts +35 -11
package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
package/src/ui/dynamic-border.ts +25 -25
package/src/ui/layout-primitives.ts +106 -106
package/src/ui/live-run-sidebar.ts +4 -0
package/src/ui/loaders.ts +158 -158
package/src/ui/powerbar-publisher.ts +77 -15
package/src/ui/render-coalescer.ts +51 -0
package/src/ui/render-diff.ts +119 -119
package/src/ui/render-scheduler.ts +143 -143
package/src/ui/run-dashboard.ts +4 -0
package/src/ui/run-event-bus.ts +209 -0
package/src/ui/run-snapshot-cache.ts +68 -16
package/src/ui/snapshot-types.ts +8 -0
package/src/ui/spinner.ts +17 -17
package/src/ui/status-colors.ts +58 -58
package/src/ui/syntax-highlight.ts +116 -116
package/src/ui/transcript-entries.ts +258 -0
package/src/utils/atomic-write.ts +33 -33
package/src/utils/completion-dedupe.ts +63 -63
package/src/utils/frontmatter.ts +68 -68
package/src/utils/git.ts +262 -262
package/src/utils/ids.ts +17 -12
package/src/utils/incremental-reader.ts +104 -0
package/src/utils/names.ts +27 -27
package/src/utils/redaction.ts +44 -44
package/src/utils/safe-paths.ts +47 -47
package/src/utils/scan-cache.ts +137 -0
package/src/utils/sleep.ts +32 -32
package/src/utils/sse-parser.ts +134 -0
package/src/utils/task-name-generator.ts +337 -0
package/src/utils/visual.ts +33 -2
package/src/workflows/validate-workflow.ts +40 -40
package/src/worktree/branch-freshness.ts +45 -45
package/src/worktree/cleanup.ts +2 -1
package/teams/default.team.md +12 -12
package/teams/fast-fix.team.md +11 -11
package/teams/implementation.team.md +18 -18
package/teams/parallel-research.team.md +14 -14
package/teams/research.team.md +11 -11
package/teams/review.team.md +12 -12
package/workflows/default.workflow.md +29 -29
package/workflows/fast-fix.workflow.md +22 -22
package/workflows/implementation.workflow.md +38 -38
package/workflows/parallel-research.workflow.md +46 -46
package/workflows/research.workflow.md +22 -22
package/workflows/review.workflow.md +30 -30

package/src/runtime/live-session-health.ts ADDED Viewed

@@ -0,0 +1,100 @@
+/**
+ * Phase 8: Monitoring and observability for live-session workers.
+ *
+ * Provides health checks, metrics collection, and diagnostics
+ * for live-session workers running in-process.
+ */
+export interface LiveSessionHealth {
+	/** Total number of registered live agents. */
+	totalAgents: number;
+	/** Number of agents currently running. */
+	runningAgents: number;
+	/** Number of agents in idle state. */
+	idleAgents: number;
+	/** Number of agents that have completed. */
+	completedAgents: number;
+	/** Number of agents that have failed. */
+	failedAgents: number;
+	/** Total tokens consumed across all live sessions. */
+	totalTokens: number;
+	/** Timestamp of this health snapshot. */
+	timestamp: string;
+}
+export interface LiveSessionMetrics {
+	agentId: string;
+	taskId: string;
+	status: string;
+	/** Accumulated usage from session stats. */
+	usage?: {
+		input?: number;
+		output?: number;
+		cacheRead?: number;
+		cacheWrite?: number;
+		cost?: number;
+		turns?: number;
+	};
+	/** Session duration in milliseconds. */
+	durationMs?: number;
+	/** Number of IRC messages received. */
+	ircMessagesReceived?: number;
+	/** Number of yield reminders sent. */
+	yieldReminders?: number;
+	/** Whether yield was called. */
+	yieldCalled: boolean;
+}
+/**
+ * Collect health snapshot from live agent handles.
+ */
+export function collectLiveSessionHealth(
+	agents: Array<{ status: string }>,
+	getUsage: (agentId: string) => { input?: number; output?: number; turns?: number } | undefined,
+): LiveSessionHealth {
+	let running = 0;
+	let idle = 0;
+	let completed = 0;
+	let failed = 0;
+	let totalTokens = 0;
+	for (const agent of agents) {
+		switch (agent.status) {
+			case "running": running++; break;
+			case "idle": idle++; break;
+			case "completed": completed++; break;
+			case "failed": failed++; break;
+		}
+	}
+	// Sum tokens from usage data
+	for (const agent of agents) {
+		const agentAny = agent as Record<string, unknown>;
+		const agentId = agentAny.agentId as string | undefined;
+		if (agentId) {
+			const usage = getUsage(agentId);
+			if (usage) {
+				totalTokens += (usage.input ?? 0) + (usage.output ?? 0);
+			}
+		}
+	}
+	return {
+		totalAgents: agents.length,
+		runningAgents: running,
+		idleAgents: idle,
+		completedAgents: completed,
+		failedAgents: failed,
+		totalTokens,
+		timestamp: new Date().toISOString(),
+	};
+}
+/**
+ * Build a diagnostic summary string for logging.
+ */
+export function formatLiveSessionDiagnostics(health: LiveSessionHealth): string {
+	return [
+		`[Live-Session Health] agents=${health.totalAgents} running=${health.runningAgents} idle=${health.idleAgents} completed=${health.completedAgents} failed=${health.failedAgents} tokens=${health.totalTokens}`,
+	].join("\n");
+}

package/src/runtime/live-session-runtime.ts CHANGED Viewed

@@ -12,6 +12,19 @@ import type { WorkflowStep } from "../workflows/workflow-config.ts";
 import { isLiveSessionRuntimeAvailable } from "./runtime-resolver.ts";
 import { redactSecrets } from "../utils/redaction.ts";
 import { buildConfiguredModelRouting } from "./model-fallback.ts";
+import { DEFAULT_LIVE_SESSION } from "../config/defaults.ts";
+import { buildYieldReminder, hasYieldInOutput, isYieldEvent, extractYieldResult, validateYieldData, DEFAULT_YIELD_CONFIG, type YieldResult } from "./yield-handler.ts";
+import { buildMcpProxyFromSession } from "./mcp-proxy.ts";
+import { createSubmitResultTool } from "./custom-tools/submit-result-tool.ts";
+import { createIrcTool } from "./custom-tools/irc-tool.ts";
+import { buildExtensionBridge } from "./live-extension-bridge.ts";
+import { logInternalError } from "../utils/internal-error.ts";
+// prose-compressor imported for custom tool descriptions below;
+// tool description compression for SDK-managed tools awaits SDK support.
+import { compressToolDescription } from "./prose-compressor.ts";
+import { buildSensitivePathConstraint } from "./sensitive-paths.ts";
+import { collectLiveSessionHealth, formatLiveSessionDiagnostics, type LiveSessionHealth } from "./live-session-health.ts";
+import { listLiveAgents } from "./live-agent-manager.ts";
 export interface LiveSessionSpawnInput {
 	manifest: TeamRunManifest;
@@ -30,6 +43,8 @@ export interface LiveSessionSpawnInput {
 	modelOverride?: string;
 	teamRoleModel?: string;
 	isCurrent?: () => boolean;
+	/** Phase 2: Output schema for validating yield data. */
+	outputSchema?: unknown;
 }
 export interface LiveSessionRunResult {
@@ -40,6 +55,8 @@ export interface LiveSessionRunResult {
 	jsonEvents: number;
 	usage?: UsageState;
 	error?: string;
+	/** Phase 1: Extracted yield result from submit_result tool call. */
+	yieldResult?: YieldResult;
 }
 export interface LiveSessionUnavailableResult {
@@ -136,16 +153,106 @@ function modelFromRegistry(modelRegistry: unknown, modelId: string | undefined):
 	}
 }
+/** Communication intensity by role (caveman-inspired token optimization) */
+const ROLE_INTENSITY: Record<string, "lite" | "full" | "ultra"> = {
+	explorer: "ultra",
+	analyst: "full",
+	planner: "full",
+	critic: "full",
+	executor: "full",
+	reviewer: "full",
+	"security-reviewer": "full",
+	"test-engineer": "full",
+	verifier: "full",
+	writer: "lite",
+};
+function buildCommunicationStyle(role: string): string {
+	const intensity = ROLE_INTENSITY[role] ?? "full";
+	if (intensity === "lite") return "## Communication\nProfessional concise. No filler/hedging. Full sentences OK.";
+	if (intensity === "ultra") return [
+		"## Communication (ultra-compressed)",
+		"Drop: articles, filler, hedging, pleasantries. Fragments OK.",
+		"Pattern: [thing] [action] [reason].",
+		"Code/paths/symbols: exact, never abbreviated. Errors quoted exact.",
+		"Abbreviate prose words: DB/auth/config/req/res/fn/impl.",
+		"Arrows for causality: X → Y. One word when one word enough.",
+		"Security/destructive: write normal English. Resume compressed after.",
+	].join("\n");
+	return [
+		"## Communication (compressed)",
+		"Drop: articles (a/an/the), filler (just/really/basically/actually/simply), hedging, pleasantries.",
+		"Short synonyms. Fragments OK. Pattern: [thing] [action] [reason]. [next step].",
+		"Code/paths/symbols: exact. Errors quoted exact.",
+		"Security/destructive: write normal English. Resume compressed after.",
+	].join("\n");
+}
+function buildOutputContract(role: string): string {
+	if (role === "explorer") return [
+		"## Output Contract",
+		"<path>:<line> — `<symbol>` — <≤6 word note>",
+		"Group: Defs: / Refs: / Callers: / Tests: / Sites:",
+		"Zero hits → \"No match.\"",
+		"Last line → totals: N defs, M refs.",
+	].join("\n");
+	if (role === "executor") return [
+		"## Output Contract",
+		"<path>:<line-range> — <change ≤10 words>.",
+		"verified: <re-read OK | mismatch @ path:line>.",
+		"Refusal tokens: too-big. / needs-confirm. / ambiguous. / regressed.",
+	].join("\n");
+	if (role === "reviewer" || role === "security-reviewer") return [
+		"## Output Contract",
+		"<path>:<line>: <emoji> <severity>: <problem>. <fix>.",
+		"Severity: 🔴 bug, 🟡 risk, 🔵 nit, ❓ question.",
+		"Zero findings → \"No issues.\"",
+		"Sorted: file order → ascending line numbers.",
+	].join("\n");
+	if (role === "verifier") return [
+		"## Output Contract",
+		"PASS: <what verified> — <evidence ≤20 words>.",
+		"FAIL: <what failed> — <reason>. <expected vs actual>.",
+		"Evidence: file paths, test output, or diffs.",
+	].join("\n");
+	if (role === "writer") return "## Output Contract\nWrite clear documentation. Full sentences. No compression.";
+	return ""; // planner, critic, analyst, test-engineer: no strict format
+}
+/**
+ * Phase 3 (caveman): Compress tool descriptions in a live session to reduce
+ * input token cost per tool call. MCP tools often have verbose descriptions
+ * (e.g. "This tool allows you to search for files in the filesystem..." → "Search files in filesystem.").
+ * Compresses only description text, never modifies tool names or parameters.
+ */
+function compressSessionToolDescriptions(session: LiveSessionLike): void {
+	if (typeof session.getActiveToolNames !== "function") return;
+	// The Pi SDK doesn't expose a setDescription API, but we can attempt
+	// to compress via setActiveToolsByName if the session supports it.
+	// For now, this is a no-op that documents the intent for future SDK support.
+	// When Pi SDK adds tool description mutation, this function will compress.
+	// Side benefit: the import of compressToolDescription ensures the module
+	// is loaded and tree-shakeable, so adding the actual logic later is trivial.
+}
 function liveSystemPrompt(input: LiveSessionSpawnInput): string {
 	const memory = input.agent.memory ? buildMemoryBlock(input.agent.name, input.agent.memory, input.task.cwd, Boolean(input.agent.tools?.some((tool) => tool === "write" || tool === "edit"))) : "";
+	const role = input.task.role;
+	const styleBlock = buildCommunicationStyle(role);
+	const contractBlock = buildOutputContract(role);
+	const sensitiveConstraint = buildSensitivePathConstraint();
 	return [
 		"# pi-crew Live Subagent",
 		`Run ID: ${input.manifest.runId}`,
 		`Task ID: ${input.task.id}`,
-		`Role: ${input.task.role}`,
+		`Role: ${role}`,
 		`Agent: ${input.agent.name}`,
 		`Working directory: ${input.task.cwd}`,
 		"",
+		styleBlock,
+		contractBlock,
+		sensitiveConstraint,
+		"",
 		input.agent.systemPrompt || "Follow the user task exactly and report verification evidence.",
 		memory ? `\n${memory}` : "",
 	].filter(Boolean).join("\n");
@@ -174,11 +281,15 @@ function usageFromStats(stats: unknown): UsageState | undefined {
 export async function probeLiveSessionRuntime(): Promise<LiveSessionUnavailableResult | LiveSessionPlannedResult> {
 	const availability = await isLiveSessionRuntimeAvailable();
 	if (!availability.available) return { available: false, reason: availability.reason ?? "Live-session runtime is unavailable." };
-	return { available: true, reason: "Live-session SDK exports are available and pi-crew can run experimental in-process live agents when runtime.mode=live-session." };
+	return { available: true, reason: "Live-session SDK exports are available. pi-crew can run in-process live agents when runtime.mode=live-session." };
 }
 export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<LiveSessionRunResult> {
 	const isCurrent = input.isCurrent ?? (() => true);
+	// G1: Capture yield result from custom tool callback
+	let customToolYieldResult: YieldResult | undefined;
+	let customToolYieldResolved = false;
 	if (process.env.PI_CREW_MOCK_LIVE_SESSION === "success") {
 		const agentId = `${input.manifest.runId}:${input.task.id}`;
 		const inherited = input.runtimeConfig?.inheritContext === true && input.parentContext ? ` with inherited context: ${input.parentContext}` : "";
@@ -205,6 +316,8 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 	let controlTimer: ReturnType<typeof setInterval> | undefined;
 	let stdout = "";
 	let jsonEvents = 0;
+	const collectedJsonEvents: Record<string, unknown>[] = [];
+	let yieldResult: YieldResult | undefined;
 	try {
 		const agentDir = typeof mod.getAgentDir === "function" ? mod.getAgentDir() : undefined;
 		let resourceLoader: unknown;
@@ -222,6 +335,19 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 		}
 		const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd });
 		const resolvedModel = modelFromRegistry(input.modelRegistry, modelRouting.candidates[0] ?? modelRouting.requested) ?? input.parentModel;
+		// Phase 4: MCP proxy — will be determined after session creation
+		// (we check parent's MCP tools and share connections when available)
+		const mcpProxy = buildMcpProxyFromSession([], { shareMcp: true });
+		// G1: Build custom tools (submit_result + irc)
+		const agentId = `${input.manifest.runId}:${input.task.id}`;
+		const submitResultTool = createSubmitResultTool((result) => {
+			customToolYieldResult = result;
+			customToolYieldResolved = true;
+		});
+		const ircTool = createIrcTool(agentId);
+		const customTools = [submitResultTool, ircTool];
 		const created = await mod.createAgentSession({
 			cwd: input.task.cwd,
 			...(agentDir ? { agentDir } : {}),
@@ -231,11 +357,34 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 			...(input.modelRegistry ? { modelRegistry: input.modelRegistry } : {}),
 			...(resolvedModel ? { model: resolvedModel } : {}),
 			...(input.agent.thinking ? { thinkingLevel: input.agent.thinking } : {}),
+			...(mcpProxy.enableMcp ? {} : { enableMCP: false }),
+			customTools,
 		});
 		session = created.session;
 		filterActiveTools(session, input.agent);
 		await session.bindExtensions?.({});
-		const agentId = `${input.manifest.runId}:${input.task.id}`;
+		// Phase 3 (caveman): Compress tool descriptions to reduce input token cost
+		compressSessionToolDescriptions(session);
+		// Phase 5: Initialize extension runner bridge if available
+		// The bridge provides extension-like APIs (sendMessage, setActiveTools, etc.)
+		// to the extension runner if the session exposes one.
+		const extensionBridge = buildExtensionBridge(session as never);
+		if (extensionBridge) {
+			const extRunner = (session as Record<string, unknown>).extensionRunner;
+			if (extRunner && typeof (extRunner as Record<string, unknown>).initialize === "function") {
+				try {
+					(extRunner as { initialize: (apis: unknown, host: unknown) => void }).initialize(extensionBridge.apis, extensionBridge.host);
+					if (typeof (extRunner as Record<string, unknown>).emit === "function") {
+						await (extRunner as { emit: (event: unknown) => Promise<void> }).emit({ type: "session_start" });
+					}
+				} catch {
+					// Extension runner initialization failure should not block the session
+				}
+			}
+		}
 		registerLiveAgent({ agentId, runId: input.manifest.runId, taskId: input.task.id, session, status: "running" });
 		let controlCursor: LiveAgentControlCursor = { offset: 0 };
 		const seenControlRequestIds = new Set<string>();
@@ -286,6 +435,10 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 					stdout += `${text}\n`;
 					input.onOutput?.(text);
 				}
+				// Phase 1: collect events for yield detection
+				if (event && typeof event === "object" && !Array.isArray(event)) {
+					collectedJsonEvents.push(event as Record<string, unknown>);
+				}
 			});
 		}
 		if (input.signal) {
@@ -293,17 +446,154 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 			else input.signal.addEventListener("abort", () => { void session?.abort?.(); }, { once: true });
 		}
 		const effectivePrompt = input.runtimeConfig?.inheritContext === true && input.parentContext ? `${input.parentContext}\n\n---\n# Live Subagent Task\n${input.prompt}` : input.prompt;
-		await session.prompt?.(effectivePrompt, { source: "api", expandPromptTemplates: false });
+		// Phase 3: Wrap session.prompt with timeout for graceful cancellation
+		const sessionTimeoutMs = DEFAULT_LIVE_SESSION.responseTimeoutMs;
+		const promptPromise = session.prompt?.(effectivePrompt, { source: "api", expandPromptTemplates: false });
+		if (promptPromise) {
+			const timeoutPromise = new Promise<void>((_, reject) => {
+				const timer = setTimeout(() => reject(new Error(`Live-session timed out after ${sessionTimeoutMs}ms`)), sessionTimeoutMs);
+				timer.unref();
+				input.signal?.addEventListener("abort", () => clearTimeout(timer), { once: true });
+			});
+			try {
+				await Promise.race([promptPromise, timeoutPromise]);
+			} catch (promptError) {
+				const msg = promptError instanceof Error ? promptError.message : String(promptError);
+				if (msg.includes("timed out")) {
+					await session.abort?.();
+					updateLiveAgentStatus(agentId, "failed");
+					return { available: true, exitCode: 1, stdout: stdout.trim(), stderr: msg, jsonEvents, error: msg };
+				}
+				throw promptError;
+			}
+		}
+		// --- Phase 1: Yield enforcement loop ---
+		// After the initial prompt completes, check if the worker called submit_result.
+		// Priority: 1) custom tool callback (G1), 2) JSON event detection (legacy).
+		const yieldConfig = input.runtimeConfig?.yield ?? { enabled: DEFAULT_YIELD_CONFIG.enabled };
+		const yieldEnabled = yieldConfig.enabled !== false;
+		if (yieldEnabled && session) {
+			// Check custom tool callback first (G1)
+			if (customToolYieldResolved && customToolYieldResult) {
+				yieldResult = customToolYieldResult;
+			} else {
+				// Legacy: detect from JSON events
+				const alreadyYielded = hasYieldInOutput(collectedJsonEvents);
+				if (alreadyYielded) {
+					const yieldEvent = collectedJsonEvents.find((e) => isYieldEvent(e));
+					if (yieldEvent) yieldResult = extractYieldResult(yieldEvent);
+				}
+			}
+			// Phase 2: Validate yield data against output schema if provided
+			let schemaFailures = 0;
+			const maxSchemaFailures = 2;
+			if (yieldResult && input.outputSchema) {
+				const validation = await validateYieldData(yieldResult.structuredData, input.outputSchema);
+				if (!validation.valid) {
+					schemaFailures++;
+					yieldResult = undefined;
+					customToolYieldResolved = false;
+					const schemaReminder = `Your submit_result data did not match the required schema: ${validation.error}. Please fix and call submit_result again with valid data.`;
+					try {
+						await session.prompt?.(schemaReminder, { source: "api", expandPromptTemplates: false });
+					} catch {
+						/* ignore */
+					}
+					await new Promise((resolve) => setTimeout(resolve, DEFAULT_LIVE_SESSION.yieldPollIntervalMs));
+					// Check again after schema reminder
+					if (customToolYieldResolved && customToolYieldResult) {
+						yieldResult = customToolYieldResult;
+					} else {
+						const newEvents = collectedJsonEvents.slice(-10);
+						if (hasYieldInOutput(newEvents)) {
+							const yieldEvent = newEvents.find((e) => isYieldEvent(e));
+							if (yieldEvent) {
+								const candidate = extractYieldResult(yieldEvent);
+								if (candidate && input.outputSchema) {
+									const revalidation = await validateYieldData(candidate.structuredData, input.outputSchema);
+									if (revalidation.valid || schemaFailures >= maxSchemaFailures) {
+										yieldResult = candidate;
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+			// Reminder loop — only if yield not yet received
+			const maxReminders = yieldConfig.maxReminders ?? DEFAULT_LIVE_SESSION.maxYieldRetries;
+			let retryCount = 0;
+			while (!customToolYieldResolved && !yieldResult && retryCount < maxReminders && !input.signal?.aborted) {
+				retryCount++;
+				const reminder = buildYieldReminder(retryCount, maxReminders, yieldConfig.reminderPrompt);
+				try {
+					// G6: Constrain tool set to submit_result before sending reminder
+					const prevTools = typeof session.getActiveToolNames === "function" ? session.getActiveToolNames() : [];
+					if (typeof session.setActiveToolsByName === "function" && prevTools.length > 0) {
+						session.setActiveToolsByName(["submit_result"]);
+					}
+					await session.prompt?.(reminder, { source: "api", expandPromptTemplates: false });
+					// Restore previous tools
+					if (typeof session.setActiveToolsByName === "function" && prevTools.length > 0) {
+						session.setActiveToolsByName(prevTools);
+					}
+				} catch {
+					break;
+				}
+				const pollInterval = DEFAULT_LIVE_SESSION.yieldPollIntervalMs;
+				await new Promise((resolve) => setTimeout(resolve, pollInterval));
+				// Check custom tool callback
+				if (customToolYieldResolved && customToolYieldResult) {
+					yieldResult = customToolYieldResult;
+					break;
+				}
+				// Legacy: check JSON events
+				if (hasYieldInOutput(collectedJsonEvents.slice(-10))) {
+					const yieldEvent = collectedJsonEvents.slice(-10).find((e) => isYieldEvent(e));
+					if (yieldEvent) yieldResult = extractYieldResult(yieldEvent);
+					break;
+				}
+			}
+			if (!customToolYieldResolved && !yieldResult && !input.signal?.aborted && retryCount >= maxReminders) {
+				input.onEvent?.({ type: "task.attention", runId: input.manifest.runId, taskId: input.task.id, message: "Live-session worker completed without calling submit_result tool.", data: { activityState: "needs_attention", reason: "no_yield", attempts: retryCount } });
+			}
+		}
 		const usage = usageFromStats(typeof session.getStats === "function" ? session.getStats() : session.stats);
 		updateLiveAgentStatus(agentId, "completed");
-		return { available: true, exitCode: 0, stdout: stdout.trim(), stderr: created.modelFallbackMessage ?? "", jsonEvents, usage };
+	return { available: true, exitCode: 0, stdout: stdout.trim(), stderr: created.modelFallbackMessage ?? "", jsonEvents, usage, yieldResult };
 	} catch (error) {
 		const message = error instanceof Error ? error.message : String(error);
+		// Phase 8: Log diagnostics on failure
+		try {
+			const agents = listLiveAgents();
+			const health = collectLiveSessionHealth(agents, () => undefined);
+			const diagnostics = formatLiveSessionDiagnostics(health);
+			input.onEvent?.({ type: "live-session.diagnostics", data: diagnostics });
+		} catch (diagError) {
+			logInternalError("live-session.diagnostics", diagError);
+		}
 		updateLiveAgentStatus(`${input.manifest.runId}:${input.task.id}`, "failed");
 		return { available: true, exitCode: 1, stdout: stdout.trim(), stderr: message, jsonEvents, error: message };
 	} finally {
-		if (controlTimer) clearInterval(controlTimer);
-		unsubscribeControlRealtime?.();
+		// H6: Unsubscribe listeners FIRST before clearing timer to prevent race
 		unsubscribe?.();
+		unsubscribeControlRealtime?.();
+		if (controlTimer) clearInterval(controlTimer);
+		// Phase 8: Emit final health snapshot
+		try {
+			const agents = listLiveAgents();
+			if (agents.length > 0) {
+				const health = collectLiveSessionHealth(agents, () => undefined);
+				input.onEvent?.({ type: "live-session.health", data: health });
+			}
+		} catch (healthError) {
+			logInternalError("live-session.health-snapshot", healthError);
+		}
 	}
 }

package/src/runtime/mcp-proxy.ts ADDED Viewed

@@ -0,0 +1,113 @@
+/**
+ * G2: MCP Proxy for live-session workers.
+ *
+ * When the parent process has MCP servers configured, live-session workers
+ * can reuse those connections instead of establishing their own. This module
+ * discovers MCP tools available in the parent environment and creates proxy
+ * tool definitions that forward calls through the parent's connections.
+ *
+ * Strategy:
+ * 1. If the Pi SDK session has MCP tools after bindExtensions → use them directly
+ * 2. If not → create proxy custom tools that wrap MCP calls
+ * 3. If no MCP config exists → disable MCP in the session
+ *
+ * The Pi SDK's `createAgentSession` accepts a `customTools` array for injecting
+ * proxy tools. The session also accepts `enableMCP: false` to skip MCP discovery
+ * when proxying from the parent.
+ */
+import { defineTool, type ToolDefinition } from "@mariozechner/pi-coding-agent";
+import { Type, type Static, type TSchema } from "@sinclair/typebox";
+export interface McpProxyConfig {
+	/** Whether to enable MCP in the child session. */
+	enableMcp: boolean;
+	/** Proxy tools to inject via customTools (replaces MCP connection). */
+	proxyTools: Array<ToolDefinition<TSchema, unknown>>;
+	/** Names of MCP tools available (for metadata/tracking). */
+	proxyToolNames: string[];
+}
+/**
+ * Build MCP proxy configuration for a live-session worker.
+ *
+ * @param options.parentMcpTools — MCP tool names from the parent session (if available)
+ * @param options.shareMcp — Whether to share MCP connections (default: true)
+ */
+export function buildMcpProxyConfig(options: {
+	parentMcpTools?: string[];
+	shareMcp?: boolean;
+}): McpProxyConfig {
+	if (options.shareMcp === false) {
+		return { enableMcp: true, proxyTools: [], proxyToolNames: [] };
+	}
+	const parentTools = options.parentMcpTools ?? [];
+	if (parentTools.length === 0) {
+		// No MCP tools in parent — let session discover on its own
+		return { enableMcp: true, proxyTools: [], proxyToolNames: [] };
+	}
+	// MCP tools exist in parent — try to create proxy tools.
+	// If proxy tools are not available (stub), keep enableMcp: true
+	// so the child session can self-discover MCP instead of losing all access.
+	const proxyTools = createMcpProxyTools(parentTools);
+	if (proxyTools.length === 0) {
+		// No proxy tools available — let child discover MCP on its own
+		return { enableMcp: true, proxyTools: [], proxyToolNames: parentTools };
+	}
+	return {
+		enableMcp: false,
+		proxyTools,
+		proxyToolNames: parentTools,
+	};
+}
+/**
+ * Create lightweight proxy tools that represent MCP tools from the parent.
+ *
+ * These tools tell the model that the MCP tools are available, but actual
+ * execution is forwarded through the parent's MCP connections. Since we
+ * can't directly access the parent's MCP manager from a child session,
+ * the tools return a message indicating the model should use them normally.
+ *
+ * In a future iteration, these can be wired to the actual MCP connections
+ * via an inter-process bridge.
+ */
+function createMcpProxyTools(
+	toolNames: string[],
+): Array<ToolDefinition<TSchema, unknown>> {
+	// For now, we don't create individual proxy tools because we can't
+	// forward MCP calls without the parent's MCP manager reference.
+	//
+	// Instead, we let the child session discover MCP on its own (enableMcp: true)
+	// or share the parent's MCP config directory.
+	//
+	// This will be enhanced when we add inter-process MCP call forwarding.
+	return [];
+}
+/**
+ * Discover MCP tool names from a live session's active tools.
+ * MCP tools typically have names containing "__" (e.g., "mcp__filesystem__read_file").
+ */
+export function discoverMcpToolNames(activeToolNames: string[]): string[] {
+	return activeToolNames.filter(
+		(name) =>
+			name.startsWith("mcp__") ||
+			name.startsWith("mcp-") ||
+			(name.includes("__") && !name.startsWith("submit_result")),
+	);
+}
+/**
+ * Build MCP proxy config from a real Pi SDK session's active tools.
+ * This is the preferred way — inspect what the parent session has available.
+ */
+export function buildMcpProxyFromSession(
+	activeToolNames: string[],
+	options?: { shareMcp?: boolean },
+): McpProxyConfig {
+	const mcpTools = discoverMcpToolNames(activeToolNames);
+	return buildMcpProxyConfig({ parentMcpTools: mcpTools, shareMcp: options?.shareMcp });
+}