npm - pi-crew - Versions diffs - 0.1.51 → 0.2.0 - Mend

pi-crew 0.1.51 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (239) hide show

package/CHANGELOG.md +56 -1
package/README.md +176 -781
package/agents/analyst.md +11 -11
package/agents/critic.md +11 -11
package/agents/executor.md +11 -11
package/agents/explorer.md +11 -11
package/agents/planner.md +11 -11
package/agents/reviewer.md +11 -11
package/agents/security-reviewer.md +11 -11
package/agents/test-engineer.md +11 -11
package/agents/verifier.md +70 -11
package/agents/writer.md +11 -11
package/docs/actions-reference.md +595 -0
package/docs/commands-reference.md +347 -0
package/docs/runtime-flow.md +148 -148
package/index.ts +6 -6
package/package.json +99 -99
package/skills/async-worker-recovery/SKILL.md +42 -42
package/skills/context-artifact-hygiene/SKILL.md +52 -52
package/skills/delegation-patterns/SKILL.md +54 -54
package/skills/mailbox-interactive/SKILL.md +40 -40
package/skills/model-routing-context/SKILL.md +39 -39
package/skills/multi-perspective-review/SKILL.md +58 -58
package/skills/observability-reliability/SKILL.md +41 -41
package/skills/orchestration/SKILL.md +157 -157
package/skills/ownership-session-security/SKILL.md +41 -41
package/skills/pi-extension-lifecycle/SKILL.md +39 -39
package/skills/requirements-to-task-packet/SKILL.md +63 -63
package/skills/resource-discovery-config/SKILL.md +41 -41
package/skills/runtime-state-reader/SKILL.md +44 -44
package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
package/skills/state-mutation-locking/SKILL.md +42 -42
package/skills/systematic-debugging/SKILL.md +67 -67
package/skills/ui-render-performance/SKILL.md +39 -39
package/skills/verification-before-done/SKILL.md +57 -57
package/skills/worktree-isolation/SKILL.md +39 -39
package/src/adapters/claude-adapter.ts +25 -0
package/src/adapters/codex-adapter.ts +21 -0
package/src/adapters/cursor-adapter.ts +17 -0
package/src/adapters/export-util.ts +137 -0
package/src/adapters/index.ts +15 -0
package/src/adapters/registry.ts +18 -0
package/src/adapters/types.ts +23 -0
package/src/agents/agent-config.ts +2 -0
package/src/agents/agent-search.ts +98 -98
package/src/agents/discover-agents.ts +2 -1
package/src/config/config.ts +13 -1
package/src/config/drift-detector.ts +211 -0
package/src/config/markers.ts +327 -0
package/src/config/resilient-parser.ts +108 -0
package/src/config/suggestions.ts +74 -0
package/src/extension/cross-extension-rpc.ts +103 -94
package/src/extension/project-init.ts +21 -1
package/src/extension/register.ts +45 -14
package/src/extension/registration/commands.ts +77 -8
package/src/extension/registration/subagent-tools.ts +10 -1
package/src/extension/registration/team-tool.ts +10 -1
package/src/extension/registration/viewers.ts +48 -34
package/src/extension/run-bundle-schema.ts +89 -89
package/src/extension/run-import.ts +25 -1
package/src/extension/run-index.ts +5 -1
package/src/extension/run-maintenance.ts +142 -68
package/src/extension/team-manager-command.ts +10 -1
package/src/extension/team-tool/doctor.ts +28 -3
package/src/extension/team-tool/handle-settings.ts +195 -188
package/src/extension/team-tool/inspect.ts +41 -41
package/src/extension/team-tool/intent-policy.ts +42 -42
package/src/extension/team-tool/lifecycle-actions.ts +27 -8
package/src/extension/team-tool/plan.ts +19 -19
package/src/extension/team-tool/run.ts +12 -1
package/src/extension/team-tool.ts +11 -1
package/src/i18n.ts +184 -184
package/src/observability/exporters/otlp-exporter.ts +92 -77
package/src/prompt/prompt-runtime.ts +72 -72
package/src/runtime/agent-memory.ts +72 -72
package/src/runtime/agent-observability.ts +114 -114
package/src/runtime/async-marker.ts +26 -26
package/src/runtime/attention-events.ts +28 -28
package/src/runtime/auto-resume.ts +100 -0
package/src/runtime/background-runner.ts +11 -1
package/src/runtime/cancellation-token.ts +89 -89
package/src/runtime/cancellation.ts +61 -61
package/src/runtime/capability-inventory.ts +116 -116
package/src/runtime/child-pi.ts +7 -2
package/src/runtime/compaction-summary.ts +271 -0
package/src/runtime/completion-guard.ts +190 -190
package/src/runtime/crash-recovery.ts +33 -0
package/src/runtime/delta-conflict.ts +360 -0
package/src/runtime/direct-run.ts +35 -35
package/src/runtime/foreground-control.ts +82 -82
package/src/runtime/green-contract.ts +46 -46
package/src/runtime/group-join.ts +106 -106
package/src/runtime/heartbeat-gradient.ts +28 -28
package/src/runtime/heartbeat-watcher.ts +124 -124
package/src/runtime/iteration-hooks.ts +262 -0
package/src/runtime/live-agent-control.ts +88 -88
package/src/runtime/live-control-realtime.ts +36 -36
package/src/runtime/live-extension-bridge.ts +150 -150
package/src/runtime/live-irc.ts +92 -92
package/src/runtime/live-session-health.ts +100 -100
package/src/runtime/loop-gates.ts +129 -0
package/src/runtime/metric-parser.ts +40 -0
package/src/runtime/notebook-helpers.ts +90 -90
package/src/runtime/orphan-sentinel.ts +7 -7
package/src/runtime/parallel-research.ts +44 -44
package/src/runtime/phase-progress.ts +217 -0
package/src/runtime/pi-args.ts +38 -11
package/src/runtime/pi-json-output.ts +111 -111
package/src/runtime/pi-spawn.ts +57 -7
package/src/runtime/policy-engine.ts +79 -79
package/src/runtime/post-checks.ts +122 -0
package/src/runtime/progress-event-coalescer.ts +43 -43
package/src/runtime/prose-compressor.ts +164 -164
package/src/runtime/recovery-recipes.ts +74 -74
package/src/runtime/result-extractor.ts +121 -121
package/src/runtime/role-permission.ts +39 -39
package/src/runtime/sensitive-paths.ts +2 -2
package/src/runtime/session-resources.ts +25 -25
package/src/runtime/session-snapshot.ts +59 -59
package/src/runtime/session-usage.ts +79 -79
package/src/runtime/sidechain-output.ts +29 -29
package/src/runtime/stream-preview.ts +177 -177
package/src/runtime/supervisor-contact.ts +59 -59
package/src/runtime/task-display.ts +38 -38
package/src/runtime/task-graph.ts +207 -0
package/src/runtime/task-quality.ts +207 -0
package/src/runtime/task-runner/capabilities.ts +78 -78
package/src/runtime/task-runner/live-executor.ts +7 -1
package/src/runtime/task-runner/progress.ts +119 -119
package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
package/src/runtime/task-runner/result-utils.ts +14 -14
package/src/runtime/task-runner/run-projection.ts +103 -103
package/src/runtime/task-runner/state-helpers.ts +22 -22
package/src/runtime/team-runner.ts +117 -7
package/src/runtime/worker-heartbeat.ts +21 -21
package/src/runtime/worker-startup.ts +57 -57
package/src/runtime/workflow-state.ts +187 -0
package/src/runtime/workspace-tree.ts +298 -298
package/src/schema/config-schema.ts +11 -0
package/src/schema/validation-types.ts +148 -0
package/src/skills/skill-templates.ts +374 -0
package/src/state/active-run-registry.ts +35 -11
package/src/state/atomic-write.ts +33 -26
package/src/state/contracts.ts +1 -0
package/src/state/event-reconstructor.ts +217 -0
package/src/state/locks.ts +2 -13
package/src/state/mailbox.ts +4 -3
package/src/state/state-store.ts +32 -14
package/src/state/task-claims.ts +44 -44
package/src/state/types.ts +9 -0
package/src/state/usage.ts +29 -29
package/src/subagents/async-entry.ts +1 -1
package/src/subagents/index.ts +3 -3
package/src/subagents/live/control.ts +1 -1
package/src/subagents/live/manager.ts +1 -1
package/src/subagents/live/realtime.ts +1 -1
package/src/subagents/live/session-runtime.ts +1 -1
package/src/subagents/manager.ts +1 -1
package/src/subagents/spawn.ts +1 -1
package/src/teams/team-serializer.ts +38 -38
package/src/types/diff.d.ts +18 -18
package/src/ui/crew-footer.ts +101 -101
package/src/ui/crew-select-list.ts +111 -111
package/src/ui/crew-widget.ts +5 -2
package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
package/src/ui/dashboard-panes/capability-pane.ts +59 -59
package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
package/src/ui/dashboard-panes/progress-pane.ts +11 -0
package/src/ui/dynamic-border.ts +25 -25
package/src/ui/layout-primitives.ts +106 -106
package/src/ui/loaders.ts +158 -158
package/src/ui/render-coalescer.ts +51 -51
package/src/ui/render-diff.ts +119 -119
package/src/ui/render-scheduler.ts +143 -143
package/src/ui/run-action-dispatcher.ts +10 -1
package/src/ui/spinner.ts +17 -17
package/src/ui/status-colors.ts +58 -58
package/src/ui/syntax-highlight.ts +116 -116
package/src/ui/transcript-entries.ts +258 -258
package/src/utils/completion-dedupe.ts +63 -63
package/src/utils/frontmatter.ts +68 -68
package/src/utils/git.ts +262 -262
package/src/utils/ids.ts +17 -17
package/src/utils/incremental-reader.ts +104 -104
package/src/utils/names.ts +27 -27
package/src/utils/redaction.ts +44 -44
package/src/utils/safe-paths.ts +47 -47
package/src/utils/scan-cache.ts +136 -136
package/src/utils/sleep.ts +40 -26
package/src/utils/task-name-generator.ts +337 -337
package/src/workflows/validate-workflow.ts +40 -40
package/src/worktree/branch-freshness.ts +45 -45
package/teams/default.team.md +12 -12
package/teams/fast-fix.team.md +11 -11
package/teams/implementation.team.md +18 -18
package/teams/parallel-research.team.md +14 -14
package/teams/research.team.md +11 -11
package/teams/review.team.md +12 -12
package/workflows/default.workflow.md +30 -29
package/workflows/fast-fix.workflow.md +23 -22
package/workflows/implementation.workflow.md +43 -43
package/workflows/parallel-research.workflow.md +46 -46
package/workflows/research.workflow.md +22 -22
package/workflows/review.workflow.md +30 -30
package/docs/refactor-tasks-phase3.md +0 -394
package/docs/refactor-tasks-phase4.md +0 -564
package/docs/refactor-tasks-phase5.md +0 -402
package/docs/refactor-tasks-phase6.md +0 -662
package/docs/refactor-tasks.md +0 -1484
package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +0 -261
package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +0 -111
package/docs/research/AUDIT_OH_MY_PI.md +0 -261
package/docs/research/AUDIT_PI_CREW.md +0 -457
package/docs/research/CAVEMAN-DEEP-RESEARCH.md +0 -281
package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +0 -264
package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +0 -343
package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +0 -480
package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +0 -354
package/docs/research/IMPLEMENTATION_PLAN.md +0 -385
package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +0 -502
package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +0 -266
package/docs/research/REMAINING-GAPS-PLAN.md +0 -363
package/docs/research/SESSION-SUMMARY-2026-05-08.md +0 -146
package/docs/research/UI-RESPONSIVENESS-AUDIT.md +0 -173
package/docs/research-awesome-agent-skills-distillation.md +0 -100
package/docs/research-extension-examples.md +0 -297
package/docs/research-extension-system.md +0 -324
package/docs/research-oh-my-pi-distillation.md +0 -369
package/docs/research-optimization-plan.md +0 -548
package/docs/research-phase10-distillation.md +0 -199
package/docs/research-phase11-distillation.md +0 -201
package/docs/research-phase8-operator-experience-plan.md +0 -819
package/docs/research-phase9-observability-reliability-plan.md +0 -1190
package/docs/research-pi-coding-agent.md +0 -357
package/docs/research-source-pi-crew-reference.md +0 -174
package/docs/research-ui-optimization-plan.md +0 -480
package/docs/source-runtime-refactor-map.md +0 -107
package/src/utils/atomic-write.ts +0 -33

package/src/runtime/completion-guard.ts CHANGED Viewed

@@ -1,190 +1,190 @@
-import * as fs from "node:fs";
-import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
-// ============================================================================
-// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
-// made no observable mutations. Used by task-runner.ts.
-// ============================================================================
-export interface CompletionMutationGuardInput {
-	role: string;
-	taskText?: string;
-	transcriptPath?: string;
-	stdout?: string;
-}
-export interface CompletionMutationGuardResult {
-	expectedMutation: boolean;
-	observedMutation: boolean;
-	reason?: "no_mutation_observed";
-	observedTools: string[];
-}
-const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
-const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
-const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
-const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
-const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
-function asRecord(value: unknown): Record<string, unknown> | undefined {
-	return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
-}
-function commandText(value: unknown): string {
-	const record = asRecord(value);
-	if (!record) return typeof value === "string" ? value : "";
-	for (const key of ["command", "cmd", "script", "input"]) {
-		const raw = record[key];
-		if (typeof raw === "string") return raw;
-	}
-	return JSON.stringify(record);
-}
-function isMutatingTool(tool: string, args: unknown): boolean {
-	const normalized = tool.toLowerCase();
-	if (MUTATING_TOOLS.has(normalized)) return true;
-	if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
-		const command = commandText(args).trim();
-		if (!command) return false;
-		// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
-		if (MUTATING_COMMANDS.test(command)) return true;
-		if (READ_ONLY_COMMANDS.test(command)) return false;
-		// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
-		return true;
-	}
-	return false;
-}
-function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
-	const record = asRecord(event);
-	if (!record) return [];
-	const calls: Array<{ tool: string; args?: unknown }> = [];
-	const directTool = record.toolName ?? record.name ?? record.tool;
-	if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
-		calls.push({ tool: directTool, args: record.args ?? record.input });
-	}
-	const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
-	if (Array.isArray(content)) {
-		for (const part of content) {
-			const item = asRecord(part);
-			if (!item) continue;
-			const tool = item.name ?? item.toolName ?? item.tool;
-			if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
-		}
-	}
-	return calls;
-}
-function transcriptText(input: CompletionMutationGuardInput): string {
-	if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
-	return input.stdout ?? "";
-}
-export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
-	if (!MUTATING_ROLES.has(input.role)) return false;
-	return !READ_ONLY_HINTS.test(input.taskText ?? "");
-}
-export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
-	const expectedMutation = expectsImplementationMutation(input);
-	const observedTools: string[] = [];
-	let observedMutation = false;
-	const text = transcriptText(input);
-	for (const line of text.split("\n")) {
-		const trimmed = line.trim();
-		if (!trimmed) continue;
-		let event: unknown;
-		try { event = JSON.parse(trimmed); } catch { continue; }
-		for (const call of collectToolCallsFromEvent(event)) {
-			observedTools.push(call.tool);
-			if (isMutatingTool(call.tool, call.args)) observedMutation = true;
-		}
-	}
-	return {
-		expectedMutation,
-		observedMutation,
-		observedTools,
-		...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
-	};
-}
-// ============================================================================
-// Phase 11a: Artifact-based Completion Verification — a second layer that
-// checks whether a completed task actually produced meaningful artifacts.
-// ============================================================================
-/**
- * Guard against false-positive task completions.
- *
- * Checks whether a task that claims success actually produced meaningful output.
- * Returns a verification result with the green level (0-3) and any warnings.
- */
-export interface CompletionVerifyResult {
-	/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
-	greenLevel: number;
-	/** Warnings about potentially incomplete work */
-	warnings: string[];
-}
-const MAX_OUTPUT_PREVIEW = 200;
-function isTrivialError(error: string | undefined): boolean {
-	if (!error) return false;
-	return error.trim().length === 0;
-}
-export function verifyTaskCompletion(
-	task: TeamTaskState,
-	manifest: TeamRunManifest,
-): CompletionVerifyResult {
-	const warnings: string[] = [];
-	let greenLevel = 0;
-	// Check 1: Has an error?
-	if (task.error && !isTrivialError(task.error)) {
-		return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
-	}
-	// Check 2: Has result artifact?
-	if (task.resultArtifact) {
-		greenLevel += 1;
-	}
-	// Check 3: Has transcript?
-	if (task.transcriptArtifact) {
-		greenLevel += 1;
-	}
-	// Check 4: For implementation tasks, verify artifacts were actually produced
-	const runArtifacts = manifest.artifacts.filter(
-		(a) => a.producer === task.id || a.producer === task.agent,
-	);
-	if (runArtifacts.length > 0) {
-		greenLevel += 1;
-	} else if (greenLevel < 3) {
-		warnings.push("No run-level artifacts produced by this task");
-	}
-	// Check 5: Usage tracking — did the task actually consume tokens?
-	if (task.usage) {
-		const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
-		if (totalTokens === 0 && greenLevel < 3) {
-			warnings.push("Task reports zero token usage — may not have executed");
-		}
-	}
-	return {
-		greenLevel: Math.min(greenLevel, 3),
-		warnings,
-	};
-}
-/**
- * Format a preview of task output for diagnostic display.
- */
-export function formatOutputPreview(output: string | undefined): string {
-	if (!output) return "(no output)";
-	const trimmed = output.trim();
-	if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
-	return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
-}
+import * as fs from "node:fs";
+import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
+// ============================================================================
+// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
+// made no observable mutations. Used by task-runner.ts.
+// ============================================================================
+export interface CompletionMutationGuardInput {
+	role: string;
+	taskText?: string;
+	transcriptPath?: string;
+	stdout?: string;
+}
+export interface CompletionMutationGuardResult {
+	expectedMutation: boolean;
+	observedMutation: boolean;
+	reason?: "no_mutation_observed";
+	observedTools: string[];
+}
+const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
+const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
+const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
+const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
+const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
+function asRecord(value: unknown): Record<string, unknown> | undefined {
+	return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
+}
+function commandText(value: unknown): string {
+	const record = asRecord(value);
+	if (!record) return typeof value === "string" ? value : "";
+	for (const key of ["command", "cmd", "script", "input"]) {
+		const raw = record[key];
+		if (typeof raw === "string") return raw;
+	}
+	return JSON.stringify(record);
+}
+function isMutatingTool(tool: string, args: unknown): boolean {
+	const normalized = tool.toLowerCase();
+	if (MUTATING_TOOLS.has(normalized)) return true;
+	if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
+		const command = commandText(args).trim();
+		if (!command) return false;
+		// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
+		if (MUTATING_COMMANDS.test(command)) return true;
+		if (READ_ONLY_COMMANDS.test(command)) return false;
+		// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
+		return true;
+	}
+	return false;
+}
+function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
+	const record = asRecord(event);
+	if (!record) return [];
+	const calls: Array<{ tool: string; args?: unknown }> = [];
+	const directTool = record.toolName ?? record.name ?? record.tool;
+	if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
+		calls.push({ tool: directTool, args: record.args ?? record.input });
+	}
+	const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
+	if (Array.isArray(content)) {
+		for (const part of content) {
+			const item = asRecord(part);
+			if (!item) continue;
+			const tool = item.name ?? item.toolName ?? item.tool;
+			if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
+		}
+	}
+	return calls;
+}
+function transcriptText(input: CompletionMutationGuardInput): string {
+	if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
+	return input.stdout ?? "";
+}
+export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
+	if (!MUTATING_ROLES.has(input.role)) return false;
+	return !READ_ONLY_HINTS.test(input.taskText ?? "");
+}
+export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
+	const expectedMutation = expectsImplementationMutation(input);
+	const observedTools: string[] = [];
+	let observedMutation = false;
+	const text = transcriptText(input);
+	for (const line of text.split("\n")) {
+		const trimmed = line.trim();
+		if (!trimmed) continue;
+		let event: unknown;
+		try { event = JSON.parse(trimmed); } catch { continue; }
+		for (const call of collectToolCallsFromEvent(event)) {
+			observedTools.push(call.tool);
+			if (isMutatingTool(call.tool, call.args)) observedMutation = true;
+		}
+	}
+	return {
+		expectedMutation,
+		observedMutation,
+		observedTools,
+		...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
+	};
+}
+// ============================================================================
+// Phase 11a: Artifact-based Completion Verification — a second layer that
+// checks whether a completed task actually produced meaningful artifacts.
+// ============================================================================
+/**
+ * Guard against false-positive task completions.
+ *
+ * Checks whether a task that claims success actually produced meaningful output.
+ * Returns a verification result with the green level (0-3) and any warnings.
+ */
+export interface CompletionVerifyResult {
+	/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
+	greenLevel: number;
+	/** Warnings about potentially incomplete work */
+	warnings: string[];
+}
+const MAX_OUTPUT_PREVIEW = 200;
+function isTrivialError(error: string | undefined): boolean {
+	if (!error) return false;
+	return error.trim().length === 0;
+}
+export function verifyTaskCompletion(
+	task: TeamTaskState,
+	manifest: TeamRunManifest,
+): CompletionVerifyResult {
+	const warnings: string[] = [];
+	let greenLevel = 0;
+	// Check 1: Has an error?
+	if (task.error && !isTrivialError(task.error)) {
+		return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
+	}
+	// Check 2: Has result artifact?
+	if (task.resultArtifact) {
+		greenLevel += 1;
+	}
+	// Check 3: Has transcript?
+	if (task.transcriptArtifact) {
+		greenLevel += 1;
+	}
+	// Check 4: For implementation tasks, verify artifacts were actually produced
+	const runArtifacts = manifest.artifacts.filter(
+		(a) => a.producer === task.id || a.producer === task.agent,
+	);
+	if (runArtifacts.length > 0) {
+		greenLevel += 1;
+	} else if (greenLevel < 3) {
+		warnings.push("No run-level artifacts produced by this task");
+	}
+	// Check 5: Usage tracking — did the task actually consume tokens?
+	if (task.usage) {
+		const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
+		if (totalTokens === 0 && greenLevel < 3) {
+			warnings.push("Task reports zero token usage — may not have executed");
+		}
+	}
+	return {
+		greenLevel: Math.min(greenLevel, 3),
+		warnings,
+	};
+}
+/**
+ * Format a preview of task output for diagnostic display.
+ */
+export function formatOutputPreview(output: string | undefined): string {
+	if (!output) return "(no output)";
+	const trimmed = output.trim();
+	if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
+	return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
+}

package/src/runtime/crash-recovery.ts CHANGED Viewed

@@ -11,6 +11,8 @@ import { checkProcessLiveness } from "./process-status.ts";
 import { reconcileStaleRun, type ReconcileResult } from "./stale-reconciler.ts";
 import { executeHook, appendHookEvent } from "../hooks/registry.ts";
 import { activeRunEntries, unregisterActiveRun, readActiveRunRegistry } from "../state/active-run-registry.ts";
+import { resolveRealContainedPath } from "../utils/safe-paths.ts";
+import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
 export interface RecoveryPlan {
 	runId: string;
@@ -168,6 +170,32 @@ export function cancelOrphanedRuns(
  * This is the **global** cleanup that cancelOrphanedRuns (project-scoped)
  * cannot reach.
  */
+/**
+ * Best-effort removal of stateRoot and artifactsRoot directories for a purged run.
+ * Uses resolveRealContainedPath to ensure we only delete paths that are safely
+ * contained within a known crew root (project or user level).
+ */
+function tryRemoveRunDirectories(entry: { stateRoot: string; cwd: string }): void {
+	const roots = [projectCrewRoot(entry.cwd), userCrewRoot()];
+	for (const root of roots) {
+		try {
+			resolveRealContainedPath(root, entry.stateRoot);
+			// If we get here, stateRoot is safely contained — remove it
+			fs.rmSync(entry.stateRoot, { recursive: true, force: true });
+			break;
+		} catch {
+			// Not contained in this root, try next
+		}
+	}
+	// NOTE: artifactsRoot is shared across runs and cleaned up by pruneFinishedRuns/pruneUserLevelRuns — not deleted here.
+}
+/**
+ * Purge the global active-run-index of entries whose manifest is no longer active.
+ *
+ * Note: This function only cleans user-level active run entries.
+ * Project-level stale runs are handled by session_start auto-prune triggered during run creation.
+ */
 export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.now()): { purged: string[]; kept: string[] } {
 	const purged: string[] = [];
 	const kept: string[] = [];
@@ -177,6 +205,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
 		// 1. Manifest file gone → definitely stale
 		if (!fs.existsSync(entry.manifestPath)) {
 			unregisterActiveRun(entry.runId);
+			tryRemoveRunDirectories(entry);
 			purged.push(entry.runId);
 			continue;
 		}
@@ -184,6 +213,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
 		// 2. CWD gone → temp dir cleaned up
 		if (!fs.existsSync(entry.cwd)) {
 			unregisterActiveRun(entry.runId);
+			tryRemoveRunDirectories(entry);
 			purged.push(entry.runId);
 			continue;
 		}
@@ -194,6 +224,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
 			manifest = JSON.parse(fs.readFileSync(entry.manifestPath, "utf-8"));
 		} catch {
 			unregisterActiveRun(entry.runId);
+			tryRemoveRunDirectories(entry);
 			purged.push(entry.runId);
 			continue;
 		}
@@ -202,6 +233,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
 		const terminalStatuses = new Set(["completed", "failed", "cancelled", "blocked"]);
 		if (manifest && terminalStatuses.has(manifest.status ?? "")) {
 			unregisterActiveRun(entry.runId);
+			tryRemoveRunDirectories(entry);
 			purged.push(entry.runId);
 			continue;
 		}
@@ -231,6 +263,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
 						// Best-effort manifest cleanup
 					}
 					unregisterActiveRun(entry.runId);
+					tryRemoveRunDirectories(entry);
 					purged.push(entry.runId);
 					continue;
 				}