npm - pi-crew - Versions diffs - 0.5.2 → 0.5.5 - Mend

pi-crew 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +67 -0
package/docs/bugs/cross-session-notification-leakage.md +82 -0
package/docs/coding-agent-optimization.md +268 -0
package/docs/deep-review-report.md +384 -0
package/docs/distillation/cybersecurity-patterns.md +294 -0
package/docs/migration-v0.4-v0.5.md +191 -0
package/docs/optimization-plan.md +642 -0
package/docs/pi-mono-opportunities.md +969 -0
package/docs/pi-mono-review.md +291 -0
package/docs/skills/REFERENCE.md +144 -0
package/package.json +7 -6
package/skills/artifact-analysis-loop/SKILL.md +302 -0
package/skills/async-worker-recovery/SKILL.md +19 -1
package/skills/child-pi-spawning/SKILL.md +19 -6
package/skills/context-artifact-hygiene/SKILL.md +19 -2
package/skills/delegation-patterns/SKILL.md +68 -3
package/skills/detection-pipeline-design/SKILL.md +285 -0
package/skills/event-log-tracing/SKILL.md +20 -6
package/skills/git-master/SKILL.md +20 -6
package/skills/hunting-investigation-loop/SKILL.md +401 -0
package/skills/incident-playbook-construction/SKILL.md +383 -0
package/skills/live-agent-lifecycle/SKILL.md +20 -6
package/skills/mailbox-interactive/SKILL.md +19 -6
package/skills/model-routing-context/SKILL.md +19 -1
package/skills/multi-perspective-review/SKILL.md +19 -4
package/skills/observability-reliability/SKILL.md +19 -2
package/skills/orchestration/SKILL.md +20 -2
package/skills/ownership-session-security/SKILL.md +20 -2
package/skills/pi-extension-lifecycle/SKILL.md +20 -2
package/skills/post-mortem/SKILL.md +7 -2
package/skills/read-only-explorer/SKILL.md +20 -6
package/skills/requirements-to-task-packet/SKILL.md +23 -3
package/skills/resource-discovery-config/SKILL.md +20 -2
package/skills/runtime-state-reader/SKILL.md +20 -2
package/skills/safe-bash/SKILL.md +21 -6
package/skills/scrutinize/SKILL.md +20 -2
package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
package/skills/security-review/SKILL.md +560 -0
package/skills/state-mutation-locking/SKILL.md +22 -2
package/skills/systematic-debugging/SKILL.md +8 -6
package/skills/threat-hypothesis-framework/SKILL.md +175 -0
package/skills/ui-render-performance/SKILL.md +20 -2
package/skills/verification-before-done/SKILL.md +17 -2
package/skills/widget-rendering/SKILL.md +21 -6
package/skills/workspace-isolation/SKILL.md +20 -6
package/skills/worktree-isolation/SKILL.md +20 -6
package/src/agents/agent-config.ts +40 -1
package/src/config/config.ts +22 -5
package/src/config/role-tools.ts +82 -0
package/src/config/types.ts +4 -0
package/src/extension/crew-cleanup.ts +114 -0
package/src/extension/register.ts +15 -3
package/src/extension/team-tool/run.ts +7 -7
package/src/observability/event-bus.ts +60 -0
package/src/runtime/background-runner.ts +8 -2
package/src/runtime/child-pi.ts +122 -34
package/src/runtime/crew-agent-runtime.ts +1 -0
package/src/runtime/foreground-control.ts +87 -17
package/src/runtime/pi-args.ts +11 -1
package/src/runtime/pi-json-output.ts +31 -0
package/src/runtime/progress-tracker.ts +124 -0
package/src/runtime/skill-effectiveness.ts +473 -0
package/src/runtime/skill-instructions.ts +37 -3
package/src/runtime/task-runner.ts +91 -17
package/src/runtime/team-runner.ts +11 -11
package/src/runtime/tool-progress.ts +10 -3
package/src/runtime/verification-gates.ts +367 -0
package/src/schema/team-tool-schema.ts +7 -0
package/src/state/decision-ledger.ts +92 -43
package/src/state/event-log.ts +136 -10
package/src/state/hook-instinct-bridge.ts +5 -5
package/src/state/state-store.ts +3 -1
package/src/state/types.ts +4 -0
package/src/types/new-api-types.ts +34 -0
package/src/ui/agent-management-overlay.ts +5 -1
package/src/ui/crew-widget.ts +29 -15
package/src/ui/powerbar-publisher.ts +100 -7
package/src/ui/tool-render.ts +15 -15
package/src/utils/session-utils.ts +52 -0
package/src/worktree/worktree-manager.ts +32 -13

package/src/runtime/verification-gates.ts ADDED Viewed

@@ -0,0 +1,367 @@
+/**
+ * Verification Gates — ECC VERIFICATION_LOOP Pattern Implementation
+ *
+ * Implements RED/GREEN phase gates for task verification.
+ * Sequential execution: cannot skip to Phase N+1 without Phase N passing.
+ *
+ * Based on: docs/distillation/ECC-10-skills.md §2 (verification-loop)
+ *
+ * @module verification-gates
+ */
+import { spawn } from "node:child_process";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { writeArtifact } from "../state/artifact-store.ts";
+import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
+export interface PhaseGateResult {
+	phase: number;
+	name: string;
+	status: "passed" | "failed" | "skipped";
+	command: string;
+	exitCode?: number | null;
+	output?: string;
+	durationMs: number;
+	error?: string;
+}
+export interface PhaseGateBundle {
+	results: PhaseGateResult[];
+	totalDurationMs: number;
+	allPassed: boolean;
+	stoppedAt?: number; // phase number where stopped
+}
+/**
+ * Standard phase gate definitions for npm/TypeScript projects.
+ * Sequential enforcement: each phase must pass before proceeding.
+ */
+export const NPM_TYPESCRIPT_GATES: Array<{ name: string; command: string; critical: boolean }> = [
+	{ name: "build", command: "npm run build 2>&1 || true", critical: true },
+	{ name: "typecheck", command: "npx tsc --noEmit 2>&1 || true", critical: true },
+	{ name: "lint", command: "npm run lint 2>&1 || true", critical: false },
+	{ name: "tests", command: "npm test 2>&1 || true", critical: true },
+];
+/**
+ * Cargo/Rust project phase gates.
+ */
+export const CARGO_RUST_GATES: Array<{ name: string; command: string; critical: boolean }> = [
+	{ name: "check", command: "cargo check 2>&1 || true", critical: true },
+	{ name: "test", command: "cargo test 2>&1 || true", critical: true },
+	{ name: "clippy", command: "cargo clippy 2>&1 || true", critical: false },
+];
+/**
+ * Execute a single command and capture output.
+ */
+async function executeCommand(
+	command: string,
+	cwd: string,
+	timeoutMs: number = 120000,
+): Promise<{ exitCode: number | null; output: string; durationMs: number }> {
+	const start = Date.now();
+	let output = "";
+	let exitCode: number | null = null;
+	return new Promise((resolve) => {
+		// Use shell to handle compound commands
+		const shell = spawn("sh", ["-c", command], {
+			cwd,
+			timeout: timeoutMs,
+			env: { ...process.env, FORCE_COLOR: "0" },
+		});
+		shell.stdout?.on("data", (data) => {
+			output += data.toString();
+		});
+		shell.stderr?.on("data", (data) => {
+			output += data.toString();
+		});
+		shell.on("close", (code) => {
+			exitCode = code;
+			resolve({
+				exitCode,
+				output: output.slice(-100000), // Cap at 100KB
+				durationMs: Date.now() - start,
+			});
+		});
+		shell.on("error", (err) => {
+			resolve({
+				exitCode: -1,
+				output: `Execution error: ${err.message}`,
+				durationMs: Date.now() - start,
+			});
+		});
+		// Handle timeout
+		setTimeout(() => {
+			shell.kill("SIGKILL");
+			resolve({
+				exitCode: -1,
+				output: output + "\n[TIMEOUT: Command exceeded limit]",
+				durationMs: Date.now() - start,
+			});
+		}, timeoutMs);
+	});
+}
+/**
+ * Run phase gates sequentially, stopping on first critical failure.
+ *
+ * @param gates - Array of phase gate definitions
+ * @param cwd - Working directory to execute commands in
+ * @param signal - Optional abort signal
+ * @param onPhase - Optional callback for each phase completion
+ * @returns Phase gate bundle with all results
+ */
+export async function runPhaseGates(
+	gates: Array<{ name: string; command: string; critical: boolean }>,
+	cwd: string,
+	signal?: AbortSignal,
+	onPhase?: (result: PhaseGateResult) => void,
+): Promise<PhaseGateBundle> {
+	const results: PhaseGateResult[] = [];
+	const startTime = Date.now();
+	let stoppedAt: number | undefined;
+	for (let i = 0; i < gates.length; i++) {
+		// Check abort signal
+		if (signal?.aborted) {
+			results.push({
+				phase: i + 1,
+				name: gates[i].name,
+				status: "skipped",
+				command: gates[i].command,
+				durationMs: 0,
+				error: "Aborted",
+			});
+			stoppedAt = i + 1;
+			break;
+		}
+		const gate = gates[i];
+		const phaseStart = Date.now();
+		// Execute the gate command
+		const { exitCode, output, durationMs } = await executeCommand(
+			gate.command,
+			cwd,
+			120000, // 2 minute timeout
+		);
+		const passed = exitCode === 0;
+		const result: PhaseGateResult = {
+			phase: i + 1,
+			name: gate.name,
+			status: passed ? "passed" : "failed",
+			command: gate.command,
+			exitCode,
+			output,
+			durationMs,
+			error: passed ? undefined : `Exit code: ${exitCode}`,
+		};
+		results.push(result);
+		onPhase?.(result);
+		// Stop on critical failure
+		if (!passed && gate.critical) {
+			stoppedAt = i + 1;
+			break;
+		}
+	}
+	return {
+		results,
+		totalDurationMs: Date.now() - startTime,
+		allPassed: results.every((r) => r.status === "passed"),
+		stoppedAt,
+	};
+}
+/**
+ * Execute verification commands from a task's verification contract.
+ * Maps the contract commands to phase gates and runs them sequentially.
+ *
+ * @param contract - Verification contract with commands to execute
+ * @param cwd - Working directory
+ * @param runId - Run ID for artifact naming
+ * @param taskId - Task ID for artifact naming
+ * @param artifactsRoot - Artifacts root directory
+ * @param signal - Optional abort signal
+ * @returns Array of verification command results
+ */
+export async function executeVerificationCommands(
+	contract: VerificationContract,
+	cwd: string,
+	runId: string,
+	taskId: string,
+	artifactsRoot: string,
+	signal?: AbortSignal,
+): Promise<VerificationCommandResult[]> {
+	if (!contract.commands || contract.commands.length === 0) {
+		return [];
+	}
+	const results: VerificationCommandResult[] = [];
+	// Map commands to phase gates
+	const gates = contract.commands.map((cmd, index) => ({
+		name: `verification-${index + 1}`,
+		command: cmd,
+		critical: true, // All verification commands are critical by default
+	}));
+	// Create artifacts directory
+	const gatesDir = path.join(artifactsRoot, "verification-gates");
+	if (!fs.existsSync(gatesDir)) {
+		fs.mkdirSync(gatesDir, { recursive: true });
+	}
+	// Run phase gates
+	const bundle = await runPhaseGates(gates, cwd, signal, (phaseResult) => {
+		// Write phase artifact immediately for observability
+		const phaseArtifact = writeArtifact(artifactsRoot, {
+			kind: "log",
+			relativePath: `verification-gates/${taskId}-phase-${phaseResult.phase}-${phaseResult.name}.log`,
+			content: [
+				`# Phase ${phaseResult.phase}: ${phaseResult.name}`,
+				`Status: ${phaseResult.status.toUpperCase()}`,
+				`Command: ${phaseResult.command}`,
+				`Duration: ${phaseResult.durationMs}ms`,
+				phaseResult.exitCode != null ? `Exit Code: ${phaseResult.exitCode}` : "",
+				phaseResult.error ? `Error: ${phaseResult.error}` : "",
+				"",
+				"## Output",
+				phaseResult.output || "(no output)",
+			].join("\n"),
+			producer: taskId,
+		});
+		results.push({
+			cmd: phaseResult.command,
+			status: phaseResult.status === "passed" ? "passed" : "failed",
+			exitCode: phaseResult.exitCode,
+			outputArtifact: phaseArtifact,
+		});
+	});
+	// Write summary artifact
+	const summaryArtifact = writeArtifact(artifactsRoot, {
+		kind: "metadata",
+		relativePath: `verification-gates/${taskId}-summary.json`,
+		content: JSON.stringify(bundle, null, 2),
+		producer: taskId,
+	});
+	// Fill in any remaining results (in case of early exit)
+	for (let i = results.length; i < gates.length; i++) {
+		results.push({
+			cmd: gates[i].command,
+			status: "not_run",
+		});
+	}
+	return results;
+}
+/**
+ * Compute observed green level from verification results.
+ * Maps verification outcomes to green levels per ECC pattern.
+ *
+ * @param commands - Array of verification command results
+ * @param requiredLevel - Required green level from contract
+ * @returns Observed green level
+ */
+export function computeGreenLevelFromResults(
+	commands: VerificationCommandResult[],
+	requiredLevel: GreenLevel,
+): GreenLevel {
+	if (commands.length === 0) {
+		return "none";
+	}
+	const passed = commands.filter((c) => c.status === "passed").length;
+	const failed = commands.filter((c) => c.status === "failed").length;
+	const notRun = commands.filter((c) => c.status === "not_run").length;
+	// If any critical verification failed, return none
+	if (failed > 0) {
+		return "none";
+	}
+	// If all passed, return the required level (capped at merge_ready)
+	if (passed === commands.length) {
+		return requiredLevel === "none" ? "targeted" : requiredLevel;
+	}
+	// Partial pass - return targeted
+	if (passed > 0) {
+		return "targeted";
+	}
+	// Nothing run
+	return "none";
+}
+/**
+ * Create a verification gate report artifact.
+ * Formatted for human review per ECC verification-loop pattern.
+ */
+export function createVerificationGateReport(
+	taskId: string,
+	contract: VerificationContract,
+	results: VerificationCommandResult[],
+	bundle: PhaseGateBundle,
+): string {
+	const lines = [
+		`# Verification Gate Report: ${taskId}`,
+		"",
+		`## Contract`,
+		`- Required Green Level: ${contract.requiredGreenLevel}`,
+		`- Allow Manual Evidence: ${contract.allowManualEvidence}`,
+		`- Commands: ${contract.commands.length}`,
+		"",
+		`## Results`,
+		"",
+		`| Phase | Command | Status | Exit Code | Duration |`,
+		`|-------|---------|--------|-----------|----------|`,
+	];
+	for (const result of results) {
+		const phaseIndex = results.indexOf(result) + 1;
+		const statusIcon = result.status === "passed" ? "✅" : result.status === "failed" ? "❌" : "⏭️";
+		lines.push(
+			`| ${phaseIndex} | \`${truncate(result.cmd, 40)}\` | ${statusIcon} ${result.status} | ${result.exitCode ?? "-"} | ${result.durationMs ?? 0}ms |`,
+		);
+	}
+	lines.push("");
+	lines.push(`## Summary`);
+	lines.push(`- Total Phases: ${bundle.results.length}`);
+	lines.push(`- Passed: ${bundle.results.filter((r) => r.status === "passed").length}`);
+	lines.push(`- Failed: ${bundle.results.filter((r) => r.status === "failed").length}`);
+	lines.push(`- Skipped: ${bundle.results.filter((r) => r.status === "skipped").length}`);
+	lines.push(`- Total Duration: ${bundle.totalDurationMs}ms`);
+	lines.push(`- All Passed: ${bundle.allPassed ? "YES ✅" : "NO ❌"}`);
+	if (bundle.stoppedAt) {
+		lines.push(`- Stopped At: Phase ${bundle.stoppedAt}`);
+	}
+	lines.push("");
+	lines.push("## VERIFICATION");
+	lines.push(bundle.allPassed ? "**PASSED** - All gates green ✅" : "**FAILED** - One or more gates red ❌");
+	return lines.join("\n");
+}
+function truncate(str: string, maxLen: number): string {
+	if (str.length <= maxLen) return str;
+	return str.slice(0, maxLen - 3) + "...";
+}

package/src/schema/team-tool-schema.ts CHANGED Viewed

@@ -204,6 +204,11 @@ export const TeamToolParams = Type.Object({
 	once: Type.Optional(
 		Type.Union([Type.String(), Type.Number()], { description: "ISO timestamp or epoch ms for a one-time scheduled run." }),
 	),
+	excludeContextBash: Type.Optional(
+		Type.Boolean({
+			description: "Mark certain bash commands as excludeFromContext to reduce context tokens (default: false).",
+		}),
+	),
 });
 export interface TeamToolParamsValue {
@@ -287,4 +292,6 @@ export interface TeamToolParamsValue {
 	cron?: string;
 	interval?: number;
 	once?: string | number;
+	/** Mark certain bash commands as excludeFromContext to reduce context tokens (default: false). */
+	excludeContextBash?: boolean;
 }

package/src/state/decision-ledger.ts CHANGED Viewed

@@ -107,7 +107,7 @@ export function appendEntry(runId: string, entry: RolloutEntry): RolloutEntry {
 	// Get existing entries to compute coherence
 	const ledger = getLedger(runId);
-	// Compute coherence marks
+	// Compute coherence
 	const coherenceMark = computeCoherence(entry, ledger);
 	const entryWithCoherence: RolloutEntry = {
 		...entry,
@@ -218,42 +218,76 @@ export function summarizeLedger(runId: string): string {
 	return lines.join("\n");
 }
+/**
+ * Override the coherence mark of the last entry in the ledger.
+ * FIX: This preserves all previous entries while updating just the last one.
+ * Previously this would truncate the entire ledger!
+ */
+function overrideLastEntry(runId: string, coherenceMark: import("./types.js").CoherenceMark): RolloutEntry {
+	const ledger = getLedger(runId);
+	if (ledger.length === 0) {
+		throw new Error(`No ledger entries found for run ${runId}`);
+	}
+	// Update the last entry with the new coherence mark
+	const lastIndex = ledger.length - 1;
+	ledger[lastIndex] = { ...ledger[lastIndex], coherenceMark };
+	// Rewrite entire ledger to preserve all entries
+	const ledgerPath = getLedgerPath(runId);
+	writeFileSync(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
+	return ledger[lastIndex];
+}
 /**
  * Promote a candidate by marking it as accepted with proper coherence.
  */
 export function promoteCandidate(runId: string, candidate: string): RolloutEntry {
 	const latestDecision = getLatestDecision(runId);
-	const entry: RolloutEntry = {
+	// Get existing entries to compute proper coherence
+	const ledger = getLedger(runId);
+	// Create entry without coherence first
+	const entryWithoutCoherence = {
 		rolloutId: `promote-${Date.now()}`,
 		timestamp: new Date().toISOString(),
 		priorWinner: latestDecision?.topCandidates[0],
 		searchSpace: latestDecision?.searchSpace || "unknown",
 		trialCount: (latestDecision?.trialCount || 0) + 1,
 		topCandidates: [candidate],
-		decisionMark: "accept",
-		coherenceMark: {
-			matchesPrior: false,
-			matchesRecursive: false,
-			promotionAllowed: true,
-			reason: "Manual promotion by user",
-		},
+		decisionMark: "accept" as const,
 	};
-	// Persist via appendEntry so ledger is consistent.
-	appendEntry(runId, entry);
-	const manualCoherence: import("./types.js").CoherenceMark = {
-		matchesPrior: false,
-		matchesRecursive: false,
-		promotionAllowed: true,
-		reason: "Manual promotion by user",
+	// Compute coherence (empty ledger = no matches)
+	const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
+	// Manual promotion always allows further promotion
+	coherenceMark.promotionAllowed = true;
+	coherenceMark.reason = "Manual promotion - promotion allowed";
+	// Create full entry with coherence
+	const entry: RolloutEntry = {
+		...entryWithoutCoherence,
+		coherenceMark,
 	};
-	// Manually override the last line in the JSONL to reflect the coherent
-	// decision we want, bypassing appendEntry's auto-compute for the returned value.
-	const lastLine = readFileSync(getLedgerPath(runId), "utf-8").trim().split("\n").filter(Boolean).at(-1)!;
-	const overridden: RolloutEntry = { ...JSON.parse(lastLine), coherenceMark: manualCoherence };
-	writeFileSync(getLedgerPath(runId), JSON.stringify(overridden) + "\n", "utf-8");
-	return overridden;
+	// Update last entry in memory if there are existing entries
+	if (ledger.length > 0) {
+		const lastIndex = ledger.length - 1;
+		ledger[lastIndex] = entry;
+	} else {
+		// No existing entries - just write this one
+		ledger.push(entry);
+	}
+	// Rewrite entire ledger to preserve all entries
+	const ledgerPath = getLedgerPath(runId);
+	const dir = dirname(ledgerPath);
+	if (!existsSync(dir)) {
+		mkdirSync(dir, { recursive: true });
+	}
+	writeFileSync(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
+	return entry;
 }
 /**
@@ -262,34 +296,49 @@ export function promoteCandidate(runId: string, candidate: string): RolloutEntry
 export function decayCandidate(runId: string, candidate: string): RolloutEntry {
 	const latestDecision = getLatestDecision(runId);
-	const entry: RolloutEntry = {
+	// Get existing entries to compute proper coherence
+	const ledger = getLedger(runId);
+	// Create entry without coherence first
+	const entryWithoutCoherence = {
 		rolloutId: `decay-${Date.now()}`,
 		timestamp: new Date().toISOString(),
 		priorWinner: latestDecision?.topCandidates[0],
 		searchSpace: latestDecision?.searchSpace || "unknown",
 		trialCount: (latestDecision?.trialCount || 0) + 1,
 		topCandidates: [candidate],
-		decisionMark: "decay",
-		coherenceMark: {
-			matchesPrior: false,
-			matchesRecursive: false,
-			promotionAllowed: false,
-			reason: "Manual decay by user",
-		},
+		decisionMark: "decay" as const,
 	};
-	// Persist via appendEntry so ledger is consistent.
-	appendEntry(runId, entry);
-	const manualCoherence: import("./types.js").CoherenceMark = {
-		matchesPrior: false,
-		matchesRecursive: false,
-		promotionAllowed: false,
-		reason: "Manual decay by user",
+	// Compute coherence (empty ledger = no matches)
+	const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
+	// Manual decay never allows promotion
+	coherenceMark.promotionAllowed = false;
+	coherenceMark.reason = "Manual decay - promotion not allowed";
+	// Create full entry with coherence
+	const entry: RolloutEntry = {
+		...entryWithoutCoherence,
+		coherenceMark,
 	};
-	// Manually override the last line in the JSONL to reflect the coherent
-	// decision we want, bypassing appendEntry's auto-compute for the returned value.
-	const lastLine = readFileSync(getLedgerPath(runId), "utf-8").trim().split("\n").filter(Boolean).at(-1)!;
-	const overridden: RolloutEntry = { ...JSON.parse(lastLine), coherenceMark: manualCoherence };
-	writeFileSync(getLedgerPath(runId), JSON.stringify(overridden) + "\n", "utf-8");
-	return overridden;
+	// Update last entry in memory if there are existing entries
+	if (ledger.length > 0) {
+		const lastIndex = ledger.length - 1;
+		ledger[lastIndex] = entry;
+	} else {
+		// No existing entries - just write this one
+		ledger.push(entry);
+	}
+	// Rewrite entire ledger to preserve all entries
+	const ledgerPath = getLedgerPath(runId);
+	const dir = dirname(ledgerPath);
+	if (!existsSync(dir)) {
+		mkdirSync(dir, { recursive: true });
+	}
+	writeFileSync(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
+	return entry;
 }