npm - pi-crew - Versions diffs - 0.5.25 → 0.6.1 - Mend

pi-crew 0.5.25 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/CHANGELOG.md +99 -0
package/README.md +13 -11
package/docs/patterns/command-agent-skill.md +71 -0
package/package.json +1 -1
package/skills/council/SKILL.md +163 -0
package/src/agents/agent-config.ts +4 -1
package/src/agents/discover-agents.ts +1 -0
package/src/benchmark/feedback-loop.ts +4 -2
package/src/extension/cross-extension-rpc.ts +48 -0
package/src/extension/registration/commands.ts +2 -1
package/src/extension/registration/subagent-tools.ts +2 -0
package/src/extension/registration/team-tool.ts +2 -0
package/src/extension/registration/viewers.ts +1 -0
package/src/extension/run-export.ts +16 -1
package/src/extension/run-import.ts +16 -0
package/src/extension/team-tool/anchor.ts +5 -1
package/src/extension/team-tool/api.ts +9 -4
package/src/extension/team-tool/config-patch.ts +15 -1
package/src/extension/team-tool.ts +2 -1
package/src/hooks/registry.ts +9 -1
package/src/hooks/types.ts +14 -0
package/src/i18n.ts +15 -2
package/src/observability/exporters/otlp-exporter.ts +73 -0
package/src/runtime/adaptive-plan.ts +24 -0
package/src/runtime/agent-control.ts +6 -3
package/src/runtime/async-runner.ts +58 -3
package/src/runtime/background-runner.ts +1 -1
package/src/runtime/chain-parser.ts +192 -0
package/src/runtime/chain-runner.ts +58 -0
package/src/runtime/child-pi.ts +1 -1
package/src/runtime/crew-agent-records.ts +4 -3
package/src/runtime/cross-extension-rpc.ts +34 -8
package/src/runtime/diagnostic-export.ts +3 -4
package/src/runtime/dynamic-script-runner.ts +7 -7
package/src/runtime/foreground-watchdog.ts +2 -2
package/src/runtime/intercom-bridge.ts +178 -0
package/src/runtime/live-agent-manager.ts +6 -3
package/src/runtime/live-irc.ts +4 -2
package/src/runtime/parallel-utils.ts +2 -1
package/src/runtime/plan-templates.ts +200 -0
package/src/runtime/post-checks.ts +10 -3
package/src/runtime/run-drift.ts +220 -0
package/src/runtime/sandbox.ts +26 -20
package/src/runtime/semaphore.ts +2 -1
package/src/runtime/settings-store.ts +14 -2
package/src/runtime/skill-effectiveness.ts +4 -2
package/src/runtime/skill-instructions.ts +4 -1
package/src/runtime/subagent-manager.ts +20 -2
package/src/runtime/subprocess-tool-registry.ts +2 -2
package/src/runtime/task-graph.ts +79 -0
package/src/runtime/task-id.ts +148 -0
package/src/runtime/task-packet.ts +13 -1
package/src/runtime/task-runner/context-retrieval.ts +172 -0
package/src/runtime/task-runner.ts +39 -1
package/src/runtime/team-runner.ts +7 -0
package/src/runtime/usage-tracker.ts +4 -2
package/src/runtime/verification-gates.ts +36 -9
package/src/state/contracts.ts +2 -1
package/src/state/event-log.ts +16 -5
package/src/state/hook-instinct-bridge.ts +2 -1
package/src/state/locks.ts +9 -2
package/src/state/memory-store.ts +244 -0
package/src/state/observation-store.ts +177 -0
package/src/state/state-store.ts +4 -2
package/src/state/task-claims.ts +9 -2
package/src/tools/safe-bash.ts +69 -20
package/src/types/new-api-types.ts +10 -5
package/src/ui/keybinding-map.ts +2 -1
package/src/ui/run-action-dispatcher.ts +2 -1
package/src/ui/status-colors.ts +2 -1
package/src/ui/syntax-highlight.ts +2 -1
package/src/ui/tool-render.ts +13 -3
package/src/utils/fingerprint.ts +183 -0
package/src/utils/fs-watch.ts +4 -2
package/src/utils/gh-protocol.ts +2 -1
package/src/utils/safe-paths.ts +6 -0
package/src/workflows/discover-workflows.ts +5 -1
package/src/workflows/intermediate-store.ts +173 -0
package/src/workflows/workflow-config.ts +8 -0
package/src/worktree/cleanup.ts +8 -5
package/src/worktree/worktree-manager.ts +1 -1

package/src/runtime/task-runner/context-retrieval.ts ADDED Viewed

@@ -0,0 +1,172 @@
+/**
+ * Iterative retrieval loop — workers progressively discover needed context.
+ *
+ * Pattern origin: ECC/skills/iterative-retrieval/SKILL.md — 4-phase loop:
+ * Dispatch → Evaluate → Refine → Loop. Max 3 cycles. Convergence when
+ * ≥3 high-relevance files found AND no critical gaps.
+ *
+ * This module provides the scoring and convergence logic.
+ * The actual file discovery is delegated to the caller (prompt-builder or task-runner).
+ */
+// ── Types ────────────────────────────────────────────────────────────────
+export interface RetrievalQuery {
+	patterns: string[];
+	keywords: string[];
+	excludes: string[];
+	focusAreas?: string[];
+}
+export interface RelevanceEvaluation {
+	path: string;
+	relevance: number;     // 0.0–1.0
+	reason: string;
+	missingContext: string[];
+}
+export interface RetrievalResult {
+	query: RetrievalQuery;
+	evaluations: RelevanceEvaluation[];
+	cycle: number;
+	converged: boolean;
+}
+// ── Scoring ──────────────────────────────────────────────────────────────
+/**
+ * Score relevance of a file to a task description.
+ *
+ * Uses keyword matching as a heuristic. In production, this would be
+ * replaced by embedding-based similarity or BM25 scoring.
+ *
+ * @param filePath - Path to the file
+ * @param fileContent - Content of the file (or excerpt)
+ * @param keywords - Task-relevant keywords
+ * @returns Relevance score 0.0–1.0
+ */
+export function scoreRelevance(
+	filePath: string,
+	fileContent: string,
+	keywords: string[],
+): number {
+	if (keywords.length === 0) return 0;
+	const pathLower = filePath.toLowerCase();
+	const contentLower = fileContent.toLowerCase();
+	let matchCount = 0;
+	let weightedScore = 0;
+	for (const keyword of keywords) {
+		const kw = keyword.toLowerCase();
+		// Path match is worth more (file naming is intentional)
+		if (pathLower.includes(kw)) {
+			matchCount++;
+			weightedScore += 0.3;
+		}
+		// Content match
+		const contentMatches = contentLower.split(kw).length - 1;
+		if (contentMatches > 0) {
+			matchCount++;
+			// Diminishing returns for repeated matches
+			weightedScore += Math.min(0.2, 0.05 * Math.log2(contentMatches + 1));
+		}
+	}
+	// Normalize: if all keywords matched, score is high
+	const keywordCoverage = matchCount / keywords.length;
+	const rawScore = keywordCoverage * 0.6 + Math.min(weightedScore, 0.4);
+	return Math.min(1.0, Math.max(0.0, rawScore));
+}
+// ── Convergence ──────────────────────────────────────────────────────────
+const CONVERGENCE_MIN_HIGH_RELEVANCE = 3;
+const HIGH_RELEVANCE_THRESHOLD = 0.7;
+/**
+ * Check if retrieval has converged — enough high-relevance files found.
+ *
+ * @param evaluations - Current relevance evaluations
+ * @returns true if converged
+ */
+export function hasConverged(evaluations: RelevanceEvaluation[]): boolean {
+	const highRelevance = evaluations.filter((e) => e.relevance >= HIGH_RELEVANCE_THRESHOLD);
+	if (highRelevance.length < CONVERGENCE_MIN_HIGH_RELEVANCE) return false;
+	// Check for critical gaps — any evaluation with empty missingContext
+	const criticalGaps = evaluations.some(
+		(e) => e.relevance < 0.3 && e.missingContext.length > 0,
+	);
+	return !criticalGaps;
+}
+// ── Refinement ───────────────────────────────────────────────────────────
+/**
+ * Refine a retrieval query based on evaluation results.
+ *
+ * Extracts new keywords from high-relevance files, adds discovered
+ * terminology, and excludes confirmed-irrelevant paths.
+ *
+ * @param query - Original query
+ * @param evaluations - Results from the current cycle
+ * @returns Refined query for the next cycle
+ */
+export function refineQuery(
+	query: RetrievalQuery,
+	evaluations: RelevanceEvaluation[],
+): RetrievalQuery {
+	const newKeywords = new Set(query.keywords);
+	const newExcludes = new Set(query.excludes);
+	const newFocusAreas = new Set(query.focusAreas ?? []);
+	for (const eval_ of evaluations) {
+		if (eval_.relevance >= HIGH_RELEVANCE_THRESHOLD) {
+			// Extract potential keywords from the file path
+			const parts = eval_.path.replace(/[\\/]/g, "/").split("/");
+			for (const part of parts) {
+				// Skip common non-informative segments
+				if (part.length > 2 && !["src", "lib", "test", "dist", "node_modules"].includes(part)) {
+					// Use the filename stem as a keyword hint
+					const stem = part.replace(/\.[^.]+$/, "").replace(/[.-]/g, " ");
+					for (const word of stem.split(/\s+/)) {
+						if (word.length > 3) newKeywords.add(word);
+					}
+				}
+			}
+		}
+		if (eval_.relevance < 0.2) {
+			// Exclude confirmed-irrelevant paths
+			newExcludes.add(eval_.path);
+		}
+		// Track missing context as focus areas
+		for (const gap of eval_.missingContext) {
+			newFocusAreas.add(gap);
+		}
+	}
+	return {
+		patterns: query.patterns, // patterns don't change
+		keywords: [...newKeywords],
+		excludes: [...newExcludes],
+		focusAreas: newFocusAreas.size > 0 ? [...newFocusAreas] : undefined,
+	};
+}
+// ── Loop Control ─────────────────────────────────────────────────────────
+const MAX_CYCLES = 3;
+/**
+ * Determine if another retrieval cycle should run.
+ */
+export function shouldContinue(evaluations: RelevanceEvaluation[], cycle: number): boolean {
+	if (cycle >= MAX_CYCLES) return false;
+	if (hasConverged(evaluations)) return false;
+	return true;
+}

package/src/runtime/task-runner.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import * as fs from "node:fs";
+import * as path from "node:path";
 import type { AgentConfig } from "../agents/agent-config.ts";
 import type { CrewLimitsConfig, CrewRuntimeConfig } from "../config/config.ts";
 import type {
@@ -267,6 +268,30 @@ export async function runTeamTask(
 		const skillNames = input.skillNames ?? renderedSkills?.names;
 		const skillPaths = input.skillPaths ?? renderedSkills?.paths;
+		// Deterministic pre-step: run script, inject stdout into worker prompt
+		let preStepOutput: string | undefined;
+		if (input.step.preStepScript) {
+			const scriptTimeout = input.step.preStepTimeout ?? 30_000;
+			const scriptArgs = input.step.preStepArgs ?? [];
+			// SECURITY: Validate preStepScript path is contained within cwd
+			const resolved = path.resolve(manifest.cwd, input.step.preStepScript);
+			if (!resolved.startsWith(path.resolve(manifest.cwd) + path.sep) && resolved !== path.resolve(manifest.cwd)) {
+				throw new Error(`Security: preStepScript path escapes working directory: ${input.step.preStepScript}`);
+			}
+			try {
+				const { execFileSync } = await import("node:child_process");
+				preStepOutput = execFileSync(input.step.preStepScript, scriptArgs, {
+					timeout: scriptTimeout,
+					encoding: "utf-8",
+					cwd: manifest.cwd,
+					maxBuffer: 1024 * 1024, // 1MB cap
+				});
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				throw new Error(`preStepScript failed: ${input.step.preStepScript}: ${msg}`);
+			}
+		}
 		const promptResult = await renderTaskPrompt(
 			manifest,
 			input.step,
@@ -274,7 +299,12 @@ export async function runTeamTask(
 			input.agent,
 			skillBlock,
 		);
-		const prompt = promptResult.full;
+		let prompt = promptResult.full;
+		// Inject deterministic pre-step output into prompt
+		if (preStepOutput) {
+			prompt += "\n\n---\n## Pre-Step Script Output\n\nThe following data was produced by a pre-step script. Use it as context for your task:\n\n<output>\n" + preStepOutput + "\n</output>\n";
+		}
 		const promptArtifact = writeArtifact(manifest.artifactsRoot, {
 			kind: "prompt",
 			relativePath: `prompts/${task.id}.md`,
@@ -502,6 +532,9 @@ export async function runTeamTask(
 							collectedJsonEvents.push(
 								event as Record<string, unknown>,
 							);
+							if (collectedJsonEvents.length > 1000) {
+								collectedJsonEvents.splice(0, collectedJsonEvents.length - 1000);
+							}
 						// Accumulate lifetime usage via message_end events (survives compaction)
 						if (event && typeof event === "object" && (event as Record<string, unknown>).type === "message_end") {
 							const msg = (event as Record<string, unknown>).message as Record<string, unknown> | undefined;
@@ -1211,6 +1244,11 @@ export async function runTeamTask(
 			taskId: task.id,
 			message: error,
 		});
+		// Execute after_task_complete lifecycle hook (non-blocking)
+		const afterTaskReport = await executeHook("after_task_complete", { runId: manifest.runId, taskId: task.id, cwd: manifest.cwd, status: error ? "failed" : noYield ? "needs_attention" : "completed" });
+		appendHookEvent(manifest, afterTaskReport);
 		return { manifest, tasks };
 	} finally {
 		streamBridge?.dispose();

package/src/runtime/team-runner.ts CHANGED Viewed

@@ -324,6 +324,13 @@ export async function executeTeamRun(input: ExecuteTeamRunInput): Promise<{ mani
 		// Emit run completion hook (100% reliable, fire-and-forget)
 		crewHooks.emit({ type: "run_completed", timestamp: new Date().toISOString(), runId: manifest.runId, data: { status: result.manifest.status, taskCount: result.tasks.length } });
+		// Execute after_run_complete lifecycle hook (non-blocking)
+		const afterRunReport = await executeHook("after_run_complete", { runId: manifest.runId, cwd: manifest.cwd, status: result.manifest.status });
+		appendHookEvent(manifest, afterRunReport);
+		if (afterRunReport.outcome === "block") {
+			logInternalError("team-runner.after_run_complete.blocked", new Error(afterRunReport.reason ?? "after_run_complete hook blocked"), `runId=${manifest.runId}`);
+		}
 		return result;
 	} catch (error) {
 		// P1: Catch unhandled errors — ensure manifest/tasks/agents are terminal so they don't stay "running" forever.

package/src/runtime/usage-tracker.ts CHANGED Viewed

@@ -16,7 +16,8 @@ export function addUsage(into: LifetimeUsage, delta: { input?: number; output?:
 	if (typeof delta.cacheWrite === "number") into.cacheWrite += delta.cacheWrite;
 }
-export function lifetimeUsageFromState(state: UsageState | undefined): LifetimeUsage {
+/** @internal */
+function lifetimeUsageFromState(state: UsageState | undefined): LifetimeUsage {
 	if (!state) return emptyLifetimeUsage();
 	return {
 		input: state.input ?? 0,
@@ -59,7 +60,8 @@ export const getTaskUsage = getTrackedTaskUsage;
 export const getRunUsage = getTrackedTaskUsage;
 export const clearAllTaskUsage = clearAllTrackedTaskUsage;
-export function aggregateTrackedUsageForRun(manifest: TeamRunManifest, tasks: TeamTaskState[]): UsageState {
+/** @internal */
+function aggregateTrackedUsageForRun(manifest: TeamRunManifest, tasks: TeamTaskState[]): UsageState {
 	const total = emptyLifetimeUsage();
 	for (const task of tasks) {
 		const tracked = getTrackedTaskUsage(task.id);

package/src/runtime/verification-gates.ts CHANGED Viewed

@@ -38,35 +38,61 @@ export interface PhaseGateBundle {
  * Sequential enforcement: each phase must pass before proceeding.
  */
 export const NPM_TYPESCRIPT_GATES: Array<{ name: string; command: string; critical: boolean }> = [
-	{ name: "build", command: "npm run build 2>&1 || true", critical: true },
-	{ name: "typecheck", command: "npx tsc --noEmit 2>&1 || true", critical: true },
-	{ name: "lint", command: "npm run lint 2>&1 || true", critical: false },
-	{ name: "tests", command: "npm test 2>&1 || true", critical: true },
+	{ name: "build", command: "npm run build 2>&1", critical: true },
+	{ name: "typecheck", command: "npx tsc --noEmit 2>&1", critical: true },
+	{ name: "lint", command: "npm run lint 2>&1", critical: false },
+	{ name: "tests", command: "npm test 2>&1", critical: true },
 ];
 /**
  * Cargo/Rust project phase gates.
  */
 export const CARGO_RUST_GATES: Array<{ name: string; command: string; critical: boolean }> = [
-	{ name: "check", command: "cargo check 2>&1 || true", critical: true },
-	{ name: "test", command: "cargo test 2>&1 || true", critical: true },
-	{ name: "clippy", command: "cargo clippy 2>&1 || true", critical: false },
+	{ name: "check", command: "cargo check 2>&1", critical: true },
+	{ name: "test", command: "cargo test 2>&1", critical: true },
+	{ name: "clippy", command: "cargo clippy 2>&1", critical: false },
 ];
 /**
  * Execute a single command and capture output.
  */
+/** Characters/patterns that indicate dangerous shell metacharacters. */
+const DANGEROUS_SHELL_PATTERNS = /(?:;|&&|\|\||\$\(|`|\$\{|\b(eval|exec)\b|>>|<[^&])/;
+// Note: single `>` is NOT blocked here because `2>&1` is a safe redirect used by built-in gates.
+// `>>` (append) is still blocked. `<` without `&` (input redirect) is still blocked.
+/**
+ * Validate a verification gate command is safe to execute.
+ * Rejects commands with shell metacharacters that could enable injection.
+ * Allows: pipes (|), redirection of stderr (2>&1), and basic npm/cargo/npx commands.
+ */
+function validateGateCommand(command: string): void {
+	const normalized = command
+		.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, '')  // ANSI escape sequences
+		.replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, '')  // control chars
+		.replace(/\\\n/g, ' ')  // escaped newlines
+		.replace(/\s+/g, ' ')  // collapse whitespace
+		.trim();
+	if (DANGEROUS_SHELL_PATTERNS.test(normalized)) {
+		throw new Error(
+			`Security: verification gate command rejected (dangerous shell pattern): ${command}`,
+		);
+	}
+}
 async function executeCommand(
 	command: string,
 	cwd: string,
 	timeoutMs: number = 120000,
 ): Promise<{ exitCode: number | null; output: string; durationMs: number }> {
+	// SECURITY: Validate command before shell execution to prevent injection.
+	validateGateCommand(command);
 	const start = Date.now();
 	let output = "";
 	let exitCode: number | null = null;
 	return new Promise((resolve) => {
-		// Use shell to handle compound commands
 		const shell = spawn("sh", ["-c", command], {
 			cwd,
 			timeout: timeoutMs,
@@ -313,7 +339,8 @@ export function computeGreenLevelFromResults(
  * Create a verification gate report artifact.
  * Formatted for human review per ECC verification-loop pattern.
  */
-export function createVerificationGateReport(
+/** @internal */
+function createVerificationGateReport(
 	taskId: string,
 	contract: VerificationContract,
 	results: VerificationCommandResult[],

package/src/state/contracts.ts CHANGED Viewed

@@ -28,7 +28,8 @@ export const TEAM_TASK_STATUS_TRANSITIONS: Readonly<Record<TeamTaskStatus, reado
 	needs_attention: ["queued", "running"],
 };
-export const TEAM_EVENT_TYPES = [
+/** @internal */
+const TEAM_EVENT_TYPES = [
 	"run.created",
 	"run.queued",
 	"run.planning",

package/src/state/event-log.ts CHANGED Viewed

@@ -80,7 +80,11 @@ export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
 	const lockDir = `${eventsPath}.lock`;
 	const pidFile = path.join(lockDir, "pid");
 	const start = Date.now();
-	const timeout = 120000; // 120s timeout for slow CI environments
+	// SECURITY (HIGH #2 fix): Reduced from 120s to 5s to prevent blocking the
+	// event loop indefinitely. 500 retries × 10ms = 5s max. After timeout, we
+	// throw a clear error instead of blocking forever. This ensures AbortSignal
+	// handlers, SIGTERM, and graceful shutdown can fire within seconds.
+	const timeout = 5000;
 	const staleMs = 10000;
 	let acquired = false;
 	while (true) {
@@ -91,10 +95,12 @@ export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
 			break;
 		} catch {
 			if (Date.now() - start > timeout) {
-				// Log error and continue without lock — lock is held by live process.
-				// Stale detection will clean up dead locks on next attempt.
-				logInternalError("event-log.lock-timeout", new Error(`Event log lock timeout for ${eventsPath}`), `lockDir=${lockDir}`);
-				break;
+				// SECURITY (HIGH #2 fix): Throw instead of continuing without lock.
+				// Previously this logged and broke out of the loop, executing the
+				// operation without lock protection. Now we throw so callers can retry.
+				throw new Error(
+					`Event log lock timeout for ${eventsPath}: could not acquire lock within ${timeout}ms`,
+				);
 			}
 			// Stale detection: if the owning process is dead, remove the stale lock.
 			try {
@@ -217,6 +223,11 @@ export function appendEvent(eventsPath: string, event: AppendTeamEvent): TeamEve
 // --- Async write queue (non-blocking alternative to withEventLogLockSync) ---
 const asyncQueues = new Map<string, Promise<unknown>>();
+/** Reset event log mode (for testing only). */
+export function resetEventLogMode(): void {
+	asyncQueues.clear();
+}
 /**
  * Append an event to the event log using non-blocking async I/O.
  *

package/src/state/hook-instinct-bridge.ts CHANGED Viewed

@@ -80,7 +80,8 @@ crewHooks.register("run_completed", async (event) => {
 /**
  * Get instinct-based recommendations.
  */
-export async function getInstinctRecommendations() {
+/** @internal */
+async function getInstinctRecommendations() {
 	try {
 		const store = await getStore();
 		return store.getInstincts().filter((i: { confidence: number }) => i.confidence >= 0.6);

package/src/state/locks.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
-import { randomUUID } from "node:crypto";
+import { randomUUID, timingSafeEqual } from "node:crypto";
 import type { TeamRunManifest } from "./types.ts";
 import { DEFAULT_LOCKS } from "../config/defaults.ts";
 import { sleepSync } from "../utils/sleep.ts";
@@ -103,9 +103,16 @@ function readLockToken(filePath: string): string | undefined {
  *
  * With token matching, A's release is a no-op for B's lock.
  */
+function timingSafeTokenMatch(a: string, b: string): boolean {
+	const bufA = Buffer.from(String(a));
+	const bufB = Buffer.from(String(b));
+	if (bufA.length !== bufB.length) return false;
+	return timingSafeEqual(bufA, bufB);
+}
 function releaseLock(filePath: string, token: string): void {
 	const stored = readLockToken(filePath);
-	if (stored === undefined || stored === token) {
+	if (stored === undefined || timingSafeTokenMatch(stored, token)) {
 		try {
 			fs.rmSync(filePath, { force: true });
 		} catch {