npm - pi-crew - Versions diffs - 0.9.5 → 0.9.8 - Mend

pi-crew 0.9.5 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +556 -0
package/README.md +10 -3
package/docs/HARNESS_BACKLOG.md +51 -3
package/docs/dynamic-workflows.md +315 -2
package/docs/fix-plan-disabletools-exit-null.md +219 -0
package/docs/troubleshooting.md +76 -0
package/package.json +10 -3
package/src/config/defaults.ts +8 -4
package/src/extension/team-tool/doctor.ts +14 -0
package/src/extension/team-tool/run.ts +2 -0
package/src/runtime/background-runner.ts +1 -1
package/src/runtime/capability-inventory.ts +20 -1
package/src/runtime/child-pi.ts +109 -11
package/src/runtime/deterministic-ast.ts +161 -0
package/src/runtime/dwf-state-store.ts +97 -0
package/src/runtime/dynamic-workflow-context.ts +381 -7
package/src/runtime/dynamic-workflow-runner.ts +93 -2
package/src/runtime/pi-args.ts +11 -0
package/src/runtime/result-extractor.ts +72 -7
package/src/runtime/task-output-context.ts +25 -9
package/src/runtime/team-runner.ts +8 -3
package/src/runtime/zombie-scanner.ts +297 -0
package/src/schema/team-tool-schema.ts +28 -0
package/src/skills/discover-skills.ts +61 -8
package/src/skills/validate.ts +267 -0
package/src/state/contracts.ts +1 -0
package/src/state/state-store.ts +3 -0
package/src/state/types.ts +9 -0
package/src/ui/dashboard-panes/progress-pane.ts +5 -0
package/src/ui/dwf-phase-display.ts +151 -0
package/src/ui/keybinding-map.ts +128 -41
package/src/ui/run-event-bus.ts +83 -0
package/src/ui/run-snapshot-cache.ts +4 -0
package/src/ui/snapshot-types.ts +3 -0
package/src/workflows/workflow-config.ts +3 -0
package/src/worktree/worktree-manager.ts +94 -0
package/types/dwf.d.ts +187 -0

package/src/ui/run-event-bus.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { TeamEvent } from "../state/event-log.ts";
+import { readEventsCursor } from "../state/event-log.ts";
 export type RunEventType =
 	| "task_started"
@@ -59,6 +60,18 @@ export interface RunEventPayload {
 	timestamp?: string;
 	data?: unknown;
 	channel?: EventChannel;
+	/**
+	 * L1: monotonic sequence from the durable event log
+	 * (`TeamEvent.metadata.seq`). Present on events that originated from a
+	 * logged TeamEvent (via emitFromTeamEvent). Absent on transient live-only
+	 * events (e.g. worker_status from the stream bridge) that are never
+	 * persisted and therefore cannot be replayed or deduped.
+	 *
+	 * Used by onWithReplay() to dedup: a live event with seq <= the last seq
+	 * replayed to a subscriber is suppressed (it was already delivered from
+	 * the durable log).
+	 */
+	seq?: number;
 }
 export type RunEventCallback = (event: RunEventPayload) => void;
@@ -115,6 +128,73 @@ class RunEventBus {
 		};
 	}
+	/**
+	 * L1: subscribe with a catch-up replay from the durable event log.
+	 *
+	 * Closes the transient-subscriber-absence gap: when an overlay/widget is
+	 * disposed and recreated (toggle, reconnect), live events emitted in that
+	 * window are lost as notification triggers. This method replays the
+	 * missed TeamEvents from the durable JSONL log BEFORE attaching the live
+	 * listener, then dedups so events delivered both ways fire exactly once.
+	 *
+	 * Unlike deer-flow's 256-event RAM ring buffer (lost on crash), this uses
+	 * pi-crew's existing durable `readEventsCursor` — O(new bytes) via
+	 * byte-offset incremental reads, monotonic seq, tail-capped. Strictly
+	 * better: survives crashes, bounded memory.
+	 *
+	 * @param runId       Run to subscribe to (live listener scope).
+	 * @param eventsPath  Path to the run's events JSONL (manifest.eventsPath).
+	 * @param lastSeenSeq Last seq the caller processed; events with seq > this
+	 *                    are replayed. Pass 0 to replay everything.
+	 * @param callback    Receives both replayed and live events. Replayed
+	 *                    events are delivered directly (NOT via emit, so no
+	 *                    fan-out to other subscribers).
+	 * @returns unsubscribe handle (detaches the live listener).
+	 */
+	onWithReplay(
+		runId: string,
+		eventsPath: string,
+		lastSeenSeq: number,
+		callback: RunEventCallback,
+	): () => void {
+		// Phase 1: replay missed events from the durable log directly to this
+		// callback. Bounded by limit; readEventsCursor already tail-caps.
+		let maxReplayedSeq = lastSeenSeq;
+		try {
+			const cursor = readEventsCursor(eventsPath, { sinceSeq: lastSeenSeq, limit: 1000 });
+			for (const teamEvent of cursor.events) {
+				const type = teamEventToRunEventType(teamEvent);
+				if (!type) continue; // not all TeamEvents map to a RunEventType
+				const payload: RunEventPayload = {
+					type,
+					runId: teamEvent.runId,
+					taskId: teamEvent.taskId,
+					timestamp: teamEvent.time,
+					data: teamEvent.data,
+					channel: classifyEventChannel(type),
+					seq: teamEvent.metadata?.seq,
+				};
+				try { callback(payload); } catch { /* subscriber errors are non-fatal */ }
+				if (typeof teamEvent.metadata?.seq === "number") {
+					maxReplayedSeq = Math.max(maxReplayedSeq, teamEvent.metadata.seq);
+				}
+			}
+		} catch {
+			// Log read failures are non-fatal — fall through to live-only
+			// subscription. The durable log may not exist yet for a brand-new run.
+		}
+		// Phase 2: attach the live listener with dedup. A live event whose seq
+		// was already replayed (seq <= maxReplayedSeq) is suppressed. Events
+		// without a seq (transient live-only, e.g. worker_status) always
+		// deliver — they are never persisted and thus never replayed.
+		const liveCallback: RunEventCallback = (event) => {
+			if (typeof event.seq === "number" && event.seq <= maxReplayedSeq) return;
+			callback(event);
+		};
+		return this.on(runId, liveCallback);
+	}
 	emit(event: RunEventPayload): void {
 		// Auto-classify channel if not already set.
 		// M2: Use local variable for routing, but also set on event
@@ -206,5 +286,8 @@ export function emitFromTeamEvent(event: TeamEvent): void {
 		taskId: event.taskId,
 		timestamp: event.time,
 		data: event.data,
+		// L1: stamp the durable-log seq so onWithReplay() can dedup live
+		// delivery against replayed events.
+		seq: event.metadata?.seq,
 	});
 }

package/src/ui/run-snapshot-cache.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { loadRunManifestById, loadRunManifestByIdAsync } from "../state/state-st
 import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
 import type { RunSnapshotCache as RunSnapshotCacheBase, RunUiGroupJoin, RunUiMailbox, RunUiProgress, RunUiSnapshot, RunUiUsage } from "./snapshot-types.ts";
 import { runEventBus } from "./run-event-bus.ts";
+import { extractDwfPhaseState } from "./dwf-phase-display.ts";
 import { sequencePath } from "../state/event-log.ts";
 export interface RunSnapshotCache extends RunSnapshotCacheBase {
@@ -566,6 +567,7 @@ function signatureFor(input: Omit<RunUiSnapshot, "signature" | "fetchedAt" | "sl
 		groupJoins: input.groupJoins,
 		events: input.recentEvents.map((event) => [event.metadata?.seq, event.time, event.type, event.taskId, event.message, event.data?.reason]),
 		cancellationReason: input.cancellationReason,
+		dwfPhaseState: input.dwfPhaseState,
 		output: input.recentOutputLines,
 		stamps,
 	}));
@@ -684,6 +686,7 @@ export function createRunSnapshotCache(cwd: string, options: RunSnapshotCacheOpt
 			mailbox,
 			groupJoins,
 			cancellationReason: cancellationReasonFromEvents(recentEvents),
+			dwfPhaseState: extractDwfPhaseState(recentEvents),
 			recentEvents,
 			recentOutputLines: recentOutputLines(loaded.manifest, agents, recentOutputLimit),
 		};
@@ -730,6 +733,7 @@ export function createRunSnapshotCache(cwd: string, options: RunSnapshotCacheOpt
 			mailbox,
 			groupJoins,
 			cancellationReason: cancellationReasonFromEvents(recentEvents),
+			dwfPhaseState: extractDwfPhaseState(recentEvents),
 			recentEvents,
 			recentOutputLines: recentOutput,
 		};

package/src/ui/snapshot-types.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { CrewAgentRecord } from "../runtime/crew-agent-runtime.ts";
 import type { TeamEvent } from "../state/event-log.ts";
 import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
+import type { DwfPhaseState } from "./dwf-phase-display.ts";
 export interface RunUiProgress {
 	total: number;
@@ -73,6 +74,8 @@ export interface RunUiSnapshot {
 	groupJoins?: RunUiGroupJoin[];
 	/** Structured cancellation reason from run.cancelled event data, when available. */
 	cancellationReason?: string;
+	/** DWF phase state derived from `recentEvents`. Null/absent for non-DWF runs. */
+	dwfPhaseState?: DwfPhaseState | null;
 	recentEvents: TeamEvent[];
 	recentOutputLines: string[];
 }

package/src/workflows/workflow-config.ts CHANGED Viewed

@@ -45,6 +45,9 @@ export interface WorkflowConfig {
 	runtime?: "static" | "dynamic";
 	/** For runtime:"dynamic" — relative/absolute path to the .dwf.ts script. Unused for static. */
 	dynamicScript?: string;
+	/** For runtime:"dynamic" — per-workflow token budget. When set, ctx.agent() auto-rejects with
+	 *  ok:false once exhausted. Accumulated from each agent run's reported usage. */
+	maxTokenBudget?: number;
 }
 /** A dynamic workflow (runtime === "dynamic"). steps is empty — the script is the source of truth. */

package/src/worktree/worktree-manager.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { execFileSync, spawnSync } from "node:child_process";
+import { randomBytes } from "node:crypto";
 import * as fs from "node:fs";
 import * as path from "node:path";
 import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
@@ -7,6 +8,7 @@ import { projectCrewRoot } from "../utils/paths.ts";
 import { DEFAULT_PATHS } from "../config/defaults.ts";
 import { logInternalError } from "../utils/internal-error.ts";
 import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
+import { writeArtifact } from "../state/artifact-store.ts";
 import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
 export interface PreparedTaskWorkspace {
@@ -460,3 +462,95 @@ export function captureWorktreeDiff(worktreePath: string): string {
 		return `Failed to capture worktree diff: ${message}`;
 	}
 }
+/**
+ * round-17 P2-4: Create an isolated git worktree for a single DWF agent call.
+ *
+ * Lightweight — does NOT require a TeamTaskState and does NOT depend on
+ * `manifest.workspaceMode === "worktree"` (DWF manifests use `single`). It
+ * reuses the same internal helpers as `prepareTaskWorkspace` (git, findGitRoot,
+ * assertCleanLeader, pruneStaleWorktrees, sanitizeBranchPart,
+ * linkNodeModulesIfPresent) but with a minimal, task-free signature.
+ *
+ * Returns `undefined` when worktree creation is unavailable (no git repo, dirty
+ * leader, git error) so the caller (`ctx.agent`) can fall back gracefully.
+ */
+export function prepareAgentWorktree(
+	manifest: TeamRunManifest,
+	agentId: string,
+): PreparedTaskWorkspace | undefined {
+	try {
+		const repoRoot = findGitRoot(manifest.cwd);
+		const loadedConfig = loadConfig(manifest.cwd);
+		if (loadedConfig.config.requireCleanWorktreeLeader !== false) assertCleanLeader(repoRoot);
+		const sanitizedRunId = manifest.runId.replace(/[^a-zA-Z0-9._-]/g, "-").replace(/^-+|-+$/g, "") || "run";
+		const worktreeRoot = path.join(projectCrewRoot(manifest.cwd), DEFAULT_PATHS.state.worktreesSubdir, sanitizedRunId);
+		fs.mkdirSync(worktreeRoot, { recursive: true });
+		const sanitizedAgentId = sanitizeBranchPart(agentId);
+		const worktreePath = path.join(worktreeRoot, sanitizedAgentId);
+		const branch = `pi-crew/${sanitizedRunId}/${sanitizedAgentId}`;
+		pruneStaleWorktrees(repoRoot);
+		git(repoRoot, ["worktree", "add", "-b", branch, worktreePath, "HEAD"]);
+		const nodeModulesLinked = loadedConfig.config.worktree?.linkNodeModules === true
+			? linkNodeModulesIfPresent(repoRoot, worktreePath)
+			: false;
+		return { cwd: worktreePath, worktreePath, branch, nodeModulesLinked };
+	} catch {
+		// Graceful fallback: no git repo, dirty leader, or git error → run normally.
+		return undefined;
+	}
+}
+/**
+ * round-17 P2-4: Remove a DWF agent worktree after the agent completes.
+ *
+ * Captures the worktree diff as an artifact before removal (best-effort), then
+ * removes the worktree, deletes the ephemeral branch, and prunes stale refs.
+ * NEVER throws — cleanup failures are logged via `logInternalError` so a
+ * worktree/branch leak never crashes a workflow.
+ */
+export function cleanupAgentWorktree(manifest: TeamRunManifest, worktreePath: string, branch?: string): void {
+	// Capture diff as artifact (best-effort).
+	try {
+		const diff = captureWorktreeDiff(worktreePath);
+		if (diff.trim() && !diff.startsWith("Failed to capture worktree diff")) {
+			writeArtifact(manifest.artifactsRoot, {
+				kind: "diff",
+				relativePath: `wf/worktree-diff-${Date.now()}-${randomBytes(2).toString("hex")}.diff`,
+				content: diff,
+				producer: "dynamic-workflow",
+			});
+		}
+	} catch (error) {
+		logInternalError("worktree.agent-cleanup.diff", error, `worktreePath=${worktreePath}`);
+	}
+	// Remove worktree (best-effort). Try git first, then fall back to fs.rm.
+	try {
+		const repoRoot = findGitRoot(manifest.cwd);
+		git(repoRoot, ["worktree", "remove", "--force", worktreePath]);
+	} catch (error) {
+		logInternalError("worktree.agent-cleanup.remove", error, `worktreePath=${worktreePath}`);
+		try {
+			fs.rmSync(worktreePath, { recursive: true, force: true });
+		} catch (rmError) {
+			logInternalError("worktree.agent-cleanup.rm", rmError, `worktreePath=${worktreePath}`);
+		}
+	}
+	// Delete the ephemeral agent branch (best-effort) to avoid accumulation across
+	// many agent calls. The diff is already captured above; the branch holds no value.
+	if (branch) {
+		try {
+			const repoRoot = findGitRoot(manifest.cwd);
+			git(repoRoot, ["branch", "-D", branch]);
+		} catch (error) {
+			logInternalError("worktree.agent-cleanup.branch", error, `branch=${branch}`);
+		}
+	}
+	// Prune stale worktree refs (best-effort).
+	try {
+		const repoRoot = findGitRoot(manifest.cwd);
+		git(repoRoot, ["worktree", "prune"]);
+	} catch (error) {
+		logInternalError("worktree.agent-cleanup.prune", error, `worktreePath=${worktreePath}`);
+	}
+}

package/types/dwf.d.ts ADDED Viewed

@@ -0,0 +1,187 @@
+/**
+ * Authoring types for pi-crew dynamic workflow scripts (`.dwf.ts`).
+ *
+ * Round-14 P1-1: gives TS users IDE IntelliSense for the `ctx` object passed to a
+ * workflow script's `export default async function(ctx) { ... }`.
+ *
+ * pi-crew passes `ctx` as a parameter (NOT as ambient globals), so the types here are
+ * named exports. Import them in your workflow script:
+ *
+ * ```ts
+ * import type { WorkflowCtx } from "pi-crew/workflow";
+ *
+ * export default async function run(ctx: WorkflowCtx): Promise<void> {
+ *   ctx.log("starting");
+ *   const res = await ctx.agent({ role: "explorer", prompt: "look around" });
+ *   ctx.setResult(res.artifactPath ?? "");
+ * }
+ * ```
+ *
+ * Alternatively, add a triple-slash reference so the package's type map is loaded:
+ *
+ * ```ts
+ * /// <reference types="pi-crew/workflow" />
+ * import type { WorkflowCtx } from "pi-crew/workflow";
+ * ```
+ *
+ * These interfaces mirror the runtime types in `src/runtime/dynamic-workflow-context.ts`.
+ * They are authoring-only (no runtime values); the real implementations live in the runner.
+ *
+ * ## Resume & Checkpoint (round-18 P2-3)
+ *
+ * The runner persists a checkpoint after every `ctx.agent()` call so that a crash
+ * (timeout, OOM, agent error) between calls does not lose all progress. When you run
+ * `team action='resume' runId='X'`, the runner re-executes the script from the top
+ * but **hydrates** `ctx.vars`, `ctx.budget.spent()`, the phase list, and the log
+ * buffer from the last checkpoint.
+ *
+ * Because the script re-runs from the top, write it **defensively** — check
+ * `ctx.vars` to skip already-completed work:
+ *
+ * ```ts
+ * export default async function run(ctx) {
+ *   // Defensive resume: skip the scan phase if it already ran.
+ *   if (ctx.vars.lastPhase !== "scan") {
+ *     const res = await ctx.agent({ role: "explorer", prompt: "scan" });
+ *     ctx.vars.lastPhase = "scan";   // checkpointed after this call
+ *   }
+ *   // ... continue with analyze, using ctx.vars from the prior run
+ * }
+ * ```
+ *
+ * On a clean completion the checkpoint is deleted, so a re-run with the same runId
+ * starts fresh. A missing or corrupt checkpoint is treated as a fresh run.
+ */
+export interface AgentCallOpts {
+	prompt: string;
+	/** Role name (resolved via 4-tier chain) OR explicit agent name. */
+	role?: string;
+	/** Explicit agent name — bypasses team-role lookup. */
+	agent?: string;
+	description?: string;
+	model?: string;
+	skill?: string[] | false;
+	maxTurns?: number;
+	graceTurns?: number;
+	/** Dependency artifact paths injected into the agent prompt. */
+	inputs?: string[];
+	/** Disable ALL tools for this call (pure-judgment / verdict steps). */
+	disableTools?: boolean;
+	/** Override the resolved agent's system prompt. */
+	systemPrompt?: string;
+	/** Round-13: optional TypeBox schema. When set, output is validated; mismatch yields ok:false. */
+	schema?: { readonly [key: string]: unknown };
+	/** round-17 P2-4: spawn this agent in an isolated git worktree. Useful when
+	 *  parallel agents modify files concurrently (avoids conflicts). The worktree
+	 *  is created from HEAD, the agent runs there, and on completion the diff is
+	 *  captured as an artifact before cleanup. Default false. If worktree creation
+	 *  fails (no git repo, dirty leader), the agent runs in the normal cwd with a
+	 *  warning. Backward compatible — omitting it is identical to `false`. */
+	worktree?: boolean;
+}
+export interface AgentResult {
+	ok: boolean;
+	text: string;
+	structured?: unknown;
+	usage?: { input?: number; output?: number; cost?: number; turns?: number };
+	runId?: string;
+	taskId?: string;
+	artifactPath?: string;
+	error?: string;
+	durationMs?: number;
+}
+/** Round-14 P1-2: per-workflow token budget. */
+export interface WorkflowBudget {
+	/** Configured budget, or null when unbounded. */
+	total: number | null;
+	/** Tokens consumed so far (accumulated from each ctx.agent() run's usage). */
+	spent(): number;
+	/** Tokens remaining; Infinity when total is null. */
+	remaining(): number;
+}
+export interface ReviewResult {
+	outcome: "accept" | "reject" | "changes_requested";
+	feedback: string;
+}
+/** Options for ctx.mail(). */
+export interface MailOpts {
+	kind?: string;
+	taskId?: string;
+	replyTo?: string;
+	replyDeadline?: number;
+}
+/** Options for ctx.review(). */
+export interface ReviewOpts {
+	content?: string;
+	artifactPath?: string;
+	disableTools?: boolean;
+}
+/** Options for ctx.retry(). */
+export interface RetryOpts {
+	feedback?: string;
+}
+/**
+ * The capability-locked context object passed to a `.dwf.ts` script's
+ * `export default async function(ctx)`. Exposes ONLY the documented methods —
+ * no raw manifest/process/require leaks.
+ *
+ * NOTE: v1 has NO vm sandbox; the script CAN reach process/require directly.
+ * The frozen ctx is a contract surface, not a security boundary. `.dwf.ts`
+ * scripts are postinstall-equivalent trust.
+ */
+export interface WorkflowCtx {
+	cwd: string;
+	runId: string;
+	goal?: string;
+	/** Script-local persistent variables.
+	 *
+	 *  On resume (round-18 P2-3), these are hydrated from the last checkpoint so a
+	 *  re-run continues where it left off. Write defensive scripts that inspect
+	 *  `ctx.vars` to skip work already done in a prior (crashed) run. */
+	vars: Record<string, unknown>;
+	/** Abort signal (cancel/stop). */
+	signal: AbortSignal;
+	/** Concurrency semaphore (bounded by ctx concurrency). */
+	semaphore: import("../src/runtime/semaphore").Semaphore;
+	/** Spawn one agent, await result. Concurrency enforced by ctx.semaphore. */
+	agent(opts: AgentCallOpts): Promise<AgentResult>;
+	/** Bounded fan-out preserving order. */
+	fanOut<T>(items: T[], limit: number, fn: (item: T, i: number) => Promise<AgentResult>): Promise<AgentResult[]>;
+	/** Pipeline: sequential per-item stages, parallel across items (bounded by ctx.semaphore).
+	 *  Failed stage → null for that item (logged); other items continue. round-16 (P2-1). */
+	pipeline<TItem, TResult = unknown>(
+		items: TItem[],
+		...stages: Array<(previous: TResult, original: TItem, index: number) => Promise<TResult> | TResult>
+	): Promise<(TResult | null)[]>;
+	/** Run a reviewer agent over an artifact; parse {outcome, feedback}. */
+	review(taskId: string, reviewerRole?: string, opts?: ReviewOpts): Promise<ReviewResult>;
+	/** Re-run a task with feedback (wraps executeWithRetry). */
+	retry(taskId: string, opts?: RetryOpts): Promise<AgentResult>;
+	/** Send a mailbox message to another agent/leader. Returns the message id. */
+	mail(to: string, body: string, opts?: MailOpts): string;
+	/** Block until N mailbox replies arrive or deadline. */
+	gatherReplies(messageIds: string[], deadlineMs: number): Promise<unknown[]>;
+	/** Render a built-in plan template (full-implementation / standard-review). */
+	renderTemplate(name: string, vars: Record<string, string>): unknown;
+	/** Mark the final result. ONLY this artifact reaches the main context. */
+	setResult(artifactPath: string, meta?: Record<string, unknown>): void;
+	/** Round-12: mark the start of a named workflow phase (emits dwf.phase_started/_completed). Idempotent on the same title. */
+	phase(title: string): void;
+	/** Round-14 P1-3: append a workflow-level log line (emits a dwf.log event). */
+	log(message: unknown): void;
+	/** Round-14 P1-2: per-workflow token budget; ctx.agent() auto-rejects when exhausted. */
+	budget: WorkflowBudget;
+	/** Round-14 P1-5: typed workflow arguments (sourced from manifest.args). Defaults to {}. */
+	args<T = unknown>(): T;
+}
+export {};