npm - pi-taskflow - Versions diffs - 0.0.9 → 0.0.11 - Mend

pi-taskflow 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +153 -216
package/examples/guarded-refactor.json +1 -1
package/extensions/index.ts +8 -0
package/extensions/render.ts +7 -2
package/extensions/runner.ts +68 -1
package/extensions/runtime.ts +41 -48
package/extensions/schema.ts +19 -6
package/extensions/store.ts +544 -55
package/package.json +1 -1
package/skills/taskflow/SKILL.md +1 -1

package/extensions/runner.ts CHANGED Viewed

@@ -42,12 +42,42 @@ export interface RunOptions {
 	signal?: AbortSignal;
 	/** Fires on each assistant turn with the latest activity + accumulated usage. */
 	onLive?: (live: LiveUpdate) => void;
+	/**
+	 * Idle watchdog: if the subagent produces no stdout for this many ms, it is
+	 * considered stalled (hung stream / provider stall / tool deadlock) and is
+	 * killed (SIGTERM → SIGKILL). Resets on every stdout chunk. 0/undefined keeps
+	 * the prior behaviour (no idle timeout). Defaults to DEFAULT_IDLE_TIMEOUT_MS.
+	 */
+	idleTimeoutMs?: number;
 }
+/**
+ * Default idle-watchdog window. A subagent that emits nothing on stdout for this
+ * long is treated as wedged and killed so a single stalled child cannot hang the
+ * entire taskflow forever (the only previous escape was a manual user abort).
+ * 5 minutes is generous enough for slow reasoning/long tool calls while still
+ * bounding a true hang.
+ */
+export const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
 export function isFailed(r: RunResult): boolean {
 	return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
 }
+/**
+ * Heuristic: did this failure look like a transient/retryable provider error
+ * (rate limit, overload, timeout, 5xx)? Such errors should be retried inside
+ * the taskflow run with backoff rather than bubbled up — otherwise the calling
+ * agent tends to re-invoke the whole tool, producing duplicate progress blocks.
+ */
+const TRANSIENT_ERROR_RE =
+	/rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
+export function isTransientError(r: RunResult): boolean {
+	if (r.stopReason === "aborted") return false;
+	const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
+	return TRANSIENT_ERROR_RE.test(hay);
+}
 /** Placeholder written to a failed phase's `output` so downstream interpolation
  *  can detect "upstream failed" without being polluted by raw HTML/JSON. */
 export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
@@ -292,6 +322,7 @@ export async function runAgentTask(
 		args.push(`Task: ${task}`);
 		let wasAborted = false;
+		let idleTimedOut = false;
 		const exitCode = await new Promise<number>((resolve) => {
 			const invocation = getPiInvocation(args);
 			const proc = spawn(invocation.command, invocation.args, {
@@ -301,12 +332,40 @@ export async function runAgentTask(
 			});
 			let buffer = "";
+			// Idle watchdog: a subagent that goes silent on stdout for too long is
+			// treated as wedged and killed, so one stalled child cannot hang the
+			// whole taskflow forever. The timer is reset on every stdout chunk and
+			// torn down on close/error.
+			const idleMs = opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS;
+			let idleTimer: ReturnType<typeof setTimeout> | undefined;
+			let forceKillTimer: ReturnType<typeof setTimeout> | undefined;
+			const clearTimers = () => {
+				if (idleTimer) clearTimeout(idleTimer);
+				if (forceKillTimer) clearTimeout(forceKillTimer);
+			};
+			const hardKill = () => {
+				proc.kill("SIGTERM");
+				forceKillTimer = setTimeout(() => proc.kill("SIGKILL"), 5000);
+				forceKillTimer.unref();
+			};
+			const armIdle = () => {
+				if (idleTimer) clearTimeout(idleTimer);
+				if (idleMs <= 0) return; // disabled
+				idleTimer = setTimeout(() => {
+					idleTimedOut = true;
+					hardKill();
+				}, idleMs);
+				idleTimer.unref();
+			};
+			armIdle();
 			const processLine = (line: string) => {
 				const live = foldEventLine(acc, line);
 				if (live && opts.onLive) opts.onLive(live);
 			};
 			proc.stdout.on("data", (data) => {
+				armIdle(); // progress observed — reset the idle watchdog
 				buffer += data.toString();
 				const lines = buffer.split("\n");
 				buffer = lines.pop() || "";
@@ -316,10 +375,12 @@ export async function runAgentTask(
 				result.stderr += data.toString();
 			});
 			proc.on("close", (code) => {
+				clearTimers();
 				if (buffer.trim()) processLine(buffer);
 				resolve(code ?? 0);
 			});
 			proc.on("error", (err) => {
+				clearTimers();
 				if (!result.stderr) result.stderr = err.message;
 				if (!result.errorMessage) result.errorMessage = err.message;
 				resolve(1);
@@ -350,7 +411,13 @@ export async function runAgentTask(
 		result.stopReason = acc.stopReason;
 		result.errorMessage = acc.errorMessage;
 		result.output = getFinalOutput(acc.messages);
-		if (wasAborted) {
+		if (idleTimedOut) {
+			// Distinct, actionable signal: the child was killed for being idle, not
+			// a user abort. stopReason "error" keeps it in the failed bucket so the
+			// runtime's retry/fail handling treats it as a real failure.
+			result.stopReason = "error";
+			result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
+		} else if (wasAborted) {
 			result.stopReason = "aborted";
 			result.errorMessage = "Subagent was aborted";
 		}

package/extensions/runtime.ts CHANGED Viewed

@@ -14,7 +14,7 @@ import * as path from "node:path";
 import * as fs from "node:fs";
 import type { AgentConfig } from "./agents.ts";
 import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
-import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
+import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
 import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
 import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
 import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
@@ -314,9 +314,20 @@ async function executePhase(
 	// Wrap each subagent call in the phase's retry policy. Usage is summed across
 	// attempts; the attempt count rides along on the result for the TUI.
+	//
+	// Even without an explicit `phase.retry`, transient provider errors (rate
+	// limits, overload, 5xx, timeouts) are retried with backoff so a momentary
+	// 429 is absorbed inside this run instead of bubbling up and provoking the
+	// calling agent to re-invoke the whole tool (which stacks duplicate progress
+	// blocks in the transcript).
 	const retry = phase.retry;
+	const DEFAULT_TRANSIENT_RETRIES = 3;
+	const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
+	const DEFAULT_TRANSIENT_FACTOR = 2;
 	const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
-		const maxAttempts = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
+		const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
+		// Allow enough attempts to cover whichever policy applies on a given attempt.
+		const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
 		const usages: UsageStats[] = [];
 		let last: RunResult | undefined;
 		for (let attempt = 0; attempt < maxAttempts; attempt++) {
@@ -330,10 +341,21 @@ async function executePhase(
 			if (!isFailed(last)) break;
 			// Stop retrying on abort or once the run is over budget.
 			if (deps.signal?.aborted || overBudget(state).over) break;
-			if (attempt < maxAttempts - 1) {
-				const wait = Math.min(60000, Math.round((retry?.backoffMs ?? 0) * (retry?.factor ?? 1) ** attempt));
-				await delay(wait, deps.signal);
-			}
+			// Decide whether THIS failure warrants another attempt. Explicit retry
+			// policy covers all failures up to its cap; the transient fallback covers
+			// only retryable provider errors. A non-transient failure with no explicit
+			// policy stops immediately (no point burning attempts on a hard error).
+			const withinExplicit = attempt < explicitMax - 1;
+			const transient = isTransientError(last);
+			const withinTransient = transient && attempt < DEFAULT_TRANSIENT_RETRIES;
+			if (!withinExplicit && !withinTransient) break;
+			// Backoff: prefer the explicit policy's curve when the phase defines one
+			// (covers transient retries too, and keeps tests fast with backoffMs:0),
+			// otherwise use the transient defaults.
+			const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
+			const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
+			const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
+			if (wait > 0) await delay(wait, deps.signal);
 		}
 		// Aborted before any attempt ran → return a clean aborted result (no crash).
 		if (!last) {
@@ -415,7 +437,7 @@ async function executePhase(
 		const { text } = interpolate(phase.task ?? "", ctx);
 		const fullTask = preRead + text;
 		const agentName = resolveAgent(phase.agent, deps, state);
-		const inputHash = hashInput(phase.id, agentName, fullTask);
+		const inputHash = hashInput(phase.id, agentName, phase.model ?? "", fullTask);
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
@@ -433,7 +455,7 @@ async function executePhase(
 				task: preRead + r.text,
 			};
 		});
-		const inputHash = hashInput(phase.id, JSON.stringify(branches));
+		const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(branches));
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
@@ -463,7 +485,7 @@ async function executePhase(
 				task: preRead + interpolate(phase.task ?? "", localCtx).text,
 			};
 		});
-		const inputHash = hashInput(phase.id, JSON.stringify(tasks));
+		const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(tasks));
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
@@ -474,7 +496,7 @@ async function executePhase(
 	if (type === "approval") {
 		const ctx = buildInterpolationContext(state, previousOutput);
 		const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
-		const inputHash = hashInput(phase.id, "approval", message);
+		const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
@@ -741,45 +763,8 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
 /**
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
  */
-function ensureImplicitGate(def: Taskflow): void {
-	// Respect explicit opt-out
-	if ((def as any).implicitGate === false) return;
-	const hasGate = def.phases.some(
-		(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
-	);
-	if (hasGate || def.phases.length === 0) return;
-	// The last existing phase is the effective "final" phase — pin it so the
-	// injected gate doesn't become the finalOutput.
-	const lastPhase = def.phases[def.phases.length - 1];
-	if (!lastPhase.final && !def.phases.some((p) => p.final)) {
-		lastPhase.final = true;
-	}
-	const allIds = def.phases.map((p) => p.id);
-	def.phases.push({
-		id: "_implicit-gate",
-		type: "gate",
-		dependsOn: allIds,
-		agent: "reviewer",
-		task: `Review all phase outputs from this taskflow for accuracy and consistency.
-For each upstream phase, scan its output for:
-1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
-2. **Internal contradictions**: Do any phases contradict each other?
-3. **Completeness**: Is any output truncated, empty, or anomalously short?
-4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
-Output:
-- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
-- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
-	});
-}
 export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
-	ensureImplicitGate(def);
 	try {
 		return await runTaskflowLayers(state, deps);
 	} catch (e) {
@@ -868,11 +853,19 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			}
 			const startedAt = Date.now();
+			// Re-running a phase (resume after a previous failed/done attempt) must
+			// start from a clean "running" state. Spreading the prior PhaseState
+			// would carry over its terminal `endedAt` (and `error`/`gate`/`output`),
+			// leaving a running phase with an old endedAt < new startedAt — which
+			// renders as a frozen NEGATIVE elapsed time in the TUI. Keep only the
+			// fields that are still meaningful across attempts (model, attempts).
+			const priorPs = state.phases[phase.id];
 			state.phases[phase.id] = {
-				...(state.phases[phase.id] ?? { id: phase.id }),
 				id: phase.id,
 				status: "running",
 				startedAt,
+				...(priorPs?.model ? { model: priorPs.model } : {}),
+				...(priorPs?.attempts ? { attempts: priorPs.attempts } : {}),
 			};
 			safeProgress(deps, state);

package/extensions/schema.ts CHANGED Viewed

@@ -147,12 +147,6 @@ export const TaskflowSchema = Type.Object(
 			}),
 		),
 		phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
-		implicitGate: Type.Optional(
-			Type.Boolean({
-				description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
-				default: true,
-			}),
-		),
 	},
 	{ additionalProperties: false },
 );
@@ -342,6 +336,16 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 		if (p.join && !JOIN_MODES.includes(p.join as JoinMode)) {
 			errors.push(`Phase '${p.id}': unknown join mode '${p.join}'`);
 		}
+		// Agent name convention: hyphens only (per AGENTS.md naming convention)
+		if (p.agent && typeof p.agent === "string" && p.agent.includes("_")) {
+			errors.push(`Phase '${p.id}': agent name '${p.agent}' uses underscores — use hyphens (e.g. 'executor-code' not 'executor_code')`);
+		}
+		// Phase id convention: hyphens only (consistent with interpolation placeholders like {steps.audit-each.output})
+		if (p.id && p.id.includes("_")) {
+			errors.push(`Phase '${p.id}': id uses underscores — use hyphens for consistency with interpolation placeholders (e.g. {steps.audit-each.output})`);
+		}
 	}
 	// dependsOn / from references must exist
@@ -355,6 +359,15 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 		}
 	}
+	// Agent name format validation (AGENTS.md naming convention: hyphens only, no underscores)
+	const VALID_AGENT_RE = /^[a-z][a-z0-9-]*$/;
+	for (const p of flow.phases) {
+		if (!p?.id) continue;
+		if (p.agent && !p.agent.includes("_") && !VALID_AGENT_RE.test(p.agent)) {
+			errors.push(`Phase '${p.id}': agent '${p.agent}' has invalid name format (expected lowercase alphanumeric with hyphens)`);
+		}
+	}
 	// Cycle detection (Kahn)
 	if (errors.length === 0) {
 		const cycle = detectCycle(flow.phases as Phase[]);