pi-taskflow 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,12 +42,42 @@ export interface RunOptions {
42
42
  signal?: AbortSignal;
43
43
  /** Fires on each assistant turn with the latest activity + accumulated usage. */
44
44
  onLive?: (live: LiveUpdate) => void;
45
+ /**
46
+ * Idle watchdog: if the subagent produces no stdout for this many ms, it is
47
+ * considered stalled (hung stream / provider stall / tool deadlock) and is
48
+ * killed (SIGTERM → SIGKILL). Resets on every stdout chunk. 0/undefined keeps
49
+ * the prior behaviour (no idle timeout). Defaults to DEFAULT_IDLE_TIMEOUT_MS.
50
+ */
51
+ idleTimeoutMs?: number;
45
52
  }
46
53
 
54
+ /**
55
+ * Default idle-watchdog window. A subagent that emits nothing on stdout for this
56
+ * long is treated as wedged and killed so a single stalled child cannot hang the
57
+ * entire taskflow forever (the only previous escape was a manual user abort).
58
+ * 5 minutes is generous enough for slow reasoning/long tool calls while still
59
+ * bounding a true hang.
60
+ */
61
+ export const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
62
+
47
63
  export function isFailed(r: RunResult): boolean {
48
64
  return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
49
65
  }
50
66
 
67
+ /**
68
+ * Heuristic: did this failure look like a transient/retryable provider error
69
+ * (rate limit, overload, timeout, 5xx)? Such errors should be retried inside
70
+ * the taskflow run with backoff rather than bubbled up — otherwise the calling
71
+ * agent tends to re-invoke the whole tool, producing duplicate progress blocks.
72
+ */
73
+ const TRANSIENT_ERROR_RE =
74
+ /rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
75
+ export function isTransientError(r: RunResult): boolean {
76
+ if (r.stopReason === "aborted") return false;
77
+ const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
78
+ return TRANSIENT_ERROR_RE.test(hay);
79
+ }
80
+
51
81
  /** Placeholder written to a failed phase's `output` so downstream interpolation
52
82
  * can detect "upstream failed" without being polluted by raw HTML/JSON. */
53
83
  export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
@@ -292,6 +322,7 @@ export async function runAgentTask(
292
322
  args.push(`Task: ${task}`);
293
323
 
294
324
  let wasAborted = false;
325
+ let idleTimedOut = false;
295
326
  const exitCode = await new Promise<number>((resolve) => {
296
327
  const invocation = getPiInvocation(args);
297
328
  const proc = spawn(invocation.command, invocation.args, {
@@ -301,12 +332,40 @@ export async function runAgentTask(
301
332
  });
302
333
  let buffer = "";
303
334
 
335
+ // Idle watchdog: a subagent that goes silent on stdout for too long is
336
+ // treated as wedged and killed, so one stalled child cannot hang the
337
+ // whole taskflow forever. The timer is reset on every stdout chunk and
338
+ // torn down on close/error.
339
+ const idleMs = opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS;
340
+ let idleTimer: ReturnType<typeof setTimeout> | undefined;
341
+ let forceKillTimer: ReturnType<typeof setTimeout> | undefined;
342
+ const clearTimers = () => {
343
+ if (idleTimer) clearTimeout(idleTimer);
344
+ if (forceKillTimer) clearTimeout(forceKillTimer);
345
+ };
346
+ const hardKill = () => {
347
+ proc.kill("SIGTERM");
348
+ forceKillTimer = setTimeout(() => proc.kill("SIGKILL"), 5000);
349
+ forceKillTimer.unref();
350
+ };
351
+ const armIdle = () => {
352
+ if (idleTimer) clearTimeout(idleTimer);
353
+ if (idleMs <= 0) return; // disabled
354
+ idleTimer = setTimeout(() => {
355
+ idleTimedOut = true;
356
+ hardKill();
357
+ }, idleMs);
358
+ idleTimer.unref();
359
+ };
360
+ armIdle();
361
+
304
362
  const processLine = (line: string) => {
305
363
  const live = foldEventLine(acc, line);
306
364
  if (live && opts.onLive) opts.onLive(live);
307
365
  };
308
366
 
309
367
  proc.stdout.on("data", (data) => {
368
+ armIdle(); // progress observed — reset the idle watchdog
310
369
  buffer += data.toString();
311
370
  const lines = buffer.split("\n");
312
371
  buffer = lines.pop() || "";
@@ -316,10 +375,12 @@ export async function runAgentTask(
316
375
  result.stderr += data.toString();
317
376
  });
318
377
  proc.on("close", (code) => {
378
+ clearTimers();
319
379
  if (buffer.trim()) processLine(buffer);
320
380
  resolve(code ?? 0);
321
381
  });
322
382
  proc.on("error", (err) => {
383
+ clearTimers();
323
384
  if (!result.stderr) result.stderr = err.message;
324
385
  if (!result.errorMessage) result.errorMessage = err.message;
325
386
  resolve(1);
@@ -350,7 +411,13 @@ export async function runAgentTask(
350
411
  result.stopReason = acc.stopReason;
351
412
  result.errorMessage = acc.errorMessage;
352
413
  result.output = getFinalOutput(acc.messages);
353
- if (wasAborted) {
414
+ if (idleTimedOut) {
415
+ // Distinct, actionable signal: the child was killed for being idle, not
416
+ // a user abort. stopReason "error" keeps it in the failed bucket so the
417
+ // runtime's retry/fail handling treats it as a real failure.
418
+ result.stopReason = "error";
419
+ result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
420
+ } else if (wasAborted) {
354
421
  result.stopReason = "aborted";
355
422
  result.errorMessage = "Subagent was aborted";
356
423
  }
@@ -14,7 +14,7 @@ import * as path from "node:path";
14
14
  import * as fs from "node:fs";
15
15
  import type { AgentConfig } from "./agents.ts";
16
16
  import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
17
- import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
17
+ import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
19
  import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
20
20
  import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
@@ -314,9 +314,20 @@ async function executePhase(
314
314
 
315
315
  // Wrap each subagent call in the phase's retry policy. Usage is summed across
316
316
  // attempts; the attempt count rides along on the result for the TUI.
317
+ //
318
+ // Even without an explicit `phase.retry`, transient provider errors (rate
319
+ // limits, overload, 5xx, timeouts) are retried with backoff so a momentary
320
+ // 429 is absorbed inside this run instead of bubbling up and provoking the
321
+ // calling agent to re-invoke the whole tool (which stacks duplicate progress
322
+ // blocks in the transcript).
317
323
  const retry = phase.retry;
324
+ const DEFAULT_TRANSIENT_RETRIES = 3;
325
+ const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
326
+ const DEFAULT_TRANSIENT_FACTOR = 2;
318
327
  const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
319
- const maxAttempts = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
328
+ const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
329
+ // Allow enough attempts to cover whichever policy applies on a given attempt.
330
+ const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
320
331
  const usages: UsageStats[] = [];
321
332
  let last: RunResult | undefined;
322
333
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
@@ -330,10 +341,21 @@ async function executePhase(
330
341
  if (!isFailed(last)) break;
331
342
  // Stop retrying on abort or once the run is over budget.
332
343
  if (deps.signal?.aborted || overBudget(state).over) break;
333
- if (attempt < maxAttempts - 1) {
334
- const wait = Math.min(60000, Math.round((retry?.backoffMs ?? 0) * (retry?.factor ?? 1) ** attempt));
335
- await delay(wait, deps.signal);
336
- }
344
+ // Decide whether THIS failure warrants another attempt. Explicit retry
345
+ // policy covers all failures up to its cap; the transient fallback covers
346
+ // only retryable provider errors. A non-transient failure with no explicit
347
+ // policy stops immediately (no point burning attempts on a hard error).
348
+ const withinExplicit = attempt < explicitMax - 1;
349
+ const transient = isTransientError(last);
350
+ const withinTransient = transient && attempt < DEFAULT_TRANSIENT_RETRIES;
351
+ if (!withinExplicit && !withinTransient) break;
352
+ // Backoff: prefer the explicit policy's curve when the phase defines one
353
+ // (covers transient retries too, and keeps tests fast with backoffMs:0),
354
+ // otherwise use the transient defaults.
355
+ const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
356
+ const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
357
+ const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
358
+ if (wait > 0) await delay(wait, deps.signal);
337
359
  }
338
360
  // Aborted before any attempt ran → return a clean aborted result (no crash).
339
361
  if (!last) {
@@ -415,7 +437,7 @@ async function executePhase(
415
437
  const { text } = interpolate(phase.task ?? "", ctx);
416
438
  const fullTask = preRead + text;
417
439
  const agentName = resolveAgent(phase.agent, deps, state);
418
- const inputHash = hashInput(phase.id, agentName, fullTask);
440
+ const inputHash = hashInput(phase.id, agentName, phase.model ?? "", fullTask);
419
441
  const cached = cachedPhase(prior, inputHash);
420
442
  if (cached) return cached;
421
443
 
@@ -433,7 +455,7 @@ async function executePhase(
433
455
  task: preRead + r.text,
434
456
  };
435
457
  });
436
- const inputHash = hashInput(phase.id, JSON.stringify(branches));
458
+ const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(branches));
437
459
  const cached = cachedPhase(prior, inputHash);
438
460
  if (cached) return cached;
439
461
 
@@ -463,7 +485,7 @@ async function executePhase(
463
485
  task: preRead + interpolate(phase.task ?? "", localCtx).text,
464
486
  };
465
487
  });
466
- const inputHash = hashInput(phase.id, JSON.stringify(tasks));
488
+ const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(tasks));
467
489
  const cached = cachedPhase(prior, inputHash);
468
490
  if (cached) return cached;
469
491
 
@@ -474,7 +496,7 @@ async function executePhase(
474
496
  if (type === "approval") {
475
497
  const ctx = buildInterpolationContext(state, previousOutput);
476
498
  const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
477
- const inputHash = hashInput(phase.id, "approval", message);
499
+ const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
478
500
  const cached = cachedPhase(prior, inputHash);
479
501
  if (cached) return cached;
480
502
 
@@ -741,45 +763,8 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
741
763
  /**
742
764
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
743
765
  */
744
- function ensureImplicitGate(def: Taskflow): void {
745
- // Respect explicit opt-out
746
- if ((def as any).implicitGate === false) return;
747
-
748
- const hasGate = def.phases.some(
749
- (p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
750
- );
751
- if (hasGate || def.phases.length === 0) return;
752
-
753
- // The last existing phase is the effective "final" phase — pin it so the
754
- // injected gate doesn't become the finalOutput.
755
- const lastPhase = def.phases[def.phases.length - 1];
756
- if (!lastPhase.final && !def.phases.some((p) => p.final)) {
757
- lastPhase.final = true;
758
- }
759
-
760
- const allIds = def.phases.map((p) => p.id);
761
- def.phases.push({
762
- id: "_implicit-gate",
763
- type: "gate",
764
- dependsOn: allIds,
765
- agent: "reviewer",
766
- task: `Review all phase outputs from this taskflow for accuracy and consistency.
767
-
768
- For each upstream phase, scan its output for:
769
- 1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
770
- 2. **Internal contradictions**: Do any phases contradict each other?
771
- 3. **Completeness**: Is any output truncated, empty, or anomalously short?
772
- 4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
773
-
774
- Output:
775
- - If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
776
- - If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
777
- });
778
- }
779
-
780
766
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
781
767
  const def: Taskflow = state.def;
782
- ensureImplicitGate(def);
783
768
  try {
784
769
  return await runTaskflowLayers(state, deps);
785
770
  } catch (e) {
@@ -868,11 +853,19 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
868
853
  }
869
854
 
870
855
  const startedAt = Date.now();
856
+ // Re-running a phase (resume after a previous failed/done attempt) must
857
+ // start from a clean "running" state. Spreading the prior PhaseState
858
+ // would carry over its terminal `endedAt` (and `error`/`gate`/`output`),
859
+ // leaving a running phase with an old endedAt < new startedAt — which
860
+ // renders as a frozen NEGATIVE elapsed time in the TUI. Keep only the
861
+ // fields that are still meaningful across attempts (model, attempts).
862
+ const priorPs = state.phases[phase.id];
871
863
  state.phases[phase.id] = {
872
- ...(state.phases[phase.id] ?? { id: phase.id }),
873
864
  id: phase.id,
874
865
  status: "running",
875
866
  startedAt,
867
+ ...(priorPs?.model ? { model: priorPs.model } : {}),
868
+ ...(priorPs?.attempts ? { attempts: priorPs.attempts } : {}),
876
869
  };
877
870
  safeProgress(deps, state);
878
871
 
@@ -147,12 +147,6 @@ export const TaskflowSchema = Type.Object(
147
147
  }),
148
148
  ),
149
149
  phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
150
- implicitGate: Type.Optional(
151
- Type.Boolean({
152
- description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
153
- default: true,
154
- }),
155
- ),
156
150
  },
157
151
  { additionalProperties: false },
158
152
  );
@@ -342,6 +336,16 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
342
336
  if (p.join && !JOIN_MODES.includes(p.join as JoinMode)) {
343
337
  errors.push(`Phase '${p.id}': unknown join mode '${p.join}'`);
344
338
  }
339
+
340
+ // Agent name convention: hyphens only (per AGENTS.md naming convention)
341
+ if (p.agent && typeof p.agent === "string" && p.agent.includes("_")) {
342
+ errors.push(`Phase '${p.id}': agent name '${p.agent}' uses underscores — use hyphens (e.g. 'executor-code' not 'executor_code')`);
343
+ }
344
+
345
+ // Phase id convention: hyphens only (consistent with interpolation placeholders like {steps.audit-each.output})
346
+ if (p.id && p.id.includes("_")) {
347
+ errors.push(`Phase '${p.id}': id uses underscores — use hyphens for consistency with interpolation placeholders (e.g. {steps.audit-each.output})`);
348
+ }
345
349
  }
346
350
 
347
351
  // dependsOn / from references must exist
@@ -355,6 +359,15 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
355
359
  }
356
360
  }
357
361
 
362
+ // Agent name format validation (AGENTS.md naming convention: hyphens only, no underscores)
363
+ const VALID_AGENT_RE = /^[a-z][a-z0-9-]*$/;
364
+ for (const p of flow.phases) {
365
+ if (!p?.id) continue;
366
+ if (p.agent && !p.agent.includes("_") && !VALID_AGENT_RE.test(p.agent)) {
367
+ errors.push(`Phase '${p.id}': agent '${p.agent}' has invalid name format (expected lowercase alphanumeric with hyphens)`);
368
+ }
369
+ }
370
+
358
371
  // Cycle detection (Kahn)
359
372
  if (errors.length === 0) {
360
373
  const cycle = detectCycle(flow.phases as Phase[]);