pi-taskflow 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +153 -216
- package/examples/guarded-refactor.json +1 -1
- package/extensions/index.ts +8 -0
- package/extensions/render.ts +7 -2
- package/extensions/runner.ts +68 -1
- package/extensions/runtime.ts +41 -48
- package/extensions/schema.ts +19 -6
- package/extensions/store.ts +544 -55
- package/package.json +1 -1
- package/skills/taskflow/SKILL.md +1 -1
package/extensions/runner.ts
CHANGED
|
@@ -42,12 +42,42 @@ export interface RunOptions {
|
|
|
42
42
|
signal?: AbortSignal;
|
|
43
43
|
/** Fires on each assistant turn with the latest activity + accumulated usage. */
|
|
44
44
|
onLive?: (live: LiveUpdate) => void;
|
|
45
|
+
/**
|
|
46
|
+
* Idle watchdog: if the subagent produces no stdout for this many ms, it is
|
|
47
|
+
* considered stalled (hung stream / provider stall / tool deadlock) and is
|
|
48
|
+
* killed (SIGTERM → SIGKILL). Resets on every stdout chunk. 0/undefined keeps
|
|
49
|
+
* the prior behaviour (no idle timeout). Defaults to DEFAULT_IDLE_TIMEOUT_MS.
|
|
50
|
+
*/
|
|
51
|
+
idleTimeoutMs?: number;
|
|
45
52
|
}
|
|
46
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Default idle-watchdog window. A subagent that emits nothing on stdout for this
|
|
56
|
+
* long is treated as wedged and killed so a single stalled child cannot hang the
|
|
57
|
+
* entire taskflow forever (the only previous escape was a manual user abort).
|
|
58
|
+
* 5 minutes is generous enough for slow reasoning/long tool calls while still
|
|
59
|
+
* bounding a true hang.
|
|
60
|
+
*/
|
|
61
|
+
export const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
|
|
62
|
+
|
|
47
63
|
export function isFailed(r: RunResult): boolean {
|
|
48
64
|
return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
|
|
49
65
|
}
|
|
50
66
|
|
|
67
|
+
/**
|
|
68
|
+
* Heuristic: did this failure look like a transient/retryable provider error
|
|
69
|
+
* (rate limit, overload, timeout, 5xx)? Such errors should be retried inside
|
|
70
|
+
* the taskflow run with backoff rather than bubbled up — otherwise the calling
|
|
71
|
+
* agent tends to re-invoke the whole tool, producing duplicate progress blocks.
|
|
72
|
+
*/
|
|
73
|
+
const TRANSIENT_ERROR_RE =
|
|
74
|
+
/rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
|
|
75
|
+
export function isTransientError(r: RunResult): boolean {
|
|
76
|
+
if (r.stopReason === "aborted") return false;
|
|
77
|
+
const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
|
|
78
|
+
return TRANSIENT_ERROR_RE.test(hay);
|
|
79
|
+
}
|
|
80
|
+
|
|
51
81
|
/** Placeholder written to a failed phase's `output` so downstream interpolation
|
|
52
82
|
* can detect "upstream failed" without being polluted by raw HTML/JSON. */
|
|
53
83
|
export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
|
|
@@ -292,6 +322,7 @@ export async function runAgentTask(
|
|
|
292
322
|
args.push(`Task: ${task}`);
|
|
293
323
|
|
|
294
324
|
let wasAborted = false;
|
|
325
|
+
let idleTimedOut = false;
|
|
295
326
|
const exitCode = await new Promise<number>((resolve) => {
|
|
296
327
|
const invocation = getPiInvocation(args);
|
|
297
328
|
const proc = spawn(invocation.command, invocation.args, {
|
|
@@ -301,12 +332,40 @@ export async function runAgentTask(
|
|
|
301
332
|
});
|
|
302
333
|
let buffer = "";
|
|
303
334
|
|
|
335
|
+
// Idle watchdog: a subagent that goes silent on stdout for too long is
|
|
336
|
+
// treated as wedged and killed, so one stalled child cannot hang the
|
|
337
|
+
// whole taskflow forever. The timer is reset on every stdout chunk and
|
|
338
|
+
// torn down on close/error.
|
|
339
|
+
const idleMs = opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS;
|
|
340
|
+
let idleTimer: ReturnType<typeof setTimeout> | undefined;
|
|
341
|
+
let forceKillTimer: ReturnType<typeof setTimeout> | undefined;
|
|
342
|
+
const clearTimers = () => {
|
|
343
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
344
|
+
if (forceKillTimer) clearTimeout(forceKillTimer);
|
|
345
|
+
};
|
|
346
|
+
const hardKill = () => {
|
|
347
|
+
proc.kill("SIGTERM");
|
|
348
|
+
forceKillTimer = setTimeout(() => proc.kill("SIGKILL"), 5000);
|
|
349
|
+
forceKillTimer.unref();
|
|
350
|
+
};
|
|
351
|
+
const armIdle = () => {
|
|
352
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
353
|
+
if (idleMs <= 0) return; // disabled
|
|
354
|
+
idleTimer = setTimeout(() => {
|
|
355
|
+
idleTimedOut = true;
|
|
356
|
+
hardKill();
|
|
357
|
+
}, idleMs);
|
|
358
|
+
idleTimer.unref();
|
|
359
|
+
};
|
|
360
|
+
armIdle();
|
|
361
|
+
|
|
304
362
|
const processLine = (line: string) => {
|
|
305
363
|
const live = foldEventLine(acc, line);
|
|
306
364
|
if (live && opts.onLive) opts.onLive(live);
|
|
307
365
|
};
|
|
308
366
|
|
|
309
367
|
proc.stdout.on("data", (data) => {
|
|
368
|
+
armIdle(); // progress observed — reset the idle watchdog
|
|
310
369
|
buffer += data.toString();
|
|
311
370
|
const lines = buffer.split("\n");
|
|
312
371
|
buffer = lines.pop() || "";
|
|
@@ -316,10 +375,12 @@ export async function runAgentTask(
|
|
|
316
375
|
result.stderr += data.toString();
|
|
317
376
|
});
|
|
318
377
|
proc.on("close", (code) => {
|
|
378
|
+
clearTimers();
|
|
319
379
|
if (buffer.trim()) processLine(buffer);
|
|
320
380
|
resolve(code ?? 0);
|
|
321
381
|
});
|
|
322
382
|
proc.on("error", (err) => {
|
|
383
|
+
clearTimers();
|
|
323
384
|
if (!result.stderr) result.stderr = err.message;
|
|
324
385
|
if (!result.errorMessage) result.errorMessage = err.message;
|
|
325
386
|
resolve(1);
|
|
@@ -350,7 +411,13 @@ export async function runAgentTask(
|
|
|
350
411
|
result.stopReason = acc.stopReason;
|
|
351
412
|
result.errorMessage = acc.errorMessage;
|
|
352
413
|
result.output = getFinalOutput(acc.messages);
|
|
353
|
-
if (
|
|
414
|
+
if (idleTimedOut) {
|
|
415
|
+
// Distinct, actionable signal: the child was killed for being idle, not
|
|
416
|
+
// a user abort. stopReason "error" keeps it in the failed bucket so the
|
|
417
|
+
// runtime's retry/fail handling treats it as a real failure.
|
|
418
|
+
result.stopReason = "error";
|
|
419
|
+
result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
|
|
420
|
+
} else if (wasAborted) {
|
|
354
421
|
result.stopReason = "aborted";
|
|
355
422
|
result.errorMessage = "Subagent was aborted";
|
|
356
423
|
}
|
package/extensions/runtime.ts
CHANGED
|
@@ -14,7 +14,7 @@ import * as path from "node:path";
|
|
|
14
14
|
import * as fs from "node:fs";
|
|
15
15
|
import type { AgentConfig } from "./agents.ts";
|
|
16
16
|
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
|
|
17
|
-
import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
17
|
+
import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
18
18
|
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
19
|
import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
|
|
20
20
|
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
@@ -314,9 +314,20 @@ async function executePhase(
|
|
|
314
314
|
|
|
315
315
|
// Wrap each subagent call in the phase's retry policy. Usage is summed across
|
|
316
316
|
// attempts; the attempt count rides along on the result for the TUI.
|
|
317
|
+
//
|
|
318
|
+
// Even without an explicit `phase.retry`, transient provider errors (rate
|
|
319
|
+
// limits, overload, 5xx, timeouts) are retried with backoff so a momentary
|
|
320
|
+
// 429 is absorbed inside this run instead of bubbling up and provoking the
|
|
321
|
+
// calling agent to re-invoke the whole tool (which stacks duplicate progress
|
|
322
|
+
// blocks in the transcript).
|
|
317
323
|
const retry = phase.retry;
|
|
324
|
+
const DEFAULT_TRANSIENT_RETRIES = 3;
|
|
325
|
+
const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
|
|
326
|
+
const DEFAULT_TRANSIENT_FACTOR = 2;
|
|
318
327
|
const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
|
|
319
|
-
const
|
|
328
|
+
const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
|
|
329
|
+
// Allow enough attempts to cover whichever policy applies on a given attempt.
|
|
330
|
+
const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
|
|
320
331
|
const usages: UsageStats[] = [];
|
|
321
332
|
let last: RunResult | undefined;
|
|
322
333
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
@@ -330,10 +341,21 @@ async function executePhase(
|
|
|
330
341
|
if (!isFailed(last)) break;
|
|
331
342
|
// Stop retrying on abort or once the run is over budget.
|
|
332
343
|
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
344
|
+
// Decide whether THIS failure warrants another attempt. Explicit retry
|
|
345
|
+
// policy covers all failures up to its cap; the transient fallback covers
|
|
346
|
+
// only retryable provider errors. A non-transient failure with no explicit
|
|
347
|
+
// policy stops immediately (no point burning attempts on a hard error).
|
|
348
|
+
const withinExplicit = attempt < explicitMax - 1;
|
|
349
|
+
const transient = isTransientError(last);
|
|
350
|
+
const withinTransient = transient && attempt < DEFAULT_TRANSIENT_RETRIES;
|
|
351
|
+
if (!withinExplicit && !withinTransient) break;
|
|
352
|
+
// Backoff: prefer the explicit policy's curve when the phase defines one
|
|
353
|
+
// (covers transient retries too, and keeps tests fast with backoffMs:0),
|
|
354
|
+
// otherwise use the transient defaults.
|
|
355
|
+
const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
|
|
356
|
+
const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
|
|
357
|
+
const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
|
|
358
|
+
if (wait > 0) await delay(wait, deps.signal);
|
|
337
359
|
}
|
|
338
360
|
// Aborted before any attempt ran → return a clean aborted result (no crash).
|
|
339
361
|
if (!last) {
|
|
@@ -415,7 +437,7 @@ async function executePhase(
|
|
|
415
437
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
416
438
|
const fullTask = preRead + text;
|
|
417
439
|
const agentName = resolveAgent(phase.agent, deps, state);
|
|
418
|
-
const inputHash = hashInput(phase.id, agentName, fullTask);
|
|
440
|
+
const inputHash = hashInput(phase.id, agentName, phase.model ?? "", fullTask);
|
|
419
441
|
const cached = cachedPhase(prior, inputHash);
|
|
420
442
|
if (cached) return cached;
|
|
421
443
|
|
|
@@ -433,7 +455,7 @@ async function executePhase(
|
|
|
433
455
|
task: preRead + r.text,
|
|
434
456
|
};
|
|
435
457
|
});
|
|
436
|
-
const inputHash = hashInput(phase.id, JSON.stringify(branches));
|
|
458
|
+
const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(branches));
|
|
437
459
|
const cached = cachedPhase(prior, inputHash);
|
|
438
460
|
if (cached) return cached;
|
|
439
461
|
|
|
@@ -463,7 +485,7 @@ async function executePhase(
|
|
|
463
485
|
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
464
486
|
};
|
|
465
487
|
});
|
|
466
|
-
const inputHash = hashInput(phase.id, JSON.stringify(tasks));
|
|
488
|
+
const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(tasks));
|
|
467
489
|
const cached = cachedPhase(prior, inputHash);
|
|
468
490
|
if (cached) return cached;
|
|
469
491
|
|
|
@@ -474,7 +496,7 @@ async function executePhase(
|
|
|
474
496
|
if (type === "approval") {
|
|
475
497
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
476
498
|
const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
|
|
477
|
-
const inputHash = hashInput(phase.id, "approval", message);
|
|
499
|
+
const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
|
|
478
500
|
const cached = cachedPhase(prior, inputHash);
|
|
479
501
|
if (cached) return cached;
|
|
480
502
|
|
|
@@ -741,45 +763,8 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
|
|
|
741
763
|
/**
|
|
742
764
|
* Execute a full taskflow. Mutates and persists `state` as it progresses.
|
|
743
765
|
*/
|
|
744
|
-
function ensureImplicitGate(def: Taskflow): void {
|
|
745
|
-
// Respect explicit opt-out
|
|
746
|
-
if ((def as any).implicitGate === false) return;
|
|
747
|
-
|
|
748
|
-
const hasGate = def.phases.some(
|
|
749
|
-
(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
|
|
750
|
-
);
|
|
751
|
-
if (hasGate || def.phases.length === 0) return;
|
|
752
|
-
|
|
753
|
-
// The last existing phase is the effective "final" phase — pin it so the
|
|
754
|
-
// injected gate doesn't become the finalOutput.
|
|
755
|
-
const lastPhase = def.phases[def.phases.length - 1];
|
|
756
|
-
if (!lastPhase.final && !def.phases.some((p) => p.final)) {
|
|
757
|
-
lastPhase.final = true;
|
|
758
|
-
}
|
|
759
|
-
|
|
760
|
-
const allIds = def.phases.map((p) => p.id);
|
|
761
|
-
def.phases.push({
|
|
762
|
-
id: "_implicit-gate",
|
|
763
|
-
type: "gate",
|
|
764
|
-
dependsOn: allIds,
|
|
765
|
-
agent: "reviewer",
|
|
766
|
-
task: `Review all phase outputs from this taskflow for accuracy and consistency.
|
|
767
|
-
|
|
768
|
-
For each upstream phase, scan its output for:
|
|
769
|
-
1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
|
|
770
|
-
2. **Internal contradictions**: Do any phases contradict each other?
|
|
771
|
-
3. **Completeness**: Is any output truncated, empty, or anomalously short?
|
|
772
|
-
4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
|
|
773
|
-
|
|
774
|
-
Output:
|
|
775
|
-
- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
|
|
776
|
-
- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
|
|
777
|
-
});
|
|
778
|
-
}
|
|
779
|
-
|
|
780
766
|
export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
781
767
|
const def: Taskflow = state.def;
|
|
782
|
-
ensureImplicitGate(def);
|
|
783
768
|
try {
|
|
784
769
|
return await runTaskflowLayers(state, deps);
|
|
785
770
|
} catch (e) {
|
|
@@ -868,11 +853,19 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
868
853
|
}
|
|
869
854
|
|
|
870
855
|
const startedAt = Date.now();
|
|
856
|
+
// Re-running a phase (resume after a previous failed/done attempt) must
|
|
857
|
+
// start from a clean "running" state. Spreading the prior PhaseState
|
|
858
|
+
// would carry over its terminal `endedAt` (and `error`/`gate`/`output`),
|
|
859
|
+
// leaving a running phase with an old endedAt < new startedAt — which
|
|
860
|
+
// renders as a frozen NEGATIVE elapsed time in the TUI. Keep only the
|
|
861
|
+
// fields that are still meaningful across attempts (model, attempts).
|
|
862
|
+
const priorPs = state.phases[phase.id];
|
|
871
863
|
state.phases[phase.id] = {
|
|
872
|
-
...(state.phases[phase.id] ?? { id: phase.id }),
|
|
873
864
|
id: phase.id,
|
|
874
865
|
status: "running",
|
|
875
866
|
startedAt,
|
|
867
|
+
...(priorPs?.model ? { model: priorPs.model } : {}),
|
|
868
|
+
...(priorPs?.attempts ? { attempts: priorPs.attempts } : {}),
|
|
876
869
|
};
|
|
877
870
|
safeProgress(deps, state);
|
|
878
871
|
|
package/extensions/schema.ts
CHANGED
|
@@ -147,12 +147,6 @@ export const TaskflowSchema = Type.Object(
|
|
|
147
147
|
}),
|
|
148
148
|
),
|
|
149
149
|
phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
|
|
150
|
-
implicitGate: Type.Optional(
|
|
151
|
-
Type.Boolean({
|
|
152
|
-
description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
|
|
153
|
-
default: true,
|
|
154
|
-
}),
|
|
155
|
-
),
|
|
156
150
|
},
|
|
157
151
|
{ additionalProperties: false },
|
|
158
152
|
);
|
|
@@ -342,6 +336,16 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
342
336
|
if (p.join && !JOIN_MODES.includes(p.join as JoinMode)) {
|
|
343
337
|
errors.push(`Phase '${p.id}': unknown join mode '${p.join}'`);
|
|
344
338
|
}
|
|
339
|
+
|
|
340
|
+
// Agent name convention: hyphens only (per AGENTS.md naming convention)
|
|
341
|
+
if (p.agent && typeof p.agent === "string" && p.agent.includes("_")) {
|
|
342
|
+
errors.push(`Phase '${p.id}': agent name '${p.agent}' uses underscores — use hyphens (e.g. 'executor-code' not 'executor_code')`);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Phase id convention: hyphens only (consistent with interpolation placeholders like {steps.audit-each.output})
|
|
346
|
+
if (p.id && p.id.includes("_")) {
|
|
347
|
+
errors.push(`Phase '${p.id}': id uses underscores — use hyphens for consistency with interpolation placeholders (e.g. {steps.audit-each.output})`);
|
|
348
|
+
}
|
|
345
349
|
}
|
|
346
350
|
|
|
347
351
|
// dependsOn / from references must exist
|
|
@@ -355,6 +359,15 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
355
359
|
}
|
|
356
360
|
}
|
|
357
361
|
|
|
362
|
+
// Agent name format validation (AGENTS.md naming convention: hyphens only, no underscores)
|
|
363
|
+
const VALID_AGENT_RE = /^[a-z][a-z0-9-]*$/;
|
|
364
|
+
for (const p of flow.phases) {
|
|
365
|
+
if (!p?.id) continue;
|
|
366
|
+
if (p.agent && !p.agent.includes("_") && !VALID_AGENT_RE.test(p.agent)) {
|
|
367
|
+
errors.push(`Phase '${p.id}': agent '${p.agent}' has invalid name format (expected lowercase alphanumeric with hyphens)`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
358
371
|
// Cycle detection (Kahn)
|
|
359
372
|
if (errors.length === 0) {
|
|
360
373
|
const cycle = detectCycle(flow.phases as Phase[]);
|