pi-taskflow 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -3
- package/extensions/agents.ts +47 -0
- package/extensions/index.ts +68 -12
- package/extensions/runtime.ts +76 -0
- package/extensions/schema.ts +13 -0
- package/extensions/verify.ts +367 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -7,7 +7,9 @@
|
|
|
7
7
|
<a href="https://www.npmjs.com/package/pi-taskflow"><img src="https://img.shields.io/npm/dm/pi-taskflow?style=flat-square&color=6E8BFF&label=downloads" alt="npm downloads"></a>
|
|
8
8
|
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-43D9AD?style=flat-square" alt="MIT license"></a>
|
|
9
9
|
<a href="#whats-inside"><img src="https://img.shields.io/badge/runtime%20deps-0-43D9AD?style=flat-square" alt="zero runtime dependencies"></a>
|
|
10
|
-
<a href="
|
|
10
|
+
<a href="https://github.com/heggria/pi-taskflow/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/heggria/pi-taskflow/ci.yml?branch=main&style=flat-square&label=CI" alt="CI status"></a>
|
|
11
|
+
<a href="#whats-inside"><img src="https://img.shields.io/badge/tests-394-6E8BFF?style=flat-square" alt="394 tests"></a>
|
|
12
|
+
<a href="#whats-inside"><img src="https://img.shields.io/badge/dogfooded-%E2%9C%93-43D9AD?style=flat-square" alt="dogfooded"></a>
|
|
11
13
|
<a href="https://pi.dev"><img src="https://img.shields.io/badge/for-Pi%20coding%20agent-B692FF?style=flat-square" alt="for the Pi coding agent"></a>
|
|
12
14
|
</p>
|
|
13
15
|
|
|
@@ -574,7 +576,7 @@ Copy one into `.pi/taskflows/<name>.json` (or `~/.pi/agent/taskflows/`) and it r
|
|
|
574
576
|
|
|
575
577
|
<div align="center">
|
|
576
578
|
|
|
577
|
-
**0 runtime dependencies** · **
|
|
579
|
+
**0 runtime dependencies** · **394 tests** · **10 phase types** · **cross-session resume** · **cross-run memoization** · **~4.9k LOC runtime**
|
|
578
580
|
|
|
579
581
|
</div>
|
|
580
582
|
|
|
@@ -583,7 +585,21 @@ Copy one into `.pi/taskflows/<name>.json` (or `~/.pi/agent/taskflows/`) and it r
|
|
|
583
585
|
- **Hardened by design.** Path-traversal defense (lexical + `realpath`), runId validation, HTML/error sanitization, atomic writes, stale-lock stealing via `rename`, and an idle watchdog that kills wedged subagents.
|
|
584
586
|
- **Dogfooded.** Every new feature has to survive the project's own `self-improve` taskflow before it ships.
|
|
585
587
|
|
|
586
|
-
|
|
588
|
+
## 🍽️ We eat our own dog food
|
|
589
|
+
|
|
590
|
+
Every feature in `pi-taskflow` ships **through `pi-taskflow`.**
|
|
591
|
+
|
|
592
|
+
Our `self-improve` flow is a 10-phase DAG — it audits the codebase, patches defects, verifies correctness, gates on quality, and surfaces the report — all declaratively. It's saved as `/tf:self-improve` and run before every release. No other agent orchestrator in the Pi ecosystem builds itself with itself.
|
|
593
|
+
|
|
594
|
+
| Campaign | Scale | Phases | Outcome |
|
|
595
|
+
|----------|-------|--------|---------|
|
|
596
|
+
| [v0.0.8 dogfood](./docs/dogfooding-v0.0.8-report.md) | Full codebase audit → triage → fix → verify | 10 phases, 234 tests | 13 fixes, all pass |
|
|
597
|
+
| [v0.0.6 self-audit](./docs/self-audit-report.md) | inventory → map audit → gate → approval → map fix → reduce | 9 phases | 11 critical defects fixed |
|
|
598
|
+
| [Cross-run cache dogfood](./docs/rfc-cross-run-memoization.md) | Real runtime + on-disk store | Dedicated test harness | Cache correctness under adversarial fingerprints |
|
|
599
|
+
| [Adversarial cross-review](./docs/brainstorm-adversarial-review-report.md) | Multi-agent adversarial review | `tournament` + `gate` | P0 cache-key fix shipped |
|
|
600
|
+
| [Init redesign review](./docs/issue-necessity-review-report.md) | Necessity audit → parallel checks → verdict | 7 phases | Full redesign plan validated |
|
|
601
|
+
|
|
602
|
+
> **Meta:** we used `pi-taskflow`'s `map` fan-out, `gate` verdicts, `approval` human-in-the-loop, `tournament` best-of-N, `loop` until-done, and `cross-run` cache — to build `pi-taskflow`.
|
|
587
603
|
|
|
588
604
|
## Status & limits
|
|
589
605
|
|
package/extensions/agents.ts
CHANGED
|
@@ -208,3 +208,50 @@ export function readSubagentSettings(): SubagentSettings {
|
|
|
208
208
|
return {};
|
|
209
209
|
}
|
|
210
210
|
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Copy the 18 built-in agents from extensions/agents/*.md into the project's
|
|
214
|
+
* .pi/agents/ directory so Pi's native subagent tool (and any other extension)
|
|
215
|
+
* can discover them. taskflow's own discoverAgents() already reads from this
|
|
216
|
+
* directory with lower priority than built-in, so the copy is a no-op for
|
|
217
|
+
* taskflow phases — it only matters for Pi's native agent discovery.
|
|
218
|
+
*
|
|
219
|
+
* Idempotent: only copies agents whose built-in source is newer than the
|
|
220
|
+
* project copy (or that don't exist yet).
|
|
221
|
+
*/
|
|
222
|
+
export function syncBuiltinAgentsToProject(cwd: string): void {
|
|
223
|
+
const builtInDir = path.resolve(import.meta.dirname, "agents");
|
|
224
|
+
if (!fs.existsSync(builtInDir)) return;
|
|
225
|
+
|
|
226
|
+
const projectAgentsDir = path.join(cwd, ".pi", "agents");
|
|
227
|
+
fs.mkdirSync(projectAgentsDir, { recursive: true });
|
|
228
|
+
|
|
229
|
+
let entries: fs.Dirent[];
|
|
230
|
+
try {
|
|
231
|
+
entries = fs.readdirSync(builtInDir, { withFileTypes: true });
|
|
232
|
+
} catch {
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
for (const entry of entries) {
|
|
237
|
+
if (!entry.isFile() || !entry.name.endsWith(".md")) continue;
|
|
238
|
+
const src = path.join(builtInDir, entry.name);
|
|
239
|
+
const dst = path.join(projectAgentsDir, entry.name);
|
|
240
|
+
|
|
241
|
+
let srcMtime = 0;
|
|
242
|
+
try { srcMtime = fs.statSync(src).mtimeMs; } catch { continue; }
|
|
243
|
+
|
|
244
|
+
let dstMtime = 0;
|
|
245
|
+
try { dstMtime = fs.statSync(dst).mtimeMs; } catch { /* dst doesn't exist yet */ }
|
|
246
|
+
|
|
247
|
+
// Only copy when the source is newer (or the destination is missing).
|
|
248
|
+
if (srcMtime <= dstMtime) continue;
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
const content = fs.readFileSync(src, "utf-8");
|
|
252
|
+
fs.writeFileSync(dst, content, "utf-8");
|
|
253
|
+
} catch {
|
|
254
|
+
// Best-effort: a locked file must not block the sync.
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
package/extensions/index.ts
CHANGED
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
runInteractiveInit,
|
|
25
25
|
} from "./init.ts";
|
|
26
26
|
import { Type } from "typebox";
|
|
27
|
-
import { type AgentScope, discoverAgents, readSubagentSettings } from "./agents.ts";
|
|
27
|
+
import { type AgentScope, discoverAgents, readSubagentSettings, syncBuiltinAgentsToProject } from "./agents.ts";
|
|
28
28
|
import { renderRunResult, summarizeRun } from "./render.ts";
|
|
29
29
|
import { RunHistoryComponent, type RunHistoryResult } from "./runs-view.ts";
|
|
30
30
|
import { executeTaskflow, type ApprovalDecision, type ApprovalRequest, type RuntimeResult } from "./runtime.ts";
|
|
@@ -60,7 +60,7 @@ const ShorthandStep = Type.Object(
|
|
|
60
60
|
);
|
|
61
61
|
|
|
62
62
|
const TaskflowParams = Type.Object({
|
|
63
|
-
action: StringEnum(["run", "save", "resume", "list", "agents", "init", "cache-clear"] as const, {
|
|
63
|
+
action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "cache-clear"] as const, {
|
|
64
64
|
description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, or clear the cross-run memoization cache",
|
|
65
65
|
default: "run",
|
|
66
66
|
}),
|
|
@@ -255,6 +255,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
255
255
|
pi.on("session_start", async (_e, ctx) => {
|
|
256
256
|
registerSavedFlowCommands(ctx);
|
|
257
257
|
|
|
258
|
+
// Sync built-in agents into .pi/agents/ so Pi's native subagent tool
|
|
259
|
+
// (and any other extension) can discover them — taskflow's
|
|
260
|
+
// extensions/agents/ directory is invisible to the rest of Pi.
|
|
261
|
+
try {
|
|
262
|
+
syncBuiltinAgentsToProject(ctx.cwd);
|
|
263
|
+
} catch {
|
|
264
|
+
// Best-effort: a locked or readonly .pi/ directory must not block
|
|
265
|
+
// session startup.
|
|
266
|
+
}
|
|
267
|
+
|
|
258
268
|
// Hint: prompt to configure model roles if not set
|
|
259
269
|
try {
|
|
260
270
|
const settings = readSubagentSettings();
|
|
@@ -272,20 +282,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
272
282
|
name: "taskflow",
|
|
273
283
|
label: "Taskflow",
|
|
274
284
|
description: [
|
|
275
|
-
"Orchestrate
|
|
276
|
-
"
|
|
277
|
-
"
|
|
278
|
-
"
|
|
279
|
-
"Use action=
|
|
280
|
-
"
|
|
281
|
-
"Phase types: agent (one subagent), parallel (static branches), map (dynamic fan-out over an array), gate (VERDICT: PASS/BLOCK quality gate), reduce (aggregate from N phases), approval (human-in-the-loop pause), flow (run a saved sub-flow), loop (re-run a task until 'until' is truthy / converged / maxIterations; body reads {loop.iteration} and {loop.lastOutput}), tournament (spawn N variants of 'task' — or distinct 'branches' — then a judge picks the best / aggregates; mode:'best'|'aggregate'). join:'any' is an OR-join; when is a conditional guard; retry adds backoff; budget caps run cost.",
|
|
285
|
+
"Orchestrate subagents — the ONLY delegation tool. Fully replaces the built-in subagent tool.",
|
|
286
|
+
"Shorthand (same API as subagent): pass `task` (+optional `agent`) for one task, `tasks:[{task,agent?}]` for parallel, or `chain:[{task,agent?}]` for sequential (use {previous.output}).",
|
|
287
|
+
"DSL: use action=run with an inline `define` (you write the DAG) or a saved `name`. Phases (agent, parallel, map, gate, reduce, approval, flow, loop, tournament) form a DAG; intermediate outputs stay out of your context — only the final phase output is returned.",
|
|
288
|
+
"Every delegation is tracked (runId), resumable across sessions, and saveable as /tf:<name> via action=save.",
|
|
289
|
+
"Use action=agents to list the 18 built-in agents (executor, scout, planner, analyst, critic, reviewer, risk-reviewer, security-reviewer, plan-arbiter, final-arbiter, test-engineer, doc-writer, executor-code, executor-fast, executor-ui, recover, verifier, visual-explorer). Do NOT invent agent names.",
|
|
290
|
+
"Phase types: agent, parallel (static branches), map (dynamic fan-out over array), gate (VERDICT: PASS/BLOCK), reduce (aggregate from N), approval (human-in-the-loop), flow (run saved sub-flow), loop (iterate until condition/convergence/cap), tournament (N variants, judge picks best/aggregate).",
|
|
282
291
|
"Interpolation: {args.X}, {steps.ID.output}, {steps.ID.json}, {item} (map), {previous.output}.",
|
|
283
292
|
].join(" "),
|
|
284
293
|
parameters: TaskflowParams,
|
|
285
|
-
promptSnippet: "Orchestrate
|
|
294
|
+
promptSnippet: "Orchestrate subagents — single, parallel, chain, or DAG — with tracking, resume, and context isolation. Replaces the subagent tool.",
|
|
286
295
|
promptGuidelines: [
|
|
287
|
-
"
|
|
288
|
-
"
|
|
296
|
+
"Use taskflow for ALL delegation — single tasks, parallel, chain, or full DAG orchestration. It fully replaces the subagent tool: every delegation is tracked with a runId, resumable across sessions, context-isolated (only final output returns), and saveable as /tf:<name>. Do NOT call the subagent tool directly; use taskflow shorthand (task/tasks/chain) for simple cases instead.",
|
|
297
|
+
"For complex multi-phase work (explore / 审计 / analyze the project, auditing endpoints, reviewing or migrating many files/modules, cross-checked research), use the full DSL with phases. For taskflow map phases, have the upstream phase emit a JSON array and set output:'json'.",
|
|
289
298
|
"For taskflow map phases, have the upstream phase emit a JSON array and set output:'json'.",
|
|
290
299
|
],
|
|
291
300
|
|
|
@@ -394,6 +403,53 @@ export default function (pi: ExtensionAPI) {
|
|
|
394
403
|
return { content: [{ type: "text", text }], details: { action } satisfies TaskflowDetails };
|
|
395
404
|
}
|
|
396
405
|
|
|
406
|
+
if (action === "verify") {
|
|
407
|
+
const { verifyTaskflow } = await import("./verify.ts");
|
|
408
|
+
// Load definition: inline define takes priority, then saved name
|
|
409
|
+
let def: Taskflow | undefined;
|
|
410
|
+
if (params.define) {
|
|
411
|
+
const d = params.define as Record<string, unknown>;
|
|
412
|
+
if (typeof d === "object" && d !== null && Array.isArray(d.phases)) {
|
|
413
|
+
def = d as unknown as Taskflow;
|
|
414
|
+
} else if (isShorthand(params.define)) {
|
|
415
|
+
const r = validateTaskflow(params.define);
|
|
416
|
+
if (r.ok) def = params.define as unknown as Taskflow;
|
|
417
|
+
}
|
|
418
|
+
} else if (params.name) {
|
|
419
|
+
const saved = getFlow(ctx.cwd, params.name);
|
|
420
|
+
if (saved) def = saved.def;
|
|
421
|
+
}
|
|
422
|
+
if (!def) {
|
|
423
|
+
return errorResult(action, "Provide 'define' (DSL) or 'name' (saved flow) to verify.");
|
|
424
|
+
}
|
|
425
|
+
// Schema validation first
|
|
426
|
+
const vr = validateTaskflow(def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
|
|
427
|
+
if (!vr.ok) {
|
|
428
|
+
return errorResult(action, `Schema validation failed:\n${vr.errors.join("\n")}`);
|
|
429
|
+
}
|
|
430
|
+
const result = verifyTaskflow({ name: def.name!, phases: def.phases!, budget: def.budget, concurrency: def.concurrency });
|
|
431
|
+
const lines: string[] = [];
|
|
432
|
+
lines.push(`# Verification of "${def.name}"`);
|
|
433
|
+
lines.push("");
|
|
434
|
+
if (result.issues.length === 0) {
|
|
435
|
+
lines.push("✅ No issues found.");
|
|
436
|
+
} else {
|
|
437
|
+
const errors = result.issues.filter((i) => i.severity === "error");
|
|
438
|
+
const warnings = result.issues.filter((i) => i.severity === "warning");
|
|
439
|
+
if (errors.length) {
|
|
440
|
+
lines.push(`## Errors (${errors.length})`);
|
|
441
|
+
for (const e of errors) lines.push(`- **${e.category}**${e.phaseId ? ` [${e.phaseId}]` : ""}: ${e.message}`);
|
|
442
|
+
}
|
|
443
|
+
if (warnings.length) {
|
|
444
|
+
lines.push(`## Warnings (${warnings.length})`);
|
|
445
|
+
for (const w of warnings) lines.push(`- ${w.category}${w.phaseId ? ` [${w.phaseId}]` : ""}: ${w.message}`);
|
|
446
|
+
}
|
|
447
|
+
lines.push("");
|
|
448
|
+
lines.push(result.ok ? "Status: PASS (no errors)" : "Status: FAIL (errors found)");
|
|
449
|
+
}
|
|
450
|
+
return { content: [{ type: "text", text: lines.join("\n") }], details: { action } satisfies TaskflowDetails };
|
|
451
|
+
}
|
|
452
|
+
|
|
397
453
|
if (action === "cache-clear") {
|
|
398
454
|
const removed = new CacheStore(ctx.cwd).clear();
|
|
399
455
|
return {
|
package/extensions/runtime.ts
CHANGED
|
@@ -286,6 +286,7 @@ async function executePhase(
|
|
|
286
286
|
deps: RuntimeDeps,
|
|
287
287
|
prior: PhaseState | undefined,
|
|
288
288
|
emitProgress: () => void,
|
|
289
|
+
_retryDepth = 0,
|
|
289
290
|
): Promise<PhaseState> {
|
|
290
291
|
const type = phase.type ?? "agent";
|
|
291
292
|
const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
|
|
@@ -454,6 +455,47 @@ async function executePhase(
|
|
|
454
455
|
// interpolated task. gate additionally parses a verdict; reduce simply pulls
|
|
455
456
|
// its inputs from `from` phases (already exposed via interpolation).
|
|
456
457
|
if (type === "agent" || type === "gate" || type === "reduce") {
|
|
458
|
+
// Eval gate: zero-token machine checks before the LLM gate.
|
|
459
|
+
if (type === "gate" && Array.isArray(phase.eval) && phase.eval.length > 0) {
|
|
460
|
+
const evalCtx = buildInterpolationContext(state, previousOutput);
|
|
461
|
+
let allPassed = true;
|
|
462
|
+
for (const check of phase.eval) {
|
|
463
|
+
let expr = check;
|
|
464
|
+
// Pre-process `contains` expressions: "{steps.x.output} contains PASS"
|
|
465
|
+
// Convert to: interpolate LHS, check RHS substring inclusion.
|
|
466
|
+
const containsIdx = expr.indexOf(" contains ");
|
|
467
|
+
if (containsIdx > 0) {
|
|
468
|
+
const lhs = expr.slice(0, containsIdx).trim();
|
|
469
|
+
const rhs = expr.slice(containsIdx + " contains ".length).trim();
|
|
470
|
+
const lhsVal = interpolate(lhs, evalCtx);
|
|
471
|
+
const lhsStr = lhsVal.text;
|
|
472
|
+
if (!lhsStr.includes(rhs)) {
|
|
473
|
+
allPassed = false;
|
|
474
|
+
break;
|
|
475
|
+
}
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
if (!evaluateCondition(expr, evalCtx)) {
|
|
479
|
+
allPassed = false;
|
|
480
|
+
break;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
if (allPassed) {
|
|
484
|
+
// All evals passed — skip the LLM gate, return an auto-pass.
|
|
485
|
+
const inputHash = cacheKey(cc, [phase.id, "eval-skip"]);
|
|
486
|
+
const ps: PhaseState = {
|
|
487
|
+
id: phase.id,
|
|
488
|
+
status: "done",
|
|
489
|
+
output: "PASS (eval checks passed — no LLM call)",
|
|
490
|
+
gate: { verdict: "pass" },
|
|
491
|
+
usage: emptyUsage(),
|
|
492
|
+
inputHash,
|
|
493
|
+
endedAt: Date.now(),
|
|
494
|
+
};
|
|
495
|
+
recordCache(cc, ps);
|
|
496
|
+
return ps;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
457
499
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
458
500
|
const fullTask = preRead + text;
|
|
459
501
|
const agentName = resolveAgent(phase.agent, deps, state);
|
|
@@ -464,6 +506,40 @@ async function executePhase(
|
|
|
464
506
|
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
|
|
465
507
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
466
508
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
509
|
+
|
|
510
|
+
// onBlock:retry — re-execute upstream + gate until pass or max attempts.
|
|
511
|
+
if (type === "gate" && ps.gate?.verdict === "block") {
|
|
512
|
+
const onBlockV: string = phase.onBlock ?? "halt";
|
|
513
|
+
const MAX_RETRY_DEPTH = 3;
|
|
514
|
+
let attempt = 0;
|
|
515
|
+
let gatePs = ps;
|
|
516
|
+
while (onBlockV === "retry" && attempt < (phase.retry?.max ?? 1)) {
|
|
517
|
+
// H1: guard against unbounded spend and user abort
|
|
518
|
+
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
519
|
+
attempt++;
|
|
520
|
+
// H2: cap nested retry depth to prevent exponential re-execution
|
|
521
|
+
// when a gate's upstream dependency is itself a gate with onBlock:retry
|
|
522
|
+
if (_retryDepth < MAX_RETRY_DEPTH) {
|
|
523
|
+
for (const depId of phase.dependsOn ?? []) {
|
|
524
|
+
const d = state.def.phases.find((p) => p.id === depId);
|
|
525
|
+
if (!d) continue;
|
|
526
|
+
const dPs = await executePhase(d, state, deps, prior, emitProgress, _retryDepth + 1);
|
|
527
|
+
state.phases[depId] = dPs;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
const retryCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
|
|
531
|
+
const retryText = interpolate(phase.task ?? "", retryCtx).text;
|
|
532
|
+
const retryTask = preRead + retryText;
|
|
533
|
+
const retryIH = cacheKey(cc, [phase.id, agentName, phase.model ?? "", retryTask]);
|
|
534
|
+
const retryR = await runOne(agentName, retryTask, liveSink(state, phase.id, emitProgress));
|
|
535
|
+
gatePs = resultToPhaseState(phase.id, retryR, retryIH, parseJson);
|
|
536
|
+
if (gatePs.status === "done") gatePs.gate = parseGateVerdict(retryR.output);
|
|
537
|
+
if (gatePs.gate?.verdict !== "block" || overBudget(state).over) break;
|
|
538
|
+
}
|
|
539
|
+
gatePs.attempts = (ps.attempts ?? 0) + attempt;
|
|
540
|
+
recordCache(cc, gatePs);
|
|
541
|
+
return gatePs;
|
|
542
|
+
}
|
|
467
543
|
recordCache(cc, ps);
|
|
468
544
|
return ps;
|
|
469
545
|
}
|
package/extensions/schema.ts
CHANGED
|
@@ -206,6 +206,19 @@ const PhaseSchema = Type.Object(
|
|
|
206
206
|
default: 8000,
|
|
207
207
|
}),
|
|
208
208
|
),
|
|
209
|
+
onBlock: Type.Optional(
|
|
210
|
+
StringEnum(["halt", "retry"] as const, {
|
|
211
|
+
description:
|
|
212
|
+
"[gate] What to do when the gate blocks: 'halt' (default, stop the flow) or 'retry' (re-run upstream phases then re-evaluate the gate). Limited by 'retry.max'.",
|
|
213
|
+
default: "halt",
|
|
214
|
+
}),
|
|
215
|
+
),
|
|
216
|
+
eval: Type.Optional(
|
|
217
|
+
Type.Array(Type.String(), {
|
|
218
|
+
description:
|
|
219
|
+
"[gate] Zero-token machine checks that run BEFORE the LLM gate. If ALL pass, the gate is skipped (PASS). If ANY fail, the LLM gate runs as normal. Each entry is a condition expression like '{steps.x.output} contains PASS' or '{steps.x.json.score} >= 0.8'. Supports same operators as 'when' plus 'contains' for substring checks.",
|
|
220
|
+
}),
|
|
221
|
+
),
|
|
209
222
|
cache: Type.Optional(CacheSchema),
|
|
210
223
|
},
|
|
211
224
|
{ additionalProperties: false },
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Static DAG verification — zero-token structural analysis.
|
|
3
|
+
*
|
|
4
|
+
* Runs *before* any agent is spawned. Catches dead-end phases, unreachable
|
|
5
|
+
* paths, gate exhaustion, budget overflow, and reference integrity issues
|
|
6
|
+
* purely through graph algorithms on the DAG — no LLM required.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { Phase } from "./schema.ts";
|
|
10
|
+
import { LOOP_DEFAULT_MAX_ITERATIONS } from "./schema.ts";
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Types
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
export type IssueCategory =
|
|
17
|
+
| "dead-end"
|
|
18
|
+
| "unreachable"
|
|
19
|
+
| "gate-exhaustion"
|
|
20
|
+
| "budget-overflow"
|
|
21
|
+
| "concurrency"
|
|
22
|
+
| "ref-integrity"
|
|
23
|
+
| "guard-contradiction";
|
|
24
|
+
|
|
25
|
+
export interface VerificationIssue {
|
|
26
|
+
/** Affected phase id, if applicable. */
|
|
27
|
+
phaseId?: string;
|
|
28
|
+
message: string;
|
|
29
|
+
severity: "error" | "warning";
|
|
30
|
+
category: IssueCategory;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface VerificationResult {
|
|
34
|
+
ok: boolean;
|
|
35
|
+
issues: VerificationIssue[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** A lightweight Taskflow shape for verification (accepts parsed Phase[] + name). */
|
|
39
|
+
export interface VerifiableFlow {
|
|
40
|
+
name: string;
|
|
41
|
+
phases: Phase[];
|
|
42
|
+
budget?: { maxUSD?: number; maxTokens?: number };
|
|
43
|
+
concurrency?: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Graph helpers
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
function successors(phases: Phase[]): Map<string, string[]> {
|
|
51
|
+
const m = new Map<string, string[]>();
|
|
52
|
+
for (const p of phases) m.set(p.id, []);
|
|
53
|
+
for (const p of phases) {
|
|
54
|
+
for (const d of p.dependsOn ?? []) {
|
|
55
|
+
const s = m.get(d);
|
|
56
|
+
if (s) s.push(p.id);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return m;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function descendants(phaseId: string, succ: Map<string, string[]>): Set<string> {
|
|
63
|
+
const visited = new Set<string>();
|
|
64
|
+
const queue = [phaseId];
|
|
65
|
+
while (queue.length) {
|
|
66
|
+
const id = queue.shift()!;
|
|
67
|
+
if (visited.has(id)) continue;
|
|
68
|
+
visited.add(id);
|
|
69
|
+
for (const s of succ.get(id) ?? []) queue.push(s);
|
|
70
|
+
}
|
|
71
|
+
return visited;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Phases with NO `dependsOn` — the DAG entry points. */
|
|
75
|
+
function entryPhases(phases: Phase[]): Phase[] {
|
|
76
|
+
return phases.filter((p) => !p.dependsOn || p.dependsOn.length === 0);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Phases with NO dependents (no one waits for them). */
|
|
80
|
+
function terminalPhases(phases: Phase[], succ: Map<string, string[]>): string[] {
|
|
81
|
+
const hasDependents = new Set<string>();
|
|
82
|
+
for (const p of phases) {
|
|
83
|
+
for (const d of p.dependsOn ?? []) hasDependents.add(d);
|
|
84
|
+
}
|
|
85
|
+
return phases.filter((p) => !hasDependents.has(p.id)).map((p) => p.id);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Analyzers
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
/** #1 Dead-end: a phase with no dependents that is neither `final` nor the last phase. */
|
|
93
|
+
function detectDeadEnds(phases: Phase[], succ: Map<string, string[]>): VerificationIssue[] {
|
|
94
|
+
const issues: VerificationIssue[] = [];
|
|
95
|
+
const terminal = new Set(terminalPhases(phases, succ));
|
|
96
|
+
const hasFinal = phases.some((p) => p.final);
|
|
97
|
+
const lastId = phases[phases.length - 1]?.id;
|
|
98
|
+
|
|
99
|
+
for (const p of phases) {
|
|
100
|
+
if (!terminal.has(p.id)) continue;
|
|
101
|
+
if (p.final) continue;
|
|
102
|
+
if (!hasFinal && p.id === lastId) continue;
|
|
103
|
+
|
|
104
|
+
issues.push({
|
|
105
|
+
phaseId: p.id,
|
|
106
|
+
message:
|
|
107
|
+
`Phase '${p.id}' is a terminal phase (no dependents) but not marked as 'final'. ` +
|
|
108
|
+
`Its output will be discarded. Add "final": true or a downstream phase that depends on it.`,
|
|
109
|
+
severity: "warning",
|
|
110
|
+
category: "dead-end",
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
return issues;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** #2 Unreachable: phases not in the largest connected component. */
|
|
117
|
+
function detectUnreachable(phases: Phase[], succ: Map<string, string[]>): VerificationIssue[] {
|
|
118
|
+
const issues: VerificationIssue[] = [];
|
|
119
|
+
|
|
120
|
+
// Build undirected adjacency (dependsOn edges are bidirectional for
|
|
121
|
+
// connectivity analysis).
|
|
122
|
+
const adj = new Map<string, Set<string>>();
|
|
123
|
+
for (const p of phases) adj.set(p.id, new Set());
|
|
124
|
+
for (const p of phases) {
|
|
125
|
+
for (const d of p.dependsOn ?? []) {
|
|
126
|
+
if (!adj.has(d)) continue; // ref to non-existent phase (schema catches)
|
|
127
|
+
adj.get(p.id)!.add(d);
|
|
128
|
+
adj.get(d)!.add(p.id);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Find connected components via BFS.
|
|
133
|
+
const visited = new Set<string>();
|
|
134
|
+
const components: Set<string>[] = [];
|
|
135
|
+
for (const p of phases) {
|
|
136
|
+
if (visited.has(p.id)) continue;
|
|
137
|
+
const comp = new Set<string>();
|
|
138
|
+
const queue = [p.id];
|
|
139
|
+
while (queue.length) {
|
|
140
|
+
const id = queue.shift()!;
|
|
141
|
+
if (visited.has(id)) continue;
|
|
142
|
+
visited.add(id);
|
|
143
|
+
comp.add(id);
|
|
144
|
+
for (const nb of adj.get(id) ?? []) {
|
|
145
|
+
if (!visited.has(nb)) queue.push(nb);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
components.push(comp);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (components.length <= 1) return issues;
|
|
152
|
+
|
|
153
|
+
// The largest component is the main DAG; flag the rest — but only if they
|
|
154
|
+
// have edges (dependsOn or successors). A standalone phase with no edges is
|
|
155
|
+
// a valid independent entry, not unreachable.
|
|
156
|
+
const succMap2 = successors(phases);
|
|
157
|
+
const largest = components.reduce((a, b) => (a.size >= b.size ? a : b));
|
|
158
|
+
for (const comp of components) {
|
|
159
|
+
if (comp === largest) continue;
|
|
160
|
+
for (const id of comp) {
|
|
161
|
+
const p = phases.find((ph) => ph.id === id);
|
|
162
|
+
const hasEdges = (p && (p.dependsOn?.length || 0) > 0) || (succMap2.get(id)?.length || 0) > 0;
|
|
163
|
+
if (!hasEdges) continue; // standalone entry — valid
|
|
164
|
+
issues.push({
|
|
165
|
+
phaseId: id,
|
|
166
|
+
message:
|
|
167
|
+
`Phase '${id}' is disconnected from the main DAG. ` +
|
|
168
|
+
`Add a 'dependsOn' edge to connect it, or remove it.`,
|
|
169
|
+
severity: "error",
|
|
170
|
+
category: "unreachable",
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return issues;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/** True if there exists a path from `src` to `dst` that does NOT pass through `avoidId`. */
|
|
178
|
+
function hasBypassPath(
|
|
179
|
+
src: string,
|
|
180
|
+
dst: string,
|
|
181
|
+
avoidId: string,
|
|
182
|
+
succ: Map<string, string[]>,
|
|
183
|
+
visited: Set<string>,
|
|
184
|
+
): boolean {
|
|
185
|
+
if (src === dst) return true;
|
|
186
|
+
if (visited.has(src)) return false;
|
|
187
|
+
visited.add(src);
|
|
188
|
+
for (const s of succ.get(src) ?? []) {
|
|
189
|
+
if (s === avoidId) continue;
|
|
190
|
+
if (hasBypassPath(s, dst, avoidId, succ, visited)) return true;
|
|
191
|
+
}
|
|
192
|
+
return false;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** #3 Gate exhaustion: detect gates that are the sole path to a final phase. */
|
|
196
|
+
function detectGateExhaustion(phases: Phase[], succ: Map<string, string[]>): VerificationIssue[] {
|
|
197
|
+
const issues: VerificationIssue[] = [];
|
|
198
|
+
const gates = phases.filter((p) => p.type === "gate" || p.type === "approval");
|
|
199
|
+
const fp = phases.filter((p) => p.final);
|
|
200
|
+
|
|
201
|
+
for (const g of gates) {
|
|
202
|
+
const desc = descendants(g.id, succ);
|
|
203
|
+
const finalsDownstream = fp.filter((p) => desc.has(p.id));
|
|
204
|
+
if (finalsDownstream.length === 0) continue;
|
|
205
|
+
|
|
206
|
+
// Check: is there at least ONE path from an entry to each final
|
|
207
|
+
// that BYPASSES this gate?
|
|
208
|
+
let allBypassable = true;
|
|
209
|
+
for (const f of finalsDownstream) {
|
|
210
|
+
const bypassable = entryPhases(phases).some((entry) => {
|
|
211
|
+
const entryDesc = descendants(entry.id, succ);
|
|
212
|
+
if (!entryDesc.has(f.id)) return false;
|
|
213
|
+
return hasBypassPath(entry.id, f.id, g.id, succ, new Set());
|
|
214
|
+
});
|
|
215
|
+
if (!bypassable) {
|
|
216
|
+
allBypassable = false;
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (!allBypassable) {
|
|
222
|
+
issues.push({
|
|
223
|
+
phaseId: g.id,
|
|
224
|
+
message:
|
|
225
|
+
`Gate '${g.id}' is the sole path to final phase(s) ` +
|
|
226
|
+
`${finalsDownstream.map((p) => "'" + p.id + "'").join(", ")}. ` +
|
|
227
|
+
`A block here halts the entire flow with no alternative route. ` +
|
|
228
|
+
`Consider adding a bypass or marking the flow's structure as intentional.`,
|
|
229
|
+
severity: "warning",
|
|
230
|
+
category: "gate-exhaustion",
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
return issues;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/** #4 Budget overflow: minimum possible cost exceeds budget. */
|
|
238
|
+
function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
|
|
239
|
+
const issues: VerificationIssue[] = [];
|
|
240
|
+
const budget = flow.budget;
|
|
241
|
+
if (!budget) return issues;
|
|
242
|
+
|
|
243
|
+
let minTokens = 0;
|
|
244
|
+
for (const p of flow.phases) {
|
|
245
|
+
if (p.type === "loop") {
|
|
246
|
+
const iters = p.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
|
|
247
|
+
minTokens += Math.min(iters, 10);
|
|
248
|
+
} else if (p.type === "tournament") {
|
|
249
|
+
const variants = p.variants ?? 3;
|
|
250
|
+
minTokens += Math.min(variants + 1, 10);
|
|
251
|
+
} else {
|
|
252
|
+
minTokens += 1;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
|
|
257
|
+
issues.push({
|
|
258
|
+
message:
|
|
259
|
+
`Budget cap (${budget.maxTokens} tokens) is below the estimated minimum of ~${minTokens} tokens ` +
|
|
260
|
+
`for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
|
|
261
|
+
`Increase maxTokens or reduce the number of phases.`,
|
|
262
|
+
severity: "warning",
|
|
263
|
+
category: "budget-overflow",
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return issues;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/** #5 Concurrency warnings. */
|
|
271
|
+
function detectConcurrencyWarnings(flow: VerifiableFlow, _succ: Map<string, string[]>): VerificationIssue[] {
|
|
272
|
+
const issues: VerificationIssue[] = [];
|
|
273
|
+
|
|
274
|
+
for (const p of flow.phases) {
|
|
275
|
+
if (p.type === "parallel" && p.branches && p.branches.length > (flow.concurrency ?? 8)) {
|
|
276
|
+
if (!p.concurrency) {
|
|
277
|
+
issues.push({
|
|
278
|
+
phaseId: p.id,
|
|
279
|
+
message:
|
|
280
|
+
`Parallel phase '${p.id}' has ${p.branches.length} branches but the flow concurrency ` +
|
|
281
|
+
`is ${flow.concurrency ?? 8}. Consider adding a per-phase 'concurrency' cap.`,
|
|
282
|
+
severity: "warning",
|
|
283
|
+
category: "concurrency",
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Self-dependency
|
|
290
|
+
for (const p of flow.phases) {
|
|
291
|
+
if ((p.dependsOn ?? []).includes(p.id)) {
|
|
292
|
+
issues.push({
|
|
293
|
+
phaseId: p.id,
|
|
294
|
+
message: `Phase '${p.id}' depends on itself — remove self-reference from 'dependsOn'.`,
|
|
295
|
+
severity: "error",
|
|
296
|
+
category: "ref-integrity",
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return issues;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/** #6 Guard contradictions (simple static analysis of `when` conditions). */
|
|
305
|
+
function detectGuardContradictions(phases: Phase[]): VerificationIssue[] {
|
|
306
|
+
const issues: VerificationIssue[] = [];
|
|
307
|
+
|
|
308
|
+
const groups = new Map<string, Phase[]>();
|
|
309
|
+
for (const p of phases) {
|
|
310
|
+
if (!p.when) continue;
|
|
311
|
+
const key = (p.dependsOn ?? []).sort().join(",");
|
|
312
|
+
if (!groups.has(key)) groups.set(key, []);
|
|
313
|
+
groups.get(key)!.push(p);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
for (const [, group] of groups) {
|
|
317
|
+
if (group.length < 2) continue;
|
|
318
|
+
// Extract the ref keys from when conditions (to check same reference)
|
|
319
|
+
const refs = group.map((p) => {
|
|
320
|
+
const m = p.when!.match(/\{([^}]+)\}/g);
|
|
321
|
+
return m ? m.join(",") : "";
|
|
322
|
+
});
|
|
323
|
+
const uniqueRefs = new Set(refs.filter((r) => r.length > 0));
|
|
324
|
+
if (uniqueRefs.size === 1 && refs.every((r) => r.length > 0)) {
|
|
325
|
+
// Check the ORIGINAL when strings for opposing operators
|
|
326
|
+
const hasEq = group.some((p) => p.when!.includes("=="));
|
|
327
|
+
const hasNeq = group.some((p) => p.when!.includes("!="));
|
|
328
|
+
if (hasEq && hasNeq) {
|
|
329
|
+
issues.push({
|
|
330
|
+
message:
|
|
331
|
+
`Phases ${group.map((p) => `'${p.id}'`).join(", ")} have ` +
|
|
332
|
+
`the same dependency set and opposing 'when' conditions. ` +
|
|
333
|
+
`One branch will always be skipped. Verify this is intentional.`,
|
|
334
|
+
severity: "warning",
|
|
335
|
+
category: "guard-contradiction",
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return issues;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// ---------------------------------------------------------------------------
|
|
344
|
+
// Entry point
|
|
345
|
+
// ---------------------------------------------------------------------------
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Run all static verification passes against a parsed taskflow.
|
|
349
|
+
*
|
|
350
|
+
* Returns issues found; `ok === true` means no errors (warnings are ok).
|
|
351
|
+
* This is a pure function — no I/O, no LLM, zero tokens.
|
|
352
|
+
*/
|
|
353
|
+
export function verifyTaskflow(flow: VerifiableFlow): VerificationResult {
|
|
354
|
+
const phases = flow.phases;
|
|
355
|
+
const succ = successors(phases);
|
|
356
|
+
const issues: VerificationIssue[] = [];
|
|
357
|
+
|
|
358
|
+
issues.push(...detectDeadEnds(phases, succ));
|
|
359
|
+
issues.push(...detectUnreachable(phases, succ));
|
|
360
|
+
issues.push(...detectGateExhaustion(phases, succ));
|
|
361
|
+
issues.push(...detectBudgetOverflow(flow));
|
|
362
|
+
issues.push(...detectConcurrencyWarnings(flow, succ));
|
|
363
|
+
issues.push(...detectGuardContradictions(phases));
|
|
364
|
+
|
|
365
|
+
const ok = !issues.some((i) => i.severity === "error");
|
|
366
|
+
return { ok, issues };
|
|
367
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-taskflow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.15",
|
|
4
4
|
"description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
],
|
|
37
37
|
"scripts": {
|
|
38
38
|
"typecheck": "tsc --noEmit",
|
|
39
|
-
"test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts",
|
|
39
|
+
"test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts",
|
|
40
40
|
"test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
|
|
41
41
|
"test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
|
|
42
42
|
},
|