pi-taskflow 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,117 +1,133 @@
1
1
  <div align="center">
2
2
 
3
- <img src="./assets/hero.png" alt="pi-taskflow — declarative, multi-phase subagent workflows" width="880">
3
+ <img src="./assets/hero.png" alt="pi-taskflow — declarative DAG orchestration for Pi subagents: stateful, resumable, context-isolated" width="900">
4
4
 
5
5
  <p>
6
6
  <a href="https://www.npmjs.com/package/pi-taskflow"><img src="https://img.shields.io/npm/v/pi-taskflow?style=flat-square&color=B692FF&label=npm" alt="npm version"></a>
7
+ <a href="https://www.npmjs.com/package/pi-taskflow"><img src="https://img.shields.io/npm/dm/pi-taskflow?style=flat-square&color=6E8BFF&label=downloads" alt="npm downloads"></a>
7
8
  <a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-43D9AD?style=flat-square" alt="MIT license"></a>
8
- <a href="https://pi.dev"><img src="https://img.shields.io/badge/for-Pi%20coding%20agent-6E8BFF?style=flat-square" alt="for the Pi coding agent"></a>
9
+ <a href="#whats-inside"><img src="https://img.shields.io/badge/runtime%20deps-0-43D9AD?style=flat-square" alt="zero runtime dependencies"></a>
10
+ <a href="#whats-inside"><img src="https://img.shields.io/badge/tests-265-6E8BFF?style=flat-square" alt="265 tests"></a>
11
+ <a href="https://pi.dev"><img src="https://img.shields.io/badge/for-Pi%20coding%20agent-B692FF?style=flat-square" alt="for the Pi coding agent"></a>
9
12
  </p>
10
13
 
14
+ <p><strong>Declarative DAG orchestration for <a href="https://pi.dev">Pi</a> subagents.</strong><br/>
15
+ Fan out · gate · resume · save as a command — intermediate results stay out of your context.</p>
16
+
17
+ ```bash
18
+ pi install npm:pi-taskflow
19
+ ```
20
+
11
21
  </div>
12
22
 
13
- > Lightweight workflow orchestration for the [Pi coding agent](https://pi.dev).
23
+ ---
14
24
 
15
- **Orchestrate your Pi subagents. Not by prompting by declaring.**
25
+ **Subagents are fire-and-forget. Taskflows fire, fan out, pause, gate, resume, and save themselves as a command.**
16
26
 
17
- If you've used the built-in subagent tool's `task` / `tasks` / `chain`, you
18
- already know the shorthand — your runs just get tracked, resumable, and
19
- saveable as a one-word `/tf:<name>` command.
27
+ You already know the built-in subagent tool's `task` / `tasks` / `chain`. `pi-taskflow` speaks the *same* shorthand — so your existing delegations instantly become **tracked, resumable, and saveable as a one-word `/tf:<name>` command**. When you outgrow the shorthand, the full DSL gives you a real DAG: dynamic fan-out over dozens of items, conditional routing, quality gates, human approvals, retries, and a hard spend ceiling.
20
28
 
21
- ```bash
22
- pi install npm:pi-taskflow
23
- ```
29
+ And the whole time, **only the final phase reaches your conversation.** Every intermediate transcript stays in the runtime, never your context window.
24
30
 
25
- Fan out one subagent per item, route on results, retry the flaky ones, pause for
26
- human approval, cap the spend, and gate the output with an adversarial review —
27
- all from one declarative definition. Only the final report reaches your
28
- conversation; every intermediate transcript stays in the runtime.
31
+ ## Why this exists
29
32
 
30
- ## Why
33
+ Here's the wall you hit with raw subagents: you describe a multi-step plan in prose, the model re-derives it every single run, the intermediate transcripts flood your context, and the moment one model call fails you start over from zero. There's no reuse, no recovery, no structure.
31
34
 
32
- The built-in subagent tool is great for a single delegated task. But when a job
33
- needs many coordinated steps, fan-out over dozens of items, cross-checked review,
34
- or a repeatable pipeline, you want orchestration — without the intermediate
35
- transcripts eating your context window.
35
+ `pi-taskflow` moves the plan **out of the prompt and into a declarative definition.** The runtime owns the DAG, the loops, the retries, and the intermediate state. You declare a pipeline once and run it a hundred times — by name.
36
36
 
37
- `pi-taskflow` moves the plan into a small declarative definition. The runtime
38
- holds the DAG, the loops, and the intermediate results; your context receives
39
- only the final phase's output.
37
+ > When a job needs twelve steps with branching fan-out and a review gate, you want orchestration — not lucky prompting.
40
38
 
41
- | | `subagent` tool | `pi-taskflow` |
39
+ | | subagent (built-in) | **pi-taskflow** |
42
40
  |---|---|---|
43
- | Who drives | the model, turn by turn | the runtime, from a definition |
44
- | Intermediate results | in your context window | in the runtime (not your context) |
45
- | Reusable | re-described each time | saved as `/tf:<name>` |
46
- | Scale | a few tasks | dynamic `map` fan-out |
47
- | Resumable | no | yes (cross-session, cached phases skip) |
48
- | Quality gates | no | `gate` phases with `VERDICT: BLOCK / PASS` |
49
- | Conditional routing | no | `when` guards + `join: any` OR-joins |
50
- | Fault tolerance | no | per-phase `retry` with backoff |
51
- | Human-in-the-loop | no | `approval` phases (approve / reject / edit) |
52
- | Cost control | no | run-wide `budget` (USD / token caps) |
53
- | Composition | no | `flow` phases run saved sub-flows |
54
- | Progress visibility | opaque while running | live DAG render with timing + cost |
55
- | Ergonomics | inline JSON each time | shorthand (`task`/`tasks`/`chain`) or DSL |
41
+ | **Who drives** | the model, turn by turn | the runtime, from a definition |
42
+ | **Topology** | chain / flat parallel | **DAG with layered concurrency + routing** |
43
+ | **Intermediate results** | in your context window | **in the runtime — not your context** |
44
+ | **Scale** | a handful of tasks | **dynamic `map` fan-out over dozens of items** |
45
+ | **Reusable** | re-described every time | **saved as `/tf:<name>`** |
46
+ | **Resumable** | | **✓ cross-session cached phases auto-skip** |
47
+ | **Quality gates** | | **`gate` phases that halt on `VERDICT: BLOCK`** |
48
+ | **Conditional routing** | | **`when` guards + `join: any` OR-joins** |
49
+ | **Fault tolerance** | | **per-phase `retry` + auto-retry on transient errors** |
50
+ | **Human-in-the-loop** | | **`approval` phases (approve / reject / edit)** |
51
+ | **Cost control** | | **run-wide `budget` (USD / token caps)** |
52
+ | **Composition** | | **`flow` phases run saved sub-flows** |
53
+ | **Live progress** | opaque while running | **live DAG render with timing + cost** |
54
+ | **Ergonomics** | inline JSON each time | **shorthand (`task`/`tasks`/`chain`) *or* DSL** |
55
+
56
+ It doesn't replace the subagent tool. It gives your subagents a DAG, a memory, and a name.
57
+
58
+ ## 30-second start
59
+
60
+ **1. Install** — one command:
56
61
 
57
- ## Show me
62
+ ```bash
63
+ pi install npm:pi-taskflow
64
+ ```
58
65
 
59
- Describe a pipeline once, then run it from a pi session by name:
66
+ **2. Run** just ask the model in a Pi session:
60
67
 
61
- > `/tf:summarize-files dir=src`
68
+ > *Run a chain: first explore the auth flow, then summarize the findings.*
62
69
 
63
- The runtime fans out one subagent per file, merges the summaries in a `reduce`
64
- phase, and returns only the final overview. Every intermediate transcript stays
65
- in the runtime — never in your context window. (Full definition in
66
- [Quickstart](#then-go-declarative) below.)
70
+ The model calls the `taskflow` tool automatically. You get live progress, per-step timing, token cost, and a saved run record — **same effort as the built-in tool, now tracked and resumable.**
67
71
 
68
- ## Quickstart
72
+ **3. Save** — say *"save it"* and you have `/tf:<name>` forever.
69
73
 
70
- ### Shorthand: same effort as `subagent`, but tracked & resumable
74
+ That's it. You can be running your first workflow before your coffee cools — without writing a single phase definition.
71
75
 
72
- **Single task** one agent, one job:
76
+ ### The shorthand (same shape as the built-in tool)
73
77
 
74
78
  ```jsonc
79
+ // Single — one agent, one job
75
80
  { "task": "Summarize the architecture of src/", "agent": "explorer" }
76
- ```
77
-
78
- **Parallel tasks** — fire several at once, outputs merge:
79
81
 
80
- ```jsonc
82
+ // Parallel — fire several at once, outputs merge
81
83
  { "tasks": [
82
- { "task": "Audit auth in src/api", "agent": "analyst" },
84
+ { "task": "Audit auth in src/api", "agent": "analyst" },
83
85
  { "task": "Audit input validation in src/api", "agent": "analyst" }
84
86
  ] }
85
- ```
86
87
 
87
- **Chain** — sequential, each step sees the previous one's output:
88
-
89
- ```jsonc
88
+ // Chain — sequential; each step sees the previous output
90
89
  { "chain": [
91
90
  { "task": "List the public API of src/lib", "agent": "scout" },
92
91
  { "task": "Write docs for:\n{previous.output}", "agent": "writer" }
93
92
  ] }
94
93
  ```
95
94
 
96
- `agent` is optional (defaults to the first available agent). Add `name` to label
97
- the run and enable saving it as a reusable command.
95
+ `agent` is optional (defaults to the first discovered agent). Add a `name` to label the run and unlock saving it as a command.
98
96
 
99
- Try it inline — tell the model something like:
97
+ ## Watch it run
98
+
99
+ This is not a mockup. **This is stdout from a real run** — the `self-improve` flow that writes and verifies its own test suites, caught mid-flight by a quality gate:
100
+
101
+ ```
102
+ ⊗ taskflow self-improve 6/7 · blocked · $0.095
103
+ ✓ discover agent deepseek-v4-flash 10t ↑38k ↓6.7k $0.011
104
+ ┌ ✓ write-runner-tests agent claude-sonnet-4-6 10t ↑13 ↓6.6k $0.020
105
+ ├ ✓ write-store-tests agent claude-sonnet-4-6 10t ↑11 ↓10k $0.018
106
+ ├ ✓ write-agents-tests agent claude-sonnet-4-6 10t ↑28 ↓13k $0.030
107
+ └ ✓ fix-stability agent claude-sonnet-4-6 10t ↑13 ↓3.9k $0.012
108
+ ✓ verify gate BLOCK 3 type errors in test files deepseek-v4-flash
109
+ ⊘ report reduce skipped · Gate blocked ↳ fix-stability
110
+ ```
111
+
112
+ **The layout *is* the DAG.** No dashboard, no logs to grep — you read the progress bar and you understand the whole pipeline:
100
113
 
101
- > Run a chain: first explore the auth flow, then summarize findings.
114
+ - **Header** — `⊗` = blocked (a gate halted it); `6/7` phases processed; aggregate cost `$0.095`.
115
+ - **Status icons** — `✓` done · `◐` running · `✗` failed · `⊘` skipped · `○` pending.
116
+ - **Rail `┌ ├ └`** — phases in the same DAG layer, running concurrently. The four `write-*`/`fix-stability` tasks fan out from `discover`. A blank gutter = a single-phase layer.
117
+ - **`↳`** — a long, layer-skipping dependency. `report` depends on the adjacent `verify` *and* on `fix-stability` two layers back, so only that skip edge is annotated.
118
+ - **Gate** — `verify` emitted `VERDICT: BLOCK`, so the runtime skipped `report` and ended the run as `blocked`, surfacing the reason inline.
119
+ - **Detail** — per phase: model, token counts (`↑`in `↓`out), cost, timing. Fan-out phases also show sub-task progress (`3/15 2✗ 8▸`).
102
120
 
103
- The model calls the `taskflow` tool; you get live progress, per-step timing,
104
- token cost, and a run record. Ask to `save` it and you get `/tf:<name>`.
121
+ ## Go declarative
105
122
 
106
- ### Then go declarative
123
+ The shorthand is your onramp. The DSL is where `pi-taskflow` earns its keep — dynamic fan-out, structured routing, and quality gates.
107
124
 
108
- When your pipeline outgrows the shorthand — when you need dynamic fan-out,
109
- intermediate JSON routing, or quality gates — graduate to the full DSL:
125
+ ### Fan out and reduce
110
126
 
111
127
  ```jsonc
112
128
  {
113
129
  "name": "summarize-files",
114
- "description": "Discover files, summarize each, produce a report",
130
+ "description": "Discover files, summarize each, produce one report",
115
131
  "args": { "dir": { "default": "." } },
116
132
  "concurrency": 8,
117
133
  "phases": [
@@ -119,34 +135,23 @@ intermediate JSON routing, or quality gates — graduate to the full DSL:
119
135
  "task": "List source files under {args.dir} (non-recursive).\nOutput ONLY a JSON array [{\"file\":\"\"}]. No prose.",
120
136
  "output": "json" },
121
137
  { "id": "summarize", "type": "map",
122
- "over": "{steps.discover.json}", "as": "item",
123
- "agent": "scout",
138
+ "over": "{steps.discover.json}", "as": "item", "agent": "scout",
124
139
  "task": "Read {item.file} and give a one-sentence summary.",
125
140
  "dependsOn": ["discover"] },
126
- { "id": "report", "type": "reduce", "from": ["summarize"],
127
- "agent": "writer",
141
+ { "id": "report", "type": "reduce", "from": ["summarize"], "agent": "writer",
128
142
  "task": "Combine into a short overview:\n{steps.summarize.output}",
129
143
  "dependsOn": ["summarize"], "final": true }
130
144
  ]
131
145
  }
132
146
  ```
133
147
 
134
- What this does:
135
-
136
- 1. **`discover`** an agent lists every file in the directory and outputs a JSON array.
137
- 2. **`summarize`** — a `map` fans out, spawning one subagent per file in parallel
138
- (throttled to 8 concurrent). Each gets `{item.file}` bound to its file path.
139
- 3. **`report`** — a `reduce` merges all summaries into one clean overview.
140
-
141
- Intermediate outputs never enter your context. The runtime owns them. You get
142
- only the final report back.
148
+ 1. **`discover`** lists every file and emits a JSON array.
149
+ 2. **`summarize`** is a `map` — it fans out one subagent per file, throttled to 8 concurrent, with `{item.file}` bound to each path.
150
+ 3. **`report`** is a `reduce` it merges every summary into one clean overview.
143
151
 
144
- Save it once → `/tf:summarize-files` forever.
152
+ The intermediate summaries never enter your context. The runtime owns them; you get the report. **Save it once → `/tf:summarize-files dir=src` forever.**
145
153
 
146
- ### Route, gate, and guard
147
-
148
- Phases also **branch, retry, pause for a human, and respect a budget** — still
149
- declaratively, no scripting:
154
+ ### Route, gate, retry, approve, and cap the spend
150
155
 
151
156
  ```jsonc
152
157
  {
@@ -156,10 +161,10 @@ declaratively, no scripting:
156
161
  { "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
157
162
  "task": "Classify the bug. Output ONLY {\"severity\":\"high\"} or {\"severity\":\"low\"}." },
158
163
  { "id": "deep", "when": "{steps.triage.json.severity} == high", "dependsOn": ["triage"],
159
- "agent": "executor_code", "task": "Root-cause and patch it.",
164
+ "agent": "executor-code", "task": "Root-cause and patch it.",
160
165
  "retry": { "max": 2, "backoffMs": 500 } },
161
166
  { "id": "quick", "when": "{steps.triage.json.severity} == low", "dependsOn": ["triage"],
162
- "agent": "executor_fast", "task": "Apply the quick fix." },
167
+ "agent": "executor-fast", "task": "Apply the quick fix." },
163
168
  { "id": "approve", "type": "approval", "join": "any", "dependsOn": ["deep", "quick"],
164
169
  "task": "Review the fix before it ships." },
165
170
  { "id": "ship", "type": "agent", "dependsOn": ["approve"],
@@ -168,59 +173,28 @@ declaratively, no scripting:
168
173
  }
169
174
  ```
170
175
 
171
- - **`when`** routes to `deep` *or* `quick` from the triage JSON; the other branch is skipped.
172
- - **`join: "any"`** lets `approve` run as soon as whichever branch fired completes.
176
+ - **`when`** routes to `deep` *or* `quick` from the triage JSON the other branch is skipped.
177
+ - **`join: "any"`** lets `approve` fire the moment whichever branch ran completes (an OR-join).
173
178
  - **`retry`** re-runs a flaky patch with backoff; **`budget`** halts the whole run if it gets too expensive.
174
179
  - **`approval`** pauses for a human (approve / reject / edit) before the final `ship`.
175
180
 
176
- ## Watch it run
177
-
178
- This is the live progress render for a real run — the `self-improve` flow that
179
- writes and verifies its own test suites, caught here mid-block by a quality gate:
180
-
181
- ```
182
- ⊗ taskflow self-improve 6/7 · blocked · $0.095
183
- ✓ discover agent deepseek-v4-flash 10t ↑38k ↓6.7k $0.011
184
- ┌ ✓ write-runner-tests agent claude-sonnet-4-6 10t ↑13 ↓6.6k $0.020
185
- ├ ✓ write-store-tests agent claude-sonnet-4-6 10t ↑11 ↓10k $0.018
186
- ├ ✓ write-agents-tests agent claude-sonnet-4-6 10t ↑28 ↓13k $0.030
187
- └ ✓ fix-stability agent claude-sonnet-4-6 10t ↑13 ↓3.9k $0.012
188
- ✓ verify gate BLOCK 3 type errors in test files deepseek-v4-flash
189
- ⊘ report reduce skipped · Gate blocked ↳ fix-stability
190
- ```
191
-
192
- **How to read it — the layout *is* the DAG:**
193
-
194
- - **Header** — `⊗` means the flow is blocked (a gate halted it); `6/7` phases
195
- processed, aggregate cost `$0.095`.
196
- - **Status icons** — `✓` done, `◐` running, `✗` failed, `⊘` skipped, `○` pending.
197
- - **Rail `┌ ├ └`** — phases in the same DAG layer, running concurrently. The four
198
- `write-*`/`fix-stability` tasks all fan out from `discover`. A blank gutter is
199
- a single-phase layer.
200
- - **`↳`** — a long (layer-skipping) dependency. `report` depends on `verify` (the
201
- adjacent layer, implied by position) *and* `fix-stability` two layers back, so
202
- only that skip edge is annotated.
203
- - **Gate** — `verify` emitted `VERDICT: BLOCK`, so the runtime skipped `report`
204
- and ended the run as `blocked`, surfacing the reason.
205
- - **Detail** — per phase: model, token counts (`↑`in `↓`out), cost, and timing.
206
- Fan-out phases also show sub-task progress.
181
+ No scripting. No `eval`. Just data the runtime executes — safe enough to run LLM-generated definitions directly.
207
182
 
208
183
  ## Phase types
209
184
 
210
- | type | meaning | required fields |
211
- |------|---------|-----------------|
185
+ | type | what it does | required fields |
186
+ |------|--------------|-----------------|
212
187
  | `agent` | one subagent runs a single task | `task` |
213
188
  | `parallel` | run `branches[]` concurrently | `branches` (array of `{task, agent?}`) |
214
- | `map` | fan out over an array — one subagent per item, `{item}` bound | `over`, `task` |
189
+ | `map` | **fan out** over an array — one subagent per item, `{item}` bound | `over`, `task` |
215
190
  | `gate` | quality/review step that can **halt the flow** | `task` |
216
191
  | `reduce` | aggregate `from[]` phase outputs into one | `from`, `task` |
217
- | `approval` | **human-in-the-loop** pause — approve / reject / edit before continuing | — |
218
- | `flow` | run a **saved sub-flow** as one phase (composition/reuse) | `use` |
192
+ | `approval` | **human-in-the-loop** pause — approve / reject / edit | — |
193
+ | `flow` | run a **saved sub-flow** as one phase (composition) | `use` |
219
194
 
220
195
  ### Common phase fields
221
196
 
222
- Every phase needs a unique `id` and a `type` (defaults to `agent`). On top of the
223
- per-type fields above:
197
+ Every phase needs a unique `id` and a `type` (defaults to `agent`). On top of the per-type fields:
224
198
 
225
199
  | Field | Meaning |
226
200
  |---|---|
@@ -237,62 +211,35 @@ per-type fields above:
237
211
  | `optional` | A failure here does **not** abort the run |
238
212
  | `use` / `with` | (`flow`) saved sub-flow name + its args |
239
213
 
240
- Flow-level keys: `name`, `description`, `args`, `concurrency` (default 8),
241
- `agentScope`, and `budget: { maxUSD?, maxTokens? }`.
214
+ Flow-level keys: `name`, `description`, `args`, `concurrency` (default 8), `agentScope`, and `budget: { maxUSD?, maxTokens? }`.
242
215
 
243
216
  ### Control flow & reliability
244
217
 
245
- - **`when`** — skip a phase unless an expression is truthy. Supports `{refs}`,
246
- `== != < > <= >=`, `&& || !`, parentheses, and quoted strings/numbers, e.g.
247
- `"when": "{steps.triage.json.route} == deep"`. Pair with `join: "any"` on the
248
- merge phase to build real if/else routing. Parse errors **fail open**.
249
- - **`join: "any"`** an OR-join: the phase runs as soon as *one* dependency
250
- completes (default `"all"` waits for every dep).
251
- - **`retry`**`{ "max": 2, "backoffMs": 500, "factor": 2 }` retries a failing
252
- subagent with fixed (`factor:1`) or exponential backoff; usage is summed and
253
- the attempt count shows as `↻N` in the TUI.
254
- - **`approval`** — pause for a human (`select`: Approve / Reject / Edit). Reject
255
- halts the flow; Edit injects the typed note as the phase output for downstream
256
- steps. Non-interactive runs auto-approve.
257
- - **`flow`** — `{ "type": "flow", "use": "deep-research", "with": { "topic": "{item}" } }`
258
- runs a saved flow as a phase (recursion is detected and rejected).
259
- - **`budget`** — a run-wide `{maxUSD, maxTokens}` ceiling; once exceeded, pending
260
- phases are skipped (and in-flight fan-out stops spawning) and the run is
261
- `blocked`.
262
-
263
- ### `output` format
264
-
265
- - `output: "text"` (default) — the raw subagent output.
266
- - `output: "json"` — the subagent output is parsed as JSON and exposed via
267
- `{steps.ID.json}` / `{steps.ID.json.field}`. Set this on phases whose output
268
- a downstream `map` or `reduce` needs to consume as structured data.
269
-
270
- There is no `output: "file"`. For file-based output, have the agent write to
271
- disk with a `write` tool call.
218
+ - **`when`** — skip a phase unless an expression is truthy. Supports `{refs}`, `== != < > <= >=`, `&& || !`, parentheses, and quoted strings/numbers. Pair with `join: "any"` on the merge phase for real if/else routing. Parse errors **fail open**.
219
+ - **`join: "any"`** an OR-join: the phase runs as soon as *one* dependency completes (default `"all"` waits for all).
220
+ - **`retry`** — `{ "max": 2, "backoffMs": 500, "factor": 2 }` retries a failing subagent with fixed or exponential backoff; usage is summed and the attempt count shows as `↻N` in the TUI. Transient provider errors (rate-limit / 5xx / timeout) **auto-retry even without an explicit policy**; hard errors don't.
221
+ - **`approval`** pause for a human (Approve / Reject / Edit). Reject halts the flow; Edit injects the typed note as the phase output for downstream steps. Non-interactive runs auto-approve.
222
+ - **`flow`** — `{ "type": "flow", "use": "deep-research", "with": { "topic": "{item}" } }` runs a saved flow as a phase (recursion is detected and rejected).
223
+ - **`budget`** — a run-wide `{maxUSD, maxTokens}` ceiling; once exceeded, pending phases skip and in-flight fan-out stops spawning, ending the run as `blocked`.
224
+ - **idle watchdog** a subagent that goes silent for 5 minutes is treated as wedged and killed (SIGTERM → SIGKILL), so one hung child can never freeze the whole flow.
272
225
 
273
226
  ### Gate phases (quality control)
274
227
 
275
- A `gate` runs an agent to review upstream output and can **block the rest
276
- of the workflow**. End the gate task's instructions by asking the agent to
277
- emit a verdict the runtime can read:
228
+ A `gate` runs an agent to review upstream output and can **block the rest of the workflow.** End the gate task by asking for a verdict the runtime can read:
278
229
 
279
- - a final line `VERDICT: PASS` or `VERDICT: BLOCK` (also accepts `OK`, `FAIL`,
280
- `STOP`, `REJECT`, `HALT` last occurrence wins), or
281
- - JSON like `{"continue": false, "reason": "missing auth checks"}` /
282
- `{"verdict": "block", "reason": "..."}`.
230
+ - a final line `VERDICT: PASS` or `VERDICT: BLOCK` (also accepts `OK`, `FAIL`, `STOP`, `REJECT`, `HALT` — last occurrence wins), or
231
+ - JSON like `{"continue": false, "reason": "missing auth checks"}` / `{"verdict": "block", "reason": "..."}`.
283
232
 
284
- On **BLOCK**, downstream phases are skipped and the run ends as `blocked` with
285
- the reason surfaced. **Ambiguous output fails open** (treated as PASS) — a gate
286
- never halts the flow by accident.
233
+ On **BLOCK**, downstream phases skip and the run ends as `blocked` with the reason surfaced. **Ambiguous output fails open** (treated as PASS) — a gate never halts your flow by accident.
287
234
 
288
235
  ```
289
- Review the audit results below. If any endpoint is missing auth, end with
236
+ Review the audit below. If any endpoint is missing auth, end with
290
237
  "VERDICT: BLOCK" and a one-line reason; otherwise end with "VERDICT: PASS".
291
238
 
292
239
  {steps.audit.output}
293
240
  ```
294
241
 
295
- ## Interpolation
242
+ ## Interpolation & expressions
296
243
 
297
244
  | placeholder | resolves to |
298
245
  |---|---|
@@ -302,9 +249,13 @@ Review the audit results below. If any endpoint is missing auth, end with
302
249
  | `{item}` / `{item.field}` | current item inside a `map` phase |
303
250
  | `{previous.output}` | the immediately-upstream phase output |
304
251
 
252
+ Condition grammar (for `when`): `== != < > <= >=`, `&& || !`, parentheses, quoted strings/numbers, and any `{...}` reference — e.g. `"when": "{steps.triage.json.route} == deep && {args.force} != true"`.
253
+
254
+ > Referencing `{steps.X}` that isn't declared in `dependsOn` is a **hard validation error** — the runtime catches the most common pipeline bug before a single agent runs.
255
+
305
256
  ## Commands
306
257
 
307
- Saved flows become CLI shortcuts. All commands work in the pi session:
258
+ Saved flows become CLI shortcuts. All commands run in the Pi session:
308
259
 
309
260
  | Command | What it does |
310
261
  |---|---|
@@ -315,18 +266,19 @@ Saved flows become CLI shortcuts. All commands work in the pi session:
315
266
  | `/tf resume <runId>` | Continue a paused/failed run — cached phases skip automatically |
316
267
  | `/tf:<name> [args]` | Shortcut — runs the flow in one tap |
317
268
 
318
- Tool actions (used by the model): `run` (inline `define` or saved `name`),
319
- `save`, `resume`, `list`.
269
+ Tool actions (used by the model): `run` (inline `define` or saved `name`), `save`, `resume`, `list`.
320
270
 
321
271
  ## Storage
322
272
 
323
273
  ```
324
274
  .pi/taskflows/<name>.json # project-scoped definitions (commit to share)
325
275
  ~/.pi/agent/taskflows/<name>.json # user-scoped definitions
326
- .pi/taskflows/runs/<runId>.json # run state (resume); gitignore this
276
+ .pi/taskflows/runs/<runId>.json # run state for resume (gitignore this)
327
277
  ```
328
278
 
329
- Agent discovery scope (set via `agentScope` in the flow definition):
279
+ > Commit `.pi/taskflows/` and your whole team shares the pipelines — no config sync, no onboarding doc. Run state is written atomically and guarded by a zero-dependency file lock, so concurrent runs never corrupt the index.
280
+
281
+ Agent discovery scope (via `agentScope` in the flow definition):
330
282
 
331
283
  | value | discovers agents from |
332
284
  |---|---|
@@ -336,20 +288,11 @@ Agent discovery scope (set via `agentScope` in the flow definition):
336
288
 
337
289
  ## Agents
338
290
 
339
- Taskflow reuses your existing pi agent files (`~/.pi/agent/agents/*.md`,
340
- `.pi/agents/*.md`). Reference agents by `name` in a phase or shorthand.
341
-
342
- When running a phase, the runtime extracts the agent's `systemPrompt` from its
343
- `.md` frontmatter and passes it via `--append-system-prompt` (written to a temp
344
- file). Phase-level overrides for `model`, `thinking`, and `tools` are passed as
345
- `--model` / `--thinking` / `--tools` flags to the subagent invocation.
346
-
347
- Settings from `~/.pi/agent/settings.json` (the `subagents.agentOverrides` map)
348
- are honored, letting you tweak model, thinking, or tools per agent across all flows.
291
+ Taskflow reuses your existing Pi agent files (`~/.pi/agent/agents/*.md`, `.pi/agents/*.md`) — reference them by `name` in any phase or shorthand. The runtime extracts each agent's `systemPrompt` from its `.md` frontmatter and passes it via `--append-system-prompt`; phase-level `model` / `thinking` / `tools` overrides map to the matching subagent flags. Settings from `~/.pi/agent/settings.json` (`subagents.agentOverrides`) are honored across all flows.
349
292
 
350
293
  ## Examples
351
294
 
352
- Ready-to-read definitions live in [`examples/`](./examples):
295
+ Ready-to-read definitions in [`examples/`](./examples):
353
296
 
354
297
  | File | Demonstrates |
355
298
  |---|---|
@@ -357,37 +300,33 @@ Ready-to-read definitions live in [`examples/`](./examples):
357
300
  | [`conditional-research.json`](./examples/conditional-research.json) | `when` routing + `join: any` + `gate` + `budget` |
358
301
  | [`guarded-refactor.json`](./examples/guarded-refactor.json) | `approval` (human-in-the-loop) + `retry` + `gate` |
359
302
 
360
- To use one, copy it into `.pi/taskflows/<name>.json` (or
361
- `~/.pi/agent/taskflows/`) and it registers as `/tf:<name>` — or just point the
362
- model at the definition.
303
+ Copy one into `.pi/taskflows/<name>.json` (or `~/.pi/agent/taskflows/`) and it registers as `/tf:<name>` — or just point the model at it.
304
+
305
+ ## What's inside
306
+
307
+ <div align="center">
308
+
309
+ **0 runtime dependencies** · **265 tests** · **7 phase types** · **cross-session resume** · **~4.4k LOC runtime**
310
+
311
+ </div>
312
+
313
+ - **Zero runtime dependencies.** No `dependencies` field — the runtime is built entirely on Node built-ins (`fs` / `path` / `os` / `child_process` / `crypto`). The file lock is `fs.openSync("wx")`, not a third-party library.
314
+ - **265 tests across 11 suites** covering concurrency, atomic file locking (8-process race regressions), path-traversal hardening, cross-session resume, gate verdicts, budget caps, retry/backoff, approval flows, sub-flow composition, callback isolation, and the idle watchdog — plus a live end-to-end test that spawns real subagents.
315
+ - **Hardened by design.** Path-traversal defense (lexical + `realpath`), runId validation, HTML/error sanitization, atomic writes, stale-lock stealing via `rename`, and an idle watchdog that kills wedged subagents.
316
+ - **Dogfooded.** Every new feature has to survive the project's own `self-improve` taskflow before it ships.
317
+
318
+ If this saves you a context window, **drop a ⭐ on [GitHub](https://github.com/heggria/pi-taskflow)** — it genuinely helps.
363
319
 
364
320
  ## Status & limits
365
321
 
366
- - **v0.0.6** — control flow & reliability: conditional `when` guards, `join: any`
367
- OR-joins, declarative `retry`/backoff, `approval` (human-in-the-loop) phases,
368
- `flow` (saved sub-flow composition), and run-wide `budget` caps on top of the
369
- DSL + DAG runtime (`agent`/`parallel`/`map`/`gate`/`reduce`),
370
- inline + saved flows, cross-session resume, live progress, isolated context.
371
- Default `concurrency` is 8 (set on the flow; per-phase `concurrency` overrides
372
- for that phase).
373
- - A run executes as one streaming tool call (live progress while it runs).
374
- - `map` requires the upstream phase to emit a JSON array (`output: "json"`).
375
- - Gate verdicts are **fail-open**: if the agent output contains no recognizable
376
- verdict marker (`VERDICT: BLOCK/PASS/OK/FAIL/STOP/REJECT/HALT` or
377
- `{continue: false}` / `{verdict: "block"}`), the gate passes. This prevents
378
- an accidental missing verdict from blocking your workflow.
379
-
380
- ### What it doesn't do (yet)
381
-
382
- - **No detached background execution.** A run needs the pi session to stay open.
383
- True background execution (and event/cron triggers on top of it) is on the
384
- roadmap.
385
- - **No `output: "file"`.** Outputs are text/JSON only. Write files via agent
386
- tool calls if needed.
387
- - **`map` requires a JSON array.** The `over` field must resolve to
388
- `{steps.ID.json}` where the upstream phase emitted `output: "json"`. If the
389
- source is a plain text list, wrap it in a single-agent phase that outputs JSON.
390
- - **Cycles are rejected at validation.** The DAG must be acyclic.
322
+ **v0.0.10** — full control-flow & reliability layer (`when` guards, `join: any`, `retry`/backoff, `approval`, `flow` composition, `budget` caps) on top of the DSL + DAG runtime (`agent`/`parallel`/`map`/`gate`/`reduce`), inline + saved flows, cross-session resume, live progress, and isolated context. A run executes as one streaming tool call.
323
+
324
+ Known boundaries (tracked, boundedno surprises mid-flow):
325
+
326
+ - **No detached background execution.** A run needs the Pi session open. True background execution (and event/cron triggers on top of it) is on the roadmap.
327
+ - **No `output: "file"`.** Outputs are text/JSON only write files via an agent's `write` tool call.
328
+ - **`map` requires a JSON array.** The `over` field must resolve to a `{steps.ID.json}` array. Wrap a text list in a single-agent `output: "json"` phase first.
329
+ - **The DAG must be acyclic.** Cycles are rejected at validation.
391
330
 
392
331
  ## Development
393
332
 
@@ -395,16 +334,14 @@ model at the definition.
395
334
  npm install
396
335
  npm run typecheck
397
336
  npm test # unit tests — no network, no process spawning
398
-
399
- # real end-to-end (spawns live subagents; needs model access)
400
- npm run test:e2e
337
+ npm run test:e2e # real end-to-end (spawns live subagents; needs model access)
401
338
  ```
402
339
 
340
+ Runtime lives in `extensions/`, tests in `test/`, runnable examples in `examples/`, and the full design rationale in [`DESIGN.md`](./DESIGN.md).
341
+
403
342
  ## Contributing
404
343
 
405
- Contributions welcome! This is a young project open an issue or PR on
406
- [GitHub](https://github.com/heggria/pi-taskflow). Tests live in `test/`, the
407
- runtime in `extensions/`.
344
+ Contributions welcome this is a young, fast-moving project. Open an issue or PR on [GitHub](https://github.com/heggria/pi-taskflow). Good first contributions: new example flows, phase-type ideas, and TUI polish.
408
345
 
409
346
  ## License
410
347
 
@@ -25,7 +25,7 @@
25
25
  {
26
26
  "id": "implement",
27
27
  "type": "agent",
28
- "agent": "executor_code",
28
+ "agent": "executor-code",
29
29
  "dependsOn": ["approve", "plan"],
30
30
  "task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
31
31
  "retry": { "max": 1, "backoffMs": 1000 }
@@ -169,6 +169,14 @@ async function runFlow(
169
169
  const scope: AgentScope = def.agentScope ?? "user";
170
170
  const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
171
171
 
172
+ // Pre-flight: warn if any phase references an agent not in the registry
173
+ const agentNames = new Set(agents.map(a => a.name));
174
+ for (const p of def.phases ?? []) {
175
+ if (p.agent && !agentNames.has(p.agent)) {
176
+ console.warn(`[taskflow] Warning: phase '${p.id}' references agent '${p.agent}' which was not found. Available: ${[...agentNames].join(", ")}`);
177
+ }
178
+ }
179
+
172
180
  const result = await executeTaskflow(state, {
173
181
  cwd: ctx.cwd,
174
182
  agents,
@@ -53,7 +53,12 @@ function elapsed(ms: number): string {
53
53
 
54
54
  function phaseElapsed(ps: PhaseState): number {
55
55
  if (!ps.startedAt) return 0;
56
- return (ps.endedAt ?? Date.now()) - ps.startedAt;
56
+ // Guard against a stale/clock-skewed endedAt that precedes startedAt (e.g. a
57
+ // resumed phase that still carries a previous attempt's endedAt): treat such
58
+ // an end time as absent and fall back to now. Finally clamp to >= 0 so the
59
+ // TUI never shows a negative (and frozen) elapsed time.
60
+ const end = ps.endedAt && ps.endedAt >= ps.startedAt ? ps.endedAt : Date.now();
61
+ return Math.max(0, end - ps.startedAt);
57
62
  }
58
63
 
59
64
  function miniBar(done: number, total: number, theme: Theme, width = 8): string {
@@ -91,7 +96,7 @@ function runElapsed(state: RunState): number {
91
96
  const min = Math.min(...starts);
92
97
  const ends = Object.values(state.phases).map((p) => p.endedAt ?? Date.now());
93
98
  const max = ends.length ? Math.max(...ends) : Date.now();
94
- return max - min;
99
+ return Math.max(0, max - min);
95
100
  }
96
101
 
97
102
  export function summarizeRun(state: RunState): string {