pi-taskflow 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +153 -216
- package/examples/guarded-refactor.json +1 -1
- package/extensions/index.ts +8 -0
- package/extensions/render.ts +7 -2
- package/extensions/runner.ts +68 -1
- package/extensions/runtime.ts +41 -48
- package/extensions/schema.ts +19 -6
- package/extensions/store.ts +544 -55
- package/package.json +1 -1
- package/skills/taskflow/SKILL.md +1 -1
package/README.md
CHANGED
|
@@ -1,117 +1,133 @@
|
|
|
1
1
|
<div align="center">
|
|
2
2
|
|
|
3
|
-
<img src="./assets/hero.png" alt="pi-taskflow — declarative,
|
|
3
|
+
<img src="./assets/hero.png" alt="pi-taskflow — declarative DAG orchestration for Pi subagents: stateful, resumable, context-isolated" width="900">
|
|
4
4
|
|
|
5
5
|
<p>
|
|
6
6
|
<a href="https://www.npmjs.com/package/pi-taskflow"><img src="https://img.shields.io/npm/v/pi-taskflow?style=flat-square&color=B692FF&label=npm" alt="npm version"></a>
|
|
7
|
+
<a href="https://www.npmjs.com/package/pi-taskflow"><img src="https://img.shields.io/npm/dm/pi-taskflow?style=flat-square&color=6E8BFF&label=downloads" alt="npm downloads"></a>
|
|
7
8
|
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-43D9AD?style=flat-square" alt="MIT license"></a>
|
|
8
|
-
<a href="
|
|
9
|
+
<a href="#whats-inside"><img src="https://img.shields.io/badge/runtime%20deps-0-43D9AD?style=flat-square" alt="zero runtime dependencies"></a>
|
|
10
|
+
<a href="#whats-inside"><img src="https://img.shields.io/badge/tests-265-6E8BFF?style=flat-square" alt="265 tests"></a>
|
|
11
|
+
<a href="https://pi.dev"><img src="https://img.shields.io/badge/for-Pi%20coding%20agent-B692FF?style=flat-square" alt="for the Pi coding agent"></a>
|
|
9
12
|
</p>
|
|
10
13
|
|
|
14
|
+
<p><strong>Declarative DAG orchestration for <a href="https://pi.dev">Pi</a> subagents.</strong><br/>
|
|
15
|
+
Fan out · gate · resume · save as a command — intermediate results stay out of your context.</p>
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pi install npm:pi-taskflow
|
|
19
|
+
```
|
|
20
|
+
|
|
11
21
|
</div>
|
|
12
22
|
|
|
13
|
-
|
|
23
|
+
---
|
|
14
24
|
|
|
15
|
-
**
|
|
25
|
+
**Subagents are fire-and-forget. Taskflows fire, fan out, pause, gate, resume, and save themselves as a command.**
|
|
16
26
|
|
|
17
|
-
|
|
18
|
-
already know the shorthand — your runs just get tracked, resumable, and
|
|
19
|
-
saveable as a one-word `/tf:<name>` command.
|
|
27
|
+
You already know the built-in subagent tool's `task` / `tasks` / `chain`. `pi-taskflow` speaks the *same* shorthand — so your existing delegations instantly become **tracked, resumable, and saveable as a one-word `/tf:<name>` command**. When you outgrow the shorthand, the full DSL gives you a real DAG: dynamic fan-out over dozens of items, conditional routing, quality gates, human approvals, retries, and a hard spend ceiling.
|
|
20
28
|
|
|
21
|
-
|
|
22
|
-
pi install npm:pi-taskflow
|
|
23
|
-
```
|
|
29
|
+
And the whole time, **only the final phase reaches your conversation.** Every intermediate transcript stays in the runtime, never your context window.
|
|
24
30
|
|
|
25
|
-
|
|
26
|
-
human approval, cap the spend, and gate the output with an adversarial review —
|
|
27
|
-
all from one declarative definition. Only the final report reaches your
|
|
28
|
-
conversation; every intermediate transcript stays in the runtime.
|
|
31
|
+
## Why this exists
|
|
29
32
|
|
|
30
|
-
|
|
33
|
+
Here's the wall you hit with raw subagents: you describe a multi-step plan in prose, the model re-derives it every single run, the intermediate transcripts flood your context, and the moment one model call fails you start over from zero. There's no reuse, no recovery, no structure.
|
|
31
34
|
|
|
32
|
-
|
|
33
|
-
needs many coordinated steps, fan-out over dozens of items, cross-checked review,
|
|
34
|
-
or a repeatable pipeline, you want orchestration — without the intermediate
|
|
35
|
-
transcripts eating your context window.
|
|
35
|
+
`pi-taskflow` moves the plan **out of the prompt and into a declarative definition.** The runtime owns the DAG, the loops, the retries, and the intermediate state. You declare a pipeline once and run it a hundred times — by name.
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
holds the DAG, the loops, and the intermediate results; your context receives
|
|
39
|
-
only the final phase's output.
|
|
37
|
+
> When a job needs twelve steps with branching fan-out and a review gate, you want orchestration — not lucky prompting.
|
|
40
38
|
|
|
41
|
-
| |
|
|
39
|
+
| | subagent (built-in) | **pi-taskflow** |
|
|
42
40
|
|---|---|---|
|
|
43
|
-
| Who drives | the model, turn by turn | the runtime, from a definition |
|
|
44
|
-
|
|
|
45
|
-
|
|
|
46
|
-
| Scale | a
|
|
47
|
-
|
|
|
48
|
-
|
|
|
49
|
-
|
|
|
50
|
-
|
|
|
51
|
-
|
|
|
52
|
-
|
|
|
53
|
-
|
|
|
54
|
-
|
|
|
55
|
-
|
|
|
41
|
+
| **Who drives** | the model, turn by turn | the runtime, from a definition |
|
|
42
|
+
| **Topology** | chain / flat parallel | **DAG with layered concurrency + routing** |
|
|
43
|
+
| **Intermediate results** | in your context window | **in the runtime — not your context** |
|
|
44
|
+
| **Scale** | a handful of tasks | **dynamic `map` fan-out over dozens of items** |
|
|
45
|
+
| **Reusable** | re-described every time | **saved as `/tf:<name>`** |
|
|
46
|
+
| **Resumable** | ✗ | **✓ cross-session — cached phases auto-skip** |
|
|
47
|
+
| **Quality gates** | ✗ | **`gate` phases that halt on `VERDICT: BLOCK`** |
|
|
48
|
+
| **Conditional routing** | ✗ | **`when` guards + `join: any` OR-joins** |
|
|
49
|
+
| **Fault tolerance** | ✗ | **per-phase `retry` + auto-retry on transient errors** |
|
|
50
|
+
| **Human-in-the-loop** | ✗ | **`approval` phases (approve / reject / edit)** |
|
|
51
|
+
| **Cost control** | ✗ | **run-wide `budget` (USD / token caps)** |
|
|
52
|
+
| **Composition** | ✗ | **`flow` phases run saved sub-flows** |
|
|
53
|
+
| **Live progress** | opaque while running | **live DAG render with timing + cost** |
|
|
54
|
+
| **Ergonomics** | inline JSON each time | **shorthand (`task`/`tasks`/`chain`) *or* DSL** |
|
|
55
|
+
|
|
56
|
+
It doesn't replace the subagent tool. It gives your subagents a DAG, a memory, and a name.
|
|
57
|
+
|
|
58
|
+
## 30-second start
|
|
59
|
+
|
|
60
|
+
**1. Install** — one command:
|
|
56
61
|
|
|
57
|
-
|
|
62
|
+
```bash
|
|
63
|
+
pi install npm:pi-taskflow
|
|
64
|
+
```
|
|
58
65
|
|
|
59
|
-
|
|
66
|
+
**2. Run** — just ask the model in a Pi session:
|
|
60
67
|
|
|
61
|
-
>
|
|
68
|
+
> *Run a chain: first explore the auth flow, then summarize the findings.*
|
|
62
69
|
|
|
63
|
-
The
|
|
64
|
-
phase, and returns only the final overview. Every intermediate transcript stays
|
|
65
|
-
in the runtime — never in your context window. (Full definition in
|
|
66
|
-
[Quickstart](#then-go-declarative) below.)
|
|
70
|
+
The model calls the `taskflow` tool automatically. You get live progress, per-step timing, token cost, and a saved run record — **same effort as the built-in tool, now tracked and resumable.**
|
|
67
71
|
|
|
68
|
-
|
|
72
|
+
**3. Save** — say *"save it"* and you have `/tf:<name>` forever.
|
|
69
73
|
|
|
70
|
-
|
|
74
|
+
That's it. You can be running your first workflow before your coffee cools — without writing a single phase definition.
|
|
71
75
|
|
|
72
|
-
|
|
76
|
+
### The shorthand (same shape as the built-in tool)
|
|
73
77
|
|
|
74
78
|
```jsonc
|
|
79
|
+
// Single — one agent, one job
|
|
75
80
|
{ "task": "Summarize the architecture of src/", "agent": "explorer" }
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
**Parallel tasks** — fire several at once, outputs merge:
|
|
79
81
|
|
|
80
|
-
|
|
82
|
+
// Parallel — fire several at once, outputs merge
|
|
81
83
|
{ "tasks": [
|
|
82
|
-
{ "task": "Audit auth in src/api",
|
|
84
|
+
{ "task": "Audit auth in src/api", "agent": "analyst" },
|
|
83
85
|
{ "task": "Audit input validation in src/api", "agent": "analyst" }
|
|
84
86
|
] }
|
|
85
|
-
```
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
```jsonc
|
|
88
|
+
// Chain — sequential; each step sees the previous output
|
|
90
89
|
{ "chain": [
|
|
91
90
|
{ "task": "List the public API of src/lib", "agent": "scout" },
|
|
92
91
|
{ "task": "Write docs for:\n{previous.output}", "agent": "writer" }
|
|
93
92
|
] }
|
|
94
93
|
```
|
|
95
94
|
|
|
96
|
-
`agent` is optional (defaults to the first
|
|
97
|
-
the run and enable saving it as a reusable command.
|
|
95
|
+
`agent` is optional (defaults to the first discovered agent). Add a `name` to label the run and unlock saving it as a command.
|
|
98
96
|
|
|
99
|
-
|
|
97
|
+
## Watch it run
|
|
98
|
+
|
|
99
|
+
This is not a mockup. **This is stdout from a real run** — the `self-improve` flow that writes and verifies its own test suites, caught mid-flight by a quality gate:
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
⊗ taskflow self-improve 6/7 · blocked · $0.095
|
|
103
|
+
✓ discover agent deepseek-v4-flash 10t ↑38k ↓6.7k $0.011
|
|
104
|
+
┌ ✓ write-runner-tests agent claude-sonnet-4-6 10t ↑13 ↓6.6k $0.020
|
|
105
|
+
├ ✓ write-store-tests agent claude-sonnet-4-6 10t ↑11 ↓10k $0.018
|
|
106
|
+
├ ✓ write-agents-tests agent claude-sonnet-4-6 10t ↑28 ↓13k $0.030
|
|
107
|
+
└ ✓ fix-stability agent claude-sonnet-4-6 10t ↑13 ↓3.9k $0.012
|
|
108
|
+
✓ verify gate BLOCK 3 type errors in test files deepseek-v4-flash
|
|
109
|
+
⊘ report reduce skipped · Gate blocked ↳ fix-stability
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**The layout *is* the DAG.** No dashboard, no logs to grep — you read the progress bar and you understand the whole pipeline:
|
|
100
113
|
|
|
101
|
-
|
|
114
|
+
- **Header** — `⊗` = blocked (a gate halted it); `6/7` phases processed; aggregate cost `$0.095`.
|
|
115
|
+
- **Status icons** — `✓` done · `◐` running · `✗` failed · `⊘` skipped · `○` pending.
|
|
116
|
+
- **Rail `┌ ├ └`** — phases in the same DAG layer, running concurrently. The four `write-*`/`fix-stability` tasks fan out from `discover`. A blank gutter = a single-phase layer.
|
|
117
|
+
- **`↳`** — a long, layer-skipping dependency. `report` depends on the adjacent `verify` *and* on `fix-stability` two layers back, so only that skip edge is annotated.
|
|
118
|
+
- **Gate** — `verify` emitted `VERDICT: BLOCK`, so the runtime skipped `report` and ended the run as `blocked`, surfacing the reason inline.
|
|
119
|
+
- **Detail** — per phase: model, token counts (`↑`in `↓`out), cost, timing. Fan-out phases also show sub-task progress (`3/15 2✗ 8▸`).
|
|
102
120
|
|
|
103
|
-
|
|
104
|
-
token cost, and a run record. Ask to `save` it and you get `/tf:<name>`.
|
|
121
|
+
## Go declarative
|
|
105
122
|
|
|
106
|
-
|
|
123
|
+
The shorthand is your onramp. The DSL is where `pi-taskflow` earns its keep — dynamic fan-out, structured routing, and quality gates.
|
|
107
124
|
|
|
108
|
-
|
|
109
|
-
intermediate JSON routing, or quality gates — graduate to the full DSL:
|
|
125
|
+
### Fan out and reduce
|
|
110
126
|
|
|
111
127
|
```jsonc
|
|
112
128
|
{
|
|
113
129
|
"name": "summarize-files",
|
|
114
|
-
"description": "Discover files, summarize each, produce
|
|
130
|
+
"description": "Discover files, summarize each, produce one report",
|
|
115
131
|
"args": { "dir": { "default": "." } },
|
|
116
132
|
"concurrency": 8,
|
|
117
133
|
"phases": [
|
|
@@ -119,34 +135,23 @@ intermediate JSON routing, or quality gates — graduate to the full DSL:
|
|
|
119
135
|
"task": "List source files under {args.dir} (non-recursive).\nOutput ONLY a JSON array [{\"file\":\"\"}]. No prose.",
|
|
120
136
|
"output": "json" },
|
|
121
137
|
{ "id": "summarize", "type": "map",
|
|
122
|
-
"over": "{steps.discover.json}", "as": "item",
|
|
123
|
-
"agent": "scout",
|
|
138
|
+
"over": "{steps.discover.json}", "as": "item", "agent": "scout",
|
|
124
139
|
"task": "Read {item.file} and give a one-sentence summary.",
|
|
125
140
|
"dependsOn": ["discover"] },
|
|
126
|
-
{ "id": "report", "type": "reduce", "from": ["summarize"],
|
|
127
|
-
"agent": "writer",
|
|
141
|
+
{ "id": "report", "type": "reduce", "from": ["summarize"], "agent": "writer",
|
|
128
142
|
"task": "Combine into a short overview:\n{steps.summarize.output}",
|
|
129
143
|
"dependsOn": ["summarize"], "final": true }
|
|
130
144
|
]
|
|
131
145
|
}
|
|
132
146
|
```
|
|
133
147
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
2. **`summarize`** — a `map` fans out, spawning one subagent per file in parallel
|
|
138
|
-
(throttled to 8 concurrent). Each gets `{item.file}` bound to its file path.
|
|
139
|
-
3. **`report`** — a `reduce` merges all summaries into one clean overview.
|
|
140
|
-
|
|
141
|
-
Intermediate outputs never enter your context. The runtime owns them. You get
|
|
142
|
-
only the final report back.
|
|
148
|
+
1. **`discover`** lists every file and emits a JSON array.
|
|
149
|
+
2. **`summarize`** is a `map` — it fans out one subagent per file, throttled to 8 concurrent, with `{item.file}` bound to each path.
|
|
150
|
+
3. **`report`** is a `reduce` — it merges every summary into one clean overview.
|
|
143
151
|
|
|
144
|
-
Save it once → `/tf:summarize-files` forever
|
|
152
|
+
The intermediate summaries never enter your context. The runtime owns them; you get the report. **Save it once → `/tf:summarize-files dir=src` forever.**
|
|
145
153
|
|
|
146
|
-
### Route, gate, and
|
|
147
|
-
|
|
148
|
-
Phases also **branch, retry, pause for a human, and respect a budget** — still
|
|
149
|
-
declaratively, no scripting:
|
|
154
|
+
### Route, gate, retry, approve, and cap the spend
|
|
150
155
|
|
|
151
156
|
```jsonc
|
|
152
157
|
{
|
|
@@ -156,10 +161,10 @@ declaratively, no scripting:
|
|
|
156
161
|
{ "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
|
|
157
162
|
"task": "Classify the bug. Output ONLY {\"severity\":\"high\"} or {\"severity\":\"low\"}." },
|
|
158
163
|
{ "id": "deep", "when": "{steps.triage.json.severity} == high", "dependsOn": ["triage"],
|
|
159
|
-
"agent": "
|
|
164
|
+
"agent": "executor-code", "task": "Root-cause and patch it.",
|
|
160
165
|
"retry": { "max": 2, "backoffMs": 500 } },
|
|
161
166
|
{ "id": "quick", "when": "{steps.triage.json.severity} == low", "dependsOn": ["triage"],
|
|
162
|
-
"agent": "
|
|
167
|
+
"agent": "executor-fast", "task": "Apply the quick fix." },
|
|
163
168
|
{ "id": "approve", "type": "approval", "join": "any", "dependsOn": ["deep", "quick"],
|
|
164
169
|
"task": "Review the fix before it ships." },
|
|
165
170
|
{ "id": "ship", "type": "agent", "dependsOn": ["approve"],
|
|
@@ -168,59 +173,28 @@ declaratively, no scripting:
|
|
|
168
173
|
}
|
|
169
174
|
```
|
|
170
175
|
|
|
171
|
-
- **`when`** routes to `deep` *or* `quick` from the triage JSON
|
|
172
|
-
- **`join: "any"`** lets `approve`
|
|
176
|
+
- **`when`** routes to `deep` *or* `quick` from the triage JSON — the other branch is skipped.
|
|
177
|
+
- **`join: "any"`** lets `approve` fire the moment whichever branch ran completes (an OR-join).
|
|
173
178
|
- **`retry`** re-runs a flaky patch with backoff; **`budget`** halts the whole run if it gets too expensive.
|
|
174
179
|
- **`approval`** pauses for a human (approve / reject / edit) before the final `ship`.
|
|
175
180
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
This is the live progress render for a real run — the `self-improve` flow that
|
|
179
|
-
writes and verifies its own test suites, caught here mid-block by a quality gate:
|
|
180
|
-
|
|
181
|
-
```
|
|
182
|
-
⊗ taskflow self-improve 6/7 · blocked · $0.095
|
|
183
|
-
✓ discover agent deepseek-v4-flash 10t ↑38k ↓6.7k $0.011
|
|
184
|
-
┌ ✓ write-runner-tests agent claude-sonnet-4-6 10t ↑13 ↓6.6k $0.020
|
|
185
|
-
├ ✓ write-store-tests agent claude-sonnet-4-6 10t ↑11 ↓10k $0.018
|
|
186
|
-
├ ✓ write-agents-tests agent claude-sonnet-4-6 10t ↑28 ↓13k $0.030
|
|
187
|
-
└ ✓ fix-stability agent claude-sonnet-4-6 10t ↑13 ↓3.9k $0.012
|
|
188
|
-
✓ verify gate BLOCK 3 type errors in test files deepseek-v4-flash
|
|
189
|
-
⊘ report reduce skipped · Gate blocked ↳ fix-stability
|
|
190
|
-
```
|
|
191
|
-
|
|
192
|
-
**How to read it — the layout *is* the DAG:**
|
|
193
|
-
|
|
194
|
-
- **Header** — `⊗` means the flow is blocked (a gate halted it); `6/7` phases
|
|
195
|
-
processed, aggregate cost `$0.095`.
|
|
196
|
-
- **Status icons** — `✓` done, `◐` running, `✗` failed, `⊘` skipped, `○` pending.
|
|
197
|
-
- **Rail `┌ ├ └`** — phases in the same DAG layer, running concurrently. The four
|
|
198
|
-
`write-*`/`fix-stability` tasks all fan out from `discover`. A blank gutter is
|
|
199
|
-
a single-phase layer.
|
|
200
|
-
- **`↳`** — a long (layer-skipping) dependency. `report` depends on `verify` (the
|
|
201
|
-
adjacent layer, implied by position) *and* `fix-stability` two layers back, so
|
|
202
|
-
only that skip edge is annotated.
|
|
203
|
-
- **Gate** — `verify` emitted `VERDICT: BLOCK`, so the runtime skipped `report`
|
|
204
|
-
and ended the run as `blocked`, surfacing the reason.
|
|
205
|
-
- **Detail** — per phase: model, token counts (`↑`in `↓`out), cost, and timing.
|
|
206
|
-
Fan-out phases also show sub-task progress.
|
|
181
|
+
No scripting. No `eval`. Just data the runtime executes — safe enough to run LLM-generated definitions directly.
|
|
207
182
|
|
|
208
183
|
## Phase types
|
|
209
184
|
|
|
210
|
-
| type |
|
|
211
|
-
|
|
185
|
+
| type | what it does | required fields |
|
|
186
|
+
|------|--------------|-----------------|
|
|
212
187
|
| `agent` | one subagent runs a single task | `task` |
|
|
213
188
|
| `parallel` | run `branches[]` concurrently | `branches` (array of `{task, agent?}`) |
|
|
214
|
-
| `map` | fan out over an array — one subagent per item, `{item}` bound | `over`, `task` |
|
|
189
|
+
| `map` | **fan out** over an array — one subagent per item, `{item}` bound | `over`, `task` |
|
|
215
190
|
| `gate` | quality/review step that can **halt the flow** | `task` |
|
|
216
191
|
| `reduce` | aggregate `from[]` phase outputs into one | `from`, `task` |
|
|
217
|
-
| `approval` | **human-in-the-loop** pause — approve / reject / edit
|
|
218
|
-
| `flow` | run a **saved sub-flow** as one phase (composition
|
|
192
|
+
| `approval` | **human-in-the-loop** pause — approve / reject / edit | — |
|
|
193
|
+
| `flow` | run a **saved sub-flow** as one phase (composition) | `use` |
|
|
219
194
|
|
|
220
195
|
### Common phase fields
|
|
221
196
|
|
|
222
|
-
Every phase needs a unique `id` and a `type` (defaults to `agent`). On top of the
|
|
223
|
-
per-type fields above:
|
|
197
|
+
Every phase needs a unique `id` and a `type` (defaults to `agent`). On top of the per-type fields:
|
|
224
198
|
|
|
225
199
|
| Field | Meaning |
|
|
226
200
|
|---|---|
|
|
@@ -237,62 +211,35 @@ per-type fields above:
|
|
|
237
211
|
| `optional` | A failure here does **not** abort the run |
|
|
238
212
|
| `use` / `with` | (`flow`) saved sub-flow name + its args |
|
|
239
213
|
|
|
240
|
-
Flow-level keys: `name`, `description`, `args`, `concurrency` (default 8),
|
|
241
|
-
`agentScope`, and `budget: { maxUSD?, maxTokens? }`.
|
|
214
|
+
Flow-level keys: `name`, `description`, `args`, `concurrency` (default 8), `agentScope`, and `budget: { maxUSD?, maxTokens? }`.
|
|
242
215
|
|
|
243
216
|
### Control flow & reliability
|
|
244
217
|
|
|
245
|
-
- **`when`** — skip a phase unless an expression is truthy. Supports `{refs}`,
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
- **`
|
|
250
|
-
|
|
251
|
-
-
|
|
252
|
-
subagent with fixed (`factor:1`) or exponential backoff; usage is summed and
|
|
253
|
-
the attempt count shows as `↻N` in the TUI.
|
|
254
|
-
- **`approval`** — pause for a human (`select`: Approve / Reject / Edit). Reject
|
|
255
|
-
halts the flow; Edit injects the typed note as the phase output for downstream
|
|
256
|
-
steps. Non-interactive runs auto-approve.
|
|
257
|
-
- **`flow`** — `{ "type": "flow", "use": "deep-research", "with": { "topic": "{item}" } }`
|
|
258
|
-
runs a saved flow as a phase (recursion is detected and rejected).
|
|
259
|
-
- **`budget`** — a run-wide `{maxUSD, maxTokens}` ceiling; once exceeded, pending
|
|
260
|
-
phases are skipped (and in-flight fan-out stops spawning) and the run is
|
|
261
|
-
`blocked`.
|
|
262
|
-
|
|
263
|
-
### `output` format
|
|
264
|
-
|
|
265
|
-
- `output: "text"` (default) — the raw subagent output.
|
|
266
|
-
- `output: "json"` — the subagent output is parsed as JSON and exposed via
|
|
267
|
-
`{steps.ID.json}` / `{steps.ID.json.field}`. Set this on phases whose output
|
|
268
|
-
a downstream `map` or `reduce` needs to consume as structured data.
|
|
269
|
-
|
|
270
|
-
There is no `output: "file"`. For file-based output, have the agent write to
|
|
271
|
-
disk with a `write` tool call.
|
|
218
|
+
- **`when`** — skip a phase unless an expression is truthy. Supports `{refs}`, `== != < > <= >=`, `&& || !`, parentheses, and quoted strings/numbers. Pair with `join: "any"` on the merge phase for real if/else routing. Parse errors **fail open**.
|
|
219
|
+
- **`join: "any"`** — an OR-join: the phase runs as soon as *one* dependency completes (default `"all"` waits for all).
|
|
220
|
+
- **`retry`** — `{ "max": 2, "backoffMs": 500, "factor": 2 }` retries a failing subagent with fixed or exponential backoff; usage is summed and the attempt count shows as `↻N` in the TUI. Transient provider errors (rate-limit / 5xx / timeout) **auto-retry even without an explicit policy**; hard errors don't.
|
|
221
|
+
- **`approval`** — pause for a human (Approve / Reject / Edit). Reject halts the flow; Edit injects the typed note as the phase output for downstream steps. Non-interactive runs auto-approve.
|
|
222
|
+
- **`flow`** — `{ "type": "flow", "use": "deep-research", "with": { "topic": "{item}" } }` runs a saved flow as a phase (recursion is detected and rejected).
|
|
223
|
+
- **`budget`** — a run-wide `{maxUSD, maxTokens}` ceiling; once exceeded, pending phases skip and in-flight fan-out stops spawning, ending the run as `blocked`.
|
|
224
|
+
- **idle watchdog** — a subagent that goes silent for 5 minutes is treated as wedged and killed (SIGTERM → SIGKILL), so one hung child can never freeze the whole flow.
|
|
272
225
|
|
|
273
226
|
### Gate phases (quality control)
|
|
274
227
|
|
|
275
|
-
A `gate` runs an agent to review upstream output and can **block the rest
|
|
276
|
-
of the workflow**. End the gate task's instructions by asking the agent to
|
|
277
|
-
emit a verdict the runtime can read:
|
|
228
|
+
A `gate` runs an agent to review upstream output and can **block the rest of the workflow.** End the gate task by asking for a verdict the runtime can read:
|
|
278
229
|
|
|
279
|
-
- a final line `VERDICT: PASS` or `VERDICT: BLOCK` (also accepts `OK`, `FAIL`,
|
|
280
|
-
|
|
281
|
-
- JSON like `{"continue": false, "reason": "missing auth checks"}` /
|
|
282
|
-
`{"verdict": "block", "reason": "..."}`.
|
|
230
|
+
- a final line `VERDICT: PASS` or `VERDICT: BLOCK` (also accepts `OK`, `FAIL`, `STOP`, `REJECT`, `HALT` — last occurrence wins), or
|
|
231
|
+
- JSON like `{"continue": false, "reason": "missing auth checks"}` / `{"verdict": "block", "reason": "..."}`.
|
|
283
232
|
|
|
284
|
-
On **BLOCK**, downstream phases
|
|
285
|
-
the reason surfaced. **Ambiguous output fails open** (treated as PASS) — a gate
|
|
286
|
-
never halts the flow by accident.
|
|
233
|
+
On **BLOCK**, downstream phases skip and the run ends as `blocked` with the reason surfaced. **Ambiguous output fails open** (treated as PASS) — a gate never halts your flow by accident.
|
|
287
234
|
|
|
288
235
|
```
|
|
289
|
-
Review the audit
|
|
236
|
+
Review the audit below. If any endpoint is missing auth, end with
|
|
290
237
|
"VERDICT: BLOCK" and a one-line reason; otherwise end with "VERDICT: PASS".
|
|
291
238
|
|
|
292
239
|
{steps.audit.output}
|
|
293
240
|
```
|
|
294
241
|
|
|
295
|
-
## Interpolation
|
|
242
|
+
## Interpolation & expressions
|
|
296
243
|
|
|
297
244
|
| placeholder | resolves to |
|
|
298
245
|
|---|---|
|
|
@@ -302,9 +249,13 @@ Review the audit results below. If any endpoint is missing auth, end with
|
|
|
302
249
|
| `{item}` / `{item.field}` | current item inside a `map` phase |
|
|
303
250
|
| `{previous.output}` | the immediately-upstream phase output |
|
|
304
251
|
|
|
252
|
+
Condition grammar (for `when`): `== != < > <= >=`, `&& || !`, parentheses, quoted strings/numbers, and any `{...}` reference — e.g. `"when": "{steps.triage.json.route} == deep && {args.force} != true"`.
|
|
253
|
+
|
|
254
|
+
> Referencing `{steps.X}` that isn't declared in `dependsOn` is a **hard validation error** — the runtime catches the most common pipeline bug before a single agent runs.
|
|
255
|
+
|
|
305
256
|
## Commands
|
|
306
257
|
|
|
307
|
-
Saved flows become CLI shortcuts. All commands
|
|
258
|
+
Saved flows become CLI shortcuts. All commands run in the Pi session:
|
|
308
259
|
|
|
309
260
|
| Command | What it does |
|
|
310
261
|
|---|---|
|
|
@@ -315,18 +266,19 @@ Saved flows become CLI shortcuts. All commands work in the pi session:
|
|
|
315
266
|
| `/tf resume <runId>` | Continue a paused/failed run — cached phases skip automatically |
|
|
316
267
|
| `/tf:<name> [args]` | Shortcut — runs the flow in one tap |
|
|
317
268
|
|
|
318
|
-
Tool actions (used by the model): `run` (inline `define` or saved `name`),
|
|
319
|
-
`save`, `resume`, `list`.
|
|
269
|
+
Tool actions (used by the model): `run` (inline `define` or saved `name`), `save`, `resume`, `list`.
|
|
320
270
|
|
|
321
271
|
## Storage
|
|
322
272
|
|
|
323
273
|
```
|
|
324
274
|
.pi/taskflows/<name>.json # project-scoped definitions (commit to share)
|
|
325
275
|
~/.pi/agent/taskflows/<name>.json # user-scoped definitions
|
|
326
|
-
.pi/taskflows/runs/<runId>.json # run state
|
|
276
|
+
.pi/taskflows/runs/<runId>.json # run state for resume (gitignore this)
|
|
327
277
|
```
|
|
328
278
|
|
|
329
|
-
|
|
279
|
+
> Commit `.pi/taskflows/` and your whole team shares the pipelines — no config sync, no onboarding doc. Run state is written atomically and guarded by a zero-dependency file lock, so concurrent runs never corrupt the index.
|
|
280
|
+
|
|
281
|
+
Agent discovery scope (via `agentScope` in the flow definition):
|
|
330
282
|
|
|
331
283
|
| value | discovers agents from |
|
|
332
284
|
|---|---|
|
|
@@ -336,20 +288,11 @@ Agent discovery scope (set via `agentScope` in the flow definition):
|
|
|
336
288
|
|
|
337
289
|
## Agents
|
|
338
290
|
|
|
339
|
-
Taskflow reuses your existing
|
|
340
|
-
`.pi/agents/*.md`). Reference agents by `name` in a phase or shorthand.
|
|
341
|
-
|
|
342
|
-
When running a phase, the runtime extracts the agent's `systemPrompt` from its
|
|
343
|
-
`.md` frontmatter and passes it via `--append-system-prompt` (written to a temp
|
|
344
|
-
file). Phase-level overrides for `model`, `thinking`, and `tools` are passed as
|
|
345
|
-
`--model` / `--thinking` / `--tools` flags to the subagent invocation.
|
|
346
|
-
|
|
347
|
-
Settings from `~/.pi/agent/settings.json` (the `subagents.agentOverrides` map)
|
|
348
|
-
are honored, letting you tweak model, thinking, or tools per agent across all flows.
|
|
291
|
+
Taskflow reuses your existing Pi agent files (`~/.pi/agent/agents/*.md`, `.pi/agents/*.md`) — reference them by `name` in any phase or shorthand. The runtime extracts each agent's `systemPrompt` from its `.md` frontmatter and passes it via `--append-system-prompt`; phase-level `model` / `thinking` / `tools` overrides map to the matching subagent flags. Settings from `~/.pi/agent/settings.json` (`subagents.agentOverrides`) are honored across all flows.
|
|
349
292
|
|
|
350
293
|
## Examples
|
|
351
294
|
|
|
352
|
-
Ready-to-read definitions
|
|
295
|
+
Ready-to-read definitions in [`examples/`](./examples):
|
|
353
296
|
|
|
354
297
|
| File | Demonstrates |
|
|
355
298
|
|---|---|
|
|
@@ -357,37 +300,33 @@ Ready-to-read definitions live in [`examples/`](./examples):
|
|
|
357
300
|
| [`conditional-research.json`](./examples/conditional-research.json) | `when` routing + `join: any` + `gate` + `budget` |
|
|
358
301
|
| [`guarded-refactor.json`](./examples/guarded-refactor.json) | `approval` (human-in-the-loop) + `retry` + `gate` |
|
|
359
302
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
303
|
+
Copy one into `.pi/taskflows/<name>.json` (or `~/.pi/agent/taskflows/`) and it registers as `/tf:<name>` — or just point the model at it.
|
|
304
|
+
|
|
305
|
+
## What's inside
|
|
306
|
+
|
|
307
|
+
<div align="center">
|
|
308
|
+
|
|
309
|
+
**0 runtime dependencies** · **265 tests** · **7 phase types** · **cross-session resume** · **~4.4k LOC runtime**
|
|
310
|
+
|
|
311
|
+
</div>
|
|
312
|
+
|
|
313
|
+
- **Zero runtime dependencies.** No `dependencies` field — the runtime is built entirely on Node built-ins (`fs` / `path` / `os` / `child_process` / `crypto`). The file lock is `fs.openSync("wx")`, not a third-party library.
|
|
314
|
+
- **265 tests across 11 suites** covering concurrency, atomic file locking (8-process race regressions), path-traversal hardening, cross-session resume, gate verdicts, budget caps, retry/backoff, approval flows, sub-flow composition, callback isolation, and the idle watchdog — plus a live end-to-end test that spawns real subagents.
|
|
315
|
+
- **Hardened by design.** Path-traversal defense (lexical + `realpath`), runId validation, HTML/error sanitization, atomic writes, stale-lock stealing via `rename`, and an idle watchdog that kills wedged subagents.
|
|
316
|
+
- **Dogfooded.** Every new feature has to survive the project's own `self-improve` taskflow before it ships.
|
|
317
|
+
|
|
318
|
+
If this saves you a context window, **drop a ⭐ on [GitHub](https://github.com/heggria/pi-taskflow)** — it genuinely helps.
|
|
363
319
|
|
|
364
320
|
## Status & limits
|
|
365
321
|
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
-
|
|
374
|
-
- `map` requires the upstream phase to emit a JSON array (`output: "json"`).
|
|
375
|
-
- Gate verdicts are **fail-open**: if the agent output contains no recognizable
|
|
376
|
-
verdict marker (`VERDICT: BLOCK/PASS/OK/FAIL/STOP/REJECT/HALT` or
|
|
377
|
-
`{continue: false}` / `{verdict: "block"}`), the gate passes. This prevents
|
|
378
|
-
an accidental missing verdict from blocking your workflow.
|
|
379
|
-
|
|
380
|
-
### What it doesn't do (yet)
|
|
381
|
-
|
|
382
|
-
- **No detached background execution.** A run needs the pi session to stay open.
|
|
383
|
-
True background execution (and event/cron triggers on top of it) is on the
|
|
384
|
-
roadmap.
|
|
385
|
-
- **No `output: "file"`.** Outputs are text/JSON only. Write files via agent
|
|
386
|
-
tool calls if needed.
|
|
387
|
-
- **`map` requires a JSON array.** The `over` field must resolve to
|
|
388
|
-
`{steps.ID.json}` where the upstream phase emitted `output: "json"`. If the
|
|
389
|
-
source is a plain text list, wrap it in a single-agent phase that outputs JSON.
|
|
390
|
-
- **Cycles are rejected at validation.** The DAG must be acyclic.
|
|
322
|
+
**v0.0.10** — full control-flow & reliability layer (`when` guards, `join: any`, `retry`/backoff, `approval`, `flow` composition, `budget` caps) on top of the DSL + DAG runtime (`agent`/`parallel`/`map`/`gate`/`reduce`), inline + saved flows, cross-session resume, live progress, and isolated context. A run executes as one streaming tool call.
|
|
323
|
+
|
|
324
|
+
Known boundaries (tracked, bounded — no surprises mid-flow):
|
|
325
|
+
|
|
326
|
+
- **No detached background execution.** A run needs the Pi session open. True background execution (and event/cron triggers on top of it) is on the roadmap.
|
|
327
|
+
- **No `output: "file"`.** Outputs are text/JSON only — write files via an agent's `write` tool call.
|
|
328
|
+
- **`map` requires a JSON array.** The `over` field must resolve to a `{steps.ID.json}` array. Wrap a text list in a single-agent `output: "json"` phase first.
|
|
329
|
+
- **The DAG must be acyclic.** Cycles are rejected at validation.
|
|
391
330
|
|
|
392
331
|
## Development
|
|
393
332
|
|
|
@@ -395,16 +334,14 @@ model at the definition.
|
|
|
395
334
|
npm install
|
|
396
335
|
npm run typecheck
|
|
397
336
|
npm test # unit tests — no network, no process spawning
|
|
398
|
-
|
|
399
|
-
# real end-to-end (spawns live subagents; needs model access)
|
|
400
|
-
npm run test:e2e
|
|
337
|
+
npm run test:e2e # real end-to-end (spawns live subagents; needs model access)
|
|
401
338
|
```
|
|
402
339
|
|
|
340
|
+
Runtime lives in `extensions/`, tests in `test/`, runnable examples in `examples/`, and the full design rationale in [`DESIGN.md`](./DESIGN.md).
|
|
341
|
+
|
|
403
342
|
## Contributing
|
|
404
343
|
|
|
405
|
-
Contributions welcome
|
|
406
|
-
[GitHub](https://github.com/heggria/pi-taskflow). Tests live in `test/`, the
|
|
407
|
-
runtime in `extensions/`.
|
|
344
|
+
Contributions welcome — this is a young, fast-moving project. Open an issue or PR on [GitHub](https://github.com/heggria/pi-taskflow). Good first contributions: new example flows, phase-type ideas, and TUI polish.
|
|
408
345
|
|
|
409
346
|
## License
|
|
410
347
|
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
{
|
|
26
26
|
"id": "implement",
|
|
27
27
|
"type": "agent",
|
|
28
|
-
"agent": "
|
|
28
|
+
"agent": "executor-code",
|
|
29
29
|
"dependsOn": ["approve", "plan"],
|
|
30
30
|
"task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
|
|
31
31
|
"retry": { "max": 1, "backoffMs": 1000 }
|
package/extensions/index.ts
CHANGED
|
@@ -169,6 +169,14 @@ async function runFlow(
|
|
|
169
169
|
const scope: AgentScope = def.agentScope ?? "user";
|
|
170
170
|
const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
|
|
171
171
|
|
|
172
|
+
// Pre-flight: warn if any phase references an agent not in the registry
|
|
173
|
+
const agentNames = new Set(agents.map(a => a.name));
|
|
174
|
+
for (const p of def.phases ?? []) {
|
|
175
|
+
if (p.agent && !agentNames.has(p.agent)) {
|
|
176
|
+
console.warn(`[taskflow] Warning: phase '${p.id}' references agent '${p.agent}' which was not found. Available: ${[...agentNames].join(", ")}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
172
180
|
const result = await executeTaskflow(state, {
|
|
173
181
|
cwd: ctx.cwd,
|
|
174
182
|
agents,
|
package/extensions/render.ts
CHANGED
|
@@ -53,7 +53,12 @@ function elapsed(ms: number): string {
|
|
|
53
53
|
|
|
54
54
|
function phaseElapsed(ps: PhaseState): number {
|
|
55
55
|
if (!ps.startedAt) return 0;
|
|
56
|
-
|
|
56
|
+
// Guard against a stale/clock-skewed endedAt that precedes startedAt (e.g. a
|
|
57
|
+
// resumed phase that still carries a previous attempt's endedAt): treat such
|
|
58
|
+
// an end time as absent and fall back to now. Finally clamp to >= 0 so the
|
|
59
|
+
// TUI never shows a negative (and frozen) elapsed time.
|
|
60
|
+
const end = ps.endedAt && ps.endedAt >= ps.startedAt ? ps.endedAt : Date.now();
|
|
61
|
+
return Math.max(0, end - ps.startedAt);
|
|
57
62
|
}
|
|
58
63
|
|
|
59
64
|
function miniBar(done: number, total: number, theme: Theme, width = 8): string {
|
|
@@ -91,7 +96,7 @@ function runElapsed(state: RunState): number {
|
|
|
91
96
|
const min = Math.min(...starts);
|
|
92
97
|
const ends = Object.values(state.phases).map((p) => p.endedAt ?? Date.now());
|
|
93
98
|
const max = ends.length ? Math.max(...ends) : Date.now();
|
|
94
|
-
return max - min;
|
|
99
|
+
return Math.max(0, max - min);
|
|
95
100
|
}
|
|
96
101
|
|
|
97
102
|
export function summarizeRun(state: RunState): string {
|