pi-taskflow 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -23
- package/extensions/cache.ts +263 -0
- package/extensions/index.ts +147 -118
- package/extensions/init.ts +607 -0
- package/extensions/render.ts +39 -0
- package/extensions/runtime.ts +342 -17
- package/extensions/schema.ts +166 -1
- package/extensions/store.ts +16 -2
- package/package.json +4 -3
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
<a href="https://www.npmjs.com/package/pi-taskflow"><img src="https://img.shields.io/npm/dm/pi-taskflow?style=flat-square&color=6E8BFF&label=downloads" alt="npm downloads"></a>
|
|
8
8
|
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-43D9AD?style=flat-square" alt="MIT license"></a>
|
|
9
9
|
<a href="#whats-inside"><img src="https://img.shields.io/badge/runtime%20deps-0-43D9AD?style=flat-square" alt="zero runtime dependencies"></a>
|
|
10
|
-
<a href="#whats-inside"><img src="https://img.shields.io/badge/tests-
|
|
10
|
+
<a href="#whats-inside"><img src="https://img.shields.io/badge/tests-371-6E8BFF?style=flat-square" alt="371 tests"></a>
|
|
11
11
|
<a href="https://pi.dev"><img src="https://img.shields.io/badge/for-Pi%20coding%20agent-B692FF?style=flat-square" alt="for the Pi coding agent"></a>
|
|
12
12
|
</p>
|
|
13
13
|
|
|
@@ -61,26 +61,33 @@ It doesn't replace the subagent tool. It gives your subagents a DAG, a memory, a
|
|
|
61
61
|
|
|
62
62
|
## Compared to other Pi extensions
|
|
63
63
|
|
|
64
|
-
The Pi ecosystem has
|
|
64
|
+
The Pi ecosystem now has **20+ delegation, workflow, and orchestration extensions** — each great at what it's for. Here's an honest map of where `pi-taskflow` sits (verified against each package's latest npm release, June 2026). For the full breakdown — every package, strengths *and* weaknesses — see [`PI-ECOSYSTEM.md`](./PI-ECOSYSTEM.md). For the broader, non-Pi landscape (LangGraph, Temporal, CrewAI, Mastra…) see [`COMPETITORS.md`](./COMPETITORS.md).
|
|
65
65
|
|
|
66
66
|
| Extension | Model | Custom DSL | DAG | Dynamic fan-out | Cross-session resume | Quality gate | Human approval | Save as command | Zero deps |
|
|
67
67
|
|---|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
68
|
-
| **pi-taskflow** | **declarative multi-phase taskflows** | **✓** | **✓** | **✓ `map`** |
|
|
69
|
-
| [
|
|
70
|
-
| [`pi-
|
|
71
|
-
| [`pi
|
|
72
|
-
| [
|
|
73
|
-
| [
|
|
74
|
-
| [`pi-
|
|
68
|
+
| **pi-taskflow** | **declarative multi-phase taskflows** | **✓** | **✓** | **✓ `map`** | **✓ phase-hash** | **✓** | **✓** | **✓ `/tf:<name>`** | **✓** |
|
|
69
|
+
| [`@pi-agents/orchid`](https://www.npmjs.com/package/@pi-agents/orchid) | opinionated 9-phase pipeline + Ralph loop | fixed | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✕ (2) |
|
|
70
|
+
| [`pi-crew`](https://www.npmjs.com/package/pi-crew) | role teams + git worktrees + async | partial | ✓ | ✓ | ✓ | ✓ | ✓ | – | ✕ (7) |
|
|
71
|
+
| [`ultimate-pi`](https://www.npmjs.com/package/ultimate-pi) | governed plan→execute→review harness | YAML contracts | ✓ (plan-time) | ✕ | ✓ | ✓ (3-tier) | ✓ | ✓ | ✕ (16) |
|
|
72
|
+
| [`@zhushanwen/pi-workflow`](https://www.npmjs.com/package/@zhushanwen/pi-workflow) | JS scripts (`agent`/`parallel`/`pipeline`) | yes (JS) | ✕ (linear) | ✓ | ✓ | ✕ | ✕ | ✓ (call cache) | ✓ |
|
|
73
|
+
| [`@fiale-plus/pi-rogue-orchestration`](https://www.npmjs.com/package/@fiale-plus/pi-rogue-orchestration) | timer loop + goal resolution | ✕ | ✕ | ✕ | ✓ | ✓ (goal-check) | ✕ | ✕ | ✓ |
|
|
74
|
+
| [`pi-subagents`](https://www.npmjs.com/package/pi-subagents) | single / parallel / chain delegation | ✕ | ✕ | static | – | ✕ | clarify | named workflows | ✕ (3) |
|
|
75
|
+
| [`@gotgenes/pi-subagents`](https://www.npmjs.com/package/@gotgenes/pi-subagents) | Claude-Code-style subagents + worktrees | ✕ | ✕ | ✕ | ✓ (by id) | ✕ | per-agent | ✕ | ✕ (1) |
|
|
76
|
+
| [`pi-pipeline`](https://www.npmjs.com/package/pi-pipeline) | fixed SPEC→PLAN→TASKS→VERIFY | ✕ | fixed | ✕ | session planning | ✓ | clarify | ✕ | ✕ (2) |
|
|
77
|
+
| [`pi-agent-flow`](https://www.npmjs.com/package/pi-agent-flow) | one-shot parallel specialist `fork` | yes | ✕ | ✕ | – | ✕ | ✕ | – | ✕ (2) |
|
|
78
|
+
|
|
79
|
+
*(Representative slice of the 20+ — see [`PI-ECOSYSTEM.md`](./PI-ECOSYSTEM.md) for all of them, plus `@0xkobold/pi-orchestration`, `@melihmucuk/pi-crew`, `@mediadatafusion/pi-workflow-suite`, `gentle-pi`, `@dreki-gg/pi-subagent`, and more.)*
|
|
75
80
|
|
|
76
81
|
**How to choose:**
|
|
77
82
|
|
|
78
|
-
-
|
|
79
|
-
- **`pi-
|
|
80
|
-
-
|
|
81
|
-
-
|
|
83
|
+
- **`@pi-agents/orchid`** is the most feature-complete orchestrator in the ecosystem (DAG + worktrees + Ralph loop + agent mailbox) — but its DSL is a *fixed* 9-phase pipeline, it carries runtime deps + jiti, and it's beta. Reach for `pi-taskflow` when you want to **define your own graph** (not adopt an opinionated one) with **zero dependencies** and a one-command install.
|
|
84
|
+
- **`pi-crew` / `ultimate-pi`** go heavier — worktree isolation, durable async teams, multi-tier governance. If you want lightweight, declarative, and zero-dependency, that's this project.
|
|
85
|
+
- **`@zhushanwen/pi-workflow`** is the closest in spirit and also zero-dep, but you author workflows as **JavaScript scripts**. `pi-taskflow`'s **declarative JSON DSL** is safer and more auditable, and its **phase-level input-hash resume** is more granular than call-cache dedup.
|
|
86
|
+
- **`@fiale-plus/pi-rogue-orchestration`** has a real **loop-until-done** (a feature `pi-taskflow` doesn't yet have). If your job is "keep going until the goal is met," it's worth a look; `pi-taskflow` is for *structured, branching* pipelines instead.
|
|
87
|
+
- **`pi-subagents` / `@gotgenes/pi-subagents`** are the mature picks for ad-hoc "use reviewer on this diff" delegation and background jobs. `pi-taskflow` is for when those delegations need to become a *repeatable, resumable pipeline*.
|
|
88
|
+
- **`pi-pipeline` / `pi-agent-flow`** ship *opinionated, fixed* flows. `pi-taskflow` ships an *empty canvas*: you (or the model) declare the graph that fits the job.
|
|
82
89
|
|
|
83
|
-
> The honest one-liner:
|
|
90
|
+
> The honest one-liner: **`pi-taskflow` is the only Pi extension that gives you a declarative, resumable, DAG-shaped subagent pipeline you save as a one-word command — with zero runtime dependencies and context isolation by design.** The known gaps it's closing next: loop-until-done, worktree isolation, and non-blocking background runs (see [`STRATEGY.md`](./STRATEGY.md)).
|
|
84
91
|
|
|
85
92
|
## 30-second start
|
|
86
93
|
|
|
@@ -90,6 +97,10 @@ The Pi ecosystem has a healthy crowd of delegation and orchestration extensions
|
|
|
90
97
|
pi install npm:pi-taskflow
|
|
91
98
|
```
|
|
92
99
|
|
|
100
|
+
> **Optional:** run `/tf init` once to map the 18 built-in agents' model roles
|
|
101
|
+
> (`fast`, `strong`, `thinker`, …) to your own models — an interactive picker.
|
|
102
|
+
> Skip it and agents just use Pi's default model. See [Model roles](#model-roles).
|
|
103
|
+
|
|
93
104
|
**2. Run** — just ask the model in a Pi session:
|
|
94
105
|
|
|
95
106
|
> *Run a chain: first explore the auth flow, then summarize the findings.*
|
|
@@ -218,6 +229,8 @@ No scripting. No `eval`. Just data the runtime executes — safe enough to run L
|
|
|
218
229
|
| `reduce` | aggregate `from[]` phase outputs into one | `from`, `task` |
|
|
219
230
|
| `approval` | **human-in-the-loop** pause — approve / reject / edit | — |
|
|
220
231
|
| `flow` | run a **saved sub-flow** as one phase (composition) | `use` |
|
|
232
|
+
| `loop` | **iterate a task until done** — re-run a body until a condition, convergence, or a cap | `task`, `until` |
|
|
233
|
+
| `tournament` | **N variants compete**, a judge picks the best (or aggregates) | `task` \| `branches` |
|
|
221
234
|
|
|
222
235
|
### Common phase fields
|
|
223
236
|
|
|
@@ -237,6 +250,7 @@ Every phase needs a unique `id` and a `type` (defaults to `agent`). On top of th
|
|
|
237
250
|
| `final` | Marks the result-bearing phase (else the last phase wins) |
|
|
238
251
|
| `optional` | A failure here does **not** abort the run |
|
|
239
252
|
| `use` / `with` | (`flow`) saved sub-flow name + its args |
|
|
253
|
+
| `cache` | `{ scope, ttl?, fingerprint? }` — cross-run memoization (see below) |
|
|
240
254
|
|
|
241
255
|
Flow-level keys: `name`, `description`, `args`, `concurrency` (default 8), `agentScope`, and `budget: { maxUSD?, maxTokens? }`.
|
|
242
256
|
|
|
@@ -247,9 +261,73 @@ Flow-level keys: `name`, `description`, `args`, `concurrency` (default 8), `agen
|
|
|
247
261
|
- **`retry`** — `{ "max": 2, "backoffMs": 500, "factor": 2 }` retries a failing subagent with fixed or exponential backoff; usage is summed and the attempt count shows as `↻N` in the TUI. Transient provider errors (rate-limit / 5xx / timeout) **auto-retry even without an explicit policy**; hard errors don't.
|
|
248
262
|
- **`approval`** — pause for a human (Approve / Reject / Edit). Reject halts the flow; Edit injects the typed note as the phase output for downstream steps. Non-interactive runs auto-approve.
|
|
249
263
|
- **`flow`** — `{ "type": "flow", "use": "deep-research", "with": { "topic": "{item}" } }` runs a saved flow as a phase (recursion is detected and rejected).
|
|
264
|
+
|
|
265
|
+
### Loop-until-done (`loop`)
|
|
266
|
+
|
|
267
|
+
Some work is inherently iterative — refine a draft until a reviewer is satisfied, retry-and-improve until tests pass, converge on an answer. A `loop` phase re-runs one task body until a stop condition holds:
|
|
268
|
+
|
|
269
|
+
```jsonc
|
|
270
|
+
{
|
|
271
|
+
"id": "refine",
|
|
272
|
+
"type": "loop",
|
|
273
|
+
"task": "Improve this draft (iteration {loop.iteration}). Previous attempt:\n{loop.lastOutput}\n\nReturn JSON {\"draft\":\"…\",\"done\":true|false}.",
|
|
274
|
+
"until": "{steps.refine.json.done} == true", // the iteration's own output is exposed here
|
|
275
|
+
"output": "json",
|
|
276
|
+
"maxIterations": 6, // default 10, hard cap 100 — the loop ALWAYS terminates
|
|
277
|
+
"convergence": true // default: stop early if an iteration's output is identical to the last
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
- **Body locals** — the task can read `{loop.iteration}` (1-based), `{loop.lastOutput}` (the prior iteration's output), and `{loop.maxIterations}` to build on its own previous work; all three are also available to the `until` condition.
|
|
282
|
+
- **`until`** — evaluated after each iteration with the iteration's output exposed as `{steps.<thisId>.output}` / `.json`. Same operators as `when`. The loop stops the moment it's truthy.
|
|
283
|
+
- **Always terminates.** Four independent stops: `until` truthy, **convergence** (a fixed point — output identical to the previous iteration), **`maxIterations`** (hard-capped at 100), or a **failing iteration** (the phase fails with the partial output preserved). A malformed `until` **stops** the loop rather than spinning forever (fail-safe) and surfaces a warning on the phase.
|
|
284
|
+
- The TUI shows `↻N` with the stop reason (`done` / `converged` / `max` / `failed`); usage is summed across iterations. Like `gate`/`approval`, `loop` is **excluded from `cross-run` cache** (each run must iterate fresh).
|
|
285
|
+
|
|
286
|
+
### Tournament (`tournament`)
|
|
287
|
+
|
|
288
|
+
For open-ended work, the best result often comes from generating several candidates and picking the strongest — best-of-N with a judge, in one declarative phase:
|
|
289
|
+
|
|
290
|
+
```jsonc
|
|
291
|
+
{
|
|
292
|
+
"id": "headline",
|
|
293
|
+
"type": "tournament",
|
|
294
|
+
"task": "Write a punchy headline for this launch post.",
|
|
295
|
+
"variants": 4, // spawn 4 competitors of the SAME task (default 3, max 20)
|
|
296
|
+
"judge": "Pick the headline with the strongest hook and clearest promise.",
|
|
297
|
+
"judgeAgent": "reviewer", // optional; defaults to the phase agent
|
|
298
|
+
"mode": "best" // "best" (default) | "aggregate"
|
|
299
|
+
}
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
- **Competitors** — either `variants: N` copies of one `task` (diversity comes from model nondeterminism), or distinct `branches: [{task, agent?}, …]` when you want to pit *different approaches* against each other.
|
|
303
|
+
- **Judge** — after the fan-out, one judge agent sees every variant (numbered) plus your `judge` rubric and picks a winner via a `WINNER: <n>` line or `{"winner": n}`. An unreadable verdict **fails open** to variant 1; a failed judge falls back too — the work is never lost.
|
|
304
|
+
- **`mode`** — `best` returns the winning variant **verbatim**; `aggregate` returns the judge's **synthesized** answer combining the strongest parts.
|
|
305
|
+
- **Short-circuits:** if only one competitor survives, it wins with no judge call; if all fail, the phase fails. The TUI shows `⚑ N→#k`; usage sums variants + judge. Like `gate`, it's **excluded from `cross-run` cache**.
|
|
250
306
|
- **`budget`** — a run-wide `{maxUSD, maxTokens}` ceiling; once exceeded, pending phases skip and in-flight fan-out stops spawning, ending the run as `blocked`.
|
|
251
307
|
- **idle watchdog** — a subagent that goes silent for 5 minutes is treated as wedged and killed (SIGTERM → SIGKILL), so one hung child can never freeze the whole flow.
|
|
252
308
|
|
|
309
|
+
### Cross-run memoization (`cache`)
|
|
310
|
+
|
|
311
|
+
Every phase is already content-addressed: within a single run's **resume**, a phase whose resolved inputs are unchanged is skipped. `cache` extends that reuse **across independent runs** — if any prior run computed a phase with an identical input hash, its result is reused for **$0.00**.
|
|
312
|
+
|
|
313
|
+
```jsonc
|
|
314
|
+
{
|
|
315
|
+
"id": "analyze-auth",
|
|
316
|
+
"task": "Summarize how the auth module works.",
|
|
317
|
+
"context": ["src/auth/**/*.ts"],
|
|
318
|
+
"cache": {
|
|
319
|
+
"scope": "cross-run", // "run-only" (default) | "cross-run" | "off"
|
|
320
|
+
"ttl": "6h", // optional max age before a hit is treated as a miss
|
|
321
|
+
"fingerprint": ["git:HEAD", "glob:src/auth/**/*.ts"] // fold world-state into the key
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
- **`scope`** — `"run-only"` (default) is exactly the historical behavior (within-run resume only). `"cross-run"` opts the phase into the persistent store. `"off"` disables reuse entirely (even within a run), for debugging.
|
|
327
|
+
- **Freshness is the whole game.** The cache key already includes the prompt, the `over` items, and any `context` files (pre-read into the task). `fingerprint` folds *implicit* inputs into the key so "the world changed" becomes a cache miss: `git:HEAD`, `glob:<pat>` (size+mtime), `glob!:<pat>` (content hash), `file:<path>`, `env:<NAME>`. `ttl` (`30m`/`6h`/`7d`) is a time backstop.
|
|
328
|
+
- **Honest limit:** a subagent that reads a file it didn't declare in `context`/`fingerprint` can still serve a stale `cross-run` hit. That's why the default is `run-only` and why `gate`/`approval` phases are **forbidden** from `cross-run` (they must produce a fresh result each run). Opt in only for phases whose output is a function of declared inputs.
|
|
329
|
+
- Cache lives in `.pi/taskflows/cache/` (gitignored). Clear it with `action: "cache-clear"`. Full rationale: [`docs/rfc-cross-run-memoization.md`](./docs/rfc-cross-run-memoization.md).
|
|
330
|
+
|
|
253
331
|
### Gate phases (quality control)
|
|
254
332
|
|
|
255
333
|
A `gate` runs an agent to review upstream output and can **block the rest of the workflow.** End the gate task by asking for a verdict the runtime can read:
|
|
@@ -291,10 +369,10 @@ Saved flows become CLI shortcuts. All commands run in the Pi session:
|
|
|
291
369
|
| `/tf show <name>` | Print a flow's definition |
|
|
292
370
|
| `/tf runs` | Browse recent run history (interactive TUI) |
|
|
293
371
|
| `/tf resume <runId>` | Continue a paused/failed run — cached phases skip automatically |
|
|
294
|
-
| `/tf init` |
|
|
372
|
+
| `/tf init` | **Interactively map model roles** to your enabled models (writes `~/.pi/agent/settings.json`) |
|
|
295
373
|
| `/tf:<name> [args]` | Shortcut — runs the flow in one tap |
|
|
296
374
|
|
|
297
|
-
Tool actions (used by the model): `run` (inline `define` or saved `name`), `save`, `resume`, `list`.
|
|
375
|
+
Tool actions (used by the model): `run` (inline `define` or saved `name`), `save`, `resume`, `list`, `init`.
|
|
298
376
|
|
|
299
377
|
## Resume across sessions
|
|
300
378
|
|
|
@@ -366,14 +444,57 @@ Each built-in agent's `model` field uses a **role placeholder** (e.g. `{{fast}}`
|
|
|
366
444
|
| `{{vision}}` | Multimodal — UI work, design reading | MiniMax M3 |
|
|
367
445
|
| `{{reasoner}}` | Cautious reasoning — security, risk | GLM 5.1 |
|
|
368
446
|
|
|
369
|
-
Without configuration, agents fall back to Pi's default model. To
|
|
447
|
+
Without configuration, agents fall back to Pi's default model. To map roles to real models, run the interactive setup:
|
|
370
448
|
|
|
371
449
|
```bash
|
|
372
|
-
# Auto-generate ~/.pi/agent/settings.json with default role mappings
|
|
373
450
|
/tf init
|
|
374
451
|
```
|
|
375
452
|
|
|
376
|
-
|
|
453
|
+
`/tf init` starts with an **action menu**. First-time users get a 2-option shortcut ("Use recommended defaults" / "Configure each role"). Returning users see the full 5-option menu:
|
|
454
|
+
|
|
455
|
+
```
|
|
456
|
+
? What do you want to do with model roles?
|
|
457
|
+
❯ Use recommended defaults
|
|
458
|
+
Configure each role
|
|
459
|
+
Edit one role
|
|
460
|
+
Show current roles
|
|
461
|
+
Cancel
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
The picker shows model **display names** with capability flags and current/recommended markers:
|
|
465
|
+
|
|
466
|
+
```
|
|
467
|
+
? Model for 'vision' — Multimodal (executor-ui, visual-explorer)
|
|
468
|
+
Current: openrouter/anthropic/claude-sonnet-4-6
|
|
469
|
+
Recommended: minimax/MiniMax-M3
|
|
470
|
+
───────────────
|
|
471
|
+
❯ MiniMax M3 (minimax/MiniMax-M3) · image ✓ · reasoning ✓ · (recommended)
|
|
472
|
+
Claude Sonnet 4.6 (openrouter/anthropic/...) · image ✓ · reasoning ✓ · (current)
|
|
473
|
+
GPT-5 (openrouter/openai/gpt-5) · image ✓
|
|
474
|
+
DeepSeek V4 Flash (openrouter/deepseek/v4-flash)
|
|
475
|
+
───────────────
|
|
476
|
+
Custom (type your own)
|
|
477
|
+
Keep current
|
|
478
|
+
Back to action menu
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
Before saving, a **preview screen** shows the diff of your changes:
|
|
482
|
+
|
|
483
|
+
```
|
|
484
|
+
? Review changes:
|
|
485
|
+
fast openrouter/deepseek/deepseek-v4-flash (unchanged)
|
|
486
|
+
strong openrouter/xiaomi/mimo-v2.5-pro (unchanged)
|
|
487
|
+
thinker openrouter/qwen/qwen3.7-max (changed ← was: openrouter/deepseek/v4-pro)
|
|
488
|
+
arbiter openrouter/qwen/qwen3.7-max (unchanged)
|
|
489
|
+
vision minimax/MiniMax-M3 (unchanged)
|
|
490
|
+
reasoner z-ai/glm-5.1 (unchanged)
|
|
491
|
+
───────────────
|
|
492
|
+
❯ Save these changes
|
|
493
|
+
Edit a role
|
|
494
|
+
Cancel
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
Your choices are written to `~/.pi/agent/settings.json`:
|
|
377
498
|
|
|
378
499
|
```json
|
|
379
500
|
{
|
|
@@ -388,7 +509,7 @@ This writes:
|
|
|
388
509
|
}
|
|
389
510
|
```
|
|
390
511
|
|
|
391
|
-
Edit the values
|
|
512
|
+
Edit the values manually any time, or just re-run `/tf init`. You can also override individual agents via `subagents.agentOverrides` in the same file:
|
|
392
513
|
|
|
393
514
|
```json
|
|
394
515
|
{
|
|
@@ -402,6 +523,18 @@ Edit the values to match your available providers. You can also override individ
|
|
|
402
523
|
}
|
|
403
524
|
```
|
|
404
525
|
|
|
526
|
+
### Tool path (`action="init"`)
|
|
527
|
+
|
|
528
|
+
The model can also configure roles via the `taskflow` tool:
|
|
529
|
+
|
|
530
|
+
| Mode | Behavior |
|
|
531
|
+
|---|---|
|
|
532
|
+
| `mode: "show"` (default) | Read-only report of current `modelRoles`. Never overwrites. |
|
|
533
|
+
| `mode: "apply-defaults"` + `force: true` | Writes `RECOMMENDED_DEFAULTS` to `settings.json`, preserving stale keys. |
|
|
534
|
+
| `mode: "interactive"` | Launches the full action menu + picker flow (requires a UI session). |
|
|
535
|
+
|
|
536
|
+
> **v0.0.13 deprecation note:** If `mode` is omitted, the tool falls back to v0.0.12 behavior when `modelRoles` is empty (auto-writes defaults) with a `console.warn` deprecation notice. If `modelRoles` already exists, it behaves as `mode: "show"`. This bridge will be removed in v0.0.14.
|
|
537
|
+
|
|
405
538
|
### Custom agents
|
|
406
539
|
|
|
407
540
|
Drop a `.md` file into `~/.pi/agent/agents/` (user-level) or `.pi/agents/` (project-level, commit it) to add your own:
|
|
@@ -441,12 +574,12 @@ Copy one into `.pi/taskflows/<name>.json` (or `~/.pi/agent/taskflows/`) and it r
|
|
|
441
574
|
|
|
442
575
|
<div align="center">
|
|
443
576
|
|
|
444
|
-
**0 runtime dependencies** · **
|
|
577
|
+
**0 runtime dependencies** · **371 tests** · **10 phase types** · **cross-session resume** · **cross-run memoization** · **~4.9k LOC runtime**
|
|
445
578
|
|
|
446
579
|
</div>
|
|
447
580
|
|
|
448
581
|
- **Zero runtime dependencies.** No `dependencies` field — the runtime is built entirely on Node built-ins (`fs` / `path` / `os` / `child_process` / `crypto`). The file lock is `fs.openSync("wx")`, not a third-party library.
|
|
449
|
-
- **
|
|
582
|
+
- **371 tests across 14 suites** covering concurrency, atomic file locking (8-process race regressions), path-traversal hardening, cross-session resume, cross-run cache freshness (flow/thinking/tools key isolation, fingerprint invalidation, TTL/LRU eviction), gate verdicts, budget caps, retry/backoff, approval flows, loop termination, tournament judging, sub-flow composition, callback isolation, the idle watchdog, model-role init config, and parseModelFromLabel with parenthesized-model-name regression — plus a live end-to-end test that spawns real subagents and a cross-run cache dogfood.
|
|
450
583
|
- **Hardened by design.** Path-traversal defense (lexical + `realpath`), runId validation, HTML/error sanitization, atomic writes, stale-lock stealing via `rename`, and an idle watchdog that kills wedged subagents.
|
|
451
584
|
- **Dogfooded.** Every new feature has to survive the project's own `self-improve` taskflow before it ships.
|
|
452
585
|
|
|
@@ -454,7 +587,7 @@ If this saves you a context window, **drop a ⭐ on [GitHub](https://github.com/
|
|
|
454
587
|
|
|
455
588
|
## Status & limits
|
|
456
589
|
|
|
457
|
-
**v0.0.
|
|
590
|
+
**v0.0.13** — loop-until-done (`loop` phase: iterate to a condition, convergence, or cap), tournament (best-of-N with a judge), cross-run memoization (content-addressed cache with git/file/glob/env fingerprints and TTL), interactive `/tf init` with role-aware model pickers + diff preview + atomic merge-write, 18 built-in agents with 6 model roles. Full control-flow & reliability layer (`when` guards, `join: any`, `retry`/backoff, `approval`, `flow` composition, `budget` caps, idle watchdog) on top of the DSL + DAG runtime (`agent`/`parallel`/`map`/`gate`/`reduce`). Inline + saved flows, cross-session resume, live progress, and isolated context. A run executes as one streaming tool call.
|
|
458
591
|
|
|
459
592
|
Known boundaries (tracked, bounded — no surprises mid-flow):
|
|
460
593
|
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-run memoization: fingerprint resolver + persistent phase-result cache.
|
|
3
|
+
*
|
|
4
|
+
* See docs/rfc-cross-run-memoization.md. The cache lets a phase reuse the result
|
|
5
|
+
* of an identical-input phase from ANY prior run (scope: "cross-run"), for $0.00.
|
|
6
|
+
* Freshness is guarded by:
|
|
7
|
+
* - the existing content-addressed inputHash (declared inputs)
|
|
8
|
+
* - optional `fingerprint` entries folded into the key (git/glob/file/env)
|
|
9
|
+
* - optional TTL
|
|
10
|
+
* - default `run-only` scope (this module is only consulted for cross-run)
|
|
11
|
+
*
|
|
12
|
+
* Zero runtime dependencies: Node built-ins only (fs.globSync requires Node >=22,
|
|
13
|
+
* which the project already targets).
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { execFileSync } from "node:child_process";
|
|
17
|
+
import * as crypto from "node:crypto";
|
|
18
|
+
import * as fs from "node:fs";
|
|
19
|
+
import * as path from "node:path";
|
|
20
|
+
import { cacheDir, withLock, writeFileAtomic } from "./store.ts";
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Fingerprint resolution
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
/** Per-file byte cap when content-hashing (mirrors store/context limits). */
|
|
27
|
+
const FINGERPRINT_MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
28
|
+
/** Cap on glob match count folded into a single fingerprint (defensive). */
|
|
29
|
+
const FINGERPRINT_MAX_GLOB_MATCHES = 5000;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Resolve a single fingerprint entry to a deterministic string. Never throws:
|
|
33
|
+
* missing files / non-git repos / unreadable paths resolve to a stable sentinel
|
|
34
|
+
* so the key stays deterministic (and a later appearance of the resource simply
|
|
35
|
+
* changes the key → cache miss, which is the safe direction).
|
|
36
|
+
*/
|
|
37
|
+
function resolveOne(entry: string, cwd: string): string {
|
|
38
|
+
try {
|
|
39
|
+
if (entry === "git:HEAD" || entry.startsWith("git:")) {
|
|
40
|
+
const ref = entry.slice("git:".length) || "HEAD";
|
|
41
|
+
// Reject refs that could be interpreted as git options (e.g. "--exec=...").
|
|
42
|
+
// The ref comes from a taskflow definition; refuse flag-like values so a
|
|
43
|
+
// crafted definition can't smuggle arguments into git.
|
|
44
|
+
if (ref.startsWith("-")) return `git:${ref}=<invalid-ref>`;
|
|
45
|
+
try {
|
|
46
|
+
const sha = execFileSync("git", ["rev-parse", ref], {
|
|
47
|
+
cwd,
|
|
48
|
+
encoding: "utf-8",
|
|
49
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
50
|
+
}).trim();
|
|
51
|
+
return `git:${ref}=${sha}`;
|
|
52
|
+
} catch {
|
|
53
|
+
return `git:${ref}=<no-git>`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (entry.startsWith("glob:") || entry.startsWith("glob!:")) {
|
|
58
|
+
const contentMode = entry.startsWith("glob!:");
|
|
59
|
+
const pattern = entry.slice(contentMode ? "glob!:".length : "glob:".length);
|
|
60
|
+
let matches: string[];
|
|
61
|
+
try {
|
|
62
|
+
// fs.globSync (Node >=22) — cwd-relative, returns posix-ish paths.
|
|
63
|
+
matches = (fs.globSync(pattern, { cwd }) as string[]).slice().sort();
|
|
64
|
+
} catch {
|
|
65
|
+
return `${entry}=<glob-error>`;
|
|
66
|
+
}
|
|
67
|
+
if (matches.length > FINGERPRINT_MAX_GLOB_MATCHES) {
|
|
68
|
+
matches = matches.slice(0, FINGERPRINT_MAX_GLOB_MATCHES);
|
|
69
|
+
}
|
|
70
|
+
const parts: string[] = [];
|
|
71
|
+
for (const rel of matches) {
|
|
72
|
+
const abs = path.resolve(cwd, rel);
|
|
73
|
+
try {
|
|
74
|
+
if (contentMode) {
|
|
75
|
+
const st = fs.statSync(abs);
|
|
76
|
+
if (st.isFile() && st.size <= FINGERPRINT_MAX_FILE_BYTES) {
|
|
77
|
+
const buf = fs.readFileSync(abs);
|
|
78
|
+
parts.push(`${rel}:${crypto.createHash("sha256").update(buf).digest("hex").slice(0, 16)}`);
|
|
79
|
+
} else {
|
|
80
|
+
parts.push(`${rel}:<skip>`);
|
|
81
|
+
}
|
|
82
|
+
} else {
|
|
83
|
+
const st = fs.statSync(abs);
|
|
84
|
+
parts.push(`${rel}:${st.size}:${Math.floor(st.mtimeMs)}`);
|
|
85
|
+
}
|
|
86
|
+
} catch {
|
|
87
|
+
parts.push(`${rel}:<stat-error>`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const digest = crypto.createHash("sha256").update(parts.join("\u0000")).digest("hex").slice(0, 16);
|
|
91
|
+
return `${entry}=${digest}`;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (entry.startsWith("file:")) {
|
|
95
|
+
const rel = entry.slice("file:".length);
|
|
96
|
+
const abs = path.resolve(cwd, rel);
|
|
97
|
+
try {
|
|
98
|
+
const st = fs.statSync(abs);
|
|
99
|
+
if (!st.isFile() || st.size > FINGERPRINT_MAX_FILE_BYTES) return `file:${rel}=<skip>`;
|
|
100
|
+
const buf = fs.readFileSync(abs);
|
|
101
|
+
return `file:${rel}=${crypto.createHash("sha256").update(buf).digest("hex").slice(0, 16)}`;
|
|
102
|
+
} catch {
|
|
103
|
+
return `file:${rel}=<missing>`;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (entry.startsWith("env:")) {
|
|
108
|
+
const name = entry.slice("env:".length);
|
|
109
|
+
return `env:${name}=${process.env[name] ?? ""}`;
|
|
110
|
+
}
|
|
111
|
+
} catch {
|
|
112
|
+
// Fall through to sentinel below.
|
|
113
|
+
}
|
|
114
|
+
// Unknown prefixes are rejected at validation time; defensively encode.
|
|
115
|
+
return `${entry}=<unknown>`;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Resolve a phase's `fingerprint` list into a single deterministic string to be
|
|
120
|
+
* folded into the cache key. Returns "" when there are no entries (so the key is
|
|
121
|
+
* unchanged for phases that declare no fingerprint).
|
|
122
|
+
*/
|
|
123
|
+
export function resolveFingerprint(entries: string[] | undefined, cwd: string): string {
|
|
124
|
+
if (!entries || entries.length === 0) return "";
|
|
125
|
+
// Preserve author order (it's part of the declared key) but resolve each.
|
|
126
|
+
const resolved = entries.map((e) => resolveOne(e, cwd));
|
|
127
|
+
return crypto.createHash("sha256").update(resolved.join("\u0000")).digest("hex").slice(0, 16);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
// Cross-run cache store
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
export interface CacheEntry {
|
|
135
|
+
/** The full cache key (== phase inputHash incl. fingerprint). */
|
|
136
|
+
key: string;
|
|
137
|
+
createdAt: number;
|
|
138
|
+
/** Trimmed phase result surface that downstream phases consume. */
|
|
139
|
+
output?: string;
|
|
140
|
+
json?: unknown;
|
|
141
|
+
model?: string;
|
|
142
|
+
/** Provenance for audit / cleanup. */
|
|
143
|
+
flowName?: string;
|
|
144
|
+
phaseId?: string;
|
|
145
|
+
runId?: string;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/** Keep at most this many cache entries; LRU-ish eviction by createdAt. */
|
|
149
|
+
const DEFAULT_MAX_ENTRIES = 1000;
|
|
150
|
+
/** Drop entries older than this regardless of TTL (hard backstop). */
|
|
151
|
+
const DEFAULT_MAX_AGE_MS = 90 * 24 * 60 * 60 * 1000; // 90 days
|
|
152
|
+
|
|
153
|
+
/** A cache key is a 16-hex inputHash; constrain to that to prevent traversal. */
|
|
154
|
+
function isValidKey(key: string): boolean {
|
|
155
|
+
return /^[0-9a-f]{8,64}$/.test(key);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function entryPath(dir: string, key: string): string {
|
|
159
|
+
return path.join(dir, `${key}.json`);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* The cross-run cache, scoped to a working directory. Cheap to construct; all IO
|
|
164
|
+
* is lazy and failure-tolerant (a broken cache must never break a run).
|
|
165
|
+
*/
|
|
166
|
+
export class CacheStore {
|
|
167
|
+
private dir: string;
|
|
168
|
+
|
|
169
|
+
constructor(cwd: string) {
|
|
170
|
+
this.dir = cacheDir(cwd);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/** Look up a fresh entry. Returns null on miss, malformed key, or TTL expiry. */
|
|
174
|
+
get(key: string, ttlMs?: number): CacheEntry | null {
|
|
175
|
+
if (!isValidKey(key)) return null;
|
|
176
|
+
let entry: CacheEntry;
|
|
177
|
+
try {
|
|
178
|
+
const raw = fs.readFileSync(entryPath(this.dir, key), "utf-8");
|
|
179
|
+
entry = JSON.parse(raw) as CacheEntry;
|
|
180
|
+
} catch {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
if (typeof entry?.createdAt !== "number") return null;
|
|
184
|
+
const age = Date.now() - entry.createdAt;
|
|
185
|
+
if (age > DEFAULT_MAX_AGE_MS) return null;
|
|
186
|
+
if (ttlMs !== undefined && age > ttlMs) return null;
|
|
187
|
+
return entry;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/** Store an entry (best-effort; never throws into the run). */
|
|
191
|
+
put(entry: CacheEntry): void {
|
|
192
|
+
if (!isValidKey(entry.key)) return;
|
|
193
|
+
try {
|
|
194
|
+
fs.mkdirSync(this.dir, { recursive: true });
|
|
195
|
+
const lock = path.join(this.dir, `${entry.key}.json.lock`);
|
|
196
|
+
withLock(lock, () => {
|
|
197
|
+
writeFileAtomic(entryPath(this.dir, entry.key), JSON.stringify(entry, null, 2));
|
|
198
|
+
});
|
|
199
|
+
this.cleanup();
|
|
200
|
+
} catch {
|
|
201
|
+
/* cache write failures are non-fatal */
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/** Remove all cache entries. Returns the number removed. */
|
|
206
|
+
clear(): number {
|
|
207
|
+
let n = 0;
|
|
208
|
+
try {
|
|
209
|
+
for (const f of fs.readdirSync(this.dir)) {
|
|
210
|
+
if (f.endsWith(".json")) {
|
|
211
|
+
try {
|
|
212
|
+
fs.unlinkSync(path.join(this.dir, f));
|
|
213
|
+
n++;
|
|
214
|
+
} catch {
|
|
215
|
+
/* ignore */
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
} catch {
|
|
220
|
+
/* no dir → nothing to clear */
|
|
221
|
+
}
|
|
222
|
+
return n;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/** Opportunistic eviction: drop expired/oversized entries. Best-effort. */
|
|
226
|
+
private cleanup(): void {
|
|
227
|
+
let files: string[];
|
|
228
|
+
try {
|
|
229
|
+
files = fs.readdirSync(this.dir).filter((f) => f.endsWith(".json"));
|
|
230
|
+
} catch {
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
const now = Date.now();
|
|
234
|
+
const live: Array<{ file: string; createdAt: number }> = [];
|
|
235
|
+
for (const f of files) {
|
|
236
|
+
const abs = path.join(this.dir, f);
|
|
237
|
+
try {
|
|
238
|
+
const e = JSON.parse(fs.readFileSync(abs, "utf-8")) as CacheEntry;
|
|
239
|
+
if (typeof e?.createdAt !== "number" || now - e.createdAt > DEFAULT_MAX_AGE_MS) {
|
|
240
|
+
fs.unlinkSync(abs);
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
live.push({ file: abs, createdAt: e.createdAt });
|
|
244
|
+
} catch {
|
|
245
|
+
try {
|
|
246
|
+
fs.unlinkSync(abs);
|
|
247
|
+
} catch {
|
|
248
|
+
/* ignore */
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (live.length > DEFAULT_MAX_ENTRIES) {
|
|
253
|
+
live.sort((a, b) => a.createdAt - b.createdAt); // oldest first
|
|
254
|
+
for (const victim of live.slice(0, live.length - DEFAULT_MAX_ENTRIES)) {
|
|
255
|
+
try {
|
|
256
|
+
fs.unlinkSync(victim.file);
|
|
257
|
+
} catch {
|
|
258
|
+
/* ignore */
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|