zidane 5.1.17 → 5.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/ARCHITECTURE.md +581 -0
- package/docs/CHAT.md +1222 -0
- package/docs/INTERACTIONS.md +197 -0
- package/docs/SKILL.md +716 -0
- package/docs/TUI.md +487 -0
- package/package.json +3 -2
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
# Zidane Architecture
|
|
2
|
+
|
|
3
|
+
## Agent Lifecycle
|
|
4
|
+
|
|
5
|
+
```mermaid
|
|
6
|
+
flowchart TB
|
|
7
|
+
subgraph Creation ["createAgent()"]
|
|
8
|
+
C1[Create hooks] --> C2[Merge config]
|
|
9
|
+
C2 --> C3["Provider + Tools + Options"]
|
|
10
|
+
C3 --> C4["eager: true?"]
|
|
11
|
+
C4 -->|yes| C4B["Kick off warmup()\nin background"]
|
|
12
|
+
C4 -->|no| C5[Return Agent]
|
|
13
|
+
C4B --> C5
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
subgraph FirstRun ["First run() — lazy init"]
|
|
17
|
+
L1["executionContext.spawn()"] --> L2["warmup()\nawait in-flight OR\nconnect MCP servers\nin parallel (Promise.all)"]
|
|
18
|
+
L2 --> L3["resolveSkills()"]
|
|
19
|
+
L3 --> L4["buildCatalog() → system prompt"]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
subgraph RunSetup ["run() setup"]
|
|
23
|
+
R1["Validate prompt\n(required unless session has turns)"] --> R2["session.startRun()"]
|
|
24
|
+
R2 --> R3["Resolve behavior\n(maxTurns, maxTokens, thinkingBudget, cache)"]
|
|
25
|
+
R3 --> R4["Build system prompt\n(agent system + skills catalog)"]
|
|
26
|
+
R4 --> R5["Build tools\n(agent tools + MCP)"]
|
|
27
|
+
R5 --> R6["Build turns\n(resume session + system + prompt?)"]
|
|
28
|
+
R6 --> R7["runLoop()"]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
subgraph PostRun ["Post-loop"]
|
|
32
|
+
P1{"Aborted?"} -->|yes| P2["session.abortRun()"]
|
|
33
|
+
P1 -->|no| P3["session.completeRun()"]
|
|
34
|
+
P2 --> P4["agent:done hook"]
|
|
35
|
+
P3 --> P4
|
|
36
|
+
P4 --> P5["Return AgentStats"]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
Creation --> FirstRun
|
|
40
|
+
FirstRun --> RunSetup
|
|
41
|
+
RunSetup --> PostRun
|
|
42
|
+
|
|
43
|
+
style Creation fill:#1a1a2e,stroke:#e94560,color:#fff
|
|
44
|
+
style FirstRun fill:#16213e,stroke:#0f3460,color:#fff
|
|
45
|
+
style RunSetup fill:#1a1a2e,stroke:#e94560,color:#fff
|
|
46
|
+
style PostRun fill:#16213e,stroke:#0f3460,color:#fff
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Turn Loop
|
|
50
|
+
|
|
51
|
+
```mermaid
|
|
52
|
+
flowchart TB
|
|
53
|
+
START((runLoop)) --> ABORT1{Aborted?}
|
|
54
|
+
ABORT1 -->|yes| DONE["agent:done hook\nReturn stats"]
|
|
55
|
+
ABORT1 -->|no| TURN["executeTurn()"]
|
|
56
|
+
|
|
57
|
+
TURN --> STATS["Accumulate usage"]
|
|
58
|
+
STATS --> ABORT2{Aborted?}
|
|
59
|
+
ABORT2 -->|yes| DONE
|
|
60
|
+
|
|
61
|
+
ABORT2 -->|no| STEER{Steering\nqueued?}
|
|
62
|
+
STEER -->|yes| INJECT_STEER["Inject steering\nas user turn"] --> CONTINUE
|
|
63
|
+
STEER -->|no| ENDED{LLM said\ndone?}
|
|
64
|
+
|
|
65
|
+
ENDED -->|no| CONTINUE((Next turn))
|
|
66
|
+
CONTINUE --> ABORT1
|
|
67
|
+
|
|
68
|
+
ENDED -->|yes| FOLLOWUP{Follow-up\nqueued?}
|
|
69
|
+
FOLLOWUP -->|yes| INJECT_FU["Inject follow-up\nas user turn"] --> CONTINUE
|
|
70
|
+
FOLLOWUP -->|no| DONE
|
|
71
|
+
|
|
72
|
+
style DONE fill:#0f3460,stroke:#e94560,color:#fff
|
|
73
|
+
style TURN fill:#1a1a2e,stroke:#533483,color:#fff
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Single Turn
|
|
77
|
+
|
|
78
|
+
```mermaid
|
|
79
|
+
flowchart TB
|
|
80
|
+
T1["Generate turnId"] --> T2["turnsToMessages()"]
|
|
81
|
+
T2 --> T2B["sanitizeStoredToolResults()\n(flatten images for non-vision providers)"]
|
|
82
|
+
T2B --> T3["context:transform hook"]
|
|
83
|
+
T3 --> T3B["system:transform hook\n(per-request system mutation)"]
|
|
84
|
+
T3B --> T3C["applyThinkingDecay()\n(per-turn budget taper)"]
|
|
85
|
+
T3C --> T4["turn:before hook"]
|
|
86
|
+
T4 --> STREAM
|
|
87
|
+
|
|
88
|
+
subgraph STREAM ["provider.stream()"]
|
|
89
|
+
S1["Send system + messages + tools to LLM"] --> S2["Stream text chunks"]
|
|
90
|
+
S2 --> S3["stream:text hook (per chunk)"]
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
STREAM -->|error| CATCH["turn:after hook (zero usage)\nRethrow"]
|
|
94
|
+
STREAM -->|ok| S4{"Has text?"}
|
|
95
|
+
S4 -->|yes| S5["stream:end hook"]
|
|
96
|
+
S4 -->|no| AFTER
|
|
97
|
+
S5 --> AFTER["turn:after hook (with usage)"]
|
|
98
|
+
|
|
99
|
+
AFTER --> DONE_CHECK{LLM done?\nNo tool calls}
|
|
100
|
+
|
|
101
|
+
DONE_CHECK -->|yes| SCHEMA{"behavior\n.schema?"}
|
|
102
|
+
SCHEMA -->|yes| ENFORCE["Force __output__ tool call\nExtract structured output\nFire output hook"]
|
|
103
|
+
SCHEMA -->|no| PUSH_FINAL["Return ended: true"]
|
|
104
|
+
ENFORCE --> PUSH_FINAL
|
|
105
|
+
DONE_CHECK -->|no| TOOLS
|
|
106
|
+
|
|
107
|
+
subgraph TOOLS ["Tool Execution"]
|
|
108
|
+
direction TB
|
|
109
|
+
TE1["Push assistant turn\n(with tool calls)"] --> TE2{Parallel\nmode?}
|
|
110
|
+
TE2 -->|sequential| SEQ["For each tool:\n1. Check abort + steering queue\n (on hit: synthesize Aborted/Skipped\n tool_results for remaining calls)\n2. executeSingleTool()"]
|
|
111
|
+
TE2 -->|parallel| PAR["Promise.allSettled(\nexecuteSingleTool)"]
|
|
112
|
+
SEQ --> TE3["Push tool results turn\n+ fire tool-results:after"]
|
|
113
|
+
PAR --> TE3
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
TOOLS --> RET["Return ended: false"]
|
|
117
|
+
|
|
118
|
+
style STREAM fill:#16213e,stroke:#0f3460,color:#fff
|
|
119
|
+
style TOOLS fill:#1a1a2e,stroke:#533483,color:#fff
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Tool Execution
|
|
123
|
+
|
|
124
|
+
```mermaid
|
|
125
|
+
flowchart TB
|
|
126
|
+
T1["tool:gate hook\n(block | result | runToolCounts)"] --> BLOCKED{"block?\n(wins over result)"}
|
|
127
|
+
BLOCKED -->|yes| T2["Return 'Blocked: reason'\n(does NOT increment runToolCounts)"]
|
|
128
|
+
BLOCKED -->|no| RES{result?}
|
|
129
|
+
RES -->|yes| RES1["increment runToolCounts\nfire tool:transform → tool:after\n(skip validate / tool:before / execute)"]
|
|
130
|
+
RES1 --> RES2["Return substitute"]
|
|
131
|
+
RES -->|no| INC["increment runToolCounts"]
|
|
132
|
+
INC --> T3{Tool\nexists?}
|
|
133
|
+
T3 -->|no| TU["tool:unknown hook\n(can substitute result\nor suppress error)"]
|
|
134
|
+
TU --> TUE{suppressError?}
|
|
135
|
+
TUE -->|no| T4["tool:error hook\n(can substitute result)"]
|
|
136
|
+
TUE -->|yes| T4B["(skip tool:error)"]
|
|
137
|
+
T4 --> TUR["Return ctx.result\n(or default 'Unknown tool: …')"]
|
|
138
|
+
T4B --> TUR
|
|
139
|
+
T3 -->|yes| T5["validateToolArgs()\n(auto-coerce types)"]
|
|
140
|
+
T5 --> VALID{Valid?}
|
|
141
|
+
VALID -->|no| VR["validation:reject hook\n(observational)"]
|
|
142
|
+
VR --> T6["Return 'Validation error: …'"]
|
|
143
|
+
VALID -->|yes| VC{"coercions\n.length > 0?"}
|
|
144
|
+
VC -->|yes| VCH["validation:coerce hook\n(observational)"]
|
|
145
|
+
VC -->|no| T7
|
|
146
|
+
VCH --> T7["tool:before hook\n(ctx.coercions when present, runToolCounts)"]
|
|
147
|
+
T7 --> T8["toolDef.execute(coercedInput, ctx)"]
|
|
148
|
+
T8 -->|error| T9["tool:error hook\n(can substitute result)"]
|
|
149
|
+
T8 -->|ok| T10["tool:transform hook\n(can mutate result;\nctx.outputBytes pre-mutation)"]
|
|
150
|
+
T9 --> T10
|
|
151
|
+
T10 --> T10B["stripImagesForNonVision()\n(replace image blocks with marker\nwhen provider.meta.capabilities.vision === false)"]
|
|
152
|
+
T10B --> T11["tool:after hook\n(ctx.outputBytes post-mutation, runToolCounts)"]
|
|
153
|
+
T11 --> T12["Return { id, content: string | ToolResultContent[] }"]
|
|
154
|
+
|
|
155
|
+
style T8 fill:#1a1a2e,stroke:#e94560,color:#fff
|
|
156
|
+
style TU fill:#1a1a2e,stroke:#0f3460,color:#fff
|
|
157
|
+
style VR fill:#1a1a2e,stroke:#0f3460,color:#fff
|
|
158
|
+
style RES1 fill:#1a1a2e,stroke:#533483,color:#fff
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**`tool:gate` mutations.** Set `block` to refuse (`Blocked: reason`), `result` to substitute and skip execute, or neither to run normally. `block` wins over `result` so a policy gate always beats a consumer cache. The substitute path skips `tool:before` + validate + execute but fires `tool:transform` + `tool:after` — budgets and telemetry stay consistent with executed calls.
|
|
162
|
+
|
|
163
|
+
**`runToolCounts`** is a frozen pre-call snapshot, scoped to `runId`. Includes dedup + gate-`result` substitutes; excludes blocked calls. Resumed sessions reset the counter. In parallel mode, consumer hooks see the pre-batch snapshot — built-in budget/dedup middleware uses its own gate-time reservation counter so `behavior.toolBudgets` stays atomic mid-batch.
|
|
164
|
+
|
|
165
|
+
**Output shape.** `string | ToolResultContent[]`. Text tools return strings; multimodal tools (MCP browsers, screenshots) return `[{ type: 'text' }, { type: 'image', mediaType, data }]`. Providers with `capabilities.imageInToolResult: true` (Anthropic, OpenAI Codex) route arrays natively; OpenAI-compat emits a companion `user` message with `image_url` parts. Non-vision providers swap image blocks for a text marker before the hook fires.
|
|
166
|
+
|
|
167
|
+
**`outputBytes`** populates `tool:after`, `tool:transform`, `mcp:tool:after`, `mcp:tool:transform` — UTF-8 byte length for text, base64 char length for images. This is a **wire-payload-size proxy**, not a token count (vision encoders tokenize decoded pixels, geometry-dependent). Use it for byte-budget heuristics; defer to provider-side context management for token-accurate accounting. Reproduce via `toolOutputByteLength(content)`.
|
|
168
|
+
|
|
169
|
+
## Tool ergonomics defaults
|
|
170
|
+
|
|
171
|
+
Built-in tools are opinionated about output sizes — drop your v2 `tool:transform` truncation polyfills.
|
|
172
|
+
|
|
173
|
+
| Tool | Default behavior |
|
|
174
|
+
|---|---|
|
|
175
|
+
| `read_file` | Line-paginates: `offset=1`, `limit=2000`, `maxBytes=262144` (256 KiB). Footer documents paging (`re-read with offset=N+1`). Binary files (NUL byte / `0xFFFD`-heavy) return a marker. `limit: 0` / `maxBytes: 0` disable. With `behavior.readLineNumbers` (default on), each line gets a `<N>\t` prefix; `edit` strips it on match so the model can paste verbatim. |
|
|
176
|
+
| `shell` | Tail-priority truncation at `maxOutputBytes=32768` (32 KiB, combined stdout+stderr). Head trim marker: `…(N bytes truncated from head)…`. `0` disables. UTF-8 never splits mid-codepoint. Appends `(exit N, Nms)` footer + surfaces non-empty stderr by default (`metadata: false` opts out). |
|
|
177
|
+
| `write_file` | Reads existing content; returns `Created` / `Updated` / `No change needed: …` so the model detects no-ops without a separate read. Race window in shared docker/sandbox contexts documented and accepted. |
|
|
178
|
+
| `edit` | Fails clearly on non-unique `old_string` (unless `replace_all: true`). On not-found, includes a nearest-match preview so the model recovers without re-reading. |
|
|
179
|
+
| `multi_edit` | Sequential edits to one file. **Ungated** (no `_outcomes` in input — the SDK default): atomic, first failure aborts. **Gated** (chat layer injects `_outcomes` per-hunk via `injectOutcomesIntoInput`): each hunk reported independently into a structured `Edited <path>: N/M applied · K denied · …` body — `parseEditOutcomesFromResult` re-parses for transcript replay. See CHAT.md → Per-edit approval. |
|
|
180
|
+
| `grep` | Wraps `rg` when present (with explicit `.` path to avoid stdin hangs). Bun.Glob fallback otherwise. `head_limit=250`, `offset` paginates. |
|
|
181
|
+
|
|
182
|
+
## Tool argument auto-coercion
|
|
183
|
+
|
|
184
|
+
`validateToolArgs` runs between `tool:gate` and `tool:before`:
|
|
185
|
+
|
|
186
|
+
1. **Required-field presence** — missing/null required fields fail outright.
|
|
187
|
+
2. **Type-aware coercion** on top-level properties:
|
|
188
|
+
- `"true"` / `"yes"` / `"1"` → `true`; `"false"` / `"no"` / `"0"` → `false`.
|
|
189
|
+
- Numeric strings → numbers/integers.
|
|
190
|
+
- JSON-encoded strings → arrays/objects.
|
|
191
|
+
- Numbers/booleans → strings (when schema declares `string`).
|
|
192
|
+
3. Reject only when no coercion succeeds; fires `validation:reject` so consumers count broken-call attempts separately from runtime errors.
|
|
193
|
+
|
|
194
|
+
`tool:execute` always receives the coerced input.
|
|
195
|
+
|
|
196
|
+
## Data Flow
|
|
197
|
+
|
|
198
|
+
```mermaid
|
|
199
|
+
flowchart LR
|
|
200
|
+
subgraph Input
|
|
201
|
+
USER[User prompt] --> TURNS[SessionTurn array]
|
|
202
|
+
SESSION[(Session store)] -.->|resume| TURNS
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
subgraph Agent
|
|
206
|
+
TURNS --> MSG["turnsToMessages()"]
|
|
207
|
+
MSG --> PROVIDER["provider.stream()"]
|
|
208
|
+
PROVIDER --> RESULT{Tool calls?}
|
|
209
|
+
RESULT -->|text only| DONE[Push final turn]
|
|
210
|
+
RESULT -->|tool calls| EXEC["Execute tools"]
|
|
211
|
+
EXEC --> PUSH["Push tool results"]
|
|
212
|
+
PUSH --> MSG
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
subgraph Output
|
|
216
|
+
DONE --> STATS["AgentStats"]
|
|
217
|
+
DONE -.->|incremental| SESSION
|
|
218
|
+
PUSH -.->|incremental| SESSION
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
style Input fill:#16213e,stroke:#0f3460,color:#fff
|
|
222
|
+
style Agent fill:#1a1a2e,stroke:#533483,color:#fff
|
|
223
|
+
style Output fill:#16213e,stroke:#0f3460,color:#fff
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## Typed errors
|
|
227
|
+
|
|
228
|
+
Provider failures are wrapped before leaving `agent.run()`:
|
|
229
|
+
|
|
230
|
+
```
|
|
231
|
+
native error
|
|
232
|
+
→ provider.classifyError(err) returns ClassifiedError | null
|
|
233
|
+
kind: 'context_exceeded' → AgentContextExceededError
|
|
234
|
+
kind: 'provider_error' → AgentProviderError
|
|
235
|
+
kind: 'aborted' → AgentAbortedError
|
|
236
|
+
null → AgentProviderError (fallback)
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
`agent.abort()` and aborted `AbortSignal` always produce `AgentAbortedError`. Match on `instanceof`, not on `err.message`.
|
|
240
|
+
|
|
241
|
+
## Prompt caching
|
|
242
|
+
|
|
243
|
+
When `behavior.cache: true` (default), zidane injects `cache_control: { type: 'ephemeral' }` breakpoints on three stable prefixes:
|
|
244
|
+
|
|
245
|
+
1. **System prompt** — last text block.
|
|
246
|
+
2. **Tool definitions** — last tool in the array.
|
|
247
|
+
3. **Conversation** — last content block of the last message, skipping trailing `thinking` / `redacted_thinking` blocks that can't carry `cache_control`.
|
|
248
|
+
|
|
249
|
+
Three of Anthropic's four allowed breakpoints; headroom left for a future thinking-block breakpoint. Next turn, the longest matching prefix is served from cache; only the new suffix is billed at full input rate.
|
|
250
|
+
|
|
251
|
+
Provider semantics:
|
|
252
|
+
|
|
253
|
+
| Provider | Behavior |
|
|
254
|
+
|---|---|
|
|
255
|
+
| `anthropic` | Breakpoints honored natively. |
|
|
256
|
+
| `openrouter` | Breakpoints forwarded; Anthropic + Gemini routes honor them, others ignore. |
|
|
257
|
+
| `openaiCompat` | Factory-gated via `cacheBreakpoints: boolean` (default **off**). `openrouter` flips it on; bare `openaiCompat` stays off so OpenAI-direct doesn't reject unknown fields. |
|
|
258
|
+
| `cerebras` | Off (factory doesn't enable breakpoints). |
|
|
259
|
+
| `openai` (Codex) | Not affected — separate wire format (pi-ai). |
|
|
260
|
+
|
|
261
|
+
Cache-read + cache-write counts surface on `TurnUsage.cacheRead` / `TurnUsage.cacheCreation`,
|
|
262
|
+
populated from Anthropic's native fields and from OpenRouter's `prompt_tokens_details`.
|
|
263
|
+
|
|
264
|
+
## Tool aliasing (wire vs. canonical)
|
|
265
|
+
|
|
266
|
+
Aliasing lives at the LLM boundary only:
|
|
267
|
+
|
|
268
|
+
```
|
|
269
|
+
AgentOptions.tools keyed by canonical name ← agent dispatch
|
|
270
|
+
│
|
|
271
|
+
▼
|
|
272
|
+
Loop.aliasMaps (canonical ↔ wire)
|
|
273
|
+
│
|
|
274
|
+
outbound: canonical → wire inbound: wire → canonical
|
|
275
|
+
│ │
|
|
276
|
+
▼ ▼
|
|
277
|
+
ToolSpec sent to provider result.toolCalls / assistantMessage.content[tool_call]
|
|
278
|
+
(model calls by wire name) (immediately rewritten to canonical)
|
|
279
|
+
│
|
|
280
|
+
▼
|
|
281
|
+
session.turns persists canonical names
|
|
282
|
+
ToolHookContext.name = canonical, .displayName = wire
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
`session.turns` can't desync on alias-map changes; hooks match on canonical names; UI adapters read `displayName` for labels.
|
|
286
|
+
|
|
287
|
+
## Skills activation lifecycle
|
|
288
|
+
|
|
289
|
+
Zidane implements the [Agent Skills spec](https://agentskills.io/specification) via dedicated-tool activation. Three tools auto-inject when the catalog is non-empty: `skills_use` / `skills_read` / `skills_run_script`.
|
|
290
|
+
|
|
291
|
+
```
|
|
292
|
+
createAgent({ skills }) → skillActivationState({ maxActive })
|
|
293
|
+
|
|
294
|
+
ensureSkillsResolved() [idempotent, shared promise across callers]:
|
|
295
|
+
resolveSkills(config) → skills:resolve
|
|
296
|
+
buildCatalog(skills, …) → skills:catalog
|
|
297
|
+
|
|
298
|
+
triggered by (first-caller-wins):
|
|
299
|
+
agent.warmup() ← explicit pre-warm (or eager: true)
|
|
300
|
+
agent.run() ← lazy bootstrap on first run
|
|
301
|
+
agent.activateSkill(name) ← host-driven activation (e.g. TUI slash-cmd)
|
|
302
|
+
|
|
303
|
+
first run():
|
|
304
|
+
rehydrate from session turns → skills:activate (via: 'resume')
|
|
305
|
+
|
|
306
|
+
per run():
|
|
307
|
+
installAllowedToolsGate, installToolBudgetsGate, installDedupToolsGate
|
|
308
|
+
auto-inject skills_use / skills_read / skills_run_script
|
|
309
|
+
runLoop():
|
|
310
|
+
skills_use({ name }) → state.activate(skill, 'model')
|
|
311
|
+
→ skills:activate (via: 'model')
|
|
312
|
+
→ <skill_content> wrapper
|
|
313
|
+
skills_read / skills_run_script → gated on state.isActive + path sandbox
|
|
314
|
+
run end: deactivateAllSkills() → skills:deactivate (reason: 'run-end')
|
|
315
|
+
|
|
316
|
+
agent.activateSkill(name) → skills:activate (via: 'explicit')
|
|
317
|
+
agent.deactivateSkill(name) → skills:deactivate (reason: 'explicit')
|
|
318
|
+
agent.reset() → skills:deactivate (reason: 'reset')
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
**allowed-tools.** When any active skill declares `allowed-tools`, a `tool:gate` handler blocks calls outside the union (the three skills tools are always implicitly allowed).
|
|
322
|
+
|
|
323
|
+
**Body-only delivery.** `skills_use` returns the body (frontmatter stripped) wrapped in `<skill_content>` for host-SDK context protection. Body includes shell-interpolated instructions (`` !`cmd` ``, fresh per activation), the skill directory, the resource listing (not eagerly loaded), and compatibility + allowed-tools when present.
|
|
324
|
+
|
|
325
|
+
**Source precedence.** `SkillConfig.source: 'project' | 'user' | 'inline' | 'builtin'`. Project beats user; first-found wins within scan order. `trustProjectSkills: false` drops project-scoped skills.
|
|
326
|
+
|
|
327
|
+
## Progressive tool disclosure
|
|
328
|
+
|
|
329
|
+
`behavior.toolDisclosure: 'lazy'` (default `'eager'`) replaces upfront MCP schemas with a name-only catalog and auto-injects a `tool_search` native tool. Native + skill tools always stay eager — only MCP is eligible. Per-server `McpServerConfig.disclosure: 'eager' | 'lazy'` overrides the agent-wide default.
|
|
330
|
+
|
|
331
|
+
```
|
|
332
|
+
per run() when toolDisclosure === 'lazy' and ≥1 MCP tool is lazy:
|
|
333
|
+
|
|
334
|
+
partitionToolDisclosure(toolsBySpecName, mcpToolNames, servers, mode, aliases)
|
|
335
|
+
→ lazyEntries (wire name + canonicalName + description + inputSchema + server?)
|
|
336
|
+
→ eagerCanonicalNames
|
|
337
|
+
|
|
338
|
+
unlocked = new Set(eagerCanonicalNames)
|
|
339
|
+
if (lazyEntries && toolSearch.tool !== false && !host-defined tool_search):
|
|
340
|
+
tools.tool_search = createToolSearchTool({ catalog: lazyEntries, unlocked, defaultLimit })
|
|
341
|
+
unlocked.add('tool_search')
|
|
342
|
+
|
|
343
|
+
system += buildSearchableCatalog(lazyEntries) // wire names, XML-escaped, no inputSchema
|
|
344
|
+
installLazyDisclosureGate // tool:gate refuses lazy ∉ unlocked
|
|
345
|
+
|
|
346
|
+
per turn:
|
|
347
|
+
buildFormattedTools() → filter ctx.tools by unlocked, alias through aliasMaps
|
|
348
|
+
|
|
349
|
+
tool_search({ names | query | server, limit? }):
|
|
350
|
+
match wire-keyed byName/byServer indexes
|
|
351
|
+
unlocked.add(canonicalName) per match
|
|
352
|
+
return <tool_search_results>…<input_schema>{json}</input_schema>…
|
|
353
|
+
(inputSchema JSON inlined with `<` → `\u003c` so a hostile schema can't break the envelope)
|
|
354
|
+
|
|
355
|
+
run end: uninstallLazyDisclosureGate; unlocked GC-eligible on next run()
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
**Aliasing.** `LazyToolEntry.name` is the wire name (the only one the provider accepts). `LazyToolEntry.canonicalName` keys `unlocked` and dispatches `ctx.tools[name]`. `session.turns`, `ToolHookContext.name`, and `ctx.tools` keys stay canonical throughout.
|
|
359
|
+
|
|
360
|
+
**Hard gate vs. provider validation.** Production providers (Anthropic, OpenAI) refuse `tool_use` blocks for tools absent from the request — the gate is mostly redundant there. It exists for custom / mock / lenient providers and makes the lazy boundary observable via `tool:gate`. Refusal points at the discovery tool's wire name.
|
|
361
|
+
|
|
362
|
+
**Cache cost.** Each `tool_search` advances the tool-list cache breakpoint — one miss per discovery wave; subsequent same-unlocked turns hit cache. Still beats eager (every schema, every turn) when lazy tools are many and discovery waves few. The system-prompt catalog is byte-stable (alphabetical) so it rides the system-prompt cache cleanly.
|
|
363
|
+
|
|
364
|
+
**Host-defined `tool_search`.** A host tool named `tool_search` (or aliased to that wire name) shadows the auto-injection. The host implementation must call `createToolSearchTool({ catalog, unlocked })` to drive the unlock flow — bare implementations leave the gate refusing every lazy call. For custom flows, prefer `toolSearch.tool: false`.
|
|
365
|
+
|
|
366
|
+
## Hook Firing Order
|
|
367
|
+
|
|
368
|
+
### Skills
|
|
369
|
+
|
|
370
|
+
```
|
|
371
|
+
skills:resolve ← once per agent, after discovery (warmup / run / activateSkill — first wins)
|
|
372
|
+
skills:catalog [mutable: catalog] ← once per agent, after system-prompt catalog built (same trigger)
|
|
373
|
+
skills:activate ← per activation; { skill, via: 'model' | 'explicit' | 'resume' }
|
|
374
|
+
skills:deactivate ← per deactivation; { skill, reason: 'run-end' | 'explicit' | 'reset' }
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
Every run ends with an implicit deactivate-all pass (`reason: 'run-end'`); activation state never leaks across runs unless re-asserted via `agent.activateSkill()`.
|
|
378
|
+
|
|
379
|
+
### Per turn
|
|
380
|
+
|
|
381
|
+
All tool hooks carry `turnId` + `callId`. Typed via `ToolHookContext` / `McpToolHookContext`. Both expose `name` (canonical) and `displayName` (wire / aliased; defaults to `name`).
|
|
382
|
+
|
|
383
|
+
Hooks marked **[mutable]** have ctx fields that affect downstream behavior when handlers mutate them. Last-writer wins.
|
|
384
|
+
|
|
385
|
+
> **Ordering note:** `context:transform` fires **before** `turn:before` — "transform the context that will be sent". For per-turn observation, use `turn:before`. For editing messages just before the provider call, use `context:transform`.
|
|
386
|
+
|
|
387
|
+
```
|
|
388
|
+
context:transform [mutable: messages] ← edit messages before LLM call
|
|
389
|
+
system:transform [mutable: system] ← per-request system mutation
|
|
390
|
+
turn:before ← turn starts
|
|
391
|
+
oauth:refresh ← (when provider supports it)
|
|
392
|
+
stream:thinking (n) / stream:text (n) ← each streamed chunk
|
|
393
|
+
stream:end ← text complete (if text present)
|
|
394
|
+
turn:after ← always fires (incl. errors); SessionTurn + toolCounts
|
|
395
|
+
tool:gate [mutable: block, reason, result?] ← refuse / substitute / run
|
|
396
|
+
tool:unknown [mutable: result?, suppressError] ← no toolDef registered
|
|
397
|
+
validation:reject / validation:coerce ← arg validation outcomes
|
|
398
|
+
tool:before ← (input coerced; coercions? when present; priorContent? for write_file)
|
|
399
|
+
mcp:tool:gate [mutable: block, reason, result?]
|
|
400
|
+
mcp:tool:before
|
|
401
|
+
mcp:tool:transform [mutable: result] ← + outputBytes pre-mutation
|
|
402
|
+
mcp:tool:after ← + outputBytes post-mutation
|
|
403
|
+
tool:transform [mutable: result, isError] ← + outputBytes pre-mutation
|
|
404
|
+
tool:after ← + outputBytes post-mutation
|
|
405
|
+
tool:error [mutable: result?] ← on execute throw; ctx.result substitutes
|
|
406
|
+
tool-results:after ← after the tool-results user turn is pushed (persistence seam)
|
|
407
|
+
usage ← running totals
|
|
408
|
+
output ← when behavior.schema set
|
|
409
|
+
budget:exceeded / tool-budget:exceeded ← byte / per-tool caps tripped
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
**`tool-results:after`** is the durable-persistence seam. The assistant turn carrying `tool_use` blocks and the user turn carrying matching `tool_result` blocks must be persisted as a pair — otherwise a process death between turns leaves an orphan `tool_use` that Anthropic rejects on resume. Subscribe here to write the pair atomically. The loop also synthesizes `Aborted: …` / `Skipped: …` tool_results for any remaining calls when abort / steering interrupts a sequential batch (`synthesizeMissingToolResults` in `src/agent.ts`), so the assistant turn's `tool_use` ids always stay matched.
|
|
413
|
+
|
|
414
|
+
`tool:unknown` replaces `tool:before` when no `toolDef` exists; `validation:reject` fires after coercion attempts fail. The model receives a result string in both cases (substituted or `Validation error: …`) so it can recover.
|
|
415
|
+
|
|
416
|
+
### Per run
|
|
417
|
+
|
|
418
|
+
```
|
|
419
|
+
session:start ← run begins
|
|
420
|
+
session:turns ← seeded user turn persisted (fires before the first turn:before
|
|
421
|
+
when run({ prompt }) seeds a user turn; skipped on promptless resume)
|
|
422
|
+
system:before ← per-run only when run({ system }) is set. Observational —
|
|
423
|
+
the loop reads options.system directly; ctx mutations are dropped.
|
|
424
|
+
Use system:transform for actual mutation.
|
|
425
|
+
[turn hooks above, repeated]
|
|
426
|
+
session:turns ← after turns persisted (includes SessionTurn[] of appended turns)
|
|
427
|
+
steer:inject ← if steering/follow-up message injected
|
|
428
|
+
agent:abort ← agent.abort() called mid-run
|
|
429
|
+
agent:done ← run finished (all exit paths), includes stats.output
|
|
430
|
+
session:end ← run finished (completed | aborted | error)
|
|
431
|
+
includes turnRange [start, end] for cleanup
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
### Once (lazy on first `warmup()` / `run()` / `activateSkill()` — or eager when `eager: true`)
|
|
435
|
+
|
|
436
|
+
```
|
|
437
|
+
mcp:bootstrap:start / mcp:bootstrap:end ← per server, always fires (durationMs + ok/toolCount | error)
|
|
438
|
+
mcp:connect ← per server bootstrapped OK
|
|
439
|
+
mcp:error ← per server that failed
|
|
440
|
+
skills:resolve ← after discovery
|
|
441
|
+
skills:catalog [mutable: catalog] ← after catalog build
|
|
442
|
+
skills:activate ← during run, when a skill file is read
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
`warmup()` fans MCP connect + skills resolution out in parallel; both sides are idempotent and share an in-flight promise across concurrent callers. Hooks registered after `warmup()` resolves may miss `skills:resolve` / `skills:catalog` — register before any `warmup()` / `run()` / `activateSkill()` call (or before `createAgent` returns when using `eager: true`). Use `mcp:bootstrap:end` for cold-start attribution — not wall-clock diffs against `mcp:connect`, which is skipped on failure.
|
|
446
|
+
|
|
447
|
+
### Spawn lifecycle
|
|
448
|
+
|
|
449
|
+
Fires when a parent invokes the `spawn` tool. Children inherit the parent's preset (tools, system, aliases, mcpServers, skills, behavior) by default — see SKILL.md "Sub-agent Spawning" to restrict.
|
|
450
|
+
|
|
451
|
+
```
|
|
452
|
+
spawn:before ← child agent created, before run
|
|
453
|
+
spawn:complete ← child finished successfully, includes AgentStats
|
|
454
|
+
spawn:error ← child run threw
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
The child's lifecycle also bubbles to the parent hook surface with `childId` + `depth`, so nested UI renders without subscribing on the child:
|
|
458
|
+
|
|
459
|
+
```
|
|
460
|
+
child:stream:text / child:stream:thinking / child:stream:end
|
|
461
|
+
child:tool:gate / child:mcp:tool:gate ← share the child's ctx — parent mutations propagate
|
|
462
|
+
child:tool:before / child:tool:after / child:tool:error
|
|
463
|
+
child:turn:after
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
The two `child:*:gate` events are special: the bubbled ctx is the same reference the child loop awaits on, so a parent listener can refuse / substitute a subagent's tool call without registering on the child agent.
|
|
467
|
+
|
|
468
|
+
## Dependency Graph
|
|
469
|
+
|
|
470
|
+
```mermaid
|
|
471
|
+
flowchart TB
|
|
472
|
+
AGENT[agent.ts] --> LOOP[loop.ts]
|
|
473
|
+
AGENT --> TOOLS[tools/]
|
|
474
|
+
AGENT --> PROVIDER[providers/]
|
|
475
|
+
AGENT --> SESSION[session/]
|
|
476
|
+
AGENT --> SKILLS[skills/]
|
|
477
|
+
AGENT --> MCP[mcp/]
|
|
478
|
+
AGENT --> CONTEXTS[contexts/]
|
|
479
|
+
AGENT --> PROMPT[prompt.ts]
|
|
480
|
+
AGENT --> ALIASING[aliasing.ts]
|
|
481
|
+
|
|
482
|
+
LOOP --> PROVIDER
|
|
483
|
+
LOOP --> TOOLS
|
|
484
|
+
LOOP --> ALIASING
|
|
485
|
+
LOOP --> ERRORS[errors.ts]
|
|
486
|
+
|
|
487
|
+
PRESETS[presets/] --> TOOLS
|
|
488
|
+
TOOLS --> CONTEXTS
|
|
489
|
+
TOOLS -->|spawn| AGENT
|
|
490
|
+
|
|
491
|
+
PROVIDER --> TYPES[types.ts]
|
|
492
|
+
PROVIDER --> ERRORS
|
|
493
|
+
LOOP --> TYPES
|
|
494
|
+
SESSION --> TYPES
|
|
495
|
+
AGENT --> TYPES
|
|
496
|
+
|
|
497
|
+
TRACING[tracing.ts] -.->|installs on| AGENT
|
|
498
|
+
|
|
499
|
+
style AGENT fill:#e94560,stroke:#fff,color:#fff
|
|
500
|
+
style LOOP fill:#533483,stroke:#fff,color:#fff
|
|
501
|
+
style TYPES fill:#0f3460,stroke:#fff,color:#fff
|
|
502
|
+
style ERRORS fill:#16213e,stroke:#e94560,color:#fff
|
|
503
|
+
style ALIASING fill:#16213e,stroke:#e94560,color:#fff
|
|
504
|
+
style PROMPT fill:#16213e,stroke:#e94560,color:#fff
|
|
505
|
+
style TRACING fill:#1a1a2e,stroke:#533483,color:#fff
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
## Presets
|
|
509
|
+
|
|
510
|
+
A `Preset` is `Omit<Partial<AgentOptions>, 'provider' | 'execution' | 'session' | 'mcpConnector'>` — a reusable slice of agent config that consumers spread into `createAgent`:
|
|
511
|
+
|
|
512
|
+
```ts
|
|
513
|
+
createAgent({ ...basic, provider })
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
Excluded fields are either ambient runtime (`provider`, `session`, `execution`) or test seams (`mcpConnector`). Everything else — `name`, `system`, `tools`, `toolAliases`, `behavior`, `mcpServers`, `skills`, `eager`, **`hooks`** — is preset-shareable.
|
|
517
|
+
|
|
518
|
+
### `hooks` field
|
|
519
|
+
|
|
520
|
+
`AgentOptions.hooks` (typed `AgentHookMap`) is registered for the **lifetime of the agent**: identical in effect to calling `agent.hooks.hook(event, fn)` immediately after `createAgent` returns. Handlers fire across every `run()`. Unknown event names throw at construction (typo guard). For per-run handlers that auto-detach at run end, use `AgentRunOptions.hooks` instead — same shape, different scope.
|
|
521
|
+
|
|
522
|
+
Each entry accepts a single handler or an array. The array form is what `composePresets()` emits when multiple presets target the same event.
|
|
523
|
+
|
|
524
|
+
### `composePresets(...presets)` — field-aware merge
|
|
525
|
+
|
|
526
|
+
Bare `...spread` is shallow: `{ ...a, ...b }` overwrites every key `b` defines, including `hooks`. Two presets that both register a `turn:before` handler would silently lose `a`'s handler. `composePresets` is the explicit composition primitive:
|
|
527
|
+
|
|
528
|
+
```ts
|
|
529
|
+
createAgent({ ...composePresets(basic, telemetry, mine), provider })
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
Merge rules:
|
|
533
|
+
|
|
534
|
+
| field | rule |
|
|
535
|
+
| ---------------------------------------- | ----------------------------------------------------- |
|
|
536
|
+
| `name`, `system`, `eager`, `skills` | last-defined wins |
|
|
537
|
+
| `tools`, `toolAliases`, `behavior` | shallow-merge (later keys override per field) |
|
|
538
|
+
| `mcpServers` | concat with last-wins on `name` collision (a later preset can override an earlier preset's `github` server) |
|
|
539
|
+
| `hooks` | per-event concat — every handler fires, in preset order |
|
|
540
|
+
|
|
541
|
+
`hooks` always emerges as `event → handler[]` so `createAgent` sees a uniform shape regardless of whether the source was a single handler or an array. Handler order within an event follows preset order: earlier presets register first.
|
|
542
|
+
|
|
543
|
+
### When to reach for what
|
|
544
|
+
|
|
545
|
+
- **One preset, no overlap with consumer code** → spread directly: `createAgent({ ...preset, provider })`.
|
|
546
|
+
- **Multiple presets** → `composePresets(...)` first. Spread alone gives you last-wins on `hooks` and `behavior` field-overrides you probably didn't intend.
|
|
547
|
+
- **Dynamic registration after creation** → `agent.hooks.hook(event, fn)` (returns an `unregister` fn). Preset hooks are "baked in"; runtime hooks are dynamic.
|
|
548
|
+
- **Per-run, auto-detached** → `agent.run({ hooks: { … } })`. Same `event → fn | fn[]` shape.
|
|
549
|
+
|
|
550
|
+
## Option Precedence
|
|
551
|
+
|
|
552
|
+
`run.behavior` > `agent.behavior` > defaults (field-by-field merge).
|
|
553
|
+
|
|
554
|
+
Defaults: unlimited turns (set `maxTurns` as a safety net), `maxTokens: 16384`, level-based thinking; `cache: true`, `dedupReads: true`, `requireReadBeforeEdit: false`, `compactStrategy: 'off'`, `compactThreshold: 128 KiB`, `compactKeepTurns: 4`, `toolDisclosure: 'eager'`, `readLineNumbers: true`, `elideStaleReads: false`. All other knobs (`toolOutputBudget`, `dedupTools`, `toolBudgets`, `thinkingDecay`, `toolSearch`, `persistThreshold`, `persistDir`, `persistExcludeTools`) are `undefined`. The chat-layer profiles (`BUILD_AGENT` / `PLAN_AGENT`) override several of these — see CHAT.md → Agents.
|
|
555
|
+
|
|
556
|
+
**`toolOutputBudget`** (off by default). The loop sums every tool result's `outputBytes` after `tool:transform`, so consumer truncation counts. On overshoot, a synthetic user message is appended:
|
|
557
|
+
|
|
558
|
+
> `[Tool output budget exceeded: N bytes returned in this turn (cap: M). Summarize the salient findings before calling more tools.]`
|
|
559
|
+
|
|
560
|
+
`budget:exceeded` fires with `{ turn, turnId, bytes, budget }`. The next turn picks up the message as latest input — model self-compacts instead of being cut.
|
|
561
|
+
|
|
562
|
+
**`compactStrategy: 'tail'`** — client-side companion for non-Anthropic providers. After `sanitizeStoredToolResults` and before `context:transform`, the loop sums tool-result bytes; on overshoot, `tool_result` blocks older than `compactKeepTurns` are replaced with a stub. Text and image blocks pass through; `session.turns` is untouched. Anthropic users should prefer the server-side `context-management-2025-06-27` beta (token-accurate) via `anthropic({ extraBetas, contextManagement })`.
|
|
563
|
+
|
|
564
|
+
**`dedupReads` + `requireReadBeforeEdit`** share a per-session `WeakMap<Session, Map<path, { contentHash, offset, limit, maxBytes, mtimeMs }>>` (keyed `${handle.cwd}::${path}`). `read_file` returns a stub on identical (hash + slice) re-reads. `edit` / `multi_edit` reject when no entry exists or the on-disk hash drifted. Edits update the hash on success so chained edits don't need re-reads. GC'd with the session.
|
|
565
|
+
|
|
566
|
+
**`dedupTools`** generalizes the same pattern via a consumer hasher per tool. Parallel `WeakMap<Session, Map<toolName, { hash, result }>>` keeps only the most recent hash. On hit, the prior result replays verbatim — substitute path fires `tool:transform` + `tool:after` so budgets stay coherent. Tools with side effects or non-deterministic outputs MUST NOT be listed.
|
|
567
|
+
|
|
568
|
+
**`toolBudgets`** caps per-tool calls per run. When `runToolCounts[name] >= max`, fires `onExceed`: `'block'` refuses with `Blocked: <message>`, `'steer'` enqueues a synthetic user message (one per tool per run, deduped). Function form returns `{ mode, message }` dynamically. Fires `tool-budget:exceeded` (distinct from byte-level `budget:exceeded`). Counts include dedup substitutes by design.
|
|
569
|
+
|
|
570
|
+
**`thinkingDecay`** tapers `thinkingBudget` per run-relative turn. `applyThinkingDecay(base, decay, turn)` runs at `runTurn` start. Struct form `{ afterTurn, factor, floor }` does geometric decay; function form `(turn, base) => number` accepts arbitrary curves. Result clamped to `[0, base]` so buggy curves can't exceed the caller's opt-in.
|
|
571
|
+
|
|
572
|
+
**Gate precedence**. Consumer hooks register first (agent-lifetime then per-run), framework gates install after — `allowed-tools → tool-budgets → dedup → lazy-disclosure`. Each framework gate short-circuits on existing `ctx.block` or `ctx.result`, so the firing order is consumer → framework with the framework gates acting as an early-exit chain. The net effect: a policy gate always beats a consumer cache substitute, budgets enforce before dedup replays a cached call against an exhausted cap, and lazy-disclosure runs last so skill refusal wins over "load schema first" and dedup substitutes only hit on previously-recorded successful calls (which already passed lazy-disclosure once).
|
|
573
|
+
|
|
574
|
+
**Tool-result persistence** (`behavior.persistThreshold` + `persistDir` + `persistExcludeTools`). When set, the loop replaces oversize `tool_result` outputs with a `<persisted-output tool="…" bytes="…" path="…">` stub carrying a 2 KiB preview. Substitution happens just after `tool:transform`; the stub flows into `session.turns` directly so the prompt-cache prefix stays stable across turns. `persistDir` is required to enable the feature — without a target dir the framework silently skips persistence. `persistExcludeTools` (default empty at the SDK; the chat layer ships `DEFAULT_PERSIST_EXCLUDE_TOOLS` — see CHAT.md) lists canonical tool names that bypass regardless of size.
|
|
575
|
+
|
|
576
|
+
## Related
|
|
577
|
+
|
|
578
|
+
- `docs/SKILL.md` — public API surface (createAgent, providers, presets, hooks, sessions, skills, MCP).
|
|
579
|
+
- `docs/CHAT.md` — renderer-agnostic chat engine (`zidane/chat`).
|
|
580
|
+
- `docs/TUI.md` — OpenTUI terminal shell (`zidane/tui`).
|
|
581
|
+
- `docs/INTERACTIONS.md` — interactive tools (`present_plan`, `ask_user`) protocol.
|