zidane 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,10 +15,10 @@ Built to be embedded.
15
15
  A small, hookable core with sensible defaults so most consumers don't write a single hook. Built around three principles: **token discipline by default** (cache, dedup, compaction, byte-accounting), **self-healing on the fault paths** (auto-coerce args, hallucinated-tool fallback, error rewriting), and **provider parity** (server-side features on Anthropic, client-side equivalents everywhere else).
16
16
 
17
17
  - 🧠 **Multi-provider, multi-auth** — Anthropic, OpenAI Codex, OpenRouter, Cerebras, plus a generic `openaiCompat` factory (Baseten, Fireworks, Groq, local servers). OAuth + API key, auto-refreshing tokens. Anthropic accepts opt-in `extraBetas` and `contextManagement` for first-party features.
18
- - 🪝 **Streaming, hookable turn loop** — text/thinking deltas, tool calls, MCP, sessions, skills, spawn, OAuth, validation, budgets — all observable (and most mutatable) via typed hook events.
19
- - 🛠 **Tools first-class** — `shell`, `read_file`, `write_file`, `edit`, `multi_edit`, `glob`, `grep`, `spawn`, human-in-the-loop, plus any [MCP](https://modelcontextprotocol.io) server. Sequential or parallel, per-call gates (`tool:gate`), validation auto-coerce (`"true"` → `true`), hallucinated-tool fallback (`tool:unknown`), error rewriting (`tool:error` → `result`).
20
- - ✂️ **Token-aware ergonomics** — paginated reads with a "how to page" footer, 8 KB tail-truncated `shell`, idempotent `write_file`; `outputBytes` surfaced on every tool/MCP hook. `behavior.toolOutputBudget` injects a "summarize" nudge when a turn's outputs exceed the cap.
21
- - 🗜 **Context discipline** — auto-injected `cache_control` breakpoints (Anthropic + OpenRouter); server-side compaction via `context-management-2025-06-27` on Anthropic, `behavior.compactStrategy: 'tail'` on everyone else. Per-session `read_file` dedup + opt-in `requireReadBeforeEdit` guard kill stale-content edits.
18
+ - 🪝 **Streaming, hookable turn loop** — text/thinking deltas, tool calls, MCP, sessions, skills, spawn, OAuth, validation, budgets — all observable (and most mutatable) via typed hook events. Per-request `system:transform` hook for runtime-derived prompt sections.
19
+ - 🛠 **Tools first-class** — `shell`, `read_file`, `write_file`, `edit`, `multi_edit`, `glob`, `grep`, `spawn`, human-in-the-loop, plus any [MCP](https://modelcontextprotocol.io) server. Sequential or parallel, per-call gates (`tool:gate` with writable `block` / `result` / `runToolCounts`), validation auto-coerce (`"true"` → `true`), hallucinated-tool fallback (`tool:unknown`), error rewriting (`tool:error` → `result`).
20
+ - ✂️ **Token-aware ergonomics** — paginated reads with a "how to page" footer, 8 KB tail-truncated `shell`, idempotent `write_file`; `outputBytes` surfaced on every tool/MCP hook. `behavior.toolOutputBudget` injects a "summarize" nudge when a turn's outputs exceed the cap; `behavior.toolBudgets` caps per-tool call counts (`'steer'` or `'block'`); `behavior.thinkingDecay` tapers reasoning budget per turn.
21
+ - 🗜 **Context discipline** — auto-injected `cache_control` breakpoints (Anthropic + OpenRouter); server-side compaction via `context-management-2025-06-27` on Anthropic, `behavior.compactStrategy: 'tail'` on everyone else. Per-session `read_file` dedup + opt-in `requireReadBeforeEdit` guard kill stale-content edits; `behavior.dedupTools` generalizes the same pattern to arbitrary tools (`todowrite`, `execute_sql`, …).
22
22
  - 🎯 **Reasoning + structured output** — thinking levels (`off` / `minimal` / `low` / `medium` / `high` / `adaptive`) with optional exact budgets; force the final answer to a JSON Schema (Zod v4 interop), no brittle parsing.
23
23
  - 💾 **Sessions, skills, multimodal** — pluggable session stores (memory / SQLite / remote / file-map), incremental persistence; [Agent Skills](https://agentskills.io/specification) spec-aligned with `allowed-tools` enforcement + resume rehydration; images + documents via `PromptPart[]`, tools can return image blocks routed natively on vision providers or via companion messages elsewhere.
24
24
  - 🧵 **Sub-agents + execution contexts** — delegate to child agents with inherited or overridden preset (child events bubble to the parent); run tools in-process, Docker, or any `SandboxProvider` (E2B / Rivet / custom). Parallel MCP bootstrap with `agent.warmup()` + `eager: true` to hide cold starts.
@@ -67,10 +67,13 @@ createAgent({
67
67
  maxTurns: 50, // max loop iterations
68
68
  maxTokens: 16384, // max tokens per LLM response
69
69
  thinkingBudget: 10240, // exact thinking token budget
70
+ thinkingDecay: { afterTurn: 5, factor: 0.5, floor: 1024 }, // taper budget per run-relative turn
70
71
  cache: true, // prompt-cache breakpoints on supported providers (default: true)
71
72
  toolOutputBudget: 32768, // soft per-turn cap on tool-output bytes (off by default)
72
73
  dedupReads: true, // dedup identical re-reads of the same file in `read_file` (default: true)
74
+ dedupTools: { todowrite: i => JSON.stringify(i.todos) }, // generic per-tool argument dedup
73
75
  requireReadBeforeEdit: false, // refuse `edit` / `multi_edit` against unread or stale files (default: false)
76
+ toolBudgets: { todowrite: { max: 6, onExceed: 'steer' } }, // per-tool soft call caps
74
77
  compactStrategy: 'off', // client-side tail compaction for non-Anthropic providers — 'off' | 'tail' (default: 'off')
75
78
  compactThreshold: 131_072, // bytes threshold that triggers tail compaction (default: 128 KiB)
76
79
  compactKeepTurns: 4, // trailing turns left intact during compaction (default: 4)
@@ -162,6 +165,18 @@ Fallback: `params.apiKey` > `params.access` > `ANTHROPIC_API_KEY` env > `.creden
162
165
 
163
166
  `extraBetas` are merged with the OAuth defaults (`claude-code-20250219`, `oauth-2025-04-20`) and de-duped. `contextManagement` is sent on the request body as `context_management`; pair it with the `context-management-2025-06-27` beta. For non-Anthropic providers, see `behavior.compactStrategy: 'tail'` for the client-side fallback.
164
167
 
168
+ `extraBodyParams` is a generic forward-compat pass-through for un-typed Messages API fields. Spread into the request before the typed core, so explicit factory options always win on collision. Use it when Anthropic ships a new beta before zidane has a dedicated knob:
169
+
170
+ ```ts
171
+ anthropic({
172
+ apiKey: '...',
173
+ extraBetas: ['some-future-beta'],
174
+ extraBodyParams: { future_field: { /* ... */ } },
175
+ })
176
+ ```
177
+
178
+ `openaiCompat` accepts the same `extraBodyParams` for OpenAI-style endpoints (e.g. `reasoning_effort`, `metadata`, OpenRouter `provider` routing).
179
+
165
180
  ### OpenRouter
166
181
 
167
182
  ```ts
@@ -367,15 +382,19 @@ All tool hooks include `turnId` and `callId` for correlation. Typed via `ToolHoo
367
382
 
368
383
  ```ts
369
384
  agent.hooks.hook('tool:gate', (ctx) => {
370
- // ctx.turnId, ctx.callId, ctx.name, ctx.input
385
+ // ctx.turnId, ctx.callId, ctx.name, ctx.input, ctx.runToolCounts
371
386
  if (ctx.name === 'shell' && String(ctx.input.command).includes('rm -rf')) {
372
387
  ctx.block = true
373
388
  ctx.reason = 'dangerous command'
374
389
  }
390
+ // Substitute a successful result without running the tool — mirrors
391
+ // tool:unknown / tool:error. When both are set, `block` wins.
392
+ if (ctx.name === 'todowrite' && (ctx.runToolCounts.todowrite ?? 0) > 0)
393
+ ctx.result = 'Already recorded; no-op.'
375
394
  })
376
395
 
377
- agent.hooks.hook('tool:before', (ctx) => { /* ctx.turnId, ctx.callId, ctx.name, ctx.input, ctx.coercions? */ })
378
- agent.hooks.hook('tool:after', (ctx) => { /* + ctx.result, ctx.outputBytes, ctx.coercions? */ })
396
+ agent.hooks.hook('tool:before', (ctx) => { /* ctx.turnId, ctx.callId, ctx.name, ctx.input, ctx.runToolCounts, ctx.coercions? */ })
397
+ agent.hooks.hook('tool:after', (ctx) => { /* + ctx.result, ctx.outputBytes, ctx.runToolCounts, ctx.coercions? */ })
379
398
  agent.hooks.hook('tool:error', (ctx) => {
380
399
  // + ctx.error. Mutate ctx.result to substitute the payload sent back to the
381
400
  // model in place of the default `Tool error: <msg>` — useful for OSS-model
@@ -434,6 +453,20 @@ agent.hooks.hook('context:transform', (ctx) => {
434
453
  })
435
454
  ```
436
455
 
456
+ ### System transform
457
+
458
+ Mutate the system prompt per request — useful for runtime-derived sections (files already read in the session, live tool budgets, skill activation reminders). Fires after `context:transform`, before the request goes out. `messages` is read-only here.
459
+
460
+ ```ts
461
+ agent.hooks.hook('system:transform', (ctx) => {
462
+ // ctx.system, ctx.messages (readonly), ctx.turn, ctx.turnId, ctx.session?
463
+ if (ctx.session && ctx.turn > 1)
464
+ ctx.system += `\n\n## Reminder: keep responses concise after turn ${ctx.turn}.`
465
+ })
466
+ ```
467
+
468
+ Cache breakpoints land naturally inside the provider after this hook, so repeated turns with the same derived system text still hit the cache.
469
+
437
470
  ### Hook recipes
438
471
 
439
472
  Three patterns that don't have a built-in default. Copy-paste and tune.
@@ -484,6 +517,12 @@ const agent = createAgent({
484
517
  agent.hooks.hook('budget:exceeded', (ctx) => {
485
518
  console.warn(`turn ${ctx.turn}: ${ctx.bytes} > ${ctx.budget} bytes`)
486
519
  })
520
+
521
+ agent.hooks.hook('tool-budget:exceeded', (ctx) => {
522
+ // Per-tool counterpart, fires when `behavior.toolBudgets[ctx.tool]` trips.
523
+ // ctx.tool, ctx.count, ctx.max, ctx.turnId, ctx.mode ('steer' | 'block')
524
+ console.warn(`tool ${ctx.tool} hit cap (${ctx.count}/${ctx.max}, mode=${ctx.mode})`)
525
+ })
487
526
  ```
488
527
 
489
528
  ### Client-side context compaction (non-Anthropic)
@@ -510,6 +549,70 @@ Anthropic users should prefer the server-side `context-management-2025-06-27` be
510
549
 
511
550
  `behavior.requireReadBeforeEdit` (off by default) — `edit` and `multi_edit` reject when the file hasn't been read in the session, or when its on-disk content has drifted since the last read. Eliminates the silent-corruption case where a model edits against bytes it "remembers" but no longer reflect reality. Recommended on for stricter eval-grade runs.
512
551
 
552
+ ### Generic per-tool dedup
553
+
554
+ `behavior.dedupTools` extends the read-file pattern to arbitrary tools. Provide a hasher per tool keyed by canonical name; identical inputs replay the prior result without re-running the tool. Requires a session.
555
+
556
+ The hasher contract has **three return values, three meanings** — pick deliberately:
557
+
558
+ | Return | Meaning |
559
+ |---|---|
560
+ | non-empty string | Cache key for this call. Equal keys replay the prior result. |
561
+ | `undefined` | **Skip dedup for this call.** Tool runs normally; nothing recorded. |
562
+ | `''` or non-string | Treated as `undefined` (defensive). |
563
+
564
+ ```ts
565
+ behavior: {
566
+ dedupTools: {
567
+ // Always cache by full input — every identical re-call dedups.
568
+ todowrite: input => JSON.stringify(input),
569
+
570
+ // Cache by a normalized subset; non-cacheable shapes opt out via `undefined`.
571
+ execute_sql: (input) => {
572
+ const q = typeof input.query === 'string' ? input.query.trim().toLowerCase() : undefined
573
+ if (!q || q.includes('now()') || q.includes('random()')) return undefined
574
+ return q
575
+ },
576
+ },
577
+ }
578
+ ```
579
+
580
+ The `undefined` opt-out is **not** the same as `JSON.stringify(input)` — that would dedup against the verbatim input. Use `undefined` to mean "this specific call is not cacheable" (timestamps baked in, randomness, debug flags).
581
+
582
+ Tools with side effects or non-deterministic outputs (network, time, randomness) **must not** be listed — there is no safety net beyond the consumer's hasher. For MCP tools, key by the namespaced wire name (`mcp_<server>_<tool>`).
583
+
584
+ ### Per-tool call budgets
585
+
586
+ `behavior.toolBudgets` caps per-tool calls per run. Two reactions:
587
+
588
+ - `'steer'` — let the call run, but emit a synthetic user message after the turn nudging the model to commit and finish. Fires once per tool per run.
589
+ - `'block'` — refuse subsequent calls with `Blocked: <reason>`.
590
+
591
+ ```ts
592
+ behavior: {
593
+ toolBudgets: {
594
+ todowrite: { max: 6, onExceed: 'steer' },
595
+ execute_sql: { max: 3, onExceed: 'block' },
596
+ },
597
+ }
598
+ ```
599
+
600
+ Pass a function for custom messages: `onExceed: ctx => ({ mode: 'steer', message: '...' })`. Subscribe to `tool-budget:exceeded` for telemetry. Counts include dedup hits — by design, since both eat against agent-loop sanity.
601
+
602
+ ### Adaptive thinking budget
603
+
604
+ `behavior.thinkingDecay` tapers the thinking budget across turns. Late turns are usually checkpoint / cleanup work where reasoning rarely pays for itself.
605
+
606
+ ```ts
607
+ behavior: {
608
+ thinkingBudget: 8192,
609
+ thinkingDecay: { afterTurn: 5, factor: 0.5, floor: 1024 },
610
+ // turn 1-5 → 8192, turn 6 → 4096, turn 7 → 2048, turn 8+ → 1024
611
+ }
612
+ ```
613
+
614
+ Pass a function for arbitrary curves: `thinkingDecay: (turn, base) => base / Math.sqrt(turn)`. No-op when `thinkingBudget` is unset. Honored by every provider that respects `thinkingBudget`.
615
+
513
616
  ## Steering and Follow-up
514
617
 
515
618
  ### Steering
@@ -229,6 +229,126 @@ interface AgentBehavior {
229
229
  * Default: `true`.
230
230
  */
231
231
  dedupReads?: boolean;
232
+ /**
233
+ * Taper the thinking budget over the course of a run. Late turns are
234
+ * usually checkpoint / cleanup work where reasoning rarely pays for
235
+ * itself; early turns benefit most. Two forms:
236
+ *
237
+ * - **Struct** — geometric decay starting after `afterTurn`, multiplying by
238
+ * `factor` each subsequent turn, clamped to `floor`. Example
239
+ * `{ afterTurn: 5, factor: 0.5, floor: 1024 }` with a base budget of 8192:
240
+ * turns 1-5 = 8192, turn 6 = 4096, turn 7 = 2048, turn 8+ = 1024.
241
+ * - **Function** — `(runTurn, baseBudget) => number`. Arbitrary curves;
242
+ * `runTurn` is 1-indexed, run-relative (resumed sessions reset).
243
+ *
244
+ * No-op when `thinkingBudget` is unset. Honored by every provider that
245
+ * respects `thinkingBudget` (anthropic legacy enabled+budget path,
246
+ * adaptive `maxTokensCap`, openai-compat `max_tokens` padding).
247
+ *
248
+ * Default: `undefined` (no decay).
249
+ */
250
+ thinkingDecay?: {
251
+ afterTurn: number;
252
+ factor: number;
253
+ floor: number;
254
+ } | ((runTurn: number, baseBudget: number) => number);
255
+ /**
256
+ * Per-tool soft call budget for this run. Keyed by **canonical** tool name.
257
+ * On the first call after the run-cumulative dispatched count for that tool
258
+ * reaches `max`, the framework fires `onExceed`:
259
+ *
260
+ * - `'steer'` (default) — let the call execute, but emit a synthetic user
261
+ * message after the turn that nudges the model away from re-calling the
262
+ * tool. Reuses the existing post-turn steer pathway used by
263
+ * `toolOutputBudget`. Fires `tool-budget:exceeded` with `mode: 'steer'`.
264
+ * - `'block'` — refuse the call via `tool:gate` `block`. The model sees a
265
+ * `Blocked: <reason>` tool result. Fires `tool-budget:exceeded` with
266
+ * `mode: 'block'`.
267
+ * - **Function** — `(ctx) => { mode, message }`. The consumer supplies the
268
+ * steering / refusal text and chooses the mode dynamically.
269
+ *
270
+ * Counts include both real dispatches and dedup substitutes (Z19 hits).
271
+ * Excludes calls already blocked by an earlier gate (skill allow-list,
272
+ * consumer hook). Tool dispatched by spawned subagents has its own per-run
273
+ * counter — child counts never charge the parent.
274
+ *
275
+ * For MCP tools, key by the namespaced wire name (`mcp_<server>_<tool>`).
276
+ *
277
+ * Atomic in parallel mode: the middleware tracks its own per-tool
278
+ * approval counter, incremented synchronously at gate-time. A
279
+ * 4-call parallel batch against `max: 2` will let the first 2 through
280
+ * and refuse the rest, even though the loop's `runToolCounts` only
281
+ * propagates between calls (not within a single batch's gate fan-out).
282
+ *
283
+ * Default: `undefined` (no budget enforcement).
284
+ */
285
+ toolBudgets?: Record<string, {
286
+ max: number;
287
+ onExceed?: 'steer' | 'block' | ((ctx: {
288
+ tool: string;
289
+ count: number;
290
+ max: number;
291
+ }) => {
292
+ mode: 'steer' | 'block';
293
+ message: string;
294
+ });
295
+ }>;
296
+ /**
297
+ * Generic per-tool argument deduplication. Keyed by the tool's **canonical**
298
+ * name (alias-stable). Each entry is a hasher: `(input) => string | undefined`.
299
+ *
300
+ * **Hasher contract** — three return values, three meanings:
301
+ *
302
+ * | Return | Meaning |
303
+ * |-------------------------|------------------------------------------------------------------------|
304
+ * | a non-empty string | Cache key for this call. Equal keys (most-recent-only, this session) |
305
+ * | | replay the prior recorded result without re-dispatching the tool. |
306
+ * | `undefined` | **Skip dedup for this call.** The tool runs normally; nothing recorded.|
307
+ * | `''` / non-string | Treated identically to `undefined` (defensive: no dedup, no error). |
308
+ *
309
+ * The `undefined` opt-out is the way to say *"this specific call is not
310
+ * cacheable"* (timestamps in input, randomness baked in, debug flags). It
311
+ * is **not** the same as `JSON.stringify(input)` — that would dedup against
312
+ * the verbatim input. Pick one explicitly:
313
+ *
314
+ * ```ts
315
+ * // Always cache by full input — every identical re-call dedups.
316
+ * dedupTools: { todowrite: input => JSON.stringify(input) }
317
+ *
318
+ * // Cache by a normalized subset; non-cacheable shapes opt out.
319
+ * dedupTools: {
320
+ * execute_sql: (input) => {
321
+ * const q = typeof input.query === 'string' ? input.query.trim().toLowerCase() : undefined
322
+ * if (!q || q.includes('now()') || q.includes('random()')) return undefined
323
+ * return q
324
+ * },
325
+ * }
326
+ * ```
327
+ *
328
+ * On a hit, the previously-recorded result is replayed as the tool_result
329
+ * without dispatching the tool. The substitution flows through `tool:gate`
330
+ * `result` (Z20), so `tool:after` and `tool:transform` still fire.
331
+ *
332
+ * Requires a session (`createSession()`); without one, the map is a silent
333
+ * no-op since per-session state has nowhere to live. Tools with side
334
+ * effects or non-deterministic outputs (network, time, randomness) MUST
335
+ * NOT be listed — there is no safety net beyond the consumer's hasher.
336
+ *
337
+ * For MCP tools, key by the namespaced wire name (`mcp_<server>_<tool>`).
338
+ * Parallel mode (`toolExecution: 'parallel'`, the default) sees calls in
339
+ * the SAME assistant turn race against each other — none can dedup against
340
+ * a sibling that started in the same batch. Sequential mode honors order
341
+ * within a turn.
342
+ *
343
+ * **Cache policy**: only the most recent `(hash, result)` per tool is
344
+ * retained. Interleaved patterns (input A, input B, input A) miss on the
345
+ * second A because B overwrote it. Sufficient for the common spam-the-
346
+ * same-call loop; consumers needing a richer cache should hook
347
+ * `tool:gate` directly.
348
+ *
349
+ * Default: `undefined` (no per-tool dedup).
350
+ */
351
+ dedupTools?: Record<string, (input: Record<string, unknown>) => string | undefined>;
232
352
  /**
233
353
  * Require `read_file` before `edit` / `multi_edit` on the same path, and
234
354
  * reject edits when the file has changed on disk since the last read in
@@ -674,6 +794,18 @@ interface AnthropicParams {
674
794
  * ```
675
795
  */
676
796
  contextManagement?: AnthropicContextManagement;
797
+ /**
798
+ * Generic pass-through for fields on the Messages API request body that the
799
+ * SDK does not yet type. Spread into the request before the typed fields,
800
+ * so explicit options ({@link AnthropicParams.contextManagement} and the
801
+ * built-in fields like `model` / `tools` / `messages`) win on collision.
802
+ *
803
+ * Forward-compat escape hatch for new Anthropic betas — when a future flag
804
+ * ships before zidane has a dedicated typed knob, set it here without
805
+ * waiting on a release. Most fields will still need the matching beta in
806
+ * {@link AnthropicParams.extraBetas}.
807
+ */
808
+ extraBodyParams?: Record<string, unknown>;
677
809
  }
678
810
  declare function anthropic(anthropicParams?: AnthropicParams): Provider;
679
811
 
@@ -796,6 +928,17 @@ interface OpenAICompatParams {
796
928
  * Default: `false`. The `openrouter` wrapper sets this to `true`.
797
929
  */
798
930
  cacheBreakpoints?: boolean;
931
+ /**
932
+ * Generic pass-through for fields on the Chat Completions request body that
933
+ * zidane does not yet type. Spread into the request before the typed core
934
+ * (model / messages / tools / max_tokens / stream / tool_choice), so
935
+ * explicit options always win on collision.
936
+ *
937
+ * Forward-compat escape hatch for endpoints that ship one-off fields ahead
938
+ * of zidane (e.g. OpenAI `reasoning_effort`, OpenRouter `provider` routing,
939
+ * vendor-specific `safety_level` knobs).
940
+ */
941
+ extraBodyParams?: Record<string, unknown>;
799
942
  }
800
943
  /**
801
944
  * Factory for any OpenAI-compatible HTTP endpoint.
@@ -1557,11 +1700,34 @@ interface AgentHooks {
1557
1700
  turnId: string;
1558
1701
  options: StreamOptions;
1559
1702
  }) => void;
1703
+ /**
1704
+ * Fires after each assistant turn (before its tool-result follow-up
1705
+ * dispatches; the loop iterates back to a fresh `turn:before` once the
1706
+ * tool results are produced).
1707
+ *
1708
+ * `toolCounts.turn` — calls **emitted** by the model in this assistant
1709
+ * turn, keyed by canonical tool name. Reflects what the model asked for,
1710
+ * regardless of downstream gate outcome. Most useful for spotting per-turn
1711
+ * spikes ("the model called todowrite 4 times in one turn").
1712
+ *
1713
+ * `toolCounts.run` — cumulative running counter of **dispatched** calls
1714
+ * scoped to this `runId`, captured at fire time. Excludes calls that were
1715
+ * `block`ed by `tool:gate` handlers. Includes calls short-circuited via
1716
+ * `tool:gate` `result` substitution (the model still asked, the framework
1717
+ * just answered without the tool running). Resumed sessions start a fresh
1718
+ * run with empty counts.
1719
+ *
1720
+ * Both fields are frozen snapshots; mutate-safe.
1721
+ */
1560
1722
  'turn:after': (ctx: {
1561
1723
  turn: number;
1562
1724
  turnId: string;
1563
1725
  usage: TurnUsage;
1564
1726
  message: SessionTurn;
1727
+ toolCounts: {
1728
+ turn: Readonly<Record<string, number>>;
1729
+ run: Readonly<Record<string, number>>;
1730
+ };
1565
1731
  }) => void;
1566
1732
  'stream:text': (ctx: StreamHookContext & {
1567
1733
  delta: string;
@@ -1575,17 +1741,53 @@ interface AgentHooks {
1575
1741
  thinking: string;
1576
1742
  }) => void;
1577
1743
  'oauth:refresh': (ctx: OAuthRefreshHookContext) => void;
1744
+ /**
1745
+ * Fires before validation, `tool:before`, and `execute`. Two ways to
1746
+ * intercept:
1747
+ *
1748
+ * - Set `block = true` (with a `reason`) to refuse the call. The model
1749
+ * sees a `Blocked: <reason>` tool result; `tool:before` / `tool:after`
1750
+ * do **not** fire.
1751
+ * - Set `result` to substitute a successful tool_result and skip
1752
+ * execution. The model sees the substitute as a normal tool_result;
1753
+ * `tool:before` does not fire, but `tool:after` and `tool:transform`
1754
+ * do — so byte budgets, telemetry, and post-mutation hooks see the
1755
+ * substitute. Useful for cache hits, dedup, idempotency guards,
1756
+ * plan-mode synthetic acks.
1757
+ *
1758
+ * If multiple handlers along the chain set both `block` and `result`,
1759
+ * `block` wins — refusal beats substitution, so a policy gate
1760
+ * (skills allow-list, custom security) can always override an upstream
1761
+ * consumer's cache substitute. Mirrors the writable-`result` shape on
1762
+ * `tool:unknown` and `tool:error` so consumers learn one pattern.
1763
+ *
1764
+ * `runToolCounts` — frozen pre-call snapshot of per-tool dispatched
1765
+ * counts in this run. Use it to self-throttle, drive observability, or
1766
+ * implement budget guards. Counts every call that passed gate, including
1767
+ * dedup substitutes (Z19); excludes `block`ed calls.
1768
+ *
1769
+ * **Parallel mode** (`toolExecution: 'parallel'`, the default): the
1770
+ * snapshot is taken before any dispatches in the batch, so consumer
1771
+ * hooks reading `runToolCounts` see the pre-batch view. Built-in
1772
+ * budget / dedup middleware uses internal per-call reservation, so
1773
+ * `behavior.toolBudgets` enforces atomically even within a parallel
1774
+ * batch.
1775
+ */
1578
1776
  'tool:gate': (ctx: ToolHookContext & {
1579
1777
  block: boolean;
1580
1778
  reason: string;
1779
+ result?: string | ToolResultContent[];
1780
+ runToolCounts: Readonly<Record<string, number>>;
1581
1781
  }) => void;
1582
1782
  'tool:before': (ctx: ToolHookContext & {
1583
1783
  coercions?: readonly string[];
1784
+ runToolCounts: Readonly<Record<string, number>>;
1584
1785
  }) => void;
1585
1786
  'tool:after': (ctx: ToolHookContext & {
1586
1787
  result: string | ToolResultContent[];
1587
1788
  outputBytes: number;
1588
1789
  coercions?: readonly string[];
1790
+ runToolCounts: Readonly<Record<string, number>>;
1589
1791
  }) => void;
1590
1792
  /**
1591
1793
  * Fires when a tool throws during execution. Mutate `result` to substitute a
@@ -1646,6 +1848,27 @@ interface AgentHooks {
1646
1848
  'context:transform': (ctx: {
1647
1849
  messages: SessionMessage[];
1648
1850
  }) => void;
1851
+ /**
1852
+ * Fires per request, after `context:transform` and before the request goes
1853
+ * out. Mutating `ctx.system` updates the system prompt the provider sends
1854
+ * for this turn — useful for runtime-derived sections (e.g. listing files
1855
+ * already read in the session, surfacing live tool budgets, injecting
1856
+ * skill activation reminders).
1857
+ *
1858
+ * Cache breakpoints are applied inside the provider after this hook, so
1859
+ * mutations land in the cache key naturally — repeated turns with the
1860
+ * same derived system text still hit the cache.
1861
+ *
1862
+ * `messages` is read-only here; use `context:transform` for message
1863
+ * surgery. `session` is `undefined` when the run is sessionless.
1864
+ */
1865
+ 'system:transform': (ctx: {
1866
+ system: string;
1867
+ messages: readonly SessionMessage[];
1868
+ turn: number;
1869
+ turnId: string;
1870
+ session?: Session;
1871
+ }) => void;
1649
1872
  'steer:inject': (ctx: {
1650
1873
  message: string;
1651
1874
  }) => void;
@@ -1673,6 +1896,7 @@ interface AgentHooks {
1673
1896
  }) => void;
1674
1897
  'child:tool:before': (ctx: ToolHookContext & {
1675
1898
  coercions?: readonly string[];
1899
+ runToolCounts: Readonly<Record<string, number>>;
1676
1900
  childId: string;
1677
1901
  depth: number;
1678
1902
  }) => void;
@@ -1680,6 +1904,7 @@ interface AgentHooks {
1680
1904
  result: string | ToolResultContent[];
1681
1905
  outputBytes: number;
1682
1906
  coercions?: readonly string[];
1907
+ runToolCounts: Readonly<Record<string, number>>;
1683
1908
  childId: string;
1684
1909
  depth: number;
1685
1910
  }) => void;
@@ -1693,6 +1918,10 @@ interface AgentHooks {
1693
1918
  turnId: string;
1694
1919
  usage: TurnUsage;
1695
1920
  message: SessionTurn;
1921
+ toolCounts: {
1922
+ turn: Readonly<Record<string, number>>;
1923
+ run: Readonly<Record<string, number>>;
1924
+ };
1696
1925
  childId: string;
1697
1926
  depth: number;
1698
1927
  }) => void;
@@ -1732,9 +1961,22 @@ interface AgentHooks {
1732
1961
  ok: false;
1733
1962
  error: Error;
1734
1963
  })) => void;
1964
+ /**
1965
+ * MCP-side counterpart of `tool:gate`. Same shape: set `block` to refuse,
1966
+ * set `result` to substitute a successful payload and skip the upstream
1967
+ * MCP `callTool`. When both are set across the handler chain, `block` wins.
1968
+ *
1969
+ * Fires INSIDE the MCP wrapper's `execute`, after the loop's `tool:gate`
1970
+ * already ran. Does **not** carry `runToolCounts` — those are loop-level
1971
+ * and already exposed on `tool:gate` for MCP tools (which are registered
1972
+ * as agent tools under their namespaced name `mcp_<server>_<tool>`). Use
1973
+ * `tool:gate` for budget / dedup logic; reserve `mcp:tool:gate` for
1974
+ * MCP-specific concerns (per-server routing, transport-aware refusals).
1975
+ */
1735
1976
  'mcp:tool:gate': (ctx: McpToolHookContext & {
1736
1977
  block: boolean;
1737
1978
  reason: string;
1979
+ result?: string | ToolResultContent[];
1738
1980
  }) => void;
1739
1981
  'mcp:tool:before': (ctx: McpToolHookContext) => void;
1740
1982
  'mcp:tool:after': (ctx: McpToolHookContext & {
@@ -1785,6 +2027,24 @@ interface AgentHooks {
1785
2027
  bytes: number;
1786
2028
  budget: number;
1787
2029
  }) => void;
2030
+ /**
2031
+ * Fires when a per-tool budget configured via `behavior.toolBudgets` is
2032
+ * exceeded for a specific tool. `mode` reflects how the framework reacted:
2033
+ * `'steer'` lets the call run and queues a post-turn nudge; `'block'`
2034
+ * refuses the call outright with `Blocked: <message>`.
2035
+ *
2036
+ * `count` is the run-cumulative dispatched count just before this call.
2037
+ * Use `turnId` to correlate with `turn:after` if you need the integer turn
2038
+ * index. Distinct from `budget:exceeded` (byte-level) so consumers can
2039
+ * subscribe specifically; both can fire in the same turn.
2040
+ */
2041
+ 'tool-budget:exceeded': (ctx: {
2042
+ tool: string;
2043
+ count: number;
2044
+ max: number;
2045
+ turnId: string;
2046
+ mode: 'steer' | 'block';
2047
+ }) => void;
1788
2048
  'agent:abort': (ctx: object) => void;
1789
2049
  'agent:done': (ctx: AgentStats) => void;
1790
2050
  'session:start': (ctx: SessionHookContext & {
@@ -1832,7 +2092,14 @@ interface AgentOptions {
1832
2092
  session?: Session;
1833
2093
  /** Skills configuration */
1834
2094
  skills?: SkillsConfig;
1835
- /** @internal */
2095
+ /**
2096
+ * Test seam — replaces the default MCP connector with a custom
2097
+ * implementation. Bypasses the `mcpServers` normalization layer entirely
2098
+ * and is **not** part of the supported public API. Subject to change or
2099
+ * removal in any release.
2100
+ *
2101
+ * @internal
2102
+ */
1836
2103
  mcpConnector?: (configs: McpServerConfig[]) => Promise<McpConnection>;
1837
2104
  /**
1838
2105
  * Pre-connect MCP servers in the background as soon as `createAgent` returns,
@@ -1896,7 +2163,13 @@ interface Agent {
1896
2163
  readonly session: Session | null;
1897
2164
  /** Snapshot of currently active skills. */
1898
2165
  readonly activeSkills: readonly ActiveSkill[];
1899
- meta: Record<string, unknown>;
2166
+ /**
2167
+ * Frozen view of the underlying `provider.meta`. Read-only to prevent
2168
+ * accidental cross-agent contamination — writes are rejected at runtime
2169
+ * (via `Object.freeze`) and at compile time (via `Readonly`). To override
2170
+ * model / capability defaults, construct a new provider.
2171
+ */
2172
+ readonly meta: Readonly<Record<string, unknown>>;
1900
2173
  }
1901
2174
  declare function createAgent({ provider, name: agentName, system: agentSystem, tools: agentTools, toolAliases, behavior: agentBehavior, execution, mcpServers, session, skills: agentSkills, mcpConnector, eager }: AgentOptions): Agent;
1902
2175