zidane 5.7.7 → 5.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { D as SkillConfig, Gt as AgentStats, Kt as ChildRunStats, N as Session, Zt as McpServerConfig, bn as TurnUsage, dn as SessionTurn, ft as StreamOptions, gn as ToolResultContent, i as AgentOptions, l as SkillActivationState, lt as Provider, mn as ThinkingLevel, r as AgentHooks, s as ActiveSkill, v as ToolContext, y as ToolDef } from "./agent-BSPhByzT.js";
1
+ import { D as SkillConfig, Gt as AgentStats, I as SessionStore, Kt as ChildRunStats, N as Session, Zt as McpServerConfig, bn as TurnUsage, dn as SessionTurn, ft as StreamOptions, gn as ToolResultContent, i as AgentOptions, l as SkillActivationState, lt as Provider, mn as ThinkingLevel, r as AgentHooks, rn as PromptPart, s as ActiveSkill, v as ToolContext, y as ToolDef, yn as TurnFinishReason } from "./agent-BSPhByzT.js";
2
2
  import { a as ExecutionContext, o as ExecutionHandle } from "./types-CEAMIUXw.js";
3
3
  import { Hookable } from "hookable";
4
4
  import { OAuthClientProvider, OAuthDiscoveryState } from "@modelcontextprotocol/sdk/client/auth.js";
@@ -618,1348 +618,1495 @@ declare const BYTES_PER_TOKEN = 4;
618
618
  */
619
619
  declare function estimateTokens(text: string): number;
620
620
  //#endregion
621
- //#region src/logger.d.ts
622
- type LogLevel = 'debug' | 'info' | 'warn' | 'error';
623
- interface LogRecord {
624
- level: LogLevel;
625
- /** Unix ms — set by `Logger` at emit time. */
626
- timestamp: number;
627
- /** Free-form message. Sinks render this as the human-facing line. */
621
+ //#region src/run-summary.d.ts
622
+ interface RunSummaryTokens {
623
+ input: number;
624
+ output: number;
625
+ cacheRead: number;
626
+ cacheCreation: number;
627
+ cost?: number;
628
+ /** First observable byte from the provider, ms from run start. */
629
+ ttftMs?: number;
630
+ }
631
+ interface RunSummaryByModel {
632
+ modelId: string;
633
+ input: number;
634
+ output: number;
635
+ cacheRead: number;
636
+ cacheCreation: number;
637
+ cost: number;
638
+ turns: number;
639
+ }
640
+ interface RunSummaryError {
641
+ kind: 'stream' | 'tool' | 'mcp-tool' | 'mcp' | 'spawn';
628
642
  message: string;
629
- /** Structured fields. Correlation ids land here automatically. */
630
- attrs: Record<string, unknown>;
643
+ errorType?: string;
644
+ turnId?: string;
645
+ callId?: string;
646
+ server?: string;
647
+ toolName?: string;
648
+ childId?: string;
649
+ statusCode?: number;
650
+ requestId?: string;
631
651
  }
632
- interface LogSink {
633
- emit: (record: LogRecord) => void;
652
+ interface RunSummaryBlock {
653
+ callId: string;
654
+ toolName: string;
655
+ outcome: 'gate-block' | 'unknown' | 'invalid-input';
656
+ reason?: string;
634
657
  }
635
- interface Logger {
636
- debug: (message: string, attrs?: Record<string, unknown>) => void;
637
- info: (message: string, attrs?: Record<string, unknown>) => void;
638
- warn: (message: string, attrs?: Record<string, unknown>) => void;
639
- error: (message: string, attrs?: Record<string, unknown>) => void;
640
- /**
641
- * Returns a child logger that prepends the given attributes onto every
642
- * subsequent emit. Equivalent to `pino.child` / `winston.child`. The
643
- * parent and child share the same sink — children are zero-cost.
644
- */
645
- with: (extra: Record<string, unknown>) => Logger;
646
- /**
647
- * Inspectable baseline attributes — handy for tests and for hook
648
- * handlers that want to clone-with-extra without recursing.
649
- */
650
- readonly baseAttributes: Readonly<Record<string, unknown>>;
658
+ interface RunSummaryValidation {
659
+ callId: string;
660
+ toolName: string;
661
+ reason: string;
651
662
  }
652
- /**
653
- * Build a Logger from a sink. Stateless and cheap; create one per agent
654
- * (or per app) and use `.with()` to attach correlation ids per-call.
655
- */
656
- declare function createLogger(sink: LogSink, baseAttributes?: Readonly<Record<string, unknown>>): Logger;
657
- interface ConsoleSinkOptions {
658
- /**
659
- * Minimum level to emit. Defaults to `'info'` — `debug` is dropped so
660
- * the harness's lifecycle logging is not noisy by default. Set to
661
- * `'debug'` to see every event.
662
- */
663
- minLevel?: LogLevel;
664
- /** Custom output stream. Defaults to `process.stderr` so logs don't pollute stdout. */
665
- stream?: {
666
- write: (chunk: string) => void;
667
- };
663
+ interface RunSummaryBudget {
664
+ kind: 'bytes' | 'tool-count';
665
+ /** Tool name (for `'tool-count'`); absent for byte budgets. */
666
+ toolName?: string;
667
+ /** `mode` for `'tool-count'`; absent for byte budgets. */
668
+ mode?: 'steer' | 'block';
669
+ observed: number;
670
+ limit: number;
671
+ turnId?: string;
668
672
  }
669
673
  /**
670
- * Human-readable terminal sink. Renders each record as
671
- * `<ISO timestamp> <LEVEL> <message> <attrs as kv pairs>`.
672
- *
673
- * Honors `process.stderr` by default so log lines don't interleave with
674
- * the agent's stdout-bound output (chat responses, JSON results).
675
- */
676
- declare function consoleSink(options?: ConsoleSinkOptions): LogSink;
677
- /**
678
- * One-JSON-object-per-line sink. Suitable for piping into log aggregators
679
- * (Datadog Agent, Fluent Bit, Loki, Vector) that expect JSONL.
674
+ * Postmortem snapshot of one `agent.run()`. Strictly serializable every
675
+ * field round-trips through `JSON.stringify` / `JSON.parse` without loss
676
+ * so a log aggregator can ingest it as-is.
680
677
  */
681
- declare function jsonSink(options?: ConsoleSinkOptions): LogSink;
682
- interface LoggingHooksOptions {
683
- logger: Logger;
678
+ interface RunSummary {
679
+ runId?: string;
680
+ parentRunId?: string;
681
+ depth: number;
682
+ agentName?: string;
683
+ startedAt: number;
684
+ endedAt: number;
685
+ durationMs: number;
686
+ status: 'completed' | 'aborted';
687
+ turns: number;
688
+ totals: RunSummaryTokens;
689
+ byModel: RunSummaryByModel[];
690
+ errors: RunSummaryError[];
691
+ blocks: RunSummaryBlock[];
692
+ validationRejects: RunSummaryValidation[];
693
+ budgetEvents: RunSummaryBudget[];
694
+ /** Counts of pairing repairs, keyed by repair mode. */
695
+ pairingRepairs: Record<string, number>;
684
696
  /**
685
- * Minimum interesting level for harness-emitted lines. Default `'info'`.
686
- * Set to `'debug'` to see every tool dispatch / stream event. Set to
687
- * `'warn'` to mute the chatty ones and only see failures + budgets.
697
+ * Postmortem snapshots of child runs that bubbled their stats up via
698
+ * `spawn:complete`. Only present when the run actually spawned.
688
699
  */
689
- level?: LogLevel;
700
+ children?: RunSummary[];
701
+ }
702
+ interface RunSummaryCollectorOptions {
690
703
  /**
691
- * When true (default), lifecycle events (`agent:start`, `turn:before`,
692
- * `tool:before`, `mcp:bootstrap:start`) emit at `debug` level so they
693
- * stay quiet by default. Set false to mute them entirely regardless of
694
- * the configured minimum level — useful when piping into a tracer
695
- * that already captures lifecycle.
704
+ * Called with the assembled {@link RunSummary} on every `agent:done`.
705
+ * Synchronous heavy I/O should be deferred (e.g. via `setImmediate`).
696
706
  */
697
- includeLifecycle?: boolean;
707
+ onSummary?: (summary: RunSummary) => void;
698
708
  }
699
- interface LoggingHookSet {
709
+ interface RunSummaryCollector {
710
+ /** Install the collector's hook handlers. Returns an uninstall fn. */
700
711
  install: (hooks: Hookable<AgentHooks>) => () => void;
712
+ /** Most-recent summary; `undefined` until the first `agent:done` fires. */
713
+ latest: () => RunSummary | undefined;
701
714
  }
702
715
  /**
703
- * Install a bundle of hook handlers that emit a structured line per
704
- * relevant lifecycle event, automatically attaching correlation ids
705
- * (`runId`, `turnId`, `callId`, `childId`, `depth`, `agentName`).
716
+ * Build a run-summary collector. State is created fresh inside each
717
+ * `install()` call, so a single collector instance can be installed
718
+ * across multiple agents without attribution cross-talk. `latest()`
719
+ * returns the most-recent summary across **any** install — install
720
+ * per-agent collectors if you need separate post-run snapshots.
706
721
  *
707
722
  * @example
708
723
  * ```ts
709
- * const logger = createLogger(consoleSink({ minLevel: 'debug' }), { service: 'tui' })
710
- * const lh = createLoggingHooks({ logger })
711
- * const uninstall = lh.install(agent.hooks)
724
+ * const collector = createRunSummaryCollector({
725
+ * onSummary: s => console.log(JSON.stringify(s)),
726
+ * })
727
+ * const uninstall = collector.install(agent.hooks)
712
728
  * try { await agent.run({ prompt }) }
713
729
  * finally { uninstall() }
714
730
  * ```
715
731
  */
716
- declare function createLoggingHooks(options: LoggingHooksOptions): LoggingHookSet;
732
+ declare function createRunSummaryCollector(options?: RunSummaryCollectorOptions): RunSummaryCollector;
717
733
  //#endregion
718
- //#region src/loop.d.ts
719
- /**
720
- * Canonical tool_result text emitted when a tool call is interrupted by the
721
- * user mid-flight (Esc / Ctrl-C / external `AbortSignal`). Mirrors Claude
722
- * Code's `INTERRUPT_MESSAGE_FOR_TOOL_USE` so downstream consumers can pattern
723
- * match a single string across both harnesses. Always paired with
724
- * `isError: true` on the wire — the model treats it as a failed call rather
725
- * than a successful tool response.
726
- */
727
- declare const INTERRUPT_MESSAGE_FOR_TOOL_USE = "[Request interrupted by user for tool use]";
728
- /**
729
- * Canonical tool_result text emitted when a tool call is skipped because a
730
- * steering message arrived between dispatches inside
731
- * {@link executeToolBatch}. Distinguished from
732
- * {@link INTERRUPT_MESSAGE_FOR_TOOL_USE} so consumers can split "user
733
- * cancelled" from "framework superseded".
734
- */
735
- declare const TOOL_USE_SKIPPED_MESSAGE = "[Tool use skipped \u2014 superseded by user message]";
734
+ //#region src/tools/edit.d.ts
736
735
  /**
737
- * Canonical tool_result text emitted when a single tool call is cancelled
738
- * mid-flight via `agent.cancelTool(callId)` (typically the TUI's
739
- * "cancel this tool" affordance). Distinguished from
740
- * {@link INTERRUPT_MESSAGE_FOR_TOOL_USE} (run-wide user abort) and
741
- * {@link TOOL_USE_SKIPPED_MESSAGE} (steered) so the model — and downstream
742
- * consumers — can tell the three apart by string match.
736
+ * Surgical edit replace `old_string` with `new_string` in a single file.
743
737
  *
744
- * Always paired with `isError: true` on the wire so the model treats the
745
- * call as failed rather than as a successful response. The remaining tool
746
- * calls in the batch continue running, in contrast with a full-run abort.
747
- */
748
- declare const TOOL_USE_CANCELLED_MESSAGE = "[Tool call cancelled by user]";
749
- /**
750
- * Canonical `tool_result.content` text emitted to siblings that were
751
- * cancelled by a `shell` error in the same batch. Distinct from
752
- * {@link INTERRUPT_MESSAGE_FOR_TOOL_USE} (user-issued abort) and
753
- * {@link TOOL_USE_SKIPPED_MESSAGE} (steered) so consumers can split
754
- * the three causes by string-match.
738
+ * Mirrors Claude Code's `Edit` semantics so models post-trained on Anthropic's
739
+ * tool surface need no relearning. Fails clearly when `old_string` isn't unique
740
+ * (unless `replace_all: true`) and when not found, with a nearest-match preview
741
+ * so the model can recover without a separate `read_file` round-trip.
755
742
  */
756
- declare const SHELL_CASCADE_CANCEL_MESSAGE = "Cancelled: a sibling `shell` call in the same batch errored; re-run independently if still needed.";
743
+ declare const edit: ToolDef;
757
744
  //#endregion
758
- //#region src/loop-persistence.d.ts
759
- /**
760
- * Bytes of head content included in the inline preview block. 2 KiB matches
761
- * Claude Code's `PREVIEW_SIZE_BYTES` — enough for the model to identify the
762
- * content class (error output / structured data / log shape) and decide
763
- * whether to call `read_file` on the persisted path for the full payload.
764
- *
765
- * Tail-priority preview (matching `shell`'s truncation strategy) was
766
- * considered but rejected: most "what is this?" decisions get made from
767
- * the head, and the path is in the stub for the rare case where the tail
768
- * matters.
769
- */
770
- declare const PERSISTENCE_PREVIEW_BYTES: number;
745
+ //#region src/tools/glob.d.ts
746
+ declare const glob: ToolDef;
747
+ //#endregion
748
+ //#region src/tools/grep.d.ts
749
+ declare const grep: ToolDef;
750
+ //#endregion
751
+ //#region src/tools/interaction.d.ts
752
+ interface InteractionToolOptions {
753
+ /** JSON Schema for the request payload the model sends */
754
+ schema: Record<string, unknown>;
755
+ /** Tool name (default: 'interaction') */
756
+ name?: string;
757
+ /** Tool description shown to the model */
758
+ description?: string;
759
+ /** Called when the model invokes this tool. Receives the validated payload and tool context, returns data for the model. */
760
+ onRequest: (payload: Record<string, unknown>, ctx: ToolContext) => Promise<Record<string, unknown> | string>;
761
+ }
771
762
  /**
772
- * Byte-stable prefix every {@link buildPersistedStub} output starts with.
773
- * Exported so wire-level passes (tail compaction, future stale-output
774
- * elision) can recognize a persisted stub and preserve its path attribute
775
- * rather than replacing the stub with their own — losing the pointer to
776
- * the on-disk blob.
763
+ * Create an interaction tool that lets the agent request structured input.
777
764
  *
778
- * Bound to the literal opening of the XML tag; changing the stub format
779
- * requires updating this constant in lockstep (and shipping a migration
780
- * for in-flight sessions).
765
+ * The model calls this tool with a payload matching the schema.
766
+ * `onRequest` is called with the payload and should return the response
767
+ * (string or object) that gets sent back to the model as the tool result.
781
768
  */
782
- declare const PERSISTED_STUB_PREFIX = "<persisted-output tool=\"";
783
- /**
784
- * Resolve the per-session persistence directory under `<userDir>/tool-results/<sessionId>/`.
785
- *
786
- * The chat layer calls this at session activation and forwards the result
787
- * via `behavior.persistDir`. Exposed as a public helper so SDK consumers
788
- * pick the same layout — single source of truth for "where do blobs live".
789
- */
790
- declare function resolvePersistDir(opts: {
791
- userDir: string;
792
- sessionId: string;
793
- }): string;
794
- /**
795
- * Resolve the per-session background-tasks directory under
796
- * `<userDir>/<sessionId>/tasks/`.
797
- *
798
- * The chat layer calls this at session activation and forwards the result
799
- * via `behavior.tasksDir`. Same shape as {@link resolvePersistDir}: hosts
800
- * get a single source of truth for "where do task log files live".
801
- * Created on first write; cleanup is the session-delete path's job.
802
- */
803
- declare function resolveTasksDir(opts: {
804
- userDir: string;
805
- sessionId: string;
806
- }): string;
807
- /**
808
- * Inputs to {@link maybePersistToolResult}. Kept as a struct so the loop's
809
- * call site stays readable and additional optional knobs (compression,
810
- * mime detection, …) land without re-threading every call site.
811
- */
812
- interface PersistInput {
813
- /** Canonical tool name — checked against `excludeTools`. */
814
- toolName: string;
815
- /** `tool_use` id from the assistant turn. Used as the filename. */
816
- callId: string;
817
- /** Result returned by the tool (post-`tool:transform`). */
818
- output: string | ToolResultContent[];
819
- /** Byte threshold; outputs at or below stay inline. */
820
- threshold: number;
821
- /** Canonical tool names that bypass persistence. */
822
- excludeTools?: readonly string[];
823
- /** Persistence root directory. Created on first write. */
824
- persistDir: string;
769
+ declare function createInteractionTool(options: InteractionToolOptions): ToolDef;
770
+ //#endregion
771
+ //#region src/tools/list-files.d.ts
772
+ declare const listFiles: ToolDef;
773
+ //#endregion
774
+ //#region src/tools/multi-edit.d.ts
775
+ declare const multiEdit: ToolDef;
776
+ //#endregion
777
+ //#region src/tools/read-file.d.ts
778
+ declare const readFile: ToolDef;
779
+ //#endregion
780
+ //#region src/tools/shell.d.ts
781
+ interface CreateShellToolOptions {
825
782
  /**
826
- * Optional cap on the total bytes of persisted blobs under `persistDir`.
827
- * When set (and > 0), after a successful write the helper sweeps the
828
- * directory and removes the oldest `*.txt` blobs (by mtime) until the
829
- * sum of remaining sizes is at or below the cap.
783
+ * Whether to expose the `run_in_background` flag in the input schema +
784
+ * the background-mode paragraphs in the description. When `false`, the
785
+ * model never sees the flag and won't try to use it. The execute path
786
+ * still has a defensive fallback: an explicit `run_in_background: true`
787
+ * call (e.g. from a hand-crafted message) returns a clean error rather
788
+ * than silently running foreground.
830
789
  *
831
- * Bound to the **current session** because `persistDir` is per-session
832
- * (see {@link resolvePersistDir}); eviction never crosses session
833
- * boundaries. The new blob is always preserved — its mtime is the
834
- * latest, so the LRU sort guarantees older blobs go first.
790
+ * Default: `true`.
791
+ */
792
+ allowBackground?: boolean;
793
+ /**
794
+ * Canonical names of tools registered alongside `shell` on the same
795
+ * agent. When non-empty, the description gains a "prefer the dedicated
796
+ * tool" block for each known sibling (`read_file`, `glob`, `grep`,
797
+ * `list_files`, `edit`, `write_file`) — useful against the
798
+ * `ls`/`cat`-to-re-verify loop some models fall into when both a
799
+ * dedicated tool AND `shell` are visible. Unknown / unrecognized names
800
+ * are ignored.
835
801
  *
836
- * Skipped when the value isn't a positive finite number. Eviction
837
- * failures (permissions, races) are surfaced through `ZIDANE_DEBUG`
838
- * but never block the calling tool result; an over-cap dir is a
839
- * housekeeping concern, not a correctness one.
802
+ * Set by `createAgent` per-run from the tool registry; hosts that
803
+ * construct a `shell` directly can pass it explicitly. Omit to suppress
804
+ * the block entirely (no nudge for shell-only agents, no nudge for
805
+ * hosts that prefer to author their own anti-loop prose).
840
806
  */
841
- maxBytes?: number;
842
- }
843
- type PersistOutcome = {
844
- kind: 'skip';
845
- reason: 'disabled' | 'excluded' | 'under-threshold' | 'unsupported-shape' | 'unsafe-call-id' | 'invalid-persist-dir';
846
- } | {
847
- kind: 'persisted';
848
- output: string;
849
- originalBytes: number;
850
- persistedPath: string;
851
- evicted?: {
852
- files: number;
853
- bytes: number;
854
- };
855
- } | {
856
- kind: 'error';
857
- reason: 'write-failed';
858
- error: Error;
859
- };
860
- /**
861
- * Decide-and-persist for a single tool result. Pure decision + filesystem
862
- * side-effect; returns the new wire-level `output` string when substitution
863
- * happened, otherwise tells the caller to leave the result alone.
864
- *
865
- * Atomicity: writes go through `<path>.tmp` + `rename` so a concurrent
866
- * read (or a crash mid-write) never sees a half-written blob.
867
- *
868
- * `ToolResultContent[]` results (images, structured blocks) currently bypass
869
- * persistence — the inline image bytes are the point of the call, and a
870
- * mixed text/image array isn't representable as a single `.txt` file. We
871
- * may revisit if a tool starts returning very large text-only arrays.
872
- */
873
- declare function maybePersistToolResult(input: PersistInput): Promise<PersistOutcome>;
874
- interface BuildStubInput {
875
- toolName: string;
876
- originalBytes: number;
877
- persistedPath: string;
878
- output: string;
807
+ registeredCanonicals?: ReadonlySet<string>;
808
+ /**
809
+ * The agent's `toolAliases` map, used to render the wire-level name of
810
+ * each sibling in the swap block. Without this, the block always prints
811
+ * canonical names fine for the default preset, wrong for hosts that
812
+ * alias-rename (the model would be told to call a name it doesn't see
813
+ * in the tool spec).
814
+ */
815
+ toolAliases?: Record<string, string>;
879
816
  }
880
817
  /**
881
- * Render the byte-stable `<persisted-output>` stub the model sees in place
882
- * of the original `tool_result`.
883
- *
884
- * Format choices:
885
- * - XML wrapper because models reliably parse it as structural.
886
- * - Byte count + path in attributes so the model can decide whether to
887
- * `read_file` the persisted blob without scanning the preview.
888
- * - Preview always shows the head — `shell`'s tail-priority truncation is
889
- * irrelevant here because the model has the full path if it needs the
890
- * tail.
891
- * - No timestamps, no random UUIDs inside the stub: every byte must be
892
- * reproducible from the inputs, otherwise re-emission on subsequent
893
- * turns would bust the prompt cache.
818
+ * Factory for the `shell` tool. The default exported `shell` is
819
+ * equivalent to `createShellTool({ allowBackground: true })`. The
820
+ * factory is the entry point hosts use when they want to override the
821
+ * default — e.g. to ship a preset that always disables background mode
822
+ * regardless of `behavior.tasksDir`.
894
823
  *
895
- * Exported for tests (asserting the byte-stable contract) and for SDK
896
- * consumers wiring their own persistence middleware against the same
897
- * surface.
824
+ * Hosts that use the framework's `createAgent` typically don't need to
825
+ * call this directly: when `behavior.tasksDir` is unset or
826
+ * `behavior.disableBackgroundTasks: true` is set, the agent
827
+ * automatically rewrites the registered `shell` (if it's the
828
+ * framework's built-in) using this factory.
898
829
  */
899
- declare function buildPersistedStub(input: BuildStubInput): string;
830
+ declare function createShellTool(opts?: CreateShellToolOptions): ToolDef;
900
831
  /**
901
- * Remove every persisted blob belonging to a session. Called by the chat
902
- * layer from its session-delete path so closing a session frees the disk
903
- * footprint alongside the SQLite row.
832
+ * Default `shell` tool with background mode enabled.
904
833
  *
905
- * Idempotent missing directory (session never persisted anything) is a
906
- * no-op, not an error. Wraps the `rm -rf` so a permissions blip on one
907
- * blob doesn't propagate to the caller; the chat layer can't usefully
908
- * recover from "couldn't unlink a result file" mid-delete.
834
+ * Most hosts use this directly via `basicTools`. When the agent's
835
+ * `behavior.tasksDir` is unset OR `behavior.disableBackgroundTasks:
836
+ * true` is set, `createAgent` auto-rewrites this identity to a
837
+ * `createShellTool({ allowBackground: false })` variant so the model
838
+ * never sees a flag it can't use. Hosts who want to bypass that
839
+ * auto-rewrite can register a `createShellTool({ allowBackground })`
840
+ * directly — the rewrite only fires on identity-equal references to
841
+ * this constant.
909
842
  */
910
- declare function cleanupPersistedSession(persistRoot: string): Promise<void>;
843
+ declare const shell: ToolDef;
911
844
  //#endregion
912
- //#region src/mcp/oauth-provider.d.ts
913
- /**
914
- * Per-server persisted state. Subfields are optional so a partial save
915
- * (e.g. `saveCodeVerifier` arriving before `saveTokens`) doesn't blow away
916
- * earlier subfields — the provider always patches, never replaces.
917
- */
918
- interface McpCredentialEntry {
919
- tokens?: OAuthTokens;
920
- clientInformation?: OAuthClientInformationMixed;
921
- discoveryState?: OAuthDiscoveryState;
845
+ //#region src/tools/shell-kill.d.ts
846
+ declare const shellKill: ToolDef;
847
+ //#endregion
848
+ //#region src/tools/skills-read.d.ts
849
+ interface SkillsReadToolOptions {
850
+ catalog: readonly SkillConfig[];
851
+ state: SkillActivationState;
922
852
  }
923
- interface McpCredentialStore {
924
- load: (name: string) => McpCredentialEntry | undefined;
925
- save: (name: string, entry: McpCredentialEntry) => void;
926
- delete: (name: string) => void;
853
+ declare function createSkillsReadTool(options: SkillsReadToolOptions): ToolDef;
854
+ //#endregion
855
+ //#region src/tools/skills-run-script.d.ts
856
+ interface SkillsRunScriptToolOptions {
857
+ catalog: readonly SkillConfig[];
858
+ state: SkillActivationState;
859
+ /** Script timeout in milliseconds. Default 60000. */
860
+ scriptTimeoutMs?: number;
861
+ }
862
+ declare function createSkillsRunScriptTool(options: SkillsRunScriptToolOptions): ToolDef;
863
+ //#endregion
864
+ //#region src/tools/skills-use.d.ts
865
+ interface SkillsUseToolOptions {
866
+ /** Resolved skills catalog for this run. */
867
+ catalog: readonly SkillConfig[];
868
+ /** Per-agent activation state the tool mutates. */
869
+ state: SkillActivationState;
870
+ /** Agent hooks — used to fire `skills:activate` on first activation. */
871
+ hooks: Hookable<AgentHooks>;
927
872
  }
928
873
  /**
929
- * In-memory store primarily for tests, but valid as a no-persistence option
930
- * (tokens evaporate on process exit, the user re-auths every cold start).
874
+ * Factory for `skills_use`. Auto-injected into the agent's tool set by the
875
+ * agent runtime when a non-empty skills catalog is available (unless
876
+ * `SkillsConfig.tool === false`).
877
+ *
878
+ * The tool schema's `name` property is `enum`-constrained to the resolved
879
+ * catalog so the LLM cannot hallucinate a skill that doesn't exist.
931
880
  */
932
- declare function createMemoryMcpCredentialStore(seed?: Record<string, McpCredentialEntry>): McpCredentialStore;
933
- interface McpOAuthProviderOptions {
934
- /** Server name — used as the storage key. */
935
- name: string;
936
- /** Persistence backend. */
937
- store: McpCredentialStore;
881
+ declare function createSkillsUseTool(options: SkillsUseToolOptions): ToolDef;
882
+ //#endregion
883
+ //#region src/tools/spawn.d.ts
884
+ interface ChildAgent {
885
+ id: string;
886
+ task: string;
887
+ startedAt: number;
888
+ /** Subagent depth — 1 for a direct child of a top-level agent. */
889
+ depth: number;
890
+ }
891
+ interface SpawnToolState {
892
+ /** Currently running children. */
893
+ readonly children: ReadonlyMap<string, ChildAgent>;
938
894
  /**
939
- * Loopback callback URI. Pass `undefined` for bootstrap (non-interactive
940
- * mode stored tokens + refresh only, never opens a browser).
895
+ * Cumulative stats across every completed direct child of this spawn-tool
896
+ * instance (returns a copy). Each child's contribution is the cumulative
897
+ * `AgentStats` returned by its `agent.run()` — so
898
+ * `totalIn`/`totalOut`/`totalCacheRead`/`totalCacheCreation` cover the
899
+ * entire subtree (children + grandchildren + …), while `turns` and
900
+ * `elapsed` stay parent-loop-only per child and are summed across direct
901
+ * children. `elapsed` over-counts when children ran in parallel.
902
+ *
903
+ * Lives across multiple parent runs that share this instance.
941
904
  */
942
- redirectUri?: string;
943
- /**
944
- * Invoked when the SDK wants the user agent to navigate to the authorization
945
- * URL. Typically the host opens the browser AND emits a hook so the TUI can
946
- * render the URL in a status row. No-op in non-interactive mode (the SDK
947
- * still calls this before throwing `UnauthorizedError` from connect).
905
+ readonly totalChildStats: Readonly<AgentStats>;
906
+ }
907
+ interface SpawnToolOptions {
908
+ /** Maximum concurrent sub-agents (default: 3). */
909
+ maxConcurrent?: number;
910
+ /**
911
+ * Maximum subagent depth. 0 disables spawning entirely; 1 allows top-level
912
+ * spawns but forbids grandchildren; 3 (default) allows three levels of
913
+ * recursion — enough for most orchestration patterns, a sharp ceiling
914
+ * against runaway loops.
948
915
  */
949
- onAuthorizationUrl?: (url: URL) => void | Promise<void>;
916
+ maxDepth?: number;
917
+ /** Child model override. */
918
+ model?: string;
919
+ /** Child system prompt override. Per-spawn `input.system` takes precedence. */
920
+ system?: string;
921
+ /** Child thinking level. */
922
+ thinking?: 'off' | 'minimal' | 'low' | 'medium' | 'high';
923
+ /** Preset override for children. Shallow-merged over the parent's preset (parent fields still win for anything left unset). */
924
+ preset?: Preset;
950
925
  /**
951
- * `client_name` used in dynamic client registration. Defaults to `'zidane'`.
952
- * Some servers display this string to the user on the consent screen.
926
+ * Per-child timeout, in milliseconds. When the child exceeds it the spawn
927
+ * tool returns a timeout marker, fires `spawn:error`, and destroys the
928
+ * child agent. Default: none.
953
929
  */
954
- clientName?: string;
930
+ timeoutMs?: number;
955
931
  /**
956
- * Override the requested OAuth scope. Default: unset (the SDK negotiates
957
- * via the server's metadata).
932
+ * When `true` and the parent has a session, the child reuses the parent's
933
+ * session — child turns are appended with the child's own `runId`, and the
934
+ * resulting `SessionRun` carries `parentRunId` so the tree is
935
+ * reconstructible. Default: `false` (child is in-memory only).
936
+ *
937
+ * **Read-state isolation.** Sharing the session also shares the
938
+ * `read_file` / `requireReadBeforeEdit` tracking map (it's keyed
939
+ * by `Session`). With `persist: false` the child gets no session,
940
+ * so reads inside the subagent populate nothing the parent can see —
941
+ * a follow-up `edit` / `multi_edit` in the parent will trip the
942
+ * gate with `"has not been read"` even though the model just
943
+ * read the file in the child. Use {@link shareReadState} when
944
+ * you want the parent's gate to honor the child's reads WITHOUT
945
+ * also persisting child turns to the parent's session.
958
946
  */
959
- scope?: string;
960
- }
961
- declare class McpOAuthProvider implements OAuthClientProvider {
962
- private readonly name;
963
- private readonly store;
964
- private readonly _redirectUri?;
965
- private readonly onAuthorizationUrl?;
966
- private readonly clientName;
967
- private readonly _scope?;
968
- private codeVerifierValue;
969
- constructor(opts: McpOAuthProviderOptions);
970
- get redirectUrl(): string | URL | undefined;
971
- get clientMetadata(): OAuthClientMetadata;
972
- tokens(): OAuthTokens | undefined;
973
- saveTokens(tokens: OAuthTokens): void;
974
- clientInformation(): OAuthClientInformationMixed | undefined;
975
- saveClientInformation(info: OAuthClientInformationMixed): void;
976
- discoveryState(): OAuthDiscoveryState | undefined;
977
- saveDiscoveryState(state: OAuthDiscoveryState): void;
978
- saveCodeVerifier(verifier: string): void;
979
- codeVerifier(): string;
980
- redirectToAuthorization(url: URL): Promise<void>;
947
+ persist?: boolean;
981
948
  /**
982
- * Wipe stored credentials when the server reports the cached state is no
983
- * longer valid. The SDK calls this with a scope hint:
984
- * - `'tokens'` → access/refresh revoked, keep client registration
985
- * - `'client'` → client registration invalidated, reset everything
986
- * - `'verifier'`→ PKCE state stale (e.g. mismatched state param)
987
- * - `'discovery'` → discovery metadata stale (servers re-keyed)
988
- * - `'all'` → full reset
949
+ * Forward the parent's read-state map to the child agent so the
950
+ * `requireReadBeforeEdit` gate and `dedupReads` cache see reads
951
+ * across the parent/child boundary. Orthogonal to {@link persist} —
952
+ * use this when you want shared read tracking without sharing the
953
+ * session's turn history. Default: `false`.
954
+ *
955
+ * Has no effect when the parent has no read-state to share (no
956
+ * session and no explicit `readState` on the parent agent's
957
+ * options). Implementation: passes the parent's resolved
958
+ * `ReadStateMap` to the child via `AgentOptions.readState`, which
959
+ * tools resolve via `ctx.readState ?? getReadState(ctx.session)`.
989
960
  */
990
- invalidateCredentials(scope: 'all' | 'client' | 'tokens' | 'verifier' | 'discovery'): Promise<void>;
991
- private patch;
961
+ shareReadState?: boolean;
962
+ /**
963
+ * Forward a curated subset of child hook events (`stream:*`, `tool:*`,
964
+ * `turn:after`) onto the parent's hook bus as `child:*` events. Default:
965
+ * `true`. Grandchildren bubble through their child transparently.
966
+ */
967
+ forwardHooks?: boolean;
968
+ /** Called when a child agent starts. */
969
+ onSpawn?: (child: ChildAgent) => void;
970
+ /** Called when a child agent completes (success, abort, timeout, or error). */
971
+ onComplete?: (child: ChildAgent, stats: AgentStats, status: NonNullable<ChildRunStats['status']>) => void;
972
+ /**
973
+ * Named subagent presets the model can select via the `subagent_type`
974
+ * input field. Mirrors the Claude Code SDK's surface — models trained
975
+ * on it routinely emit `subagent_type: 'Explore' | 'Plan' |
976
+ * 'Verification' | 'general-purpose'`, and without a registry the
977
+ * field is silently dropped, so hosts wanting type-specialized
978
+ * subagents have to invent their own dispatch layer.
979
+ *
980
+ * Each entry overlays the base spawn config for that particular
981
+ * dispatch. Per-call `input.system` still wins over `subagents[type].system`
982
+ * so the model can always specialize further.
983
+ *
984
+ * When the registry is non-empty:
985
+ * - `subagent_type` appears in the spawn input schema as a `string`
986
+ * enum of the registered keys (plus the always-available
987
+ * `'general-purpose'` fallback).
988
+ * - Models that pass an unregistered type are routed to
989
+ * `'general-purpose'` (no error — degrade gracefully so trained
990
+ * models keep working even on hosts that haven't wired every
991
+ * type Claude Code uses).
992
+ *
993
+ * When the registry is empty / unset, the field is omitted entirely
994
+ * (preserves the historical schema for hosts that never use this).
995
+ *
996
+ * Default: `undefined` (no subagent types; `subagent_type` schema
997
+ * field hidden).
998
+ */
999
+ subagents?: SubagentRegistry;
992
1000
  }
993
1001
  /**
994
- * True when an HTTP transport's auth headers already include an explicit
995
- * Authorization. Used by the bootstrap escape-hatch: a user who provided
996
- * their own bearer token shouldn't be auto-promoted to OAuth on a 401.
997
- *
998
- * Case-insensitive — Node normalizes outgoing headers to lowercase but
999
- * users hand-write `Authorization` in configs.
1002
+ * Per-type subagent override applied when the model calls
1003
+ * `spawn({ subagent_type: '…' })`. All fields are optional; absent
1004
+ * fields fall back to the parent's resolved configuration (see
1005
+ * {@link SpawnToolOptions} comments for the merge order).
1000
1006
  */
1001
- declare function hasAuthorizationHeader(headers: Record<string, string> | undefined): boolean;
1002
- //#endregion
1003
- //#region src/mcp/login.d.ts
1004
- interface LoginMcpServerOptions {
1005
- /** Persistence — same store the bootstrap path reads from. */
1006
- store: McpCredentialStore;
1007
+ interface SubagentDef {
1007
1008
  /**
1008
- * Invoked with the authorization URL once it's ready. Hosts typically
1009
- * (a) emit `mcp:auth:url` for the TUI, and (b) call `tryOpenBrowser`.
1010
- * The URL is identical to the one passed to the `mcp:auth:url` hook
1011
- * fired automatically — this callback is a synchronous hook for callers
1012
- * that don't want to wire the agent hook machinery.
1009
+ * System prompt override for this subagent type. Per-call
1010
+ * `input.system` still wins model-supplied specialization beats
1011
+ * preset defaults.
1013
1012
  */
1014
- onAuthorizationUrl?: (url: URL) => void | Promise<void>;
1015
- /** Cancels the flow (esc / close modal / SIGINT). */
1016
- signal?: AbortSignal;
1017
- /** Agent hooks. The flow emits `mcp:auth:url`/`success`/`error` when wired. */
1018
- hooks?: Hookable<AgentHooks>;
1019
- /** Override `client_name` shown on consent screens. Default: 'zidane'. */
1020
- clientName?: string;
1021
- /** Override the requested OAuth scope. */
1022
- scope?: string;
1013
+ system?: string;
1023
1014
  /**
1024
- * Override the loopback callback path. Default: `/callback`. Useful only
1025
- * for servers that pinned a different path during registration.
1015
+ * Restrict the child agent's tool registry to this list of canonical
1016
+ * tool names. Operates as a filter over the parent's tools (the
1017
+ * parent's selection is the upper bound — a subagent can never gain
1018
+ * tools the parent doesn't have). When unset, the child inherits the
1019
+ * parent's full tool list.
1020
+ *
1021
+ * Tool names are canonical (registry-key) not wire/alias names — the
1022
+ * filter runs before aliases are applied. Names that don't match a
1023
+ * parent tool are silently dropped (matches the lenient behaviour of
1024
+ * `enabledTools` on MCP configs).
1026
1025
  */
1027
- callbackPath?: string;
1026
+ tools?: readonly string[];
1028
1027
  /**
1029
- * Maximum time to wait for the user to complete the browser flow, in ms.
1030
- * The user can also cancel via `signal`. Default: 5 minutes.
1028
+ * Mark this subagent as read-only equivalent to listing only
1029
+ * obviously-non-mutating tools (`read_file`, `grep`, `glob`,
1030
+ * `list_files`) in {@link SubagentDef.tools}. Convenience for the
1031
+ * common "Plan" / "Explore" subagent shape Claude Code ships.
1032
+ *
1033
+ * When both `readonly: true` and `tools` are set, `tools` wins
1034
+ * (explicit beats implicit).
1031
1035
  */
1032
- timeoutMs?: number;
1033
- }
1034
- interface LoginMcpServerResult {
1035
- /** Stored OAuth tokens after a successful exchange. */
1036
- tokens: NonNullable<ReturnType<McpOAuthProvider['tokens']>>;
1036
+ readonly?: boolean;
1037
1037
  /**
1038
- * Upstream tool descriptors discovered after re-connecting with the new
1039
- * tokens. Already filtered by the server's `enabledTools` / `disabledTools`
1040
- * is NOT applied here that's a bootstrap concern. Hosts that want filtering
1041
- * should pass the result through `connectMcpServers` rebuild on the next
1042
- * session activation rather than reusing this list verbatim.
1038
+ * Short description rendered into the spawn tool's schema (the
1039
+ * `subagent_type` field's description) so the model can pick a type
1040
+ * that matches the task without round-tripping through docs.
1043
1041
  */
1044
- tools: Array<{
1045
- name: string;
1046
- description?: string | null;
1047
- inputSchema?: unknown;
1048
- }>;
1042
+ description?: string;
1049
1043
  }
1050
1044
  /**
1051
- * Run the full interactive OAuth flow for `config`. Only supports `sse` and
1052
- * `streamable-http` transports `stdio` MCP servers don't speak OAuth.
1053
- *
1054
- * Throws on:
1055
- * - Wrong transport.
1056
- * - Abort signal.
1057
- * - Browser-side error (user denied, server rejected, etc.).
1058
- * - Code exchange failure.
1059
- * - Post-exchange connect failure.
1060
- *
1061
- * Always closes the loopback callback server before returning, success or
1062
- * failure.
1045
+ * Map of subagent-type key preset. Keys are case-sensitive and
1046
+ * appear verbatim in the spawn input schema's enum so the model emits
1047
+ * them with the same casing. Common conventions: `'Explore'`,
1048
+ * `'Plan'`, `'Verification'`, `'general-purpose'` (Claude Code SDK's
1049
+ * built-in set).
1063
1050
  */
1064
- declare function loginMcpServer(config: McpServerConfig, options: LoginMcpServerOptions): Promise<LoginMcpServerResult>;
1065
- //#endregion
1066
- //#region src/mcp/oauth-callback.d.ts
1051
+ type SubagentRegistry = Record<string, SubagentDef>;
1067
1052
  /**
1068
- * Local loopback HTTP callback for OAuth 2.0 authorization code flows.
1069
- *
1070
- * Stands up a one-shot server on `127.0.0.1:<random>` that captures the
1071
- * `?code=...` redirect from a browser-driven OAuth flow and resolves a
1072
- * promise with the code. Used as the `redirectUrl` half of the MCP SDK's
1073
- * `OAuthClientProvider` (the persistence half lives separately).
1053
+ * Create a configured spawn tool.
1074
1054
  *
1075
- * Design:
1076
- * - Loopback-only (`127.0.0.1`) the OAuth spec treats `http://127.0.0.1:<port>`
1077
- * as a public-client redirect URI per RFC 8252 §7.3. Browsers do NOT block it,
1078
- * and Anthropic / OpenAI / Linear / GitHub all accept it.
1079
- * - Random port (`port = 0`) — the OS picks an unused one. We read the actual
1080
- * port back from `server.address()` after `listen()`.
1081
- * - Single-shot — the first GET to `path` with a `code` (or `error`) wins;
1082
- * subsequent requests get 404. The server keeps listening (in case the user
1083
- * hits "back" and re-authorizes), so callers must `close()` once they have
1084
- * the code or have given up.
1085
- * - Abort-aware — wiring an external `AbortSignal` rejects the promise and
1086
- * closes the server immediately. Required for the TUI's "esc cancels login"
1087
- * UX.
1088
- * - No HTML framework — a single inline `<html>` string keeps this isolated
1089
- * from any UI dependency.
1055
+ * State (`children`, `totalChildStats`, counters, active count) is scoped to
1056
+ * the returned instance. Multiple parent agents using the same instance will
1057
+ * share counters + stats + concurrency slots call `createSpawnTool()` per
1058
+ * agent (or use the stateless default `spawn`) to keep them isolated.
1090
1059
  */
1091
- /**
1092
- * Result of a successful callback. `state` is forwarded verbatim from the
1093
- * query string — callers verify it against their pre-flight value to defend
1094
- * against CSRF (the MCP SDK does this internally when it controls `state`).
1095
- */
1096
- interface OAuthCallbackResult {
1097
- code: string;
1098
- state?: string;
1099
- }
1100
- interface OAuthCallbackHandle {
1101
- /**
1102
- * Full URI to register with the authorization server, e.g.
1103
- * `http://127.0.0.1:51823/callback`. Stable for the lifetime of the
1104
- * handle.
1105
- */
1106
- redirectUri: string;
1060
+ declare function createSpawnTool(options?: SpawnToolOptions): ToolDef & SpawnToolState;
1061
+ //#endregion
1062
+ //#region src/tools/tool-search.d.ts
1063
+ interface LazyToolEntry {
1107
1064
  /**
1108
- * Resolves with `{ code, state }` on a successful callback. Rejects with:
1109
- * - The OAuth-spec `error` field (`access_denied`, `server_error`, ...)
1110
- * when the authorization server redirects with `?error=...`.
1111
- * - `'OAuth callback aborted'` when the external `AbortSignal` fires.
1112
- * - `'OAuth callback server closed'` when `close()` is called before any
1113
- * callback arrives.
1114
- *
1115
- * Single-shot — only the first matching request resolves the promise.
1065
+ * Wire name (after `toolAliases` rewrite). What the model sees in the
1066
+ * catalog, what `tool_search` matches against, and what the provider's
1067
+ * tool list will carry once the entry is unlocked.
1116
1068
  */
1117
- promise: Promise<OAuthCallbackResult>;
1069
+ name: string;
1118
1070
  /**
1119
- * Idempotent shutdown. Safe to call from a `finally` block whether the
1120
- * flow succeeded, failed, or was aborted. Resolves once the server stops
1121
- * accepting connections.
1071
+ * Canonical (registry-key) name used for unlock-set membership and for the
1072
+ * loop's `ctx.tools[name]` dispatch lookup. Equal to `name` when no alias
1073
+ * is configured for this tool.
1122
1074
  */
1123
- close: () => Promise<void>;
1075
+ canonicalName: string;
1076
+ description: string;
1077
+ inputSchema: Record<string, unknown>;
1078
+ /** Source MCP server, when applicable. Used for `server`-bulk unlock. */
1079
+ server?: string;
1124
1080
  }
1125
- interface OAuthCallbackOptions {
1126
- /** Cancels the flow — rejects `promise` and closes the server. */
1127
- signal?: AbortSignal;
1081
+ interface ToolSearchToolOptions {
1128
1082
  /**
1129
- * Path component the authorization server should redirect to. Defaults
1130
- * to `/callback`. Useful when matching a pre-registered URI that uses a
1131
- * different path.
1083
+ * Snapshot of every lazy tool the model can discover. Built once per run by
1084
+ * the agent the tool closes over this array and never mutates it.
1132
1085
  */
1133
- path?: string;
1086
+ catalog: readonly LazyToolEntry[];
1134
1087
  /**
1135
- * Override the loopback host. Defaults to `127.0.0.1`. Don't bind to
1136
- * `0.0.0.0` here the OAuth code is a one-time secret and the server
1137
- * would otherwise accept it from any host on the LAN.
1088
+ * Mutable per-run set of unlocked **canonical** tool names. The tool adds
1089
+ * matches in place; the loop reads the set when rebuilding the wire-level
1090
+ * tool list. Keyed by canonical (not wire) so dispatch lookups stay
1091
+ * alias-stable.
1092
+ *
1093
+ * Prefer `addUnlock` for cache-stable wire-tool ordering: writes through a
1094
+ * Set lose unlock order, so the wire-level rebuild that filters by `unlocked`
1095
+ * has to fall back to registry iteration order — which moves entries every
1096
+ * time a lazy tool earlier in the registry is unlocked, breaking provider
1097
+ * prompt-cache breakpoints. The agent passes both when it owns the unlock
1098
+ * tracker, with `addUnlock` mirroring writes into an ordered log.
1138
1099
  */
1139
- host?: string;
1100
+ unlocked: Set<string>;
1140
1101
  /**
1141
- * Override the port. Defaults to `0` (OS-assigned). Pin to a fixed port
1142
- * only when the authorization server requires a pre-registered redirect
1143
- * URI; the random-port path is preferred so concurrent flows don't clash.
1102
+ * Optional callback fired for every canonical name the tool unlocks. When
1103
+ * set, the agent uses this to maintain an append-only `dynamicUnlockOrder`
1104
+ * so the wire-level tool list emits new unlocks at the tail and keeps the
1105
+ * provider prefix cache warm. Idempotent on repeat unlocks of the same
1106
+ * name — callers may dedupe internally.
1107
+ *
1108
+ * Invoked **in addition to** the `unlocked.add` (which still happens for
1109
+ * back-compat with callers that only watch the Set).
1144
1110
  */
1145
- port?: number;
1111
+ addUnlock?: (canonical: string) => void;
1112
+ /** Default cap on returned matches when the model omits `limit`. */
1113
+ defaultLimit?: number;
1146
1114
  }
1147
1115
  /**
1148
- * Start a one-shot OAuth callback server. The returned handle's `redirectUri`
1149
- * should be passed to the authorization server as the `redirect_uri` query
1150
- * parameter; `promise` resolves once the user finishes the browser flow.
1151
- *
1152
- * Always `await handle.close()` in a `finally` block — even on success, the
1153
- * server stays open until told to shut down (so it can serve the
1154
- * "you can close this tab" page).
1116
+ * Factory for `tool_search`. Auto-injected by the agent when
1117
+ * `behavior.toolDisclosure === 'lazy'` and at least one MCP tool is in the
1118
+ * registry. Opt out via `behavior.toolSearch.tool === false`.
1155
1119
  */
1156
- declare function startOAuthCallback(opts?: OAuthCallbackOptions): Promise<OAuthCallbackHandle>;
1120
+ declare function createToolSearchTool(options: ToolSearchToolOptions): ToolDef;
1157
1121
  //#endregion
1158
- //#region src/metrics.d.ts
1159
- type MetricAttributes = Record<string, string | number | boolean | undefined>;
1160
- interface Counter {
1161
- add: (value: number, attributes?: MetricAttributes) => void;
1162
- }
1163
- interface Histogram {
1164
- record: (value: number, attributes?: MetricAttributes) => void;
1165
- }
1166
- interface UpDownCounter {
1167
- add: (value: number, attributes?: MetricAttributes) => void;
1168
- }
1169
- interface InstrumentOptions {
1170
- description?: string;
1171
- unit?: string;
1172
- }
1122
+ //#region src/tools/validation.d.ts
1173
1123
  /**
1174
- * Minimal Meter interface structurally identical to OTel's `Meter`.
1175
- * Hosts passing `metrics.getMeter(name)` (from `@opentelemetry/api`)
1176
- * satisfy this without adaptation.
1124
+ * Tool argument validation against JSON Schema-style inputSchema.
1125
+ *
1126
+ * Two passes:
1127
+ * 1. Required-field presence. Missing or null/undefined required fields fail.
1128
+ * 2. Per-property type checks with **best-effort coercion**. Small/OSS models
1129
+ * routinely send `"true"` for a `boolean` field or `"42"` for a `number`,
1130
+ * and rejecting outright forces a confusing retry. Instead, we auto-heal
1131
+ * coerce when the conversion is unambiguous, fail only when the value
1132
+ * cannot be reasonably normalized to any of the declared types.
1133
+ *
1134
+ * Recursion: when a property declares `type: 'array'` with an `items` schema,
1135
+ * each item is validated against `items`. Object items are walked one level
1136
+ * deep (their declared `properties` get the same coercion + enum checks the
1137
+ * top level does). Items that can't be coerced are dropped rather than
1138
+ * rejecting the whole call — the model rarely benefits from an
1139
+ * all-or-nothing failure on a 20-item list because one entry was malformed.
1140
+ * Dropped items are reported back via `droppedItems` so the tool's `execute`
1141
+ * can surface a hint to the model if it wants to.
1177
1142
  */
1178
- interface Meter {
1179
- createCounter: (name: string, options?: InstrumentOptions) => Counter;
1180
- createHistogram: (name: string, options?: InstrumentOptions) => Histogram;
1181
- createUpDownCounter: (name: string, options?: InstrumentOptions) => UpDownCounter;
1182
- }
1183
- interface MetricsHooksOptions {
1184
- meter: Meter;
1143
+ interface ValidationResult {
1144
+ valid: boolean;
1145
+ /** Human-readable reason. Present on failure only. */
1146
+ error?: string;
1185
1147
  /**
1186
- * Optional prefix prepended to every instrument name. Default: no prefix
1187
- * (instrument names follow OTel Gen AI semantic conventions verbatim,
1188
- * which is the most-portable shape). Set to e.g. `'zidane.'` to
1189
- * namespace inside a shared meter registry.
1148
+ * Possibly-coerced input. Present iff `valid: true`. Tools should call
1149
+ * `execute(coercedInput, ctx)` so auto-healed values reach the tool body.
1150
+ * When no coercion was applied, this is reference-equal to the input.
1190
1151
  */
1191
- namespace?: string;
1152
+ coercedInput?: Record<string, unknown>;
1192
1153
  /**
1193
- * Optional baseline attributes applied to every measurement. Typical
1194
- * use: `{ service: 'tui', env: 'prod' }`. Per-event attributes win on
1195
- * key collision.
1154
+ * Names of fields whose values were coerced. Empty when nothing changed.
1155
+ * Useful for telemetry (`validation:reject` on failure already carries the
1156
+ * reason; this is the success-path equivalent).
1196
1157
  */
1197
- baseAttributes?: MetricAttributes;
1158
+ coercions?: readonly string[];
1198
1159
  /**
1199
- * Error sink for meter failures. The helper still swallows the throw
1200
- * so a broken backend can't crash a run; this callback surfaces the
1201
- * failure for ops dashboards.
1160
+ * Indexes of array items dropped during recursive validation, keyed by
1161
+ * the property name. Empty / absent when nothing was dropped. Tools that
1162
+ * care about the discrepancy (e.g. `todowrite` wanting to surface
1163
+ * "ignored 2 malformed items") can inspect this.
1202
1164
  */
1203
- onError?: (kind: string, err: unknown) => void;
1204
- }
1205
- interface MetricsHookSet {
1206
- install: (hooks: Hookable<AgentHooks>) => () => void;
1165
+ droppedItems?: Readonly<Record<string, readonly number[]>>;
1207
1166
  }
1167
+ declare function validateToolArgs(input: Record<string, unknown>, schema: Record<string, unknown>): ValidationResult;
1168
+ //#endregion
1169
+ //#region src/tools/write-file.d.ts
1208
1170
  /**
1209
- * Build a set of metrics hook handlers that can be installed on an agent.
1171
+ * Write a file, with an idempotency signal when the content is unchanged.
1210
1172
  *
1211
- * @example OpenTelemetry
1212
- * ```ts
1213
- * import { metrics } from '@opentelemetry/api'
1214
- * const meter = metrics.getMeter('zidane')
1215
- * const m = createMetricsHooks({ meter, baseAttributes: { service: 'tui' } })
1216
- * const uninstall = m.install(agent.hooks)
1217
- * try { await agent.run({ prompt }) }
1218
- * finally { uninstall() }
1219
- * ```
1173
+ * Three return shapes — chosen so the model can recognize a no-op without a
1174
+ * separate read:
1175
+ * - `Created path (N bytes)` file did not exist
1176
+ * - `Updated path (N bytes)` — content differed from on-disk
1177
+ * - `No change needed: path already at target state (N bytes)` — equal
1178
+ *
1179
+ * Race window: in non-process execution contexts (docker, sandbox) shared by
1180
+ * multiple agents, another writer can mutate the file between our read and
1181
+ * our write. Local process context is single-writer per agent so the race is
1182
+ * a non-issue there. Documented rather than locked because the cost of
1183
+ * cross-context locking outweighs the cost of a stale "No change" message.
1220
1184
  */
1221
- declare function createMetricsHooks(options: MetricsHooksOptions): MetricsHookSet;
1185
+ declare const writeFile: ToolDef;
1222
1186
  //#endregion
1223
- //#region src/run-summary.d.ts
1224
- interface RunSummaryTokens {
1187
+ //#region src/headless.d.ts
1188
+ type HeadlessStatus = 'completed' | 'aborted' | 'error' | 'timeout';
1189
+ interface HeadlessUsage {
1225
1190
  input: number;
1226
1191
  output: number;
1227
1192
  cacheRead: number;
1228
1193
  cacheCreation: number;
1194
+ /** Cumulative USD cost when the provider/registry could price the run. */
1229
1195
  cost?: number;
1230
- /** First observable byte from the provider, ms from run start. */
1231
- ttftMs?: number;
1232
- }
1233
- interface RunSummaryByModel {
1234
- modelId: string;
1235
- input: number;
1236
- output: number;
1237
- cacheRead: number;
1238
- cacheCreation: number;
1239
- cost: number;
1240
- turns: number;
1241
1196
  }
1242
- interface RunSummaryError {
1243
- kind: 'stream' | 'tool' | 'mcp-tool' | 'mcp' | 'spawn';
1197
+ interface HeadlessErrorInfo {
1244
1198
  message: string;
1245
- errorType?: string;
1246
- turnId?: string;
1247
- callId?: string;
1248
- server?: string;
1249
- toolName?: string;
1250
- childId?: string;
1251
- statusCode?: number;
1252
- requestId?: string;
1253
- }
1254
- interface RunSummaryBlock {
1255
- callId: string;
1256
- toolName: string;
1257
- outcome: 'gate-block' | 'unknown' | 'invalid-input';
1258
- reason?: string;
1259
- }
1260
- interface RunSummaryValidation {
1261
- callId: string;
1262
- toolName: string;
1263
- reason: string;
1199
+ /** Typed-error class name (`AgentAbortedError`, `AgentContextExceededError`, …). */
1200
+ type: string;
1264
1201
  }
1265
- interface RunSummaryBudget {
1266
- kind: 'bytes' | 'tool-count';
1267
- /** Tool name (for `'tool-count'`); absent for byte budgets. */
1268
- toolName?: string;
1269
- /** `mode` for `'tool-count'`; absent for byte budgets. */
1270
- mode?: 'steer' | 'block';
1271
- observed: number;
1272
- limit: number;
1273
- turnId?: string;
1202
+ /**
1203
+ * Strictly JSON-serializable postmortem of one headless run. Everything an RL
1204
+ * reward function needs: the final answer (`finalText`), the verifiable
1205
+ * structured output (`output`, present iff a `schema` was set), usage/turns,
1206
+ * and the lossless `transcript` (the SFT training data).
1207
+ */
1208
+ interface HeadlessResult {
1209
+ status: HeadlessStatus;
1210
+ /** Concatenated text of the last assistant turn that produced any text. */
1211
+ finalText: string;
1212
+ /** Schema-enforced structured output (only when `opts.schema` is set). */
1213
+ output?: Record<string, unknown>;
1214
+ usage: HeadlessUsage;
1215
+ turns: number;
1216
+ durationMs: number;
1217
+ /** Total `tool_call` blocks across the whole transcript. */
1218
+ numToolCalls: number;
1219
+ /** Finish reason of the final turn that reported one. */
1220
+ finishReason?: TurnFinishReason;
1221
+ error?: HeadlessErrorInfo;
1222
+ sessionId: string;
1223
+ /** Incident postmortem (errors, gate blocks, budget events) via run-summary. */
1224
+ summary?: RunSummary;
1225
+ /** Lossless transcript — raw `session.turns`. Thinking stripped when `includeThinking: false`. */
1226
+ transcript: SessionTurn[];
1274
1227
  }
1275
1228
  /**
1276
- * Postmortem snapshot of one `agent.run()`. Strictly serializable — every
1277
- * field round-trips through `JSON.stringify` / `JSON.parse` without loss
1278
- * so a log aggregator can ingest it as-is.
1229
+ * Live event union — the in-process equivalent of a `stream-json` line. Every
1230
+ * member is JSON-serializable; render to JSONL with {@link headlessEventToJsonl}.
1279
1231
  */
1280
- interface RunSummary {
1281
- runId?: string;
1282
- parentRunId?: string;
1283
- depth: number;
1284
- agentName?: string;
1285
- startedAt: number;
1286
- endedAt: number;
1287
- durationMs: number;
1288
- status: 'completed' | 'aborted';
1289
- turns: number;
1290
- totals: RunSummaryTokens;
1291
- byModel: RunSummaryByModel[];
1292
- errors: RunSummaryError[];
1293
- blocks: RunSummaryBlock[];
1294
- validationRejects: RunSummaryValidation[];
1295
- budgetEvents: RunSummaryBudget[];
1296
- /** Counts of pairing repairs, keyed by repair mode. */
1297
- pairingRepairs: Record<string, number>;
1232
+ type HeadlessEvent = {
1233
+ type: 'start';
1234
+ runId: string;
1235
+ provider?: string;
1236
+ } | {
1237
+ type: 'text';
1238
+ delta: string;
1239
+ } | {
1240
+ type: 'thinking';
1241
+ delta: string;
1242
+ } | {
1243
+ type: 'tool_call';
1244
+ callId: string;
1245
+ name: string;
1246
+ input: Record<string, unknown>;
1247
+ } | {
1248
+ type: 'tool_result';
1249
+ callId: string;
1250
+ name: string;
1251
+ output: string;
1252
+ isError: boolean;
1253
+ } | {
1254
+ type: 'turn';
1255
+ index: number;
1256
+ usage: TurnUsage;
1257
+ } | {
1258
+ type: 'spawn';
1259
+ event: 'before' | 'complete' | 'error';
1260
+ id: string;
1261
+ info?: Record<string, unknown>;
1262
+ } | {
1263
+ type: 'error';
1264
+ message: string;
1265
+ errorType?: string;
1266
+ } | {
1267
+ type: 'result';
1268
+ result: HeadlessResult;
1269
+ };
1270
+ /** Serialize one event as a newline-terminated JSON line (stream-json). */
1271
+ declare function headlessEventToJsonl(event: HeadlessEvent): string;
1272
+ interface HeadlessOptions {
1273
+ /** User prompt — plain string or multimodal `PromptPart[]`. */
1274
+ prompt: string | PromptPart[];
1275
+ /** Built provider (e.g. `local()`, `openaiCompat(...)`, `anthropic(...)`). */
1276
+ provider: Provider;
1277
+ model?: string;
1278
+ /** Override the preset system prompt for this run. */
1279
+ system?: string;
1280
+ thinking?: ThinkingLevel;
1281
+ maxTurns?: number;
1282
+ maxTokens?: number;
1283
+ /** Wall-clock cap; on expiry the run is aborted and `status` becomes `'timeout'`. */
1284
+ timeoutMs?: number;
1285
+ /** External abort signal — chained with the internal timeout controller. */
1286
+ signal?: AbortSignal;
1287
+ /** JSON Schema for structured-output enforcement → populates `result.output`. */
1288
+ schema?: Record<string, unknown>;
1289
+ /** Tool overrides. Omit to use the basic preset's tools. */
1290
+ tools?: Record<string, ToolDef>;
1291
+ mcpServers?: McpServerConfig[];
1292
+ skills?: AgentOptions['skills'];
1293
+ /** Execution context. Defaults to a process context rooted at `cwd`. */
1294
+ execution?: ExecutionContext;
1295
+ /** Working directory for the default process context (ignored if `execution` is set). */
1296
+ cwd?: string;
1297
+ /** Reuse / resume an existing session. */
1298
+ session?: Session;
1299
+ /** Session store for a fresh session (defaults to an in-memory store). */
1300
+ store?: SessionStore;
1301
+ /** Keep `thinking` blocks in `result.transcript` (default true). */
1302
+ includeThinking?: boolean;
1303
+ /** Live event callback — the in-process stream-json equivalent. */
1304
+ onEvent?: (event: HeadlessEvent) => void;
1305
+ }
1306
+ /**
1307
+ * Run a prompt to completion, headless, and return a single serializable
1308
+ * {@link HeadlessResult}. Safe to call concurrently for parallel rollouts —
1309
+ * each call builds its own agent + session and tears them down in `finally`.
1310
+ */
1311
+ declare function runHeadless(opts: HeadlessOptions): Promise<HeadlessResult>;
1312
+ interface OpenAIChatMessage {
1313
+ role: 'system' | 'user' | 'assistant' | 'tool';
1314
+ content: string | null;
1315
+ tool_calls?: Array<{
1316
+ id: string;
1317
+ type: 'function';
1318
+ function: {
1319
+ name: string;
1320
+ arguments: string;
1321
+ };
1322
+ }>;
1323
+ tool_call_id?: string;
1324
+ }
1325
+ /**
1326
+ * Convert raw `session.turns` into standard OpenAI chat-completion messages:
1327
+ * assistant turns carry `tool_calls`, and each `tool_result` becomes its own
1328
+ * `role: 'tool'` message. This is the drop-in shape for an SFT renderer —
1329
+ * unlike `toOpenAI` (session/messages.ts), which emits an internal `_tag`
1330
+ * envelope meant for re-sending to a provider, not for training data.
1331
+ */
1332
+ declare function transcriptToOpenAIMessages(turns: SessionTurn[]): OpenAIChatMessage[];
1333
+ //#endregion
1334
+ //#region src/logger.d.ts
1335
+ type LogLevel = 'debug' | 'info' | 'warn' | 'error';
1336
+ interface LogRecord {
1337
+ level: LogLevel;
1338
+ /** Unix ms — set by `Logger` at emit time. */
1339
+ timestamp: number;
1340
+ /** Free-form message. Sinks render this as the human-facing line. */
1341
+ message: string;
1342
+ /** Structured fields. Correlation ids land here automatically. */
1343
+ attrs: Record<string, unknown>;
1344
+ }
1345
+ interface LogSink {
1346
+ emit: (record: LogRecord) => void;
1347
+ }
1348
+ interface Logger {
1349
+ debug: (message: string, attrs?: Record<string, unknown>) => void;
1350
+ info: (message: string, attrs?: Record<string, unknown>) => void;
1351
+ warn: (message: string, attrs?: Record<string, unknown>) => void;
1352
+ error: (message: string, attrs?: Record<string, unknown>) => void;
1298
1353
  /**
1299
- * Postmortem snapshots of child runs that bubbled their stats up via
1300
- * `spawn:complete`. Only present when the run actually spawned.
1354
+ * Returns a child logger that prepends the given attributes onto every
1355
+ * subsequent emit. Equivalent to `pino.child` / `winston.child`. The
1356
+ * parent and child share the same sink — children are zero-cost.
1301
1357
  */
1302
- children?: RunSummary[];
1358
+ with: (extra: Record<string, unknown>) => Logger;
1359
+ /**
1360
+ * Inspectable baseline attributes — handy for tests and for hook
1361
+ * handlers that want to clone-with-extra without recursing.
1362
+ */
1363
+ readonly baseAttributes: Readonly<Record<string, unknown>>;
1303
1364
  }
1304
- interface RunSummaryCollectorOptions {
1365
+ /**
1366
+ * Build a Logger from a sink. Stateless and cheap; create one per agent
1367
+ * (or per app) and use `.with()` to attach correlation ids per-call.
1368
+ */
1369
+ declare function createLogger(sink: LogSink, baseAttributes?: Readonly<Record<string, unknown>>): Logger;
1370
+ interface ConsoleSinkOptions {
1305
1371
  /**
1306
- * Called with the assembled {@link RunSummary} on every `agent:done`.
1307
- * Synchronous heavy I/O should be deferred (e.g. via `setImmediate`).
1372
+ * Minimum level to emit. Defaults to `'info'` `debug` is dropped so
1373
+ * the harness's lifecycle logging is not noisy by default. Set to
1374
+ * `'debug'` to see every event.
1308
1375
  */
1309
- onSummary?: (summary: RunSummary) => void;
1376
+ minLevel?: LogLevel;
1377
+ /** Custom output stream. Defaults to `process.stderr` so logs don't pollute stdout. */
1378
+ stream?: {
1379
+ write: (chunk: string) => void;
1380
+ };
1310
1381
  }
1311
- interface RunSummaryCollector {
1312
- /** Install the collector's hook handlers. Returns an uninstall fn. */
1382
+ /**
1383
+ * Human-readable terminal sink. Renders each record as
1384
+ * `<ISO timestamp> <LEVEL> <message> <attrs as kv pairs>`.
1385
+ *
1386
+ * Honors `process.stderr` by default so log lines don't interleave with
1387
+ * the agent's stdout-bound output (chat responses, JSON results).
1388
+ */
1389
+ declare function consoleSink(options?: ConsoleSinkOptions): LogSink;
1390
+ /**
1391
+ * One-JSON-object-per-line sink. Suitable for piping into log aggregators
1392
+ * (Datadog Agent, Fluent Bit, Loki, Vector) that expect JSONL.
1393
+ */
1394
+ declare function jsonSink(options?: ConsoleSinkOptions): LogSink;
1395
+ interface LoggingHooksOptions {
1396
+ logger: Logger;
1397
+ /**
1398
+ * Minimum interesting level for harness-emitted lines. Default `'info'`.
1399
+ * Set to `'debug'` to see every tool dispatch / stream event. Set to
1400
+ * `'warn'` to mute the chatty ones and only see failures + budgets.
1401
+ */
1402
+ level?: LogLevel;
1403
+ /**
1404
+ * When true (default), lifecycle events (`agent:start`, `turn:before`,
1405
+ * `tool:before`, `mcp:bootstrap:start`) emit at `debug` level so they
1406
+ * stay quiet by default. Set false to mute them entirely regardless of
1407
+ * the configured minimum level — useful when piping into a tracer
1408
+ * that already captures lifecycle.
1409
+ */
1410
+ includeLifecycle?: boolean;
1411
+ }
1412
+ interface LoggingHookSet {
1313
1413
  install: (hooks: Hookable<AgentHooks>) => () => void;
1314
- /** Most-recent summary; `undefined` until the first `agent:done` fires. */
1315
- latest: () => RunSummary | undefined;
1316
1414
  }
1317
1415
  /**
1318
- * Build a run-summary collector. State is created fresh inside each
1319
- * `install()` call, so a single collector instance can be installed
1320
- * across multiple agents without attribution cross-talk. `latest()`
1321
- * returns the most-recent summary across **any** install — install
1322
- * per-agent collectors if you need separate post-run snapshots.
1416
+ * Install a bundle of hook handlers that emit a structured line per
1417
+ * relevant lifecycle event, automatically attaching correlation ids
1418
+ * (`runId`, `turnId`, `callId`, `childId`, `depth`, `agentName`).
1323
1419
  *
1324
1420
  * @example
1325
1421
  * ```ts
1326
- * const collector = createRunSummaryCollector({
1327
- * onSummary: s => console.log(JSON.stringify(s)),
1328
- * })
1329
- * const uninstall = collector.install(agent.hooks)
1422
+ * const logger = createLogger(consoleSink({ minLevel: 'debug' }), { service: 'tui' })
1423
+ * const lh = createLoggingHooks({ logger })
1424
+ * const uninstall = lh.install(agent.hooks)
1330
1425
  * try { await agent.run({ prompt }) }
1331
1426
  * finally { uninstall() }
1332
1427
  * ```
1333
1428
  */
1334
- declare function createRunSummaryCollector(options?: RunSummaryCollectorOptions): RunSummaryCollector;
1429
+ declare function createLoggingHooks(options: LoggingHooksOptions): LoggingHookSet;
1335
1430
  //#endregion
1336
- //#region src/stats.d.ts
1431
+ //#region src/loop.d.ts
1337
1432
  /**
1338
- * Per-model usage rollup produced by {@link statsByModel}.
1339
- *
1340
- * `turns` counts the number of `TurnUsage` entries attributed to the model
1341
- * across the whole tree (parent loop + every recursively-spawned child).
1342
- * Cache and cost numbers are summed from the same set of turns.
1433
+ * Canonical tool_result text emitted when a tool call is interrupted by the
1434
+ * user mid-flight (Esc / Ctrl-C / external `AbortSignal`). Mirrors Claude
1435
+ * Code's `INTERRUPT_MESSAGE_FOR_TOOL_USE` so downstream consumers can pattern
1436
+ * match a single string across both harnesses. Always paired with
1437
+ * `isError: true` on the wire the model treats it as a failed call rather
1438
+ * than a successful tool response.
1343
1439
  */
1344
- interface ModelUsage {
1345
- input: number;
1346
- output: number;
1347
- cost: number;
1348
- cacheRead: number;
1349
- cacheCreation: number;
1350
- turns: number;
1351
- }
1440
+ declare const INTERRUPT_MESSAGE_FOR_TOOL_USE = "[Request interrupted by user for tool use]";
1352
1441
  /**
1353
- * Depth-first walk over the stats tree, returning every `TurnUsage` entry
1354
- * parent loop first, then each child subtree in completion order.
1355
- *
1356
- * Closes the cache-token aggregation gap: `TurnUsage.cacheRead` /
1357
- * `cacheCreation` live only on per-turn entries, and the top-level
1358
- * `AgentStats` deliberately doesn't carry cumulative forms (one source of
1359
- * truth, no risk of drift). Anything that needs a tree-wide sum walks
1360
- * through this.
1442
+ * Canonical tool_result text emitted when a tool call is skipped because a
1443
+ * steering message arrived between dispatches inside
1444
+ * {@link executeToolBatch}. Distinguished from
1445
+ * {@link INTERRUPT_MESSAGE_FOR_TOOL_USE} so consumers can split "user
1446
+ * cancelled" from "framework superseded".
1361
1447
  */
1362
- declare function flattenTurns(stats: AgentStats): TurnUsage[];
1448
+ declare const TOOL_USE_SKIPPED_MESSAGE = "[Tool use skipped \u2014 superseded by user message]";
1363
1449
  /**
1364
- * Group cumulative usage by `TurnUsage.modelId`. Each entry sums the input,
1365
- * output, cache, cost, and turn-count across every turn the tree attributed
1366
- * to that model naturally handling cross-model runs (vision-fallback,
1367
- * model-shifted subagents, mixed-provider workflows).
1450
+ * Canonical tool_result text emitted when a single tool call is cancelled
1451
+ * mid-flight via `agent.cancelTool(callId)` (typically the TUI's
1452
+ * "cancel this tool" affordance). Distinguished from
1453
+ * {@link INTERRUPT_MESSAGE_FOR_TOOL_USE} (run-wide user abort) and
1454
+ * {@link TOOL_USE_SKIPPED_MESSAGE} (steered) so the model — and downstream
1455
+ * consumers — can tell the three apart by string match.
1368
1456
  *
1369
- * Turns missing `modelId` (mock providers, providers that don't echo a model
1370
- * id) are bucketed under the literal string `'(unknown)'`.
1457
+ * Always paired with `isError: true` on the wire so the model treats the
1458
+ * call as failed rather than as a successful response. The remaining tool
1459
+ * calls in the batch continue running, in contrast with a full-run abort.
1371
1460
  */
1372
- declare function statsByModel(stats: AgentStats): Map<string, ModelUsage>;
1373
- //#endregion
1374
- //#region src/system-prompt.d.ts
1461
+ declare const TOOL_USE_CANCELLED_MESSAGE = "[Tool call cancelled by user]";
1375
1462
  /**
1376
- * System-prompt boundary marker splits a system prompt into a stable static
1377
- * prefix (cached) and a per-turn dynamic suffix (NOT cached).
1378
- *
1379
- * Why this exists: providers attach `cache_control` markers on the last block
1380
- * of the system prompt, so the cached prefix covers the entire system text.
1381
- * Any byte change anywhere — including a per-turn `<env>` rewrite — busts the
1382
- * cache for the doctrine that sits below. A literal marker in the system
1383
- * string lets providers split it into:
1384
- *
1385
- * ┌──────────────┐ cache_control: ephemeral
1386
- * │ STATIC half │ — doctrine, skills catalog, tool catalog,
1387
- * │ │ user instructions
1388
- * ├──────────────┤ ← SYSTEM_PROMPT_BOUNDARY
1389
- * │ DYNAMIC half │ — env, cwd, mtimes, anything per-turn
1390
- * └──────────────┘ (no cache_control)
1391
- *
1392
- * The static prefix rides the prompt cache across turns/sessions; the dynamic
1393
- * suffix re-bills per turn. Net effect: a cwd change between turns no longer
1394
- * invalidates 4 KB of doctrine.
1395
- *
1396
- * Wire contract:
1397
- *
1398
- * - `splitSystemPrompt(s)` is pure; missing marker ⇒ entire string is static
1399
- * (current behavior — no caller is forced to opt in).
1400
- * - `renderSystemForWire(s)` strips the marker so it never reaches the model;
1401
- * used by every provider before the bytes hit the wire, including the
1402
- * cache-disabled path on providers that DO support `cache_control`.
1403
- * - The marker uses underscores rather than XML/punctuation so it's
1404
- * unambiguous when scanning prompts manually and unlikely to collide with
1405
- * model-written content.
1406
- * - Providers handle the split internally (Anthropic emits a 2-block array;
1407
- * OpenAI-compat splits the leading `system` message into multi-part text).
1408
- * Callers always pass a single `string` — no API break.
1463
+ * Canonical `tool_result.content` text emitted to siblings that were
1464
+ * cancelled by a `shell` error in the same batch. Distinct from
1465
+ * {@link INTERRUPT_MESSAGE_FOR_TOOL_USE} (user-issued abort) and
1466
+ * {@link TOOL_USE_SKIPPED_MESSAGE} (steered) so consumers can split
1467
+ * the three causes by string-match.
1409
1468
  */
1469
+ declare const SHELL_CASCADE_CANCEL_MESSAGE = "Cancelled: a sibling `shell` call in the same batch errored; re-run independently if still needed.";
1470
+ //#endregion
1471
+ //#region src/loop-persistence.d.ts
1410
1472
  /**
1411
- * Literal marker inserted in a system prompt to separate cacheable doctrine
1412
- * from per-turn dynamic content. Providers split on this token.
1473
+ * Bytes of head content included in the inline preview block. 2 KiB matches
1474
+ * Claude Code's `PREVIEW_SIZE_BYTES` enough for the model to identify the
1475
+ * content class (error output / structured data / log shape) and decide
1476
+ * whether to call `read_file` on the persisted path for the full payload.
1413
1477
  *
1414
- * Underscored on both sides for visual distinctiveness — a stray instance in
1415
- * model-written prose is implausible. Don't change the value without shipping
1416
- * a migration; existing sessions carry the old marker in their cached
1417
- * prompts.
1478
+ * Tail-priority preview (matching `shell`'s truncation strategy) was
1479
+ * considered but rejected: most "what is this?" decisions get made from
1480
+ * the head, and the path is in the stub for the rare case where the tail
1481
+ * matters.
1418
1482
  */
1419
- declare const SYSTEM_PROMPT_BOUNDARY = "__ZIDANE_SYSTEM_PROMPT_BOUNDARY__";
1420
- /** Result of {@link splitSystemPrompt} — both halves stripped of the marker. */
1421
- interface SystemPromptParts {
1422
- /** Bytes BEFORE the marker (or the entire string when no marker present). Cacheable. */
1423
- static: string;
1424
- /** Bytes AFTER the marker. Empty when no marker present. NOT cached. */
1425
- dynamic: string;
1426
- }
1483
+ declare const PERSISTENCE_PREVIEW_BYTES: number;
1427
1484
  /**
1428
- * Split a system prompt around the first {@link SYSTEM_PROMPT_BOUNDARY}.
1429
- *
1430
- * Splits on the FIRST occurrence subsequent markers are folded into the
1431
- * dynamic half. This way callers can append additional `<env>` style blocks
1432
- * with extra markers without each one creating a new cache layer (Anthropic
1433
- * caps breakpoints; we use the budget elsewhere). The marker itself is
1434
- * stripped from both sides — providers attach `cache_control` directly to
1435
- * the static half's text content.
1436
- *
1437
- * A single blank line (`\n\n`) immediately adjacent to the marker on each
1438
- * side is trimmed — callers conventionally write
1439
- * `<doctrine>\n\n<MARKER>\n\n<env>` and expect the rendered wire bytes to
1440
- * read as one logical paragraph. Blank lines beyond that immediate pair
1441
- * (e.g. `\n\n\n<env>`) are preserved verbatim so callers composing their
1442
- * own spacing don't lose intentional gaps.
1485
+ * Byte-stable prefix every {@link buildPersistedStub} output starts with.
1486
+ * Exported so wire-level passes (tail compaction, future stale-output
1487
+ * elision) can recognize a persisted stub and preserve its path attribute
1488
+ * rather than replacing the stub with their own losing the pointer to
1489
+ * the on-disk blob.
1443
1490
  *
1444
- * Pure / no I/O / no allocation when the marker is absent (returns the input
1445
- * verbatim on the static side and the empty string on the dynamic side).
1491
+ * Bound to the literal opening of the XML tag; changing the stub format
1492
+ * requires updating this constant in lockstep (and shipping a migration
1493
+ * for in-flight sessions).
1446
1494
  */
1447
- declare function splitSystemPrompt(system: string): SystemPromptParts;
1495
+ declare const PERSISTED_STUB_PREFIX = "<persisted-output tool=\"";
1448
1496
  /**
1449
- * Compose a system prompt from a static prefix and an optional dynamic
1450
- * suffix. Inserts {@link SYSTEM_PROMPT_BOUNDARY} between them only when the
1451
- * dynamic side is non-empty — single-block prompts stay marker-free, so
1452
- * callers that never opt in pay zero overhead and providers fall back to the
1453
- * existing whole-string caching path.
1497
+ * Resolve the per-session persistence directory under `<userDir>/tool-results/<sessionId>/`.
1454
1498
  *
1455
- * Spacing is `\n\n` on both sides of the marker so doctrine fragments,
1456
- * which conventionally separate sections with a blank line, read cleanly
1457
- * around the boundary.
1499
+ * The chat layer calls this at session activation and forwards the result
1500
+ * via `behavior.persistDir`. Exposed as a public helper so SDK consumers
1501
+ * pick the same layout — single source of truth for "where do blobs live".
1458
1502
  */
1459
- declare function joinSystemPrompt(staticPart: string, dynamicPart: string): string;
1503
+ declare function resolvePersistDir(opts: {
1504
+ userDir: string;
1505
+ sessionId: string;
1506
+ }): string;
1460
1507
  /**
1461
- * Append `extra` to the STATIC half of a system prompt, preserving any
1462
- * existing dynamic suffix.
1463
- *
1464
- * Used by the agent to fold in run-stable content (skills catalog, lazy tool
1465
- * catalog) without bumping it into the dynamic half — both catalogs are
1466
- * built once per run and remain byte-stable for the duration, so they
1467
- * belong in the cached prefix.
1508
+ * Resolve the per-session background-tasks directory under
1509
+ * `<userDir>/<sessionId>/tasks/`.
1468
1510
  *
1469
- * Returns a new string; the input is not mutated. When `extra` is empty,
1470
- * returns the input verbatim.
1511
+ * The chat layer calls this at session activation and forwards the result
1512
+ * via `behavior.tasksDir`. Same shape as {@link resolvePersistDir}: hosts
1513
+ * get a single source of truth for "where do task log files live".
1514
+ * Created on first write; cleanup is the session-delete path's job.
1471
1515
  */
1472
- declare function appendStaticSection(system: string, extra: string): string;
1516
+ declare function resolveTasksDir(opts: {
1517
+ userDir: string;
1518
+ sessionId: string;
1519
+ }): string;
1473
1520
  /**
1474
- * Append `extra` to the DYNAMIC half of a system prompt. Inserts the boundary
1475
- * marker if the input didn't already carry one.
1476
- *
1477
- * Used by hosts (typically the TUI) to inject per-turn state — current cwd,
1478
- * IDE selection, project root — without forcing every caller to know the
1479
- * marker format. The host's `system:transform` hook rewrites the dynamic
1480
- * half each turn; the static doctrine above stays byte-stable and rides the
1481
- * cache.
1482
- *
1483
- * Returns a new string; the input is not mutated. When `extra` is empty,
1484
- * returns the input verbatim.
1521
+ * Inputs to {@link maybePersistToolResult}. Kept as a struct so the loop's
1522
+ * call site stays readable and additional optional knobs (compression,
1523
+ * mime detection, …) land without re-threading every call site.
1485
1524
  */
1486
- declare function appendDynamicSection(system: string, extra: string): string;
1525
+ interface PersistInput {
1526
+ /** Canonical tool name — checked against `excludeTools`. */
1527
+ toolName: string;
1528
+ /** `tool_use` id from the assistant turn. Used as the filename. */
1529
+ callId: string;
1530
+ /** Result returned by the tool (post-`tool:transform`). */
1531
+ output: string | ToolResultContent[];
1532
+ /** Byte threshold; outputs at or below stay inline. */
1533
+ threshold: number;
1534
+ /** Canonical tool names that bypass persistence. */
1535
+ excludeTools?: readonly string[];
1536
+ /** Persistence root directory. Created on first write. */
1537
+ persistDir: string;
1538
+ /**
1539
+ * Optional cap on the total bytes of persisted blobs under `persistDir`.
1540
+ * When set (and > 0), after a successful write the helper sweeps the
1541
+ * directory and removes the oldest `*.txt` blobs (by mtime) until the
1542
+ * sum of remaining sizes is at or below the cap.
1543
+ *
1544
+ * Bound to the **current session** because `persistDir` is per-session
1545
+ * (see {@link resolvePersistDir}); eviction never crosses session
1546
+ * boundaries. The new blob is always preserved — its mtime is the
1547
+ * latest, so the LRU sort guarantees older blobs go first.
1548
+ *
1549
+ * Skipped when the value isn't a positive finite number. Eviction
1550
+ * failures (permissions, races) are surfaced through `ZIDANE_DEBUG`
1551
+ * but never block the calling tool result; an over-cap dir is a
1552
+ * housekeeping concern, not a correctness one.
1553
+ */
1554
+ maxBytes?: number;
1555
+ }
1556
+ type PersistOutcome = {
1557
+ kind: 'skip';
1558
+ reason: 'disabled' | 'excluded' | 'under-threshold' | 'unsupported-shape' | 'unsafe-call-id' | 'invalid-persist-dir';
1559
+ } | {
1560
+ kind: 'persisted';
1561
+ output: string;
1562
+ originalBytes: number;
1563
+ persistedPath: string;
1564
+ evicted?: {
1565
+ files: number;
1566
+ bytes: number;
1567
+ };
1568
+ } | {
1569
+ kind: 'error';
1570
+ reason: 'write-failed';
1571
+ error: Error;
1572
+ };
1487
1573
  /**
1488
- * Replace the entire dynamic half of a system prompt with `next`. Used by
1489
- * the TUI's `<env>` rewriter where the entire dynamic section is regenerated
1490
- * each turn rather than appended to.
1574
+ * Decide-and-persist for a single tool result. Pure decision + filesystem
1575
+ * side-effect; returns the new wire-level `output` string when substitution
1576
+ * happened, otherwise tells the caller to leave the result alone.
1491
1577
  *
1492
- * When `next` is empty, drops the dynamic half (and the marker) entirely.
1578
+ * Atomicity: writes go through `<path>.tmp` + `rename` so a concurrent
1579
+ * read (or a crash mid-write) never sees a half-written blob.
1580
+ *
1581
+ * `ToolResultContent[]` results (images, structured blocks) currently bypass
1582
+ * persistence — the inline image bytes are the point of the call, and a
1583
+ * mixed text/image array isn't representable as a single `.txt` file. We
1584
+ * may revisit if a tool starts returning very large text-only arrays.
1493
1585
  */
1494
- declare function replaceDynamicSection(system: string, next: string): string;
1586
+ declare function maybePersistToolResult(input: PersistInput): Promise<PersistOutcome>;
1587
+ interface BuildStubInput {
1588
+ toolName: string;
1589
+ originalBytes: number;
1590
+ persistedPath: string;
1591
+ output: string;
1592
+ }
1495
1593
  /**
1496
- * Strip the boundary marker so it never reaches the wire — collapses
1497
- * `<static>${BOUNDARY}<dynamic>` into `<static>\n\n<dynamic>` for providers
1498
- * that can't honor `cache_control` (vanilla OpenAI Chat Completions, Codex,
1499
- * Cerebras, ...) or for the cache-disabled path on any provider.
1594
+ * Render the byte-stable `<persisted-output>` stub the model sees in place
1595
+ * of the original `tool_result`.
1500
1596
  *
1501
- * Cache-aware providers re-derive the split from the original (un-rendered)
1502
- * system string via `splitSystemPrompt` `renderSystemForWire` is the
1503
- * marker-free counterpart used to build the actual wire bytes.
1597
+ * Format choices:
1598
+ * - XML wrapper because models reliably parse it as structural.
1599
+ * - Byte count + path in attributes so the model can decide whether to
1600
+ * `read_file` the persisted blob without scanning the preview.
1601
+ * - Preview always shows the head — `shell`'s tail-priority truncation is
1602
+ * irrelevant here because the model has the full path if it needs the
1603
+ * tail.
1604
+ * - No timestamps, no random UUIDs inside the stub: every byte must be
1605
+ * reproducible from the inputs, otherwise re-emission on subsequent
1606
+ * turns would bust the prompt cache.
1504
1607
  *
1505
- * No-op when the input has no marker. Pure / no I/O.
1608
+ * Exported for tests (asserting the byte-stable contract) and for SDK
1609
+ * consumers wiring their own persistence middleware against the same
1610
+ * surface.
1506
1611
  */
1507
- declare function renderSystemForWire(system: string): string;
1508
- /** True when `system` contains the boundary marker. */
1509
- declare function hasSystemPromptBoundary(system: string): boolean;
1510
- //#endregion
1511
- //#region src/tools/edit.d.ts
1612
+ declare function buildPersistedStub(input: BuildStubInput): string;
1512
1613
  /**
1513
- * Surgical edit replace `old_string` with `new_string` in a single file.
1614
+ * Remove every persisted blob belonging to a session. Called by the chat
1615
+ * layer from its session-delete path so closing a session frees the disk
1616
+ * footprint alongside the SQLite row.
1514
1617
  *
1515
- * Mirrors Claude Code's `Edit` semantics so models post-trained on Anthropic's
1516
- * tool surface need no relearning. Fails clearly when `old_string` isn't unique
1517
- * (unless `replace_all: true`) and when not found, with a nearest-match preview
1518
- * so the model can recover without a separate `read_file` round-trip.
1618
+ * Idempotent missing directory (session never persisted anything) is a
1619
+ * no-op, not an error. Wraps the `rm -rf` so a permissions blip on one
1620
+ * blob doesn't propagate to the caller; the chat layer can't usefully
1621
+ * recover from "couldn't unlink a result file" mid-delete.
1519
1622
  */
1520
- declare const edit: ToolDef;
1521
- //#endregion
1522
- //#region src/tools/glob.d.ts
1523
- declare const glob: ToolDef;
1524
- //#endregion
1525
- //#region src/tools/grep.d.ts
1526
- declare const grep: ToolDef;
1623
+ declare function cleanupPersistedSession(persistRoot: string): Promise<void>;
1527
1624
  //#endregion
1528
- //#region src/tools/interaction.d.ts
1529
- interface InteractionToolOptions {
1530
- /** JSON Schema for the request payload the model sends */
1531
- schema: Record<string, unknown>;
1532
- /** Tool name (default: 'interaction') */
1533
- name?: string;
1534
- /** Tool description shown to the model */
1535
- description?: string;
1536
- /** Called when the model invokes this tool. Receives the validated payload and tool context, returns data for the model. */
1537
- onRequest: (payload: Record<string, unknown>, ctx: ToolContext) => Promise<Record<string, unknown> | string>;
1625
+ //#region src/mcp/oauth-provider.d.ts
1626
+ /**
1627
+ * Per-server persisted state. Subfields are optional so a partial save
1628
+ * (e.g. `saveCodeVerifier` arriving before `saveTokens`) doesn't blow away
1629
+ * earlier subfields the provider always patches, never replaces.
1630
+ */
1631
+ interface McpCredentialEntry {
1632
+ tokens?: OAuthTokens;
1633
+ clientInformation?: OAuthClientInformationMixed;
1634
+ discoveryState?: OAuthDiscoveryState;
1635
+ }
1636
+ interface McpCredentialStore {
1637
+ load: (name: string) => McpCredentialEntry | undefined;
1638
+ save: (name: string, entry: McpCredentialEntry) => void;
1639
+ delete: (name: string) => void;
1538
1640
  }
1539
1641
  /**
1540
- * Create an interaction tool that lets the agent request structured input.
1541
- *
1542
- * The model calls this tool with a payload matching the schema.
1543
- * `onRequest` is called with the payload and should return the response
1544
- * (string or object) that gets sent back to the model as the tool result.
1642
+ * In-memory store primarily for tests, but valid as a no-persistence option
1643
+ * (tokens evaporate on process exit, the user re-auths every cold start).
1545
1644
  */
1546
- declare function createInteractionTool(options: InteractionToolOptions): ToolDef;
1547
- //#endregion
1548
- //#region src/tools/list-files.d.ts
1549
- declare const listFiles: ToolDef;
1550
- //#endregion
1551
- //#region src/tools/multi-edit.d.ts
1552
- declare const multiEdit: ToolDef;
1553
- //#endregion
1554
- //#region src/tools/read-file.d.ts
1555
- declare const readFile: ToolDef;
1556
- //#endregion
1557
- //#region src/tools/shell.d.ts
1558
- interface CreateShellToolOptions {
1559
- /**
1560
- * Whether to expose the `run_in_background` flag in the input schema +
1561
- * the background-mode paragraphs in the description. When `false`, the
1562
- * model never sees the flag and won't try to use it. The execute path
1563
- * still has a defensive fallback: an explicit `run_in_background: true`
1564
- * call (e.g. from a hand-crafted message) returns a clean error rather
1565
- * than silently running foreground.
1566
- *
1567
- * Default: `true`.
1645
+ declare function createMemoryMcpCredentialStore(seed?: Record<string, McpCredentialEntry>): McpCredentialStore;
1646
+ interface McpOAuthProviderOptions {
1647
+ /** Server name — used as the storage key. */
1648
+ name: string;
1649
+ /** Persistence backend. */
1650
+ store: McpCredentialStore;
1651
+ /**
1652
+ * Loopback callback URI. Pass `undefined` for bootstrap (non-interactive
1653
+ * mode — stored tokens + refresh only, never opens a browser).
1568
1654
  */
1569
- allowBackground?: boolean;
1655
+ redirectUri?: string;
1570
1656
  /**
1571
- * Canonical names of tools registered alongside `shell` on the same
1572
- * agent. When non-empty, the description gains a "prefer the dedicated
1573
- * tool" block for each known sibling (`read_file`, `glob`, `grep`,
1574
- * `list_files`, `edit`, `write_file`) useful against the
1575
- * `ls`/`cat`-to-re-verify loop some models fall into when both a
1576
- * dedicated tool AND `shell` are visible. Unknown / unrecognized names
1577
- * are ignored.
1578
- *
1579
- * Set by `createAgent` per-run from the tool registry; hosts that
1580
- * construct a `shell` directly can pass it explicitly. Omit to suppress
1581
- * the block entirely (no nudge for shell-only agents, no nudge for
1582
- * hosts that prefer to author their own anti-loop prose).
1657
+ * Invoked when the SDK wants the user agent to navigate to the authorization
1658
+ * URL. Typically the host opens the browser AND emits a hook so the TUI can
1659
+ * render the URL in a status row. No-op in non-interactive mode (the SDK
1660
+ * still calls this before throwing `UnauthorizedError` from connect).
1583
1661
  */
1584
- registeredCanonicals?: ReadonlySet<string>;
1662
+ onAuthorizationUrl?: (url: URL) => void | Promise<void>;
1585
1663
  /**
1586
- * The agent's `toolAliases` map, used to render the wire-level name of
1587
- * each sibling in the swap block. Without this, the block always prints
1588
- * canonical names — fine for the default preset, wrong for hosts that
1589
- * alias-rename (the model would be told to call a name it doesn't see
1590
- * in the tool spec).
1664
+ * `client_name` used in dynamic client registration. Defaults to `'zidane'`.
1665
+ * Some servers display this string to the user on the consent screen.
1591
1666
  */
1592
- toolAliases?: Record<string, string>;
1667
+ clientName?: string;
1668
+ /**
1669
+ * Override the requested OAuth scope. Default: unset (the SDK negotiates
1670
+ * via the server's metadata).
1671
+ */
1672
+ scope?: string;
1673
+ }
1674
+ declare class McpOAuthProvider implements OAuthClientProvider {
1675
+ private readonly name;
1676
+ private readonly store;
1677
+ private readonly _redirectUri?;
1678
+ private readonly onAuthorizationUrl?;
1679
+ private readonly clientName;
1680
+ private readonly _scope?;
1681
+ private codeVerifierValue;
1682
+ constructor(opts: McpOAuthProviderOptions);
1683
+ get redirectUrl(): string | URL | undefined;
1684
+ get clientMetadata(): OAuthClientMetadata;
1685
+ tokens(): OAuthTokens | undefined;
1686
+ saveTokens(tokens: OAuthTokens): void;
1687
+ clientInformation(): OAuthClientInformationMixed | undefined;
1688
+ saveClientInformation(info: OAuthClientInformationMixed): void;
1689
+ discoveryState(): OAuthDiscoveryState | undefined;
1690
+ saveDiscoveryState(state: OAuthDiscoveryState): void;
1691
+ saveCodeVerifier(verifier: string): void;
1692
+ codeVerifier(): string;
1693
+ redirectToAuthorization(url: URL): Promise<void>;
1694
+ /**
1695
+ * Wipe stored credentials when the server reports the cached state is no
1696
+ * longer valid. The SDK calls this with a scope hint:
1697
+ * - `'tokens'` → access/refresh revoked, keep client registration
1698
+ * - `'client'` → client registration invalidated, reset everything
1699
+ * - `'verifier'`→ PKCE state stale (e.g. mismatched state param)
1700
+ * - `'discovery'` → discovery metadata stale (servers re-keyed)
1701
+ * - `'all'` → full reset
1702
+ */
1703
+ invalidateCredentials(scope: 'all' | 'client' | 'tokens' | 'verifier' | 'discovery'): Promise<void>;
1704
+ private patch;
1593
1705
  }
1594
1706
  /**
1595
- * Factory for the `shell` tool. The default exported `shell` is
1596
- * equivalent to `createShellTool({ allowBackground: true })`. The
1597
- * factory is the entry point hosts use when they want to override the
1598
- * default — e.g. to ship a preset that always disables background mode
1599
- * regardless of `behavior.tasksDir`.
1600
- *
1601
- * Hosts that use the framework's `createAgent` typically don't need to
1602
- * call this directly: when `behavior.tasksDir` is unset or
1603
- * `behavior.disableBackgroundTasks: true` is set, the agent
1604
- * automatically rewrites the registered `shell` (if it's the
1605
- * framework's built-in) using this factory.
1606
- */
1607
- declare function createShellTool(opts?: CreateShellToolOptions): ToolDef;
1608
- /**
1609
- * Default `shell` tool with background mode enabled.
1707
+ * True when an HTTP transport's auth headers already include an explicit
1708
+ * Authorization. Used by the bootstrap escape-hatch: a user who provided
1709
+ * their own bearer token shouldn't be auto-promoted to OAuth on a 401.
1610
1710
  *
1611
- * Most hosts use this directly via `basicTools`. When the agent's
1612
- * `behavior.tasksDir` is unset OR `behavior.disableBackgroundTasks:
1613
- * true` is set, `createAgent` auto-rewrites this identity to a
1614
- * `createShellTool({ allowBackground: false })` variant so the model
1615
- * never sees a flag it can't use. Hosts who want to bypass that
1616
- * auto-rewrite can register a `createShellTool({ allowBackground })`
1617
- * directly — the rewrite only fires on identity-equal references to
1618
- * this constant.
1711
+ * Case-insensitive Node normalizes outgoing headers to lowercase but
1712
+ * users hand-write `Authorization` in configs.
1619
1713
  */
1620
- declare const shell: ToolDef;
1621
- //#endregion
1622
- //#region src/tools/shell-kill.d.ts
1623
- declare const shellKill: ToolDef;
1624
- //#endregion
1625
- //#region src/tools/skills-read.d.ts
1626
- interface SkillsReadToolOptions {
1627
- catalog: readonly SkillConfig[];
1628
- state: SkillActivationState;
1629
- }
1630
- declare function createSkillsReadTool(options: SkillsReadToolOptions): ToolDef;
1714
+ declare function hasAuthorizationHeader(headers: Record<string, string> | undefined): boolean;
1631
1715
  //#endregion
1632
- //#region src/tools/skills-run-script.d.ts
1633
- interface SkillsRunScriptToolOptions {
1634
- catalog: readonly SkillConfig[];
1635
- state: SkillActivationState;
1636
- /** Script timeout in milliseconds. Default 60000. */
1637
- scriptTimeoutMs?: number;
1716
+ //#region src/mcp/login.d.ts
1717
+ interface LoginMcpServerOptions {
1718
+ /** Persistence — same store the bootstrap path reads from. */
1719
+ store: McpCredentialStore;
1720
+ /**
1721
+ * Invoked with the authorization URL once it's ready. Hosts typically
1722
+ * (a) emit `mcp:auth:url` for the TUI, and (b) call `tryOpenBrowser`.
1723
+ * The URL is identical to the one passed to the `mcp:auth:url` hook
1724
+ * fired automatically — this callback is a synchronous hook for callers
1725
+ * that don't want to wire the agent hook machinery.
1726
+ */
1727
+ onAuthorizationUrl?: (url: URL) => void | Promise<void>;
1728
+ /** Cancels the flow (esc / close modal / SIGINT). */
1729
+ signal?: AbortSignal;
1730
+ /** Agent hooks. The flow emits `mcp:auth:url`/`success`/`error` when wired. */
1731
+ hooks?: Hookable<AgentHooks>;
1732
+ /** Override `client_name` shown on consent screens. Default: 'zidane'. */
1733
+ clientName?: string;
1734
+ /** Override the requested OAuth scope. */
1735
+ scope?: string;
1736
+ /**
1737
+ * Override the loopback callback path. Default: `/callback`. Useful only
1738
+ * for servers that pinned a different path during registration.
1739
+ */
1740
+ callbackPath?: string;
1741
+ /**
1742
+ * Maximum time to wait for the user to complete the browser flow, in ms.
1743
+ * The user can also cancel via `signal`. Default: 5 minutes.
1744
+ */
1745
+ timeoutMs?: number;
1638
1746
  }
1639
- declare function createSkillsRunScriptTool(options: SkillsRunScriptToolOptions): ToolDef;
1640
- //#endregion
1641
- //#region src/tools/skills-use.d.ts
1642
- interface SkillsUseToolOptions {
1643
- /** Resolved skills catalog for this run. */
1644
- catalog: readonly SkillConfig[];
1645
- /** Per-agent activation state the tool mutates. */
1646
- state: SkillActivationState;
1647
- /** Agent hooks used to fire `skills:activate` on first activation. */
1648
- hooks: Hookable<AgentHooks>;
1747
+ interface LoginMcpServerResult {
1748
+ /** Stored OAuth tokens after a successful exchange. */
1749
+ tokens: NonNullable<ReturnType<McpOAuthProvider['tokens']>>;
1750
+ /**
1751
+ * Upstream tool descriptors discovered after re-connecting with the new
1752
+ * tokens. Already filtered by the server's `enabledTools` / `disabledTools`
1753
+ * is NOT applied here that's a bootstrap concern. Hosts that want filtering
1754
+ * should pass the result through `connectMcpServers` rebuild on the next
1755
+ * session activation rather than reusing this list verbatim.
1756
+ */
1757
+ tools: Array<{
1758
+ name: string;
1759
+ description?: string | null;
1760
+ inputSchema?: unknown;
1761
+ }>;
1649
1762
  }
1650
1763
  /**
1651
- * Factory for `skills_use`. Auto-injected into the agent's tool set by the
1652
- * agent runtime when a non-empty skills catalog is available (unless
1653
- * `SkillsConfig.tool === false`).
1764
+ * Run the full interactive OAuth flow for `config`. Only supports `sse` and
1765
+ * `streamable-http` transports `stdio` MCP servers don't speak OAuth.
1654
1766
  *
1655
- * The tool schema's `name` property is `enum`-constrained to the resolved
1656
- * catalog so the LLM cannot hallucinate a skill that doesn't exist.
1767
+ * Throws on:
1768
+ * - Wrong transport.
1769
+ * - Abort signal.
1770
+ * - Browser-side error (user denied, server rejected, etc.).
1771
+ * - Code exchange failure.
1772
+ * - Post-exchange connect failure.
1773
+ *
1774
+ * Always closes the loopback callback server before returning, success or
1775
+ * failure.
1657
1776
  */
1658
- declare function createSkillsUseTool(options: SkillsUseToolOptions): ToolDef;
1777
+ declare function loginMcpServer(config: McpServerConfig, options: LoginMcpServerOptions): Promise<LoginMcpServerResult>;
1659
1778
  //#endregion
1660
- //#region src/tools/spawn.d.ts
1661
- interface ChildAgent {
1662
- id: string;
1663
- task: string;
1664
- startedAt: number;
1665
- /** Subagent depth 1 for a direct child of a top-level agent. */
1666
- depth: number;
1779
+ //#region src/mcp/oauth-callback.d.ts
1780
+ /**
1781
+ * Local loopback HTTP callback for OAuth 2.0 authorization code flows.
1782
+ *
1783
+ * Stands up a one-shot server on `127.0.0.1:<random>` that captures the
1784
+ * `?code=...` redirect from a browser-driven OAuth flow and resolves a
1785
+ * promise with the code. Used as the `redirectUrl` half of the MCP SDK's
1786
+ * `OAuthClientProvider` (the persistence half lives separately).
1787
+ *
1788
+ * Design:
1789
+ * - Loopback-only (`127.0.0.1`) — the OAuth spec treats `http://127.0.0.1:<port>`
1790
+ * as a public-client redirect URI per RFC 8252 §7.3. Browsers do NOT block it,
1791
+ * and Anthropic / OpenAI / Linear / GitHub all accept it.
1792
+ * - Random port (`port = 0`) — the OS picks an unused one. We read the actual
1793
+ * port back from `server.address()` after `listen()`.
1794
+ * - Single-shot — the first GET to `path` with a `code` (or `error`) wins;
1795
+ * subsequent requests get 404. The server keeps listening (in case the user
1796
+ * hits "back" and re-authorizes), so callers must `close()` once they have
1797
+ * the code or have given up.
1798
+ * - Abort-aware — wiring an external `AbortSignal` rejects the promise and
1799
+ * closes the server immediately. Required for the TUI's "esc cancels login"
1800
+ * UX.
1801
+ * - No HTML framework — a single inline `<html>` string keeps this isolated
1802
+ * from any UI dependency.
1803
+ */
1804
+ /**
1805
+ * Result of a successful callback. `state` is forwarded verbatim from the
1806
+ * query string — callers verify it against their pre-flight value to defend
1807
+ * against CSRF (the MCP SDK does this internally when it controls `state`).
1808
+ */
1809
+ interface OAuthCallbackResult {
1810
+ code: string;
1811
+ state?: string;
1667
1812
  }
1668
- interface SpawnToolState {
1669
- /** Currently running children. */
1670
- readonly children: ReadonlyMap<string, ChildAgent>;
1813
+ interface OAuthCallbackHandle {
1671
1814
  /**
1672
- * Cumulative stats across every completed direct child of this spawn-tool
1673
- * instance (returns a copy). Each child's contribution is the cumulative
1674
- * `AgentStats` returned by its `agent.run()` — so
1675
- * `totalIn`/`totalOut`/`totalCacheRead`/`totalCacheCreation` cover the
1676
- * entire subtree (children + grandchildren + …), while `turns` and
1677
- * `elapsed` stay parent-loop-only per child and are summed across direct
1678
- * children. `elapsed` over-counts when children ran in parallel.
1815
+ * Full URI to register with the authorization server, e.g.
1816
+ * `http://127.0.0.1:51823/callback`. Stable for the lifetime of the
1817
+ * handle.
1818
+ */
1819
+ redirectUri: string;
1820
+ /**
1821
+ * Resolves with `{ code, state }` on a successful callback. Rejects with:
1822
+ * - The OAuth-spec `error` field (`access_denied`, `server_error`, ...)
1823
+ * when the authorization server redirects with `?error=...`.
1824
+ * - `'OAuth callback aborted'` when the external `AbortSignal` fires.
1825
+ * - `'OAuth callback server closed'` when `close()` is called before any
1826
+ * callback arrives.
1679
1827
  *
1680
- * Lives across multiple parent runs that share this instance.
1828
+ * Single-shot only the first matching request resolves the promise.
1681
1829
  */
1682
- readonly totalChildStats: Readonly<AgentStats>;
1830
+ promise: Promise<OAuthCallbackResult>;
1831
+ /**
1832
+ * Idempotent shutdown. Safe to call from a `finally` block whether the
1833
+ * flow succeeded, failed, or was aborted. Resolves once the server stops
1834
+ * accepting connections.
1835
+ */
1836
+ close: () => Promise<void>;
1683
1837
  }
1684
- interface SpawnToolOptions {
1685
- /** Maximum concurrent sub-agents (default: 3). */
1686
- maxConcurrent?: number;
1838
+ interface OAuthCallbackOptions {
1839
+ /** Cancels the flow rejects `promise` and closes the server. */
1840
+ signal?: AbortSignal;
1687
1841
  /**
1688
- * Maximum subagent depth. 0 disables spawning entirely; 1 allows top-level
1689
- * spawns but forbids grandchildren; 3 (default) allows three levels of
1690
- * recursion — enough for most orchestration patterns, a sharp ceiling
1691
- * against runaway loops.
1842
+ * Path component the authorization server should redirect to. Defaults
1843
+ * to `/callback`. Useful when matching a pre-registered URI that uses a
1844
+ * different path.
1692
1845
  */
1693
- maxDepth?: number;
1694
- /** Child model override. */
1695
- model?: string;
1696
- /** Child system prompt override. Per-spawn `input.system` takes precedence. */
1697
- system?: string;
1698
- /** Child thinking level. */
1699
- thinking?: 'off' | 'minimal' | 'low' | 'medium' | 'high';
1700
- /** Preset override for children. Shallow-merged over the parent's preset (parent fields still win for anything left unset). */
1701
- preset?: Preset;
1846
+ path?: string;
1702
1847
  /**
1703
- * Per-child timeout, in milliseconds. When the child exceeds it the spawn
1704
- * tool returns a timeout marker, fires `spawn:error`, and destroys the
1705
- * child agent. Default: none.
1848
+ * Override the loopback host. Defaults to `127.0.0.1`. Don't bind to
1849
+ * `0.0.0.0` here the OAuth code is a one-time secret and the server
1850
+ * would otherwise accept it from any host on the LAN.
1706
1851
  */
1707
- timeoutMs?: number;
1852
+ host?: string;
1708
1853
  /**
1709
- * When `true` and the parent has a session, the child reuses the parent's
1710
- * session child turns are appended with the child's own `runId`, and the
1711
- * resulting `SessionRun` carries `parentRunId` so the tree is
1712
- * reconstructible. Default: `false` (child is in-memory only).
1713
- *
1714
- * **Read-state isolation.** Sharing the session also shares the
1715
- * `read_file` / `requireReadBeforeEdit` tracking map (it's keyed
1716
- * by `Session`). With `persist: false` the child gets no session,
1717
- * so reads inside the subagent populate nothing the parent can see —
1718
- * a follow-up `edit` / `multi_edit` in the parent will trip the
1719
- * gate with `"has not been read"` even though the model just
1720
- * read the file in the child. Use {@link shareReadState} when
1721
- * you want the parent's gate to honor the child's reads WITHOUT
1722
- * also persisting child turns to the parent's session.
1854
+ * Override the port. Defaults to `0` (OS-assigned). Pin to a fixed port
1855
+ * only when the authorization server requires a pre-registered redirect
1856
+ * URI; the random-port path is preferred so concurrent flows don't clash.
1723
1857
  */
1724
- persist?: boolean;
1858
+ port?: number;
1859
+ }
1860
+ /**
1861
+ * Start a one-shot OAuth callback server. The returned handle's `redirectUri`
1862
+ * should be passed to the authorization server as the `redirect_uri` query
1863
+ * parameter; `promise` resolves once the user finishes the browser flow.
1864
+ *
1865
+ * Always `await handle.close()` in a `finally` block — even on success, the
1866
+ * server stays open until told to shut down (so it can serve the
1867
+ * "you can close this tab" page).
1868
+ */
1869
+ declare function startOAuthCallback(opts?: OAuthCallbackOptions): Promise<OAuthCallbackHandle>;
1870
+ //#endregion
1871
+ //#region src/metrics.d.ts
1872
+ type MetricAttributes = Record<string, string | number | boolean | undefined>;
1873
+ interface Counter {
1874
+ add: (value: number, attributes?: MetricAttributes) => void;
1875
+ }
1876
+ interface Histogram {
1877
+ record: (value: number, attributes?: MetricAttributes) => void;
1878
+ }
1879
+ interface UpDownCounter {
1880
+ add: (value: number, attributes?: MetricAttributes) => void;
1881
+ }
1882
+ interface InstrumentOptions {
1883
+ description?: string;
1884
+ unit?: string;
1885
+ }
1886
+ /**
1887
+ * Minimal Meter interface — structurally identical to OTel's `Meter`.
1888
+ * Hosts passing `metrics.getMeter(name)` (from `@opentelemetry/api`)
1889
+ * satisfy this without adaptation.
1890
+ */
1891
+ interface Meter {
1892
+ createCounter: (name: string, options?: InstrumentOptions) => Counter;
1893
+ createHistogram: (name: string, options?: InstrumentOptions) => Histogram;
1894
+ createUpDownCounter: (name: string, options?: InstrumentOptions) => UpDownCounter;
1895
+ }
1896
+ interface MetricsHooksOptions {
1897
+ meter: Meter;
1725
1898
  /**
1726
- * Forward the parent's read-state map to the child agent so the
1727
- * `requireReadBeforeEdit` gate and `dedupReads` cache see reads
1728
- * across the parent/child boundary. Orthogonal to {@link persist}
1729
- * use this when you want shared read tracking without sharing the
1730
- * session's turn history. Default: `false`.
1731
- *
1732
- * Has no effect when the parent has no read-state to share (no
1733
- * session and no explicit `readState` on the parent agent's
1734
- * options). Implementation: passes the parent's resolved
1735
- * `ReadStateMap` to the child via `AgentOptions.readState`, which
1736
- * tools resolve via `ctx.readState ?? getReadState(ctx.session)`.
1899
+ * Optional prefix prepended to every instrument name. Default: no prefix
1900
+ * (instrument names follow OTel Gen AI semantic conventions verbatim,
1901
+ * which is the most-portable shape). Set to e.g. `'zidane.'` to
1902
+ * namespace inside a shared meter registry.
1737
1903
  */
1738
- shareReadState?: boolean;
1904
+ namespace?: string;
1739
1905
  /**
1740
- * Forward a curated subset of child hook events (`stream:*`, `tool:*`,
1741
- * `turn:after`) onto the parent's hook bus as `child:*` events. Default:
1742
- * `true`. Grandchildren bubble through their child transparently.
1906
+ * Optional baseline attributes applied to every measurement. Typical
1907
+ * use: `{ service: 'tui', env: 'prod' }`. Per-event attributes win on
1908
+ * key collision.
1743
1909
  */
1744
- forwardHooks?: boolean;
1745
- /** Called when a child agent starts. */
1746
- onSpawn?: (child: ChildAgent) => void;
1747
- /** Called when a child agent completes (success, abort, timeout, or error). */
1748
- onComplete?: (child: ChildAgent, stats: AgentStats, status: NonNullable<ChildRunStats['status']>) => void;
1910
+ baseAttributes?: MetricAttributes;
1749
1911
  /**
1750
- * Named subagent presets the model can select via the `subagent_type`
1751
- * input field. Mirrors the Claude Code SDK's surface models trained
1752
- * on it routinely emit `subagent_type: 'Explore' | 'Plan' |
1753
- * 'Verification' | 'general-purpose'`, and without a registry the
1754
- * field is silently dropped, so hosts wanting type-specialized
1755
- * subagents have to invent their own dispatch layer.
1756
- *
1757
- * Each entry overlays the base spawn config for that particular
1758
- * dispatch. Per-call `input.system` still wins over `subagents[type].system`
1759
- * so the model can always specialize further.
1760
- *
1761
- * When the registry is non-empty:
1762
- * - `subagent_type` appears in the spawn input schema as a `string`
1763
- * enum of the registered keys (plus the always-available
1764
- * `'general-purpose'` fallback).
1765
- * - Models that pass an unregistered type are routed to
1766
- * `'general-purpose'` (no error — degrade gracefully so trained
1767
- * models keep working even on hosts that haven't wired every
1768
- * type Claude Code uses).
1769
- *
1770
- * When the registry is empty / unset, the field is omitted entirely
1771
- * (preserves the historical schema for hosts that never use this).
1772
- *
1773
- * Default: `undefined` (no subagent types; `subagent_type` schema
1774
- * field hidden).
1912
+ * Error sink for meter failures. The helper still swallows the throw
1913
+ * so a broken backend can't crash a run; this callback surfaces the
1914
+ * failure for ops dashboards.
1775
1915
  */
1776
- subagents?: SubagentRegistry;
1916
+ onError?: (kind: string, err: unknown) => void;
1917
+ }
1918
+ interface MetricsHookSet {
1919
+ install: (hooks: Hookable<AgentHooks>) => () => void;
1920
+ }
1921
+ /**
1922
+ * Build a set of metrics hook handlers that can be installed on an agent.
1923
+ *
1924
+ * @example OpenTelemetry
1925
+ * ```ts
1926
+ * import { metrics } from '@opentelemetry/api'
1927
+ * const meter = metrics.getMeter('zidane')
1928
+ * const m = createMetricsHooks({ meter, baseAttributes: { service: 'tui' } })
1929
+ * const uninstall = m.install(agent.hooks)
1930
+ * try { await agent.run({ prompt }) }
1931
+ * finally { uninstall() }
1932
+ * ```
1933
+ */
1934
+ declare function createMetricsHooks(options: MetricsHooksOptions): MetricsHookSet;
1935
+ //#endregion
1936
+ //#region src/stats.d.ts
1937
+ /**
1938
+ * Per-model usage rollup produced by {@link statsByModel}.
1939
+ *
1940
+ * `turns` counts the number of `TurnUsage` entries attributed to the model
1941
+ * across the whole tree (parent loop + every recursively-spawned child).
1942
+ * Cache and cost numbers are summed from the same set of turns.
1943
+ */
1944
+ interface ModelUsage {
1945
+ input: number;
1946
+ output: number;
1947
+ cost: number;
1948
+ cacheRead: number;
1949
+ cacheCreation: number;
1950
+ turns: number;
1777
1951
  }
1778
1952
  /**
1779
- * Per-type subagent override applied when the model calls
1780
- * `spawn({ subagent_type: '…' })`. All fields are optional; absent
1781
- * fields fall back to the parent's resolved configuration (see
1782
- * {@link SpawnToolOptions} comments for the merge order).
1953
+ * Depth-first walk over the stats tree, returning every `TurnUsage` entry
1954
+ * parent loop first, then each child subtree in completion order.
1955
+ *
1956
+ * Closes the cache-token aggregation gap: `TurnUsage.cacheRead` /
1957
+ * `cacheCreation` live only on per-turn entries, and the top-level
1958
+ * `AgentStats` deliberately doesn't carry cumulative forms (one source of
1959
+ * truth, no risk of drift). Anything that needs a tree-wide sum walks
1960
+ * through this.
1961
+ */
1962
+ declare function flattenTurns(stats: AgentStats): TurnUsage[];
1963
+ /**
1964
+ * Group cumulative usage by `TurnUsage.modelId`. Each entry sums the input,
1965
+ * output, cache, cost, and turn-count across every turn the tree attributed
1966
+ * to that model — naturally handling cross-model runs (vision-fallback,
1967
+ * model-shifted subagents, mixed-provider workflows).
1968
+ *
1969
+ * Turns missing `modelId` (mock providers, providers that don't echo a model
1970
+ * id) are bucketed under the literal string `'(unknown)'`.
1971
+ */
1972
+ declare function statsByModel(stats: AgentStats): Map<string, ModelUsage>;
1973
+ //#endregion
1974
+ //#region src/system-prompt.d.ts
1975
+ /**
1976
+ * System-prompt boundary marker — splits a system prompt into a stable static
1977
+ * prefix (cached) and a per-turn dynamic suffix (NOT cached).
1978
+ *
1979
+ * Why this exists: providers attach `cache_control` markers on the last block
1980
+ * of the system prompt, so the cached prefix covers the entire system text.
1981
+ * Any byte change anywhere — including a per-turn `<env>` rewrite — busts the
1982
+ * cache for the doctrine that sits below. A literal marker in the system
1983
+ * string lets providers split it into:
1984
+ *
1985
+ * ┌──────────────┐ cache_control: ephemeral
1986
+ * │ STATIC half │ — doctrine, skills catalog, tool catalog,
1987
+ * │ │ user instructions
1988
+ * ├──────────────┤ ← SYSTEM_PROMPT_BOUNDARY
1989
+ * │ DYNAMIC half │ — env, cwd, mtimes, anything per-turn
1990
+ * └──────────────┘ (no cache_control)
1991
+ *
1992
+ * The static prefix rides the prompt cache across turns/sessions; the dynamic
1993
+ * suffix re-bills per turn. Net effect: a cwd change between turns no longer
1994
+ * invalidates 4 KB of doctrine.
1995
+ *
1996
+ * Wire contract:
1997
+ *
1998
+ * - `splitSystemPrompt(s)` is pure; missing marker ⇒ entire string is static
1999
+ * (current behavior — no caller is forced to opt in).
2000
+ * - `renderSystemForWire(s)` strips the marker so it never reaches the model;
2001
+ * used by every provider before the bytes hit the wire, including the
2002
+ * cache-disabled path on providers that DO support `cache_control`.
2003
+ * - The marker uses underscores rather than XML/punctuation so it's
2004
+ * unambiguous when scanning prompts manually and unlikely to collide with
2005
+ * model-written content.
2006
+ * - Providers handle the split internally (Anthropic emits a 2-block array;
2007
+ * OpenAI-compat splits the leading `system` message into multi-part text).
2008
+ * Callers always pass a single `string` — no API break.
2009
+ */
2010
+ /**
2011
+ * Literal marker inserted in a system prompt to separate cacheable doctrine
2012
+ * from per-turn dynamic content. Providers split on this token.
2013
+ *
2014
+ * Underscored on both sides for visual distinctiveness — a stray instance in
2015
+ * model-written prose is implausible. Don't change the value without shipping
2016
+ * a migration; existing sessions carry the old marker in their cached
2017
+ * prompts.
1783
2018
  */
1784
- interface SubagentDef {
1785
- /**
1786
- * System prompt override for this subagent type. Per-call
1787
- * `input.system` still wins model-supplied specialization beats
1788
- * preset defaults.
1789
- */
1790
- system?: string;
1791
- /**
1792
- * Restrict the child agent's tool registry to this list of canonical
1793
- * tool names. Operates as a filter over the parent's tools (the
1794
- * parent's selection is the upper bound — a subagent can never gain
1795
- * tools the parent doesn't have). When unset, the child inherits the
1796
- * parent's full tool list.
1797
- *
1798
- * Tool names are canonical (registry-key) not wire/alias names — the
1799
- * filter runs before aliases are applied. Names that don't match a
1800
- * parent tool are silently dropped (matches the lenient behaviour of
1801
- * `enabledTools` on MCP configs).
1802
- */
1803
- tools?: readonly string[];
1804
- /**
1805
- * Mark this subagent as read-only — equivalent to listing only
1806
- * obviously-non-mutating tools (`read_file`, `grep`, `glob`,
1807
- * `list_files`) in {@link SubagentDef.tools}. Convenience for the
1808
- * common "Plan" / "Explore" subagent shape Claude Code ships.
1809
- *
1810
- * When both `readonly: true` and `tools` are set, `tools` wins
1811
- * (explicit beats implicit).
1812
- */
1813
- readonly?: boolean;
1814
- /**
1815
- * Short description rendered into the spawn tool's schema (the
1816
- * `subagent_type` field's description) so the model can pick a type
1817
- * that matches the task without round-tripping through docs.
1818
- */
1819
- description?: string;
2019
+ declare const SYSTEM_PROMPT_BOUNDARY = "__ZIDANE_SYSTEM_PROMPT_BOUNDARY__";
2020
+ /** Result of {@link splitSystemPrompt} — both halves stripped of the marker. */
2021
+ interface SystemPromptParts {
2022
+ /** Bytes BEFORE the marker (or the entire string when no marker present). Cacheable. */
2023
+ static: string;
2024
+ /** Bytes AFTER the marker. Empty when no marker present. NOT cached. */
2025
+ dynamic: string;
1820
2026
  }
1821
2027
  /**
1822
- * Map of subagent-type key preset. Keys are case-sensitive and
1823
- * appear verbatim in the spawn input schema's enum so the model emits
1824
- * them with the same casing. Common conventions: `'Explore'`,
1825
- * `'Plan'`, `'Verification'`, `'general-purpose'` (Claude Code SDK's
1826
- * built-in set).
2028
+ * Split a system prompt around the first {@link SYSTEM_PROMPT_BOUNDARY}.
2029
+ *
2030
+ * Splits on the FIRST occurrence subsequent markers are folded into the
2031
+ * dynamic half. This way callers can append additional `<env>` style blocks
2032
+ * with extra markers without each one creating a new cache layer (Anthropic
2033
+ * caps breakpoints; we use the budget elsewhere). The marker itself is
2034
+ * stripped from both sides — providers attach `cache_control` directly to
2035
+ * the static half's text content.
2036
+ *
2037
+ * A single blank line (`\n\n`) immediately adjacent to the marker on each
2038
+ * side is trimmed — callers conventionally write
2039
+ * `<doctrine>\n\n<MARKER>\n\n<env>` and expect the rendered wire bytes to
2040
+ * read as one logical paragraph. Blank lines beyond that immediate pair
2041
+ * (e.g. `\n\n\n<env>`) are preserved verbatim so callers composing their
2042
+ * own spacing don't lose intentional gaps.
2043
+ *
2044
+ * Pure / no I/O / no allocation when the marker is absent (returns the input
2045
+ * verbatim on the static side and the empty string on the dynamic side).
1827
2046
  */
1828
- type SubagentRegistry = Record<string, SubagentDef>;
2047
+ declare function splitSystemPrompt(system: string): SystemPromptParts;
1829
2048
  /**
1830
- * Create a configured spawn tool.
2049
+ * Compose a system prompt from a static prefix and an optional dynamic
2050
+ * suffix. Inserts {@link SYSTEM_PROMPT_BOUNDARY} between them only when the
2051
+ * dynamic side is non-empty — single-block prompts stay marker-free, so
2052
+ * callers that never opt in pay zero overhead and providers fall back to the
2053
+ * existing whole-string caching path.
1831
2054
  *
1832
- * State (`children`, `totalChildStats`, counters, active count) is scoped to
1833
- * the returned instance. Multiple parent agents using the same instance will
1834
- * share counters + stats + concurrency slots — call `createSpawnTool()` per
1835
- * agent (or use the stateless default `spawn`) to keep them isolated.
2055
+ * Spacing is `\n\n` on both sides of the marker so doctrine fragments,
2056
+ * which conventionally separate sections with a blank line, read cleanly
2057
+ * around the boundary.
1836
2058
  */
1837
- declare function createSpawnTool(options?: SpawnToolOptions): ToolDef & SpawnToolState;
1838
- //#endregion
1839
- //#region src/tools/tool-search.d.ts
1840
- interface LazyToolEntry {
1841
- /**
1842
- * Wire name (after `toolAliases` rewrite). What the model sees in the
1843
- * catalog, what `tool_search` matches against, and what the provider's
1844
- * tool list will carry once the entry is unlocked.
1845
- */
1846
- name: string;
1847
- /**
1848
- * Canonical (registry-key) name used for unlock-set membership and for the
1849
- * loop's `ctx.tools[name]` dispatch lookup. Equal to `name` when no alias
1850
- * is configured for this tool.
1851
- */
1852
- canonicalName: string;
1853
- description: string;
1854
- inputSchema: Record<string, unknown>;
1855
- /** Source MCP server, when applicable. Used for `server`-bulk unlock. */
1856
- server?: string;
1857
- }
1858
- interface ToolSearchToolOptions {
1859
- /**
1860
- * Snapshot of every lazy tool the model can discover. Built once per run by
1861
- * the agent — the tool closes over this array and never mutates it.
1862
- */
1863
- catalog: readonly LazyToolEntry[];
1864
- /**
1865
- * Mutable per-run set of unlocked **canonical** tool names. The tool adds
1866
- * matches in place; the loop reads the set when rebuilding the wire-level
1867
- * tool list. Keyed by canonical (not wire) so dispatch lookups stay
1868
- * alias-stable.
1869
- *
1870
- * Prefer `addUnlock` for cache-stable wire-tool ordering: writes through a
1871
- * Set lose unlock order, so the wire-level rebuild that filters by `unlocked`
1872
- * has to fall back to registry iteration order — which moves entries every
1873
- * time a lazy tool earlier in the registry is unlocked, breaking provider
1874
- * prompt-cache breakpoints. The agent passes both when it owns the unlock
1875
- * tracker, with `addUnlock` mirroring writes into an ordered log.
1876
- */
1877
- unlocked: Set<string>;
1878
- /**
1879
- * Optional callback fired for every canonical name the tool unlocks. When
1880
- * set, the agent uses this to maintain an append-only `dynamicUnlockOrder`
1881
- * so the wire-level tool list emits new unlocks at the tail and keeps the
1882
- * provider prefix cache warm. Idempotent on repeat unlocks of the same
1883
- * name — callers may dedupe internally.
1884
- *
1885
- * Invoked **in addition to** the `unlocked.add` (which still happens for
1886
- * back-compat with callers that only watch the Set).
1887
- */
1888
- addUnlock?: (canonical: string) => void;
1889
- /** Default cap on returned matches when the model omits `limit`. */
1890
- defaultLimit?: number;
1891
- }
2059
+ declare function joinSystemPrompt(staticPart: string, dynamicPart: string): string;
1892
2060
  /**
1893
- * Factory for `tool_search`. Auto-injected by the agent when
1894
- * `behavior.toolDisclosure === 'lazy'` and at least one MCP tool is in the
1895
- * registry. Opt out via `behavior.toolSearch.tool === false`.
2061
+ * Append `extra` to the STATIC half of a system prompt, preserving any
2062
+ * existing dynamic suffix.
2063
+ *
2064
+ * Used by the agent to fold in run-stable content (skills catalog, lazy tool
2065
+ * catalog) without bumping it into the dynamic half — both catalogs are
2066
+ * built once per run and remain byte-stable for the duration, so they
2067
+ * belong in the cached prefix.
2068
+ *
2069
+ * Returns a new string; the input is not mutated. When `extra` is empty,
2070
+ * returns the input verbatim.
1896
2071
  */
1897
- declare function createToolSearchTool(options: ToolSearchToolOptions): ToolDef;
1898
- //#endregion
1899
- //#region src/tools/validation.d.ts
2072
+ declare function appendStaticSection(system: string, extra: string): string;
1900
2073
  /**
1901
- * Tool argument validation against JSON Schema-style inputSchema.
2074
+ * Append `extra` to the DYNAMIC half of a system prompt. Inserts the boundary
2075
+ * marker if the input didn't already carry one.
1902
2076
  *
1903
- * Two passes:
1904
- * 1. Required-field presence. Missing or null/undefined required fields fail.
1905
- * 2. Per-property type checks with **best-effort coercion**. Small/OSS models
1906
- * routinely send `"true"` for a `boolean` field or `"42"` for a `number`,
1907
- * and rejecting outright forces a confusing retry. Instead, we auto-heal
1908
- * coerce when the conversion is unambiguous, fail only when the value
1909
- * cannot be reasonably normalized to any of the declared types.
2077
+ * Used by hosts (typically the TUI) to inject per-turn state — current cwd,
2078
+ * IDE selection, project root without forcing every caller to know the
2079
+ * marker format. The host's `system:transform` hook rewrites the dynamic
2080
+ * half each turn; the static doctrine above stays byte-stable and rides the
2081
+ * cache.
1910
2082
  *
1911
- * Recursion: when a property declares `type: 'array'` with an `items` schema,
1912
- * each item is validated against `items`. Object items are walked one level
1913
- * deep (their declared `properties` get the same coercion + enum checks the
1914
- * top level does). Items that can't be coerced are dropped rather than
1915
- * rejecting the whole call — the model rarely benefits from an
1916
- * all-or-nothing failure on a 20-item list because one entry was malformed.
1917
- * Dropped items are reported back via `droppedItems` so the tool's `execute`
1918
- * can surface a hint to the model if it wants to.
2083
+ * Returns a new string; the input is not mutated. When `extra` is empty,
2084
+ * returns the input verbatim.
1919
2085
  */
1920
- interface ValidationResult {
1921
- valid: boolean;
1922
- /** Human-readable reason. Present on failure only. */
1923
- error?: string;
1924
- /**
1925
- * Possibly-coerced input. Present iff `valid: true`. Tools should call
1926
- * `execute(coercedInput, ctx)` so auto-healed values reach the tool body.
1927
- * When no coercion was applied, this is reference-equal to the input.
1928
- */
1929
- coercedInput?: Record<string, unknown>;
1930
- /**
1931
- * Names of fields whose values were coerced. Empty when nothing changed.
1932
- * Useful for telemetry (`validation:reject` on failure already carries the
1933
- * reason; this is the success-path equivalent).
1934
- */
1935
- coercions?: readonly string[];
1936
- /**
1937
- * Indexes of array items dropped during recursive validation, keyed by
1938
- * the property name. Empty / absent when nothing was dropped. Tools that
1939
- * care about the discrepancy (e.g. `todowrite` wanting to surface
1940
- * "ignored 2 malformed items") can inspect this.
1941
- */
1942
- droppedItems?: Readonly<Record<string, readonly number[]>>;
1943
- }
1944
- declare function validateToolArgs(input: Record<string, unknown>, schema: Record<string, unknown>): ValidationResult;
1945
- //#endregion
1946
- //#region src/tools/write-file.d.ts
2086
+ declare function appendDynamicSection(system: string, extra: string): string;
1947
2087
  /**
1948
- * Write a file, with an idempotency signal when the content is unchanged.
2088
+ * Replace the entire dynamic half of a system prompt with `next`. Used by
2089
+ * the TUI's `<env>` rewriter where the entire dynamic section is regenerated
2090
+ * each turn rather than appended to.
1949
2091
  *
1950
- * Three return shapes chosen so the model can recognize a no-op without a
1951
- * separate read:
1952
- * - `Created path (N bytes)` — file did not exist
1953
- * - `Updated path (N bytes)` — content differed from on-disk
1954
- * - `No change needed: path already at target state (N bytes)`equal
2092
+ * When `next` is empty, drops the dynamic half (and the marker) entirely.
2093
+ */
2094
+ declare function replaceDynamicSection(system: string, next: string): string;
2095
+ /**
2096
+ * Strip the boundary marker so it never reaches the wirecollapses
2097
+ * `<static>${BOUNDARY}<dynamic>` into `<static>\n\n<dynamic>` for providers
2098
+ * that can't honor `cache_control` (vanilla OpenAI Chat Completions, Codex,
2099
+ * Cerebras, ...) or for the cache-disabled path on any provider.
1955
2100
  *
1956
- * Race window: in non-process execution contexts (docker, sandbox) shared by
1957
- * multiple agents, another writer can mutate the file between our read and
1958
- * our write. Local process context is single-writer per agent so the race is
1959
- * a non-issue there. Documented rather than locked because the cost of
1960
- * cross-context locking outweighs the cost of a stale "No change" message.
2101
+ * Cache-aware providers re-derive the split from the original (un-rendered)
2102
+ * system string via `splitSystemPrompt` `renderSystemForWire` is the
2103
+ * marker-free counterpart used to build the actual wire bytes.
2104
+ *
2105
+ * No-op when the input has no marker. Pure / no I/O.
1961
2106
  */
1962
- declare const writeFile: ToolDef;
2107
+ declare function renderSystemForWire(system: string): string;
2108
+ /** True when `system` contains the boundary marker. */
2109
+ declare function hasSystemPromptBoundary(system: string): boolean;
1963
2110
  //#endregion
1964
2111
  //#region src/tracing.d.ts
1965
2112
  /** Minimal span shape — any tracer that provides these methods is compatible. */
@@ -2281,5 +2428,5 @@ declare function definePreset(config: Preset): Preset;
2281
2428
  */
2282
2429
  declare function composePresets(...presets: Preset[]): Preset;
2283
2430
  //#endregion
2284
- export { ModelUsage as $, LoggingHooksOptions as $t, SkillsReadToolOptions as A, ANCHOR_PREVIEW_MAX_CHARS as An, McpOAuthProvider as At, createInteractionTool as B, CacheDimensionDiff as Bn, maybePersistToolResult as Bt, SubagentDef as C, NO_TOOLS_PREAMBLE as Cn, OAuthCallbackResult as Ct, createSkillsUseTool as D, buildFullCompactPrompt as Dn, loginMcpServer as Dt, SkillsUseToolOptions as E, buildFromCompactPrompt as En, LoginMcpServerResult as Et, shell as F, sliceForCompaction as Fn, PERSISTENCE_PREVIEW_BYTES as Ft, SystemPromptParts as G, installCacheBreakLogger as Gn, TOOL_USE_CANCELLED_MESSAGE as Gt, glob as H, CacheDimensionSnapshot as Hn, resolveTasksDir as Ht, readFile as I, stripImagesFromTurns as In, PersistInput as It, hasSystemPromptBoundary as J, LogLevel as Jt, appendDynamicSection as K, snapshotCacheDimensions as Kn, TOOL_USE_SKIPPED_MESSAGE as Kt, multiEdit as L, summaryToTurn as Ln, PersistOutcome as Lt, shellKill as M, CompactionSlice as Mn, createMemoryMcpCredentialStore as Mt, CreateShellToolOptions as N, SummaryToTurnInput as Nn, hasAuthorizationHeader as Nt, SkillsRunScriptToolOptions as O, buildTailCompactPrompt as On, McpCredentialEntry as Ot, createShellTool as P, anchorPreviewFor as Pn, PERSISTED_STUB_PREFIX as Pt, splitSystemPrompt as Q, LoggingHookSet as Qt, listFiles as R, truncateHeadForPtlRetry as Rn, buildPersistedStub as Rt, SpawnToolState as S, CompactPromptOptions as Sn, OAuthCallbackOptions as St, createSpawnTool as T, buildCompactPrompt as Tn, LoginMcpServerOptions as Tt, edit as U, diffCacheDimensions as Un, INTERRUPT_MESSAGE_FOR_TOOL_USE as Ut, grep as V, CacheDimensionName as Vn, resolvePersistDir as Vt, SYSTEM_PROMPT_BOUNDARY as W, fnv1a32 as Wn, SHELL_CASCADE_CANCEL_MESSAGE as Wt, renderSystemForWire as X, LogSink as Xt, joinSystemPrompt as Y, LogRecord as Yt, replaceDynamicSection as Z, Logger as Zt, LazyToolEntry as _, CompactInvalidInputError as _n, MetricsHookSet as _t, basicTools as a, estimateTokens as an, RunSummaryByModel as at, ChildAgent as b, CompactDirection as bn, createMetricsHooks as bt, Span as c, PostCompactRestoreOptions as cn, RunSummaryError as ct, TracingHookSet as d, selectFilesFromReadState as dn, createRunSummaryCollector as dt, consoleSink as en, flattenTurns as et, TracingHooksOptions as f, selectFilesFromSession as fn, Counter as ft, validateToolArgs as g, compactConversation as gn, MetricAttributes as gt, ValidationResult as h, CompactResult as hn, Meter as ht, _default as i, BYTES_PER_TOKEN as in, RunSummaryBudget as it, createSkillsReadTool as j, CompactScope as jn, McpOAuthProviderOptions as jt, createSkillsRunScriptTool as k, buildUpToCompactPrompt as kn, McpCredentialStore as kt, StartSpan as l, RecentFile as ln, RunSummaryTokens as lt, writeFile as m, CompactOptions as mn, InstrumentOptions as mt, composePresets as n, createLoggingHooks as nn, RunSummary as nt, zodToJsonSchema as o, utf8ByteLength as on, RunSummaryCollector as ot, createTracingHooks as p, selectRecentFiles as pn, Histogram as pt, appendStaticSection as q, ConsoleSinkOptions as qt, definePreset as r, jsonSink as rn, RunSummaryBlock as rt, GEN_AI_ATTRIBUTES as s, PostCompactAttachments as sn, RunSummaryCollectorOptions as st, Preset as t, createLogger as tn, statsByModel as tt, TracingConventions as u, buildPostCompactAttachments as un, RunSummaryValidation as ut, ToolSearchToolOptions as v, CompactPromptTooLongError as vn, MetricsHooksOptions as vt, SubagentRegistry as w, TRAILER as wn, startOAuthCallback as wt, SpawnToolOptions as x, CompactPromptBuilder as xn, OAuthCallbackHandle as xt, createToolSearchTool as y, BASE_INSTRUCTIONS as yn, UpDownCounter as yt, InteractionToolOptions as z, CacheBreakLoggerOptions as zn, cleanupPersistedSession as zt };
2285
- //# sourceMappingURL=index-B6h9C_JE.d.ts.map
2431
+ export { cleanupPersistedSession as $, diffCacheDimensions as $n, InteractionToolOptions as $t, MetricAttributes as A, CompactDirection as An, validateToolArgs as At, LoginMcpServerResult as B, ANCHOR_PREVIEW_MAX_CHARS as Bn, SkillsUseToolOptions as Bt, ModelUsage as C, selectRecentFiles as Cn, HeadlessUsage as Ct, Histogram as D, CompactInvalidInputError as Dn, transcriptToOpenAIMessages as Dt, Counter as E, compactConversation as En, runHeadless as Et, OAuthCallbackHandle as F, buildCompactPrompt as Fn, SpawnToolOptions as Ft, McpOAuthProviderOptions as G, sliceForCompaction as Gn, createSkillsReadTool as Gt, McpCredentialEntry as H, CompactionSlice as Hn, SkillsRunScriptToolOptions as Ht, OAuthCallbackOptions as I, buildFromCompactPrompt as In, SpawnToolState as It, PERSISTED_STUB_PREFIX as J, truncateHeadForPtlRetry as Jn, createShellTool as Jt, createMemoryMcpCredentialStore as K, stripImagesFromTurns as Kn, shellKill as Kt, OAuthCallbackResult as L, buildFullCompactPrompt as Ln, SubagentDef as Lt, MetricsHooksOptions as M, CompactPromptOptions as Mn, ToolSearchToolOptions as Mt, UpDownCounter as N, NO_TOOLS_PREAMBLE as Nn, createToolSearchTool as Nt, InstrumentOptions as O, CompactPromptTooLongError as On, writeFile as Ot, createMetricsHooks as P, TRAILER as Pn, ChildAgent as Pt, buildPersistedStub as Q, CacheDimensionSnapshot as Qn, listFiles as Qt, startOAuthCallback as R, buildTailCompactPrompt as Rn, SubagentRegistry as Rt, splitSystemPrompt as S, selectFilesFromSession as Sn, HeadlessStatus as St, statsByModel as T, CompactResult as Tn, headlessEventToJsonl as Tt, McpCredentialStore as U, SummaryToTurnInput as Un, createSkillsRunScriptTool as Ut, loginMcpServer as V, CompactScope as Vn, createSkillsUseTool as Vt, McpOAuthProvider as W, anchorPreviewFor as Wn, SkillsReadToolOptions as Wt, PersistInput as X, CacheDimensionDiff as Xn, readFile as Xt, PERSISTENCE_PREVIEW_BYTES as Y, CacheBreakLoggerOptions as Yn, shell as Yt, PersistOutcome as Z, CacheDimensionName as Zn, multiEdit as Zt, appendStaticSection as _, PostCompactAttachments as _n, jsonSink as _t, basicTools as a, RunSummaryBlock as an, TOOL_USE_CANCELLED_MESSAGE as at, renderSystemForWire as b, buildPostCompactAttachments as bn, HeadlessOptions as bt, Span as c, RunSummaryCollector as cn, LogLevel as ct, TracingHookSet as d, RunSummaryTokens as dn, Logger as dt, createInteractionTool as en, fnv1a32 as er, maybePersistToolResult as et, TracingHooksOptions as f, RunSummaryValidation as fn, LoggingHookSet as ft, appendDynamicSection as g, utf8ByteLength as gn, createLoggingHooks as gt, SystemPromptParts as h, estimateTokens as hn, createLogger as ht, _default as i, RunSummary as in, SHELL_CASCADE_CANCEL_MESSAGE as it, MetricsHookSet as j, CompactPromptBuilder as jn, LazyToolEntry as jt, Meter as k, BASE_INSTRUCTIONS as kn, ValidationResult as kt, StartSpan as l, RunSummaryCollectorOptions as ln, LogRecord as lt, SYSTEM_PROMPT_BOUNDARY as m, BYTES_PER_TOKEN as mn, consoleSink as mt, composePresets as n, glob as nn, snapshotCacheDimensions as nr, resolveTasksDir as nt, zodToJsonSchema as o, RunSummaryBudget as on, TOOL_USE_SKIPPED_MESSAGE as ot, createTracingHooks as p, createRunSummaryCollector as pn, LoggingHooksOptions as pt, hasAuthorizationHeader as q, summaryToTurn as qn, CreateShellToolOptions as qt, definePreset as r, edit as rn, INTERRUPT_MESSAGE_FOR_TOOL_USE as rt, GEN_AI_ATTRIBUTES as s, RunSummaryByModel as sn, ConsoleSinkOptions as st, Preset as t, grep as tn, installCacheBreakLogger as tr, resolvePersistDir as tt, TracingConventions as u, RunSummaryError as un, LogSink as ut, hasSystemPromptBoundary as v, PostCompactRestoreOptions as vn, HeadlessErrorInfo as vt, flattenTurns as w, CompactOptions as wn, OpenAIChatMessage as wt, replaceDynamicSection as x, selectFilesFromReadState as xn, HeadlessResult as xt, joinSystemPrompt as y, RecentFile as yn, HeadlessEvent as yt, LoginMcpServerOptions as z, buildUpToCompactPrompt as zn, createSpawnTool as zt };
2432
+ //# sourceMappingURL=index-DEqGfnZr.d.ts.map