llm-cli-gateway 1.13.2 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,13 @@ export interface AsyncJobFlightRecorderEntry {
16
16
  sessionId?: string;
17
17
  stablePrefixHash?: string;
18
18
  stablePrefixTokens?: number;
19
+ /**
20
+ * Slice κ: count of caller-supplied prompt-parts content blocks the
21
+ * gateway emitted with explicit Anthropic `cache_control` markers
22
+ * (ttl='1h'). Only set for Claude requests that opt into κ; left
23
+ * undefined elsewhere so legacy rows stay NULL.
24
+ */
25
+ cacheControlBlocks?: number;
19
26
  }
20
27
  /**
21
28
  * Slice 1.5 usage-extraction callback. Closures MUST be constructed from
@@ -66,6 +73,13 @@ export interface StartJobOptions {
66
73
  * therefore do NOT collide on dedup.
67
74
  */
68
75
  env?: Record<string, string>;
76
+ /**
77
+ * Slice κ: optional UTF-8 payload to pipe into the child's stdin.
78
+ * Participates in the dedup key — two requests with identical argv
79
+ * but different stdin do NOT collide. When set, stdio[0] is "pipe";
80
+ * when unset, stdio[0] stays "ignore" (regression-protected).
81
+ */
82
+ stdin?: string;
69
83
  /**
70
84
  * Optional hook fired exactly once when the job reaches a terminal state.
71
85
  * Used by callers that own per-request resources (outputSchema temp files,
@@ -168,7 +182,7 @@ export declare class AsyncJobManager {
168
182
  * Existing callers keep working unchanged; forceRefresh is exposed as a trailing
169
183
  * optional param for the dedup-aware path.
170
184
  */
171
- startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean): AsyncJobSnapshot;
185
+ startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean, stdin?: string): AsyncJobSnapshot;
172
186
  /**
173
187
  * Start a job, with optional dedup against recent identical requests.
174
188
  * Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
@@ -207,8 +207,22 @@ export class AsyncJobManager {
207
207
  * (sorted keys → JSON-stringified). This prevents two Mistral requests with the
208
208
  * same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
209
209
  */
210
- buildRequestKey(cli, args, env) {
211
- return computeRequestKey(cli, args, canonicaliseEnvForKey(env));
210
+ buildRequestKey(cli, args, env, stdin, cwd) {
211
+ // Slice κ: stdin participates in the dedup key. Two Claude requests
212
+ // with identical argv but different cache_control content blocks
213
+ // would otherwise collide on dedup and the second caller would get
214
+ // the wrong response. The legacy "no stdin" code path passes
215
+ // stdin=undefined, which serialises to the same empty marker the
216
+ // previous version emitted — non-κ dedup is unchanged.
217
+ // Slice λ: cwd participates similarly. Two requests with identical
218
+ // argv but different worktrees would otherwise collide on dedup and
219
+ // the second caller would receive a response executed in the wrong
220
+ // worktree. cwd=undefined preserves the pre-λ key shape — non-λ
221
+ // dedup is unchanged.
222
+ const extraEnv = canonicaliseEnvForKey(env);
223
+ const withStdin = stdin === undefined ? extraEnv : `${extraEnv}|stdin:${stdin}`;
224
+ const extra = cwd === undefined ? withStdin : `${withStdin}|cwd:${cwd}`;
225
+ return computeRequestKey(cli, args, extra);
212
226
  }
213
227
  fireOnComplete(job) {
214
228
  if (job.onCompleteFired)
@@ -417,13 +431,14 @@ export class AsyncJobManager {
417
431
  * Existing callers keep working unchanged; forceRefresh is exposed as a trailing
418
432
  * optional param for the dedup-aware path.
419
433
  */
420
- startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart) {
434
+ startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, stdin) {
421
435
  return this.startJobWithDedup(cli, args, correlationId, {
422
436
  cwd,
423
437
  idleTimeoutMs,
424
438
  outputFormat,
425
439
  forceRefresh,
426
440
  env,
441
+ stdin,
427
442
  onComplete,
428
443
  flightRecorderEntry,
429
444
  extractUsage,
@@ -439,8 +454,8 @@ export class AsyncJobManager {
439
454
  * is returned without spawning a new process. forceRefresh skips dedup entirely.
440
455
  */
441
456
  startJobWithDedup(cli, args, correlationId, opts = {}) {
442
- const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
443
- const requestKey = this.buildRequestKey(cli, args, extraEnv);
457
+ const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, stdin, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
458
+ const requestKey = this.buildRequestKey(cli, args, extraEnv, stdin, cwd);
444
459
  if (!forceRefresh && this.store) {
445
460
  try {
446
461
  const existing = this.store.findByRequestKey(requestKey);
@@ -489,9 +504,18 @@ export class AsyncJobManager {
489
504
  const baseEnv = envWithExtendedPath(process.env, getExtendedPath());
490
505
  const child = spawnCliProcess(command, args, {
491
506
  cwd,
492
- stdio: ["ignore", "pipe", "pipe"],
507
+ stdio: stdin === undefined ? ["ignore", "pipe", "pipe"] : ["pipe", "pipe", "pipe"],
493
508
  env: { ...baseEnv, ...(extraEnv ?? {}) },
494
509
  });
510
+ if (stdin !== undefined && child.stdin) {
511
+ try {
512
+ child.stdin.write(stdin);
513
+ }
514
+ catch (err) {
515
+ this.logger.error(`Job ${id} failed to write stdin payload`, err);
516
+ }
517
+ child.stdin.end();
518
+ }
495
519
  // Single cleanup flag to prevent double-unregister
496
520
  let groupCleaned = false;
497
521
  const cleanupGroup = () => {
@@ -560,6 +584,7 @@ export class AsyncJobManager {
560
584
  asyncJobId: id,
561
585
  stablePrefixHash: flightRecorderEntry.stablePrefixHash,
562
586
  stablePrefixTokens: flightRecorderEntry.stablePrefixTokens,
587
+ cacheControlBlocks: flightRecorderEntry.cacheControlBlocks,
563
588
  });
564
589
  }
565
590
  catch (err) {
@@ -76,6 +76,32 @@ export interface GlobalCacheStats {
76
76
  estimatedSavingsUsd: number;
77
77
  }>;
78
78
  estimatedSavingsUsd: number;
79
+ /**
80
+ * Rec #3 (slice κ): derived metrics that distinguish gateway-driven
81
+ * κ-explicit `cache_control` breakpoints from Claude Code's
82
+ * own baseline cache reads.
83
+ *
84
+ * - explicitCacheControlRows: rows where the gateway emitted at
85
+ * least one `cache_control` marker (`cache_control_blocks > 0`).
86
+ * - explicitCacheControlHits: those rows whose `cache_read_tokens
87
+ * > 0` — closest signal we have to "the caller's marked block
88
+ * actually hit Anthropic's cache" (still includes Claude Code's
89
+ * baseline cache reads on top, which is unavoidable without
90
+ * per-block token accounting from Anthropic).
91
+ * - explicitCacheControlHitRate: ratio explicit hits / explicit rows.
92
+ * - stablePrefixReuseCount: distinct `stable_prefix_hash` values
93
+ * that appear in >1 row in-window (i.e. real reuse opportunities).
94
+ * - avgCacheCreationAfterFirstCall: averaged across stable-prefix
95
+ * reuse groups, the cache_creation_tokens on rows AFTER the
96
+ * first-by-datetime in each group. Drops sharply when caller
97
+ * blocks are reused; stays high when Claude Code's session-wrap
98
+ * floor dominates.
99
+ */
100
+ explicitCacheControlRows: number;
101
+ explicitCacheControlHits: number;
102
+ explicitCacheControlHitRate: number;
103
+ stablePrefixReuseCount: number;
104
+ avgCacheCreationAfterFirstCall: number | null;
79
105
  }
80
106
  export declare function computeSessionCacheStats(db: FlightRecorderQuery, sessionId: string): SessionCacheStats;
81
107
  export interface TtlPolicy {
@@ -159,14 +159,16 @@ export function computeGlobalCacheStats(db, opts = {}) {
159
159
  COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
160
160
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
161
161
  stable_prefix_hash,
162
- datetime_utc
162
+ datetime_utc,
163
+ cache_control_blocks
163
164
  FROM requests
164
165
  WHERE datetime_utc >= ?`
165
166
  : `SELECT cli, model,
166
167
  COALESCE(cache_read_tokens, 0) AS cache_read_tokens,
167
168
  COALESCE(cache_creation_tokens, 0) AS cache_creation_tokens,
168
169
  stable_prefix_hash,
169
- datetime_utc
170
+ datetime_utc,
171
+ cache_control_blocks
170
172
  FROM requests`;
171
173
  const rows = sinceIso ? db.queryRequests(sql, sinceIso) : db.queryRequests(sql);
172
174
  const perCliMap = new Map();
@@ -175,6 +177,17 @@ export function computeGlobalCacheStats(db, opts = {}) {
175
177
  let totalRead = 0;
176
178
  let totalCreation = 0;
177
179
  let totalSavings = 0;
180
+ // Rec #3: κ-explicit metrics. A row is "κ-explicit" iff it has
181
+ // `cache_control_blocks > 0` — i.e. the gateway emitted at least one
182
+ // caller-supplied `cache_control` marker. Rows with NULL or 0 are
183
+ // either pre-v4 or non-κ Claude / non-Claude requests.
184
+ let explicitRows = 0;
185
+ let explicitHits = 0;
186
+ // Per-prefix reuse tracking: collect cache_creation_tokens for every
187
+ // row keyed by stable_prefix_hash, ordered ascending by datetime_utc.
188
+ // For each group with >1 row, drop the first (the cache-write call)
189
+ // and average the rest (the cache-read calls).
190
+ const perPrefix = new Map();
178
191
  for (const row of rows) {
179
192
  totalRequests += 1;
180
193
  const reads = safeNum(row.cache_read_tokens);
@@ -183,6 +196,17 @@ export function computeGlobalCacheStats(db, opts = {}) {
183
196
  totalCreation += creation;
184
197
  if (reads > 0)
185
198
  totalHits += 1;
199
+ const ccBlocks = safeNum(row.cache_control_blocks);
200
+ if (ccBlocks > 0) {
201
+ explicitRows += 1;
202
+ if (reads > 0)
203
+ explicitHits += 1;
204
+ }
205
+ if (row.stable_prefix_hash) {
206
+ const arr = perPrefix.get(row.stable_prefix_hash) ?? [];
207
+ arr.push({ datetime_utc: row.datetime_utc, cache_creation_tokens: creation });
208
+ perPrefix.set(row.stable_prefix_hash, arr);
209
+ }
186
210
  if (!isCacheStatsCli(row.cli))
187
211
  continue;
188
212
  const cli = row.cli;
@@ -203,6 +227,20 @@ export function computeGlobalCacheStats(db, opts = {}) {
203
227
  agg.estimatedSavingsUsd += savings;
204
228
  perCliMap.set(cli, agg);
205
229
  }
230
+ let stablePrefixReuseCount = 0;
231
+ let creationAfterFirstSum = 0;
232
+ let creationAfterFirstCount = 0;
233
+ for (const arr of perPrefix.values()) {
234
+ if (arr.length <= 1)
235
+ continue;
236
+ stablePrefixReuseCount += 1;
237
+ arr.sort((a, b) => a.datetime_utc < b.datetime_utc ? -1 : a.datetime_utc > b.datetime_utc ? 1 : 0);
238
+ for (let i = 1; i < arr.length; i++) {
239
+ creationAfterFirstSum += arr[i].cache_creation_tokens;
240
+ creationAfterFirstCount += 1;
241
+ }
242
+ }
243
+ const avgCacheCreationAfterFirstCall = creationAfterFirstCount > 0 ? creationAfterFirstSum / creationAfterFirstCount : null;
206
244
  const perCli = Array.from(perCliMap.entries()).map(([cli, agg]) => ({
207
245
  cli,
208
246
  requestCount: agg.requestCount,
@@ -221,5 +259,10 @@ export function computeGlobalCacheStats(db, opts = {}) {
221
259
  totalCacheCreationTokens: totalCreation,
222
260
  perCli,
223
261
  estimatedSavingsUsd: totalSavings,
262
+ explicitCacheControlRows: explicitRows,
263
+ explicitCacheControlHits: explicitHits,
264
+ explicitCacheControlHitRate: explicitRows > 0 ? explicitHits / explicitRows : 0,
265
+ stablePrefixReuseCount,
266
+ avgCacheCreationAfterFirstCall,
224
267
  };
225
268
  }
@@ -7,6 +7,14 @@ export interface ExecuteOptions {
7
7
  logger?: Logger;
8
8
  /** Extra environment variables to inject; merged after PATH. */
9
9
  env?: NodeJS.ProcessEnv;
10
+ /**
11
+ * Slice κ: optional UTF-8 payload to write to the child's stdin
12
+ * immediately after spawn. When provided, stdio for stdin switches
13
+ * from "ignore" to "pipe" so the CLI can read the payload (used by
14
+ * `claude --input-format stream-json`). Undefined preserves the
15
+ * legacy stdio:["ignore","pipe","pipe"] shape.
16
+ */
17
+ stdin?: string;
10
18
  }
11
19
  export interface ExecuteResult {
12
20
  stdout: string;
package/dist/executor.js CHANGED
@@ -296,16 +296,21 @@ export function spawnCliProcess(command, args, options) {
296
296
  return proc;
297
297
  }
298
298
  export async function executeCli(command, args, options = {}) {
299
- const { timeout, idleTimeout, cwd, env: extraEnv } = options;
299
+ const { timeout, idleTimeout, cwd, env: extraEnv, stdin } = options;
300
300
  const extendedPath = getExtendedPath();
301
301
  const baseEnv = envWithExtendedPath(process.env, extendedPath);
302
302
  const circuitBreaker = getCircuitBreaker(command);
303
303
  const runOnce = () => new Promise((resolve, reject) => {
304
+ const stdio = stdin === undefined ? ["ignore", "pipe", "pipe"] : ["pipe", "pipe", "pipe"];
304
305
  const proc = spawnCliProcess(command, args, {
305
306
  cwd,
306
- stdio: ["ignore", "pipe", "pipe"],
307
+ stdio,
307
308
  env: { ...baseEnv, ...(extraEnv ?? {}) },
308
309
  });
310
+ if (stdin !== undefined && proc.stdin) {
311
+ proc.stdin.write(stdin);
312
+ proc.stdin.end();
313
+ }
309
314
  let stdout = "";
310
315
  let stderr = "";
311
316
  let timedOut = false;
@@ -8,6 +8,13 @@ export interface FlightLogStart {
8
8
  asyncJobId?: string;
9
9
  stablePrefixHash?: string;
10
10
  stablePrefixTokens?: number;
11
+ /**
12
+ * Slice κ: number of caller-supplied prompt-parts content blocks
13
+ * that the gateway emitted with an explicit `cache_control`
14
+ * breakpoint on this request. `null` (default) for non-κ requests,
15
+ * including pre-κ rows after a v4 migration of a legacy DB.
16
+ */
17
+ cacheControlBlocks?: number;
11
18
  }
12
19
  export interface FlightLogResult {
13
20
  response: string;
@@ -55,6 +55,20 @@ function ensureStablePrefixColumns(db) {
55
55
  }
56
56
  db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
57
57
  }
58
+ /**
59
+ * Idempotent v4 migration (slice κ): add `cache_control_blocks` column
60
+ * to the `requests` table. Counts the caller-supplied content blocks
61
+ * the gateway emitted with an explicit Anthropic `cache_control`
62
+ * marker. Pre-κ rows keep NULL; only κ-opt-in callers ever set the
63
+ * column to a non-NULL integer.
64
+ */
65
+ function ensureCacheControlBlocksColumn(db) {
66
+ const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
67
+ const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
68
+ if (!names.has("cache_control_blocks")) {
69
+ db.exec("ALTER TABLE requests ADD COLUMN cache_control_blocks INTEGER");
70
+ }
71
+ }
58
72
  export function resolveFlightRecorderDbPath() {
59
73
  const configured = process.env.LLM_GATEWAY_LOGS_DB;
60
74
  if (configured !== undefined) {
@@ -176,6 +190,14 @@ export class FlightRecorder {
176
190
  this.db
177
191
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(3, ?)")
178
192
  .run(new Date().toISOString());
193
+ // Migration v4: cache_control_blocks (slice κ). Pre-κ rows keep NULL;
194
+ // only κ-opt-in writes populate this. Aggregates in cache-stats /
195
+ // MCP resources can use this to separate explicit κ hits from
196
+ // implicit prefix-cache hits.
197
+ ensureCacheControlBlocksColumn(this.db);
198
+ this.db
199
+ .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
200
+ .run(new Date().toISOString());
179
201
  if (process.platform !== "win32") {
180
202
  try {
181
203
  chmodSync(dbPath, 0o600);
@@ -186,9 +208,11 @@ export class FlightRecorder {
186
208
  }
187
209
  const insertRequest = this.db.prepare(`
188
210
  INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc,
189
- stable_prefix_hash, stable_prefix_tokens)
211
+ stable_prefix_hash, stable_prefix_tokens,
212
+ cache_control_blocks)
190
213
  VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc,
191
- @stable_prefix_hash, @stable_prefix_tokens)
214
+ @stable_prefix_hash, @stable_prefix_tokens,
215
+ @cache_control_blocks)
192
216
  `);
193
217
  const insertMetadata = this.db.prepare(`
194
218
  INSERT INTO gateway_metadata (request_id, async_job_id, status)
@@ -205,6 +229,7 @@ export class FlightRecorder {
205
229
  datetime_utc: new Date().toISOString(),
206
230
  stable_prefix_hash: entry.stablePrefixHash ?? null,
207
231
  stable_prefix_tokens: entry.stablePrefixTokens ?? null,
232
+ cache_control_blocks: entry.cacheControlBlocks ?? null,
208
233
  });
209
234
  insertMetadata.run({
210
235
  request_id: entry.correlationId,
package/dist/index.d.ts CHANGED
@@ -67,6 +67,36 @@ type GatewayLogger = typeof logger;
67
67
  */
68
68
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
69
69
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
70
+ /**
71
+ * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
72
+ * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
73
+ * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
74
+ * name and/or git ref (default ref: HEAD).
75
+ *
76
+ * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
77
+ * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
78
+ * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
79
+ * the request carries a sessionId and the session already has a worktree,
80
+ * that worktree is reused. On session_delete or TTL eviction the gateway
81
+ * runs `git worktree remove --force`.
82
+ *
83
+ * Tool response: when a worktree was used, the successful response stdout
84
+ * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
85
+ * parse/use the path without a schema change (slice λ §1.d).
86
+ *
87
+ * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
88
+ * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
89
+ */
90
+ export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
91
+ name: z.ZodOptional<z.ZodString>;
92
+ ref: z.ZodOptional<z.ZodString>;
93
+ }, "strict", z.ZodTypeAny, {
94
+ name?: string | undefined;
95
+ ref?: string | undefined;
96
+ }, {
97
+ name?: string | undefined;
98
+ ref?: string | undefined;
99
+ }>]>;
70
100
  export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
71
101
  export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral"]>;
72
102
  export type SessionProvider = (typeof SESSION_PROVIDER_VALUES)[number];
@@ -82,7 +112,7 @@ export interface GatewayServerDeps {
82
112
  persistence?: PersistenceConfig;
83
113
  cacheAwareness?: CacheAwarenessConfig;
84
114
  }
85
- interface GatewayServerRuntime {
115
+ export interface GatewayServerRuntime {
86
116
  sessionManager: ISessionManager;
87
117
  resourceProvider: ResourceProvider;
88
118
  db: DatabaseConnection | null;
@@ -94,6 +124,60 @@ interface GatewayServerRuntime {
94
124
  persistence: PersistenceConfig;
95
125
  cacheAwareness: CacheAwarenessConfig;
96
126
  }
127
+ export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
128
+ isolateState?: boolean;
129
+ }): GatewayServerRuntime;
130
+ /**
131
+ * Slice λ: shape returned by `resolveWorktreeForRequest`. `cwd` is what
132
+ * the spawn helpers (`executeCli`, `startJobWithDedup`) consume;
133
+ * `worktreePath` is what the tool handler embeds in the response prefix
134
+ * so callers can discover the path.
135
+ */
136
+ export interface ResolvedWorktree {
137
+ cwd?: string;
138
+ worktreePath?: string;
139
+ }
140
+ /**
141
+ * Slice λ: resolve a request's worktree directive into a spawn cwd.
142
+ *
143
+ * - `worktreeOpt` is the Zod-validated input value (boolean |
144
+ * `{ name?, ref? }` | undefined).
145
+ * - When the request has a session AND the session already has a
146
+ * `metadata.worktreePath`, that path is reused (resume semantics).
147
+ * The reused path is returned without touching git; if the directory
148
+ * was externally removed between requests, the next CLI invocation
149
+ * will surface the error naturally.
150
+ * - When no reusable worktree exists, `createWorktree` runs; on success
151
+ * the new path is written to `session.metadata` (only when a session
152
+ * exists — request-scoped worktrees do NOT persist).
153
+ * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
154
+ * pre-λ behaviour at non-worktree call sites).
155
+ * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
156
+ * in a `createErrorResponse` envelope. Do NOT swallow.
157
+ *
158
+ * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
159
+ * verify" §5.
160
+ */
161
+ export declare function resolveWorktreeForRequest(worktreeOpt: boolean | {
162
+ name?: string;
163
+ ref?: string;
164
+ } | undefined, sessionId: string | undefined, runtime: GatewayServerRuntime): Promise<ResolvedWorktree>;
165
+ /**
166
+ * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
167
+ *
168
+ * We surface the worktree path inline as a stdout prefix
169
+ * (`[gateway] worktree=<absolute-path>\n`) rather than as a
170
+ * structuredContent field or JSON wrapper. Rationale:
171
+ * - zero schema change across all 10 tools and their downstream parsers
172
+ * - matches how other slice features (session warnings, cache_state
173
+ * aggregates) surface side-channel metadata today
174
+ * - callers that want the path can split on the first newline; callers
175
+ * that don't care see a single ignorable header line
176
+ *
177
+ * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
178
+ * the moment a successful response is constructed.
179
+ */
180
+ export declare function formatWorktreePrefix(worktreePath?: string): string;
97
181
  export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string,
98
182
  /**
99
183
  * Optional context for off-stdout telemetry sources. Today only Mistral
@@ -129,6 +213,27 @@ interface CliRequestPrep {
129
213
  stablePrefixHash: string | null;
130
214
  /** Heuristic token count (bytes/4) of the same stable prefix. */
131
215
  stablePrefixTokens: number | null;
216
+ /**
217
+ * Slice κ (Claude only): JSON stream-json payload to feed on stdin
218
+ * when the gateway emits `-p --input-format stream-json`. Undefined
219
+ * when the caller did not opt into Anthropic `cache_control`
220
+ * breakpoints. Non-κ providers always leave this undefined.
221
+ */
222
+ stdinPayload?: string;
223
+ /**
224
+ * Slice κ (Claude only): number of caller-supplied content blocks
225
+ * that carry an explicit `cache_control` marker. Threaded into the
226
+ * flight recorder so `cache_state` aggregates can distinguish
227
+ * κ-explicit breakpoints from implicit prefix-cache hits.
228
+ */
229
+ cacheControlBlocks?: number;
230
+ /**
231
+ * Rec #4: structured warnings produced during prep (e.g. cacheable
232
+ * stable prefix without cacheControl). Handlers merge these with any
233
+ * other warnings (cache_ttl_expiring_soon, etc.) before returning to
234
+ * the caller.
235
+ */
236
+ warnings?: WarningEntry[];
132
237
  }
133
238
  export declare function prepareClaudeRequest(params: {
134
239
  prompt?: string;
@@ -360,6 +465,11 @@ export interface GeminiRequestParams {
360
465
  attachments?: string[];
361
466
  /** Phase 4 slice γ: emit `--skip-trust` for fresh-workspace headless runs. */
362
467
  skipTrust?: boolean;
468
+ /** Slice λ: run this request inside a gateway-owned git worktree. */
469
+ worktree?: boolean | {
470
+ name?: string;
471
+ ref?: string;
472
+ };
363
473
  }
364
474
  export interface HandlerDeps {
365
475
  sessionManager: ISessionManager;
@@ -412,6 +522,11 @@ export interface GrokRequestParams {
412
522
  allow?: string[];
413
523
  /** Phase 4 slice θ: Grok `--deny <RULE>` (repeatable; one entry per --deny instance). */
414
524
  deny?: string[];
525
+ /** Slice λ: run this request inside a gateway-owned git worktree. */
526
+ worktree?: boolean | {
527
+ name?: string;
528
+ ref?: string;
529
+ };
415
530
  }
416
531
  export declare function handleGrokRequest(deps: HandlerDeps, params: GrokRequestParams): Promise<ExtendedToolResponse>;
417
532
  export declare function handleGrokRequestAsync(deps: AsyncHandlerDeps, params: Omit<GrokRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
@@ -446,6 +561,11 @@ export interface MistralRequestParams {
446
561
  workingDir?: string;
447
562
  /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
448
563
  addDir?: string[];
564
+ /** Slice λ: run this request inside a gateway-owned git worktree. */
565
+ worktree?: boolean | {
566
+ name?: string;
567
+ ref?: string;
568
+ };
449
569
  }
450
570
  export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
451
571
  export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
@@ -480,6 +600,11 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
480
600
  ignoreRules?: boolean;
481
601
  workingDir?: string;
482
602
  addDir?: string[];
603
+ /** Slice λ: run this request inside a gateway-owned git worktree. */
604
+ worktree?: boolean | {
605
+ name?: string;
606
+ ref?: string;
607
+ };
483
608
  }): Promise<ExtendedToolResponse>;
484
609
  export declare function createGatewayServer(deps?: GatewayServerDeps): McpServer;
485
610
  export {};