llm-cli-gateway 1.17.3 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/README.md +1 -1
  3. package/dist/approval-manager.js +0 -8
  4. package/dist/async-job-manager.d.ts +0 -113
  5. package/dist/async-job-manager.js +6 -124
  6. package/dist/cache-stats.d.ts +0 -89
  7. package/dist/cache-stats.js +0 -62
  8. package/dist/claude-mcp-config.js +0 -1
  9. package/dist/cli-updater.d.ts +0 -8
  10. package/dist/cli-updater.js +0 -12
  11. package/dist/codex-json-parser.d.ts +0 -20
  12. package/dist/codex-json-parser.js +0 -21
  13. package/dist/config.d.ts +0 -31
  14. package/dist/config.js +2 -72
  15. package/dist/db.d.ts +0 -18
  16. package/dist/db.js +0 -22
  17. package/dist/doctor.d.ts +0 -49
  18. package/dist/doctor.js +0 -47
  19. package/dist/endpoint-exposure.js +0 -1
  20. package/dist/executor.d.ts +0 -19
  21. package/dist/executor.js +3 -38
  22. package/dist/flight-recorder.d.ts +0 -26
  23. package/dist/flight-recorder.js +1 -70
  24. package/dist/gemini-json-parser.d.ts +0 -25
  25. package/dist/gemini-json-parser.js +0 -28
  26. package/dist/health.d.ts +0 -3
  27. package/dist/health.js +0 -3
  28. package/dist/index.d.ts +12 -208
  29. package/dist/index.js +116 -588
  30. package/dist/job-store.d.ts +0 -74
  31. package/dist/job-store.js +1 -73
  32. package/dist/logger.d.ts +0 -7
  33. package/dist/logger.js +0 -6
  34. package/dist/migrate-sessions.d.ts +0 -3
  35. package/dist/migrate-sessions.js +0 -16
  36. package/dist/migrate.js +1 -18
  37. package/dist/mistral-meta-json-parser.js +0 -67
  38. package/dist/model-registry.js +0 -13
  39. package/dist/pricing.d.ts +0 -46
  40. package/dist/pricing.js +0 -47
  41. package/dist/process-monitor.d.ts +0 -15
  42. package/dist/process-monitor.js +2 -31
  43. package/dist/prompt-parts.d.ts +6 -31
  44. package/dist/prompt-parts.js +0 -11
  45. package/dist/provider-status.d.ts +0 -8
  46. package/dist/provider-status.js +0 -11
  47. package/dist/request-helpers.d.ts +4 -316
  48. package/dist/request-helpers.js +13 -231
  49. package/dist/resources.d.ts +0 -20
  50. package/dist/resources.js +1 -34
  51. package/dist/retry.d.ts +0 -45
  52. package/dist/retry.js +3 -40
  53. package/dist/session-manager-pg.d.ts +0 -32
  54. package/dist/session-manager-pg.js +0 -32
  55. package/dist/session-manager.d.ts +0 -21
  56. package/dist/session-manager.js +1 -15
  57. package/dist/stream-json-parser.d.ts +0 -18
  58. package/dist/stream-json-parser.js +0 -22
  59. package/dist/upstream-contracts.d.ts +0 -55
  60. package/dist/upstream-contracts.js +86 -64
  61. package/dist/validation-orchestrator.js +0 -3
  62. package/dist/worktree-manager.d.ts +0 -9
  63. package/dist/worktree-manager.js +0 -21
  64. package/package.json +1 -1
@@ -1,30 +1,8 @@
1
- /**
2
- * Flight recorder: SQLite-backed request log.
3
- *
4
- * Read access for cache-stats / MCP resources / doctor goes through the
5
- * `queryRequests<T>(sql, ...params)` method exposed on both `FlightRecorder`
6
- * and `NoopFlightRecorder` (the `FlightRecorderQuery` interface, see bottom
7
- * of file). This is Option A from
8
- * docs/plans/cache-awareness.dag.toml#expose-flight-recorder-read-access —
9
- * a single read-only query surface on the existing class, NOT a sibling
10
- * read-only SQLite connection. better-sqlite3 in WAL mode handles
11
- * concurrent readers inside a single process safely, so the additional
12
- * connection isn't needed and would have to be threaded through
13
- * GatewayServerRuntime as a separate field.
14
- *
15
- * Callers MUST pass parameterised SQL — string-interpolation of untrusted
16
- * values is unsafe even on a "read-only" query.
17
- */
18
1
  import { chmodSync, existsSync, mkdirSync } from "fs";
19
2
  import os from "os";
20
3
  import path from "path";
21
4
  import { createRequire } from "module";
22
5
  const MAX_THINKING_BYTES = 1_000_000;
23
- /**
24
- * Idempotent migration: add `cache_read_tokens` / `cache_creation_tokens`
25
- * columns to the `requests` table if a pre-U23 logs.db is opened. Existing
26
- * rows keep NULL for the new columns; that is intentional.
27
- */
28
6
  function ensureRequestsCacheColumns(db) {
29
7
  const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
30
8
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
@@ -35,15 +13,6 @@ function ensureRequestsCacheColumns(db) {
35
13
  db.exec("ALTER TABLE requests ADD COLUMN cache_creation_tokens INTEGER");
36
14
  }
37
15
  }
38
- /**
39
- * Idempotent v3 migration: add `stable_prefix_hash` / `stable_prefix_tokens`
40
- * columns plus their index. Populated only for new rows that carry a
41
- * promptParts structure (slice 1); legacy rows keep NULL forever.
42
- *
43
- * Read access for cache-stats / MCP resources / doctor goes through the
44
- * read-only `queryRequests()` method on FlightRecorder (no separate read
45
- * connection — better-sqlite3 in WAL mode handles concurrent readers).
46
- */
47
16
  function ensureStablePrefixColumns(db) {
48
17
  const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
49
18
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
@@ -55,13 +24,6 @@ function ensureStablePrefixColumns(db) {
55
24
  }
56
25
  db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
57
26
  }
58
- /**
59
- * Idempotent v4 migration (slice κ): add `cache_control_blocks` column
60
- * to the `requests` table. Counts the caller-supplied content blocks
61
- * the gateway emitted with an explicit Anthropic `cache_control`
62
- * marker. Pre-κ rows keep NULL; only κ-opt-in callers ever set the
63
- * column to a non-NULL integer.
64
- */
65
27
  function ensureCacheControlBlocksColumn(db) {
66
28
  const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
67
29
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
@@ -92,10 +54,8 @@ function truncateThinkingBlocks(blocks) {
92
54
  used += bytes;
93
55
  continue;
94
56
  }
95
- // Reserve space for the suffix so total stays within budget
96
57
  const budget = Math.max(0, MAX_THINKING_BYTES - used - TRUNCATION_SUFFIX_BYTES);
97
58
  if (budget > 0) {
98
- // Truncate on code point boundaries by using string iteration
99
59
  let charBytes = 0;
100
60
  let safeEnd = 0;
101
61
  for (const char of block) {
@@ -103,7 +63,7 @@ function truncateThinkingBlocks(blocks) {
103
63
  if (charBytes + charSize > budget)
104
64
  break;
105
65
  charBytes += charSize;
106
- safeEnd += char.length; // char.length handles surrogate pairs
66
+ safeEnd += char.length;
107
67
  }
108
68
  const sliced = block.slice(0, safeEnd);
109
69
  result.push(sliced ? `${sliced}${TRUNCATION_SUFFIX}` : TRUNCATION_SUFFIX);
@@ -174,26 +134,14 @@ export class FlightRecorder {
174
134
  this.db
175
135
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(1, ?)")
176
136
  .run(new Date().toISOString());
177
- // Migration v2: cache_read_tokens / cache_creation_tokens columns on
178
- // pre-U23 logs.db files. ALTER TABLE ADD COLUMN is idempotent only via
179
- // a prior PRAGMA table_info() check; better-sqlite3 has no native
180
- // "IF NOT EXISTS" for ADD COLUMN.
181
137
  ensureRequestsCacheColumns(this.db);
182
138
  this.db
183
139
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(2, ?)")
184
140
  .run(new Date().toISOString());
185
- // Migration v3: stable_prefix_hash / stable_prefix_tokens columns plus
186
- // their index. Populated only for new rows whose request carried a
187
- // promptParts structure (slice 1 of cache-awareness); legacy rows keep
188
- // NULL intentionally.
189
141
  ensureStablePrefixColumns(this.db);
190
142
  this.db
191
143
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(3, ?)")
192
144
  .run(new Date().toISOString());
193
- // Migration v4: cache_control_blocks (slice κ). Pre-κ rows keep NULL;
194
- // only κ-opt-in writes populate this. Aggregates in cache-stats /
195
- // MCP resources can use this to separate explicit κ hits from
196
- // implicit prefix-cache hits.
197
145
  ensureCacheControlBlocksColumn(this.db);
198
146
  this.db
199
147
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
@@ -203,7 +151,6 @@ export class FlightRecorder {
203
151
  chmodSync(dbPath, 0o600);
204
152
  }
205
153
  catch {
206
- // Best effort permissions hardening.
207
154
  }
208
155
  }
209
156
  const insertRequest = this.db.prepare(`
@@ -292,21 +239,6 @@ export class FlightRecorder {
292
239
  logComplete(correlationId, result) {
293
240
  this.updateCompleteTxn(correlationId, result);
294
241
  }
295
- /**
296
- * Read-only query over the requests + gateway_metadata tables. Used by
297
- * cache-stats / MCP resources / doctor without exposing a second SQLite
298
- * connection. better-sqlite3 in WAL mode handles concurrent readers
299
- * inside a single process safely.
300
- *
301
- * Safety:
302
- * - Caller MUST pass parameterised SQL — direct string interpolation of
303
- * untrusted values is unsafe.
304
- * - The compiled statement's `.readonly` flag is checked at runtime;
305
- * anything that can mutate rows (INSERT/UPDATE/DELETE, including the
306
- * `RETURNING` forms that better-sqlite3 surfaces via `.all()`) throws.
307
- * This blocks the writer-disguised-as-reader vector codex-r1/F3
308
- * flagged, even when the caller is internal gateway code.
309
- */
310
242
  queryRequests(sql, ...params) {
311
243
  const stmt = this.db.prepare(sql);
312
244
  if (stmt.readonly === false) {
@@ -318,7 +250,6 @@ export class FlightRecorder {
318
250
  return stmt.all(...params);
319
251
  }
320
252
  flush() {
321
- // No-op: better-sqlite3 writes synchronously.
322
253
  }
323
254
  close() {
324
255
  this.db.close();
@@ -1,23 +1,3 @@
1
- /**
2
- * Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
3
- * (NDJSON event stream) output.
4
- *
5
- * `-o json` emits a single JSON object with:
6
- * - `response`: string final model output
7
- * - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
8
- * cachedContentTokenCount?, totalTokenCount }
9
- *
10
- * `-o stream-json` emits one JSON object per line:
11
- * - `{ "type": "init", "session_id": "...", "model": "..." }`
12
- * - `{ "type": "message", "role": "user", "content": "..." }`
13
- * - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
14
- * - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
15
- * "output_tokens": N, "cached": N, ... } }`
16
- *
17
- * Both parsers return null when stdout is unparseable. Both populate the same
18
- * `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
19
- * outputFormat without further dispatch.
20
- */
21
1
  export interface GeminiUsage {
22
2
  input_tokens: number;
23
3
  output_tokens: number;
@@ -28,9 +8,4 @@ export interface GeminiJsonParseResult {
28
8
  response?: string;
29
9
  }
30
10
  export declare function parseGeminiJson(stdout: string): GeminiJsonParseResult | null;
31
- /**
32
- * Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
33
- * message content into `response`, extracts the terminal `result.stats` payload
34
- * into `usage`. Returns null when stdout contains no parseable JSON line.
35
- */
36
11
  export declare function parseGeminiStreamJson(stdout: string): GeminiJsonParseResult | null;
@@ -1,23 +1,3 @@
1
- /**
2
- * Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
3
- * (NDJSON event stream) output.
4
- *
5
- * `-o json` emits a single JSON object with:
6
- * - `response`: string final model output
7
- * - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
8
- * cachedContentTokenCount?, totalTokenCount }
9
- *
10
- * `-o stream-json` emits one JSON object per line:
11
- * - `{ "type": "init", "session_id": "...", "model": "..." }`
12
- * - `{ "type": "message", "role": "user", "content": "..." }`
13
- * - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
14
- * - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
15
- * "output_tokens": N, "cached": N, ... } }`
16
- *
17
- * Both parsers return null when stdout is unparseable. Both populate the same
18
- * `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
19
- * outputFormat without further dispatch.
20
- */
21
1
  export function parseGeminiJson(stdout) {
22
2
  const trimmed = stdout.trim();
23
3
  if (!trimmed) {
@@ -54,11 +34,6 @@ export function parseGeminiJson(stdout) {
54
34
  }
55
35
  return result;
56
36
  }
57
- /**
58
- * Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
59
- * message content into `response`, extracts the terminal `result.stats` payload
60
- * into `usage`. Returns null when stdout contains no parseable JSON line.
61
- */
62
37
  export function parseGeminiStreamJson(stdout) {
63
38
  if (!stdout) {
64
39
  return null;
@@ -71,9 +46,6 @@ export function parseGeminiStreamJson(stdout) {
71
46
  const trimmed = line.trim();
72
47
  if (!trimmed)
73
48
  continue;
74
- // Gemini stream-json lines are individual JSON objects; non-JSON
75
- // chatter (warnings, "Ripgrep not available", etc.) is silently
76
- // ignored so a stray banner line doesn't poison usage extraction.
77
49
  let event;
78
50
  try {
79
51
  event = JSON.parse(trimmed);
package/dist/health.d.ts CHANGED
@@ -13,8 +13,5 @@ export interface ProviderRuntimeHealth {
13
13
  providers: Record<string, Pick<ProviderRuntimeStatus, "installed" | "version" | "loginStatus" | "loginCheck">>;
14
14
  timestamp: string;
15
15
  }
16
- /**
17
- * Check health status of PostgreSQL.
18
- */
19
16
  export declare function checkHealth(db: DatabaseConnection): Promise<HealthStatus>;
20
17
  export declare function checkProviderRuntimeHealth(): ProviderRuntimeHealth;
package/dist/health.js CHANGED
@@ -1,7 +1,4 @@
1
1
  import { listProviderRuntimeStatuses } from "./provider-status.js";
2
- /**
3
- * Check health status of PostgreSQL.
4
- */
5
2
  export async function checkHealth(db) {
6
3
  const result = await db.healthCheck();
7
4
  const health = {
package/dist/index.d.ts CHANGED
@@ -13,18 +13,9 @@ import { ClaudeMcpConfigResult, ClaudeMcpServerName } from "./claude-mcp-config.
13
13
  import { type MistralAgentMode, type ClaudePermissionMode, type CodexSandboxMode, type CodexAskForApproval, type ClaudeEffortLevel } from "./request-helpers.js";
14
14
  import { FlightRecorderLike } from "./flight-recorder.js";
15
15
  import { type PromptParts } from "./prompt-parts.js";
16
- /**
17
- * Slice 3: structured warning entries attached to tool responses.
18
- * Distinct from review-integrity warnings (which are text-appended to
19
- * the user-visible response). These are programmatic signals for caller
20
- * agents to react to.
21
- */
22
16
  export interface WarningEntry {
23
- /** Stable machine-readable code, e.g. "cache_ttl_expiring_soon". */
24
17
  code: string;
25
- /** Optional human-readable message for surfaces that render text. */
26
18
  message?: string;
27
- /** Code-specific payload — left open for future warning types. */
28
19
  ttlRemainingMs?: number;
29
20
  [key: string]: unknown;
30
21
  }
@@ -44,7 +35,6 @@ type ExtendedToolResponse = {
44
35
  missing?: ClaudeMcpServerName[];
45
36
  };
46
37
  reviewIntegrity?: ReviewIntegrityResult;
47
- /** Slice 3: structured warnings (e.g. cache_ttl_expiring_soon). */
48
38
  warnings?: WarningEntry[];
49
39
  };
50
40
  declare const logger: {
@@ -54,40 +44,9 @@ declare const logger: {
54
44
  debug: (message: string, ...args: any[]) => void;
55
45
  };
56
46
  type GatewayLogger = typeof logger;
57
- /**
58
- * Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
59
- *
60
- * Both flags reach the upstream CLIs as decimal-formatted argv strings via
61
- * `String(N)`. `z.number().int().positive()` alone lets values past
62
- * `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
63
- * scientific notation that Grok and Vibe both reject. The bounds below
64
- * (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
65
- * for price) guarantee a lossless decimal stringification AND a sane
66
- * upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
67
- */
68
47
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
69
48
  export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
70
49
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
71
- /**
72
- * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
73
- * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
74
- * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
75
- * name and/or git ref (default ref: HEAD).
76
- *
77
- * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
78
- * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
79
- * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
80
- * the request carries a sessionId and the session already has a worktree,
81
- * that worktree is reused. On session_delete or TTL eviction the gateway
82
- * runs `git worktree remove --force`.
83
- *
84
- * Tool response: when a worktree was used, the successful response stdout
85
- * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
86
- * parse/use the path without a schema change (slice λ §1.d).
87
- *
88
- * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
89
- * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
90
- */
91
50
  export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
92
51
  name: z.ZodOptional<z.ZodString>;
93
52
  ref: z.ZodOptional<z.ZodString>;
@@ -128,65 +87,16 @@ export interface GatewayServerRuntime {
128
87
  export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
129
88
  isolateState?: boolean;
130
89
  }): GatewayServerRuntime;
131
- /**
132
- * Slice λ: shape returned by `resolveWorktreeForRequest`. `cwd` is what
133
- * the spawn helpers (`executeCli`, `startJobWithDedup`) consume;
134
- * `worktreePath` is what the tool handler embeds in the response prefix
135
- * so callers can discover the path.
136
- */
137
90
  export interface ResolvedWorktree {
138
91
  cwd?: string;
139
92
  worktreePath?: string;
140
93
  }
141
- /**
142
- * Slice λ: resolve a request's worktree directive into a spawn cwd.
143
- *
144
- * - `worktreeOpt` is the Zod-validated input value (boolean |
145
- * `{ name?, ref? }` | undefined).
146
- * - When the request has a session AND the session already has a
147
- * `metadata.worktreePath`, that path is reused (resume semantics).
148
- * The reused path is returned without touching git; if the directory
149
- * was externally removed between requests, the next CLI invocation
150
- * will surface the error naturally.
151
- * - When no reusable worktree exists, `createWorktree` runs; on success
152
- * the new path is written to `session.metadata` (only when a session
153
- * exists — request-scoped worktrees do NOT persist).
154
- * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
155
- * pre-λ behaviour at non-worktree call sites).
156
- * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
157
- * in a `createErrorResponse` envelope. Do NOT swallow.
158
- *
159
- * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
160
- * verify" §5.
161
- */
162
94
  export declare function resolveWorktreeForRequest(worktreeOpt: boolean | {
163
95
  name?: string;
164
96
  ref?: string;
165
97
  } | undefined, sessionId: string | undefined, runtime: GatewayServerRuntime): Promise<ResolvedWorktree>;
166
- /**
167
- * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
168
- *
169
- * We surface the worktree path inline as a stdout prefix
170
- * (`[gateway] worktree=<absolute-path>\n`) rather than as a
171
- * structuredContent field or JSON wrapper. Rationale:
172
- * - zero schema change across all 10 tools and their downstream parsers
173
- * - matches how other slice features (session warnings, cache_state
174
- * aggregates) surface side-channel metadata today
175
- * - callers that want the path can split on the first newline; callers
176
- * that don't care see a single ignorable header line
177
- *
178
- * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
179
- * the moment a successful response is constructed.
180
- */
181
98
  export declare function formatWorktreePrefix(worktreePath?: string): string;
182
- export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string,
183
- /**
184
- * Optional context for off-stdout telemetry sources. Today only Mistral
185
- * uses this — its meta.json lives on disk keyed by sessionId. Threading
186
- * this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
187
- * primitives-only (no `params`/`prep` retention on AsyncJobRecord).
188
- */
189
- ctx?: {
99
+ export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string, ctx?: {
190
100
  sessionId?: string;
191
101
  home?: string;
192
102
  }): {
@@ -205,35 +115,10 @@ interface CliRequestPrep {
205
115
  approvalDecision: ApprovalRecord | null;
206
116
  reviewIntegrity?: ReviewIntegrityResult;
207
117
  args: string[];
208
- /**
209
- * Sha256 of the assembled prompt's stable prefix bytes when the caller
210
- * supplied `promptParts`. Null when the legacy `prompt` field was used.
211
- * Populated by `resolvePromptOrPartsForPrep` and threaded into the
212
- * flight-recorder row by the caller's safeFlightStart entry.
213
- */
214
118
  stablePrefixHash: string | null;
215
- /** Heuristic token count (bytes/4) of the same stable prefix. */
216
119
  stablePrefixTokens: number | null;
217
- /**
218
- * Slice κ (Claude only): JSON stream-json payload to feed on stdin
219
- * when the gateway emits `-p --input-format stream-json`. Undefined
220
- * when the caller did not opt into Anthropic `cache_control`
221
- * breakpoints. Non-κ providers always leave this undefined.
222
- */
223
120
  stdinPayload?: string;
224
- /**
225
- * Slice κ (Claude only): number of caller-supplied content blocks
226
- * that carry an explicit `cache_control` marker. Threaded into the
227
- * flight recorder so `cache_state` aggregates can distinguish
228
- * κ-explicit breakpoints from implicit prefix-cache hits.
229
- */
230
121
  cacheControlBlocks?: number;
231
- /**
232
- * Rec #4: structured warnings produced during prep (e.g. cacheable
233
- * stable prefix without cacheControl). Handlers merge these with any
234
- * other warnings (cache_ttl_expiring_soon, etc.) before returning to
235
- * the caller.
236
- */
237
122
  warnings?: WarningEntry[];
238
123
  }
239
124
  export declare function prepareClaudeRequest(params: {
@@ -264,14 +149,12 @@ export declare function prepareClaudeRequest(params: {
264
149
  fallbackModel?: string;
265
150
  jsonSchema?: string | Record<string, unknown>;
266
151
  addDir?: string[];
152
+ noSessionPersistence?: boolean;
153
+ settingSources?: string;
154
+ settings?: string;
155
+ tools?: string[];
267
156
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
268
157
  export interface CodexRequestPrep extends CliRequestPrep {
269
- /**
270
- * U26: Cleanup hook for any `outputSchema` temp file written during prep.
271
- * Callers MUST invoke this in a `finally` block (regardless of whether the
272
- * spawn succeeded, failed, or never ran) to avoid leaking the 0o600 temp
273
- * file into `os.tmpdir()`.
274
- */
275
158
  cleanup?: () => void;
276
159
  }
277
160
  export declare function prepareCodexRequest(params: {
@@ -292,11 +175,6 @@ export declare function prepareCodexRequest(params: {
292
175
  correlationId?: string;
293
176
  optimizePrompt: boolean;
294
177
  operation: string;
295
- /**
296
- * U23: output format. When set to "json", emits `--json` so Codex streams
297
- * the JSONL event format that `parseCodexJsonStream` (and downstream
298
- * `extractUsageAndCost`) can consume. Defaults to "text".
299
- */
300
178
  outputFormat?: "text" | "json";
301
179
  outputSchema?: string | Record<string, unknown>;
302
180
  search?: boolean;
@@ -322,24 +200,13 @@ export declare function prepareGeminiRequest(params: {
322
200
  correlationId?: string;
323
201
  optimizePrompt: boolean;
324
202
  operation: string;
325
- /**
326
- * U23 + Phase 4 slice ε: output format. `json` emits `-o json` (single
327
- * JSON object with usageMetadata). `stream-json` emits `-o stream-json`
328
- * (NDJSON event stream — `init` / `message` / `result` lines). Both
329
- * route through `extractUsageAndCost` so usage tokens reach the flight
330
- * recorder. Defaults to "text".
331
- */
332
203
  outputFormat?: "text" | "json" | "stream-json";
333
204
  sandbox?: boolean;
334
205
  policyFiles?: string[];
335
206
  adminPolicyFiles?: string[];
336
207
  attachments?: string[];
337
- /**
338
- * Phase 4 slice γ: emit `--skip-trust` so first-run workspaces don't
339
- * block headless invocations on the interactive trust prompt. Default
340
- * is undefined (preserves current prompt behaviour for legacy callers).
341
- */
342
208
  skipTrust?: boolean;
209
+ yolo?: boolean;
343
210
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
344
211
  export declare function prepareGrokRequest(params: {
345
212
  prompt?: string;
@@ -358,36 +225,15 @@ export declare function prepareGrokRequest(params: {
358
225
  correlationId?: string;
359
226
  optimizePrompt: boolean;
360
227
  operation: string;
361
- /**
362
- * Phase 4 slice δ: emit `--max-turns N` so callers can cap agent-loop
363
- * iterations for cost / latency control. Mirrors Claude's wiring.
364
- */
365
228
  maxTurns?: number;
366
- /**
367
- * Phase 4 slice ζ: emit `--cwd <DIR>` so headless callers can set Grok's
368
- * working directory without depending on the gateway process's cwd.
369
- */
370
229
  workingDir?: string;
371
- /**
372
- * Phase 4 slice θ — Grok HIGH parity. All five are passthrough flags:
373
- *
374
- * - `sandbox` → `--sandbox <PROFILE>` (freeform; Grok 0.1.210 --help
375
- * shows no enum constraint, unlike --effort / --permission-mode /
376
- * --output-format which all show `[possible values: …]`).
377
- * - `rules` → `--rules <RULES>`. Supports `@file` prefix; gateway
378
- * passes the value verbatim and lets Grok parse it.
379
- * - `systemPromptOverride` → `--system-prompt-override <PROMPT>`.
380
- * Distinct from Claude's --system-prompt / --append-system-prompt
381
- * (Grok has only one override flag).
382
- * - `allow` / `deny` → repeatable `--allow <RULE>` / `--deny <RULE>`
383
- * per --help ("Repeat to add multiple rules"). One argv pair per
384
- * entry — NOT comma-joined like --tools / --disallowed-tools.
385
- */
386
230
  sandbox?: string;
387
231
  rules?: string;
388
232
  systemPromptOverride?: string;
389
233
  allow?: string[];
390
234
  deny?: string[];
235
+ compactionMode?: string;
236
+ compactionDetail?: string;
391
237
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
392
238
  export declare function prepareMistralRequest(params: {
393
239
  prompt?: string;
@@ -395,8 +241,6 @@ export declare function prepareMistralRequest(params: {
395
241
  model?: string;
396
242
  outputFormat?: string;
397
243
  permissionMode?: MistralAgentMode;
398
- effort?: string;
399
- reasoningEffort?: string;
400
244
  allowedTools?: string[];
401
245
  disallowedTools?: string[];
402
246
  approvalStrategy: "legacy" | "mcp_managed";
@@ -405,34 +249,16 @@ export declare function prepareMistralRequest(params: {
405
249
  correlationId?: string;
406
250
  optimizePrompt: boolean;
407
251
  operation: string;
408
- /**
409
- * Phase 4 slice γ: emit `--trust` to bypass Vibe's interactive trust
410
- * prompt for this invocation only (not persisted). Default undefined.
411
- */
412
252
  trust?: boolean;
413
- /** Phase 4 slice δ: Vibe `--max-turns N` cap on agent-loop iterations. */
414
253
  maxTurns?: number;
415
- /** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
416
254
  maxPrice?: number;
417
- /** Vibe 2.x: `--max-tokens N` cumulative prompt + completion token cap. */
418
255
  maxTokens?: number;
419
- /** Phase 4 slice ζ: Vibe `--workdir <DIR>` working-directory parity. */
420
256
  workingDir?: string;
421
- /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
422
257
  addDir?: string[];
423
258
  }, runtime?: GatewayServerRuntime): (CliRequestPrep & {
424
259
  mistralEnv: Record<string, string>;
425
260
  }) | ExtendedToolResponse;
426
- /**
427
- * Phase 4 slice δ post-review: pure helper extracted from
428
- * `handleMistralRequest` so the retry-path arg-preservation invariants
429
- * (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
430
- * without mocking awaitJobOrDefer. Any param the wrapper threads into
431
- * the FIRST `buildMistralCliInvocation` call MUST also be threaded
432
- * through here, or a fresh-workspace / budgeted run can degrade on
433
- * the second attempt.
434
- */
435
- export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams, "outputFormat" | "permissionMode" | "effort" | "reasoningEffort" | "allowedTools" | "disallowedTools" | "approvalStrategy" | "trust" | "maxTurns" | "maxPrice" | "maxTokens" | "workingDir" | "addDir"> & {
261
+ export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams, "outputFormat" | "permissionMode" | "allowedTools" | "disallowedTools" | "approvalStrategy" | "trust" | "maxTurns" | "maxPrice" | "maxTokens" | "workingDir" | "addDir"> & {
436
262
  effectivePrompt: string;
437
263
  }, recoveryModel: string): {
438
264
  args: string[];
@@ -457,18 +283,13 @@ export interface GeminiRequestParams {
457
283
  optimizeResponse?: boolean;
458
284
  idleTimeoutMs?: number;
459
285
  forceRefresh?: boolean;
460
- /**
461
- * U23 + Phase 4 slice ε: "json" emits `-o json`; "stream-json" emits
462
- * `-o stream-json` (NDJSON event stream). Both are usage-extracted.
463
- */
464
286
  outputFormat?: "text" | "json" | "stream-json";
465
287
  sandbox?: boolean;
466
288
  policyFiles?: string[];
467
289
  adminPolicyFiles?: string[];
468
290
  attachments?: string[];
469
- /** Phase 4 slice γ: emit `--skip-trust` for fresh-workspace headless runs. */
470
291
  skipTrust?: boolean;
471
- /** Slice λ: run this request inside a gateway-owned git worktree. */
292
+ yolo?: boolean;
472
293
  worktree?: boolean | {
473
294
  name?: string;
474
295
  ref?: string;
@@ -511,21 +332,15 @@ export interface GrokRequestParams {
511
332
  optimizeResponse?: boolean;
512
333
  idleTimeoutMs?: number;
513
334
  forceRefresh?: boolean;
514
- /** Phase 4 slice δ: cap agent-loop iterations via `--max-turns N`. */
515
335
  maxTurns?: number;
516
- /** Phase 4 slice ζ: emit `--cwd <DIR>` so the CLI uses the specified working directory. */
517
336
  workingDir?: string;
518
- /** Phase 4 slice θ: Grok `--sandbox <PROFILE>` (freeform passthrough). */
519
337
  sandbox?: string;
520
- /** Phase 4 slice θ: Grok `--rules <RULES>` (supports `@file` prefix; verbatim passthrough). */
521
338
  rules?: string;
522
- /** Phase 4 slice θ: Grok `--system-prompt-override <PROMPT>`. */
523
339
  systemPromptOverride?: string;
524
- /** Phase 4 slice θ: Grok `--allow <RULE>` (repeatable; one entry per --allow instance). */
525
340
  allow?: string[];
526
- /** Phase 4 slice θ: Grok `--deny <RULE>` (repeatable; one entry per --deny instance). */
527
341
  deny?: string[];
528
- /** Slice λ: run this request inside a gateway-owned git worktree. */
342
+ compactionMode?: string;
343
+ compactionDetail?: string;
529
344
  worktree?: boolean | {
530
345
  name?: string;
531
346
  ref?: string;
@@ -542,8 +357,6 @@ export interface MistralRequestParams {
542
357
  resumeLatest: boolean;
543
358
  createNewSession: boolean;
544
359
  permissionMode?: MistralAgentMode;
545
- effort?: string;
546
- reasoningEffort?: string;
547
360
  approvalStrategy: "legacy" | "mcp_managed";
548
361
  approvalPolicy?: string;
549
362
  mcpServers?: ClaudeMcpServerName[];
@@ -554,19 +367,12 @@ export interface MistralRequestParams {
554
367
  optimizeResponse?: boolean;
555
368
  idleTimeoutMs?: number;
556
369
  forceRefresh?: boolean;
557
- /** Phase 4 slice γ: emit `--trust` for fresh-workspace headless runs. */
558
370
  trust?: boolean;
559
- /** Phase 4 slice δ: Vibe `--max-turns N` cap on agent-loop iterations. */
560
371
  maxTurns?: number;
561
- /** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
562
372
  maxPrice?: number;
563
- /** Vibe 2.x: `--max-tokens N` cumulative prompt + completion token cap. */
564
373
  maxTokens?: number;
565
- /** Phase 4 slice ζ: Vibe `--workdir <DIR>` working-directory parity. */
566
374
  workingDir?: string;
567
- /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
568
375
  addDir?: string[];
569
- /** Slice λ: run this request inside a gateway-owned git worktree. */
570
376
  worktree?: boolean | {
571
377
  name?: string;
572
378
  ref?: string;
@@ -593,7 +399,6 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
593
399
  optimizePrompt: boolean;
594
400
  idleTimeoutMs?: number;
595
401
  forceRefresh?: boolean;
596
- /** U23: when "json", emits Codex `--json` so the parser is reachable. */
597
402
  outputFormat?: "text" | "json";
598
403
  outputSchema?: string | Record<string, unknown>;
599
404
  search?: boolean;
@@ -605,7 +410,6 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
605
410
  ignoreRules?: boolean;
606
411
  workingDir?: string;
607
412
  addDir?: string[];
608
- /** Slice λ: run this request inside a gateway-owned git worktree. */
609
413
  worktree?: boolean | {
610
414
  name?: string;
611
415
  ref?: string;