llm-cli-gateway 1.17.4 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +1 -1
  3. package/dist/approval-manager.js +0 -8
  4. package/dist/async-job-manager.d.ts +0 -113
  5. package/dist/async-job-manager.js +6 -124
  6. package/dist/cache-stats.d.ts +0 -89
  7. package/dist/cache-stats.js +0 -62
  8. package/dist/claude-mcp-config.js +0 -1
  9. package/dist/cli-updater.d.ts +0 -8
  10. package/dist/cli-updater.js +0 -12
  11. package/dist/codex-json-parser.d.ts +0 -20
  12. package/dist/codex-json-parser.js +0 -21
  13. package/dist/config.d.ts +0 -31
  14. package/dist/config.js +2 -72
  15. package/dist/db.d.ts +0 -18
  16. package/dist/db.js +0 -22
  17. package/dist/doctor.d.ts +0 -49
  18. package/dist/doctor.js +0 -47
  19. package/dist/endpoint-exposure.js +0 -1
  20. package/dist/executor.d.ts +0 -19
  21. package/dist/executor.js +3 -38
  22. package/dist/flight-recorder.d.ts +0 -26
  23. package/dist/flight-recorder.js +1 -70
  24. package/dist/gemini-json-parser.d.ts +0 -25
  25. package/dist/gemini-json-parser.js +0 -28
  26. package/dist/health.d.ts +0 -3
  27. package/dist/health.js +0 -3
  28. package/dist/index.d.ts +1 -221
  29. package/dist/index.js +14 -563
  30. package/dist/job-store.d.ts +0 -74
  31. package/dist/job-store.js +1 -73
  32. package/dist/logger.d.ts +0 -7
  33. package/dist/logger.js +0 -6
  34. package/dist/migrate-sessions.d.ts +0 -3
  35. package/dist/migrate-sessions.js +0 -16
  36. package/dist/migrate.js +1 -18
  37. package/dist/mistral-meta-json-parser.js +0 -67
  38. package/dist/model-registry.js +0 -13
  39. package/dist/pricing.d.ts +0 -46
  40. package/dist/pricing.js +0 -47
  41. package/dist/process-monitor.d.ts +0 -15
  42. package/dist/process-monitor.js +2 -31
  43. package/dist/prompt-parts.d.ts +0 -25
  44. package/dist/prompt-parts.js +0 -11
  45. package/dist/provider-status.d.ts +0 -8
  46. package/dist/provider-status.js +0 -11
  47. package/dist/request-helpers.d.ts +0 -334
  48. package/dist/request-helpers.js +1 -229
  49. package/dist/resources.d.ts +0 -20
  50. package/dist/resources.js +1 -34
  51. package/dist/retry.d.ts +0 -45
  52. package/dist/retry.js +3 -40
  53. package/dist/session-manager-pg.d.ts +0 -32
  54. package/dist/session-manager-pg.js +0 -32
  55. package/dist/session-manager.d.ts +0 -21
  56. package/dist/session-manager.js +1 -15
  57. package/dist/stream-json-parser.d.ts +0 -18
  58. package/dist/stream-json-parser.js +0 -22
  59. package/dist/upstream-contracts.d.ts +0 -55
  60. package/dist/upstream-contracts.js +0 -77
  61. package/dist/validation-orchestrator.js +0 -3
  62. package/dist/worktree-manager.d.ts +0 -9
  63. package/dist/worktree-manager.js +0 -21
  64. package/package.json +1 -1
@@ -1,30 +1,8 @@
1
- /**
2
- * Flight recorder: SQLite-backed request log.
3
- *
4
- * Read access for cache-stats / MCP resources / doctor goes through the
5
- * `queryRequests<T>(sql, ...params)` method exposed on both `FlightRecorder`
6
- * and `NoopFlightRecorder` (the `FlightRecorderQuery` interface, see bottom
7
- * of file). This is Option A from
8
- * docs/plans/cache-awareness.dag.toml#expose-flight-recorder-read-access —
9
- * a single read-only query surface on the existing class, NOT a sibling
10
- * read-only SQLite connection. better-sqlite3 in WAL mode handles
11
- * concurrent readers inside a single process safely, so the additional
12
- * connection isn't needed and would have to be threaded through
13
- * GatewayServerRuntime as a separate field.
14
- *
15
- * Callers MUST pass parameterised SQL — string-interpolation of untrusted
16
- * values is unsafe even on a "read-only" query.
17
- */
18
1
  import { chmodSync, existsSync, mkdirSync } from "fs";
19
2
  import os from "os";
20
3
  import path from "path";
21
4
  import { createRequire } from "module";
22
5
  const MAX_THINKING_BYTES = 1_000_000;
23
- /**
24
- * Idempotent migration: add `cache_read_tokens` / `cache_creation_tokens`
25
- * columns to the `requests` table if a pre-U23 logs.db is opened. Existing
26
- * rows keep NULL for the new columns; that is intentional.
27
- */
28
6
  function ensureRequestsCacheColumns(db) {
29
7
  const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
30
8
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
@@ -35,15 +13,6 @@ function ensureRequestsCacheColumns(db) {
35
13
  db.exec("ALTER TABLE requests ADD COLUMN cache_creation_tokens INTEGER");
36
14
  }
37
15
  }
38
- /**
39
- * Idempotent v3 migration: add `stable_prefix_hash` / `stable_prefix_tokens`
40
- * columns plus their index. Populated only for new rows that carry a
41
- * promptParts structure (slice 1); legacy rows keep NULL forever.
42
- *
43
- * Read access for cache-stats / MCP resources / doctor goes through the
44
- * read-only `queryRequests()` method on FlightRecorder (no separate read
45
- * connection — better-sqlite3 in WAL mode handles concurrent readers).
46
- */
47
16
  function ensureStablePrefixColumns(db) {
48
17
  const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
49
18
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
@@ -55,13 +24,6 @@ function ensureStablePrefixColumns(db) {
55
24
  }
56
25
  db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
57
26
  }
58
- /**
59
- * Idempotent v4 migration (slice κ): add `cache_control_blocks` column
60
- * to the `requests` table. Counts the caller-supplied content blocks
61
- * the gateway emitted with an explicit Anthropic `cache_control`
62
- * marker. Pre-κ rows keep NULL; only κ-opt-in callers ever set the
63
- * column to a non-NULL integer.
64
- */
65
27
  function ensureCacheControlBlocksColumn(db) {
66
28
  const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
67
29
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
@@ -92,10 +54,8 @@ function truncateThinkingBlocks(blocks) {
92
54
  used += bytes;
93
55
  continue;
94
56
  }
95
- // Reserve space for the suffix so total stays within budget
96
57
  const budget = Math.max(0, MAX_THINKING_BYTES - used - TRUNCATION_SUFFIX_BYTES);
97
58
  if (budget > 0) {
98
- // Truncate on code point boundaries by using string iteration
99
59
  let charBytes = 0;
100
60
  let safeEnd = 0;
101
61
  for (const char of block) {
@@ -103,7 +63,7 @@ function truncateThinkingBlocks(blocks) {
103
63
  if (charBytes + charSize > budget)
104
64
  break;
105
65
  charBytes += charSize;
106
- safeEnd += char.length; // char.length handles surrogate pairs
66
+ safeEnd += char.length;
107
67
  }
108
68
  const sliced = block.slice(0, safeEnd);
109
69
  result.push(sliced ? `${sliced}${TRUNCATION_SUFFIX}` : TRUNCATION_SUFFIX);
@@ -174,26 +134,14 @@ export class FlightRecorder {
174
134
  this.db
175
135
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(1, ?)")
176
136
  .run(new Date().toISOString());
177
- // Migration v2: cache_read_tokens / cache_creation_tokens columns on
178
- // pre-U23 logs.db files. ALTER TABLE ADD COLUMN is idempotent only via
179
- // a prior PRAGMA table_info() check; better-sqlite3 has no native
180
- // "IF NOT EXISTS" for ADD COLUMN.
181
137
  ensureRequestsCacheColumns(this.db);
182
138
  this.db
183
139
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(2, ?)")
184
140
  .run(new Date().toISOString());
185
- // Migration v3: stable_prefix_hash / stable_prefix_tokens columns plus
186
- // their index. Populated only for new rows whose request carried a
187
- // promptParts structure (slice 1 of cache-awareness); legacy rows keep
188
- // NULL intentionally.
189
141
  ensureStablePrefixColumns(this.db);
190
142
  this.db
191
143
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(3, ?)")
192
144
  .run(new Date().toISOString());
193
- // Migration v4: cache_control_blocks (slice κ). Pre-κ rows keep NULL;
194
- // only κ-opt-in writes populate this. Aggregates in cache-stats /
195
- // MCP resources can use this to separate explicit κ hits from
196
- // implicit prefix-cache hits.
197
145
  ensureCacheControlBlocksColumn(this.db);
198
146
  this.db
199
147
  .prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(4, ?)")
@@ -203,7 +151,6 @@ export class FlightRecorder {
203
151
  chmodSync(dbPath, 0o600);
204
152
  }
205
153
  catch {
206
- // Best effort permissions hardening.
207
154
  }
208
155
  }
209
156
  const insertRequest = this.db.prepare(`
@@ -292,21 +239,6 @@ export class FlightRecorder {
292
239
  logComplete(correlationId, result) {
293
240
  this.updateCompleteTxn(correlationId, result);
294
241
  }
295
- /**
296
- * Read-only query over the requests + gateway_metadata tables. Used by
297
- * cache-stats / MCP resources / doctor without exposing a second SQLite
298
- * connection. better-sqlite3 in WAL mode handles concurrent readers
299
- * inside a single process safely.
300
- *
301
- * Safety:
302
- * - Caller MUST pass parameterised SQL — direct string interpolation of
303
- * untrusted values is unsafe.
304
- * - The compiled statement's `.readonly` flag is checked at runtime;
305
- * anything that can mutate rows (INSERT/UPDATE/DELETE, including the
306
- * `RETURNING` forms that better-sqlite3 surfaces via `.all()`) throws.
307
- * This blocks the writer-disguised-as-reader vector codex-r1/F3
308
- * flagged, even when the caller is internal gateway code.
309
- */
310
242
  queryRequests(sql, ...params) {
311
243
  const stmt = this.db.prepare(sql);
312
244
  if (stmt.readonly === false) {
@@ -318,7 +250,6 @@ export class FlightRecorder {
318
250
  return stmt.all(...params);
319
251
  }
320
252
  flush() {
321
- // No-op: better-sqlite3 writes synchronously.
322
253
  }
323
254
  close() {
324
255
  this.db.close();
@@ -1,23 +1,3 @@
1
- /**
2
- * Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
3
- * (NDJSON event stream) output.
4
- *
5
- * `-o json` emits a single JSON object with:
6
- * - `response`: string final model output
7
- * - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
8
- * cachedContentTokenCount?, totalTokenCount }
9
- *
10
- * `-o stream-json` emits one JSON object per line:
11
- * - `{ "type": "init", "session_id": "...", "model": "..." }`
12
- * - `{ "type": "message", "role": "user", "content": "..." }`
13
- * - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
14
- * - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
15
- * "output_tokens": N, "cached": N, ... } }`
16
- *
17
- * Both parsers return null when stdout is unparseable. Both populate the same
18
- * `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
19
- * outputFormat without further dispatch.
20
- */
21
1
  export interface GeminiUsage {
22
2
  input_tokens: number;
23
3
  output_tokens: number;
@@ -28,9 +8,4 @@ export interface GeminiJsonParseResult {
28
8
  response?: string;
29
9
  }
30
10
  export declare function parseGeminiJson(stdout: string): GeminiJsonParseResult | null;
31
- /**
32
- * Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
33
- * message content into `response`, extracts the terminal `result.stats` payload
34
- * into `usage`. Returns null when stdout contains no parseable JSON line.
35
- */
36
11
  export declare function parseGeminiStreamJson(stdout: string): GeminiJsonParseResult | null;
@@ -1,23 +1,3 @@
1
- /**
2
- * Parsers for Gemini CLI `-o json` (single object) and `-o stream-json`
3
- * (NDJSON event stream) output.
4
- *
5
- * `-o json` emits a single JSON object with:
6
- * - `response`: string final model output
7
- * - `usageMetadata`: { promptTokenCount, candidatesTokenCount,
8
- * cachedContentTokenCount?, totalTokenCount }
9
- *
10
- * `-o stream-json` emits one JSON object per line:
11
- * - `{ "type": "init", "session_id": "...", "model": "..." }`
12
- * - `{ "type": "message", "role": "user", "content": "..." }`
13
- * - `{ "type": "message", "role": "assistant", "content": "...", "delta": true }` (repeated)
14
- * - `{ "type": "result", "status": "success", "stats": { "input_tokens": N,
15
- * "output_tokens": N, "cached": N, ... } }`
16
- *
17
- * Both parsers return null when stdout is unparseable. Both populate the same
18
- * `GeminiJsonParseResult` shape so `extractUsageAndCost` can branch on
19
- * outputFormat without further dispatch.
20
- */
21
1
  export function parseGeminiJson(stdout) {
22
2
  const trimmed = stdout.trim();
23
3
  if (!trimmed) {
@@ -54,11 +34,6 @@ export function parseGeminiJson(stdout) {
54
34
  }
55
35
  return result;
56
36
  }
57
- /**
58
- * Parse Gemini `-o stream-json` NDJSON output. Concatenates assistant `delta`
59
- * message content into `response`, extracts the terminal `result.stats` payload
60
- * into `usage`. Returns null when stdout contains no parseable JSON line.
61
- */
62
37
  export function parseGeminiStreamJson(stdout) {
63
38
  if (!stdout) {
64
39
  return null;
@@ -71,9 +46,6 @@ export function parseGeminiStreamJson(stdout) {
71
46
  const trimmed = line.trim();
72
47
  if (!trimmed)
73
48
  continue;
74
- // Gemini stream-json lines are individual JSON objects; non-JSON
75
- // chatter (warnings, "Ripgrep not available", etc.) is silently
76
- // ignored so a stray banner line doesn't poison usage extraction.
77
49
  let event;
78
50
  try {
79
51
  event = JSON.parse(trimmed);
package/dist/health.d.ts CHANGED
@@ -13,8 +13,5 @@ export interface ProviderRuntimeHealth {
13
13
  providers: Record<string, Pick<ProviderRuntimeStatus, "installed" | "version" | "loginStatus" | "loginCheck">>;
14
14
  timestamp: string;
15
15
  }
16
- /**
17
- * Check health status of PostgreSQL.
18
- */
19
16
  export declare function checkHealth(db: DatabaseConnection): Promise<HealthStatus>;
20
17
  export declare function checkProviderRuntimeHealth(): ProviderRuntimeHealth;
package/dist/health.js CHANGED
@@ -1,7 +1,4 @@
1
1
  import { listProviderRuntimeStatuses } from "./provider-status.js";
2
- /**
3
- * Check health status of PostgreSQL.
4
- */
5
2
  export async function checkHealth(db) {
6
3
  const result = await db.healthCheck();
7
4
  const health = {
package/dist/index.d.ts CHANGED
@@ -13,18 +13,9 @@ import { ClaudeMcpConfigResult, ClaudeMcpServerName } from "./claude-mcp-config.
13
13
  import { type MistralAgentMode, type ClaudePermissionMode, type CodexSandboxMode, type CodexAskForApproval, type ClaudeEffortLevel } from "./request-helpers.js";
14
14
  import { FlightRecorderLike } from "./flight-recorder.js";
15
15
  import { type PromptParts } from "./prompt-parts.js";
16
- /**
17
- * Slice 3: structured warning entries attached to tool responses.
18
- * Distinct from review-integrity warnings (which are text-appended to
19
- * the user-visible response). These are programmatic signals for caller
20
- * agents to react to.
21
- */
22
16
  export interface WarningEntry {
23
- /** Stable machine-readable code, e.g. "cache_ttl_expiring_soon". */
24
17
  code: string;
25
- /** Optional human-readable message for surfaces that render text. */
26
18
  message?: string;
27
- /** Code-specific payload — left open for future warning types. */
28
19
  ttlRemainingMs?: number;
29
20
  [key: string]: unknown;
30
21
  }
@@ -44,7 +35,6 @@ type ExtendedToolResponse = {
44
35
  missing?: ClaudeMcpServerName[];
45
36
  };
46
37
  reviewIntegrity?: ReviewIntegrityResult;
47
- /** Slice 3: structured warnings (e.g. cache_ttl_expiring_soon). */
48
38
  warnings?: WarningEntry[];
49
39
  };
50
40
  declare const logger: {
@@ -54,40 +44,9 @@ declare const logger: {
54
44
  debug: (message: string, ...args: any[]) => void;
55
45
  };
56
46
  type GatewayLogger = typeof logger;
57
- /**
58
- * Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
59
- *
60
- * Both flags reach the upstream CLIs as decimal-formatted argv strings via
61
- * `String(N)`. `z.number().int().positive()` alone lets values past
62
- * `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
63
- * scientific notation that Grok and Vibe both reject. The bounds below
64
- * (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
65
- * for price) guarantee a lossless decimal stringification AND a sane
66
- * upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
67
- */
68
47
  export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
69
48
  export declare const MAX_TOKENS_SCHEMA: z.ZodNumber;
70
49
  export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
71
- /**
72
- * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
73
- * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
74
- * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
75
- * name and/or git ref (default ref: HEAD).
76
- *
77
- * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
78
- * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
79
- * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
80
- * the request carries a sessionId and the session already has a worktree,
81
- * that worktree is reused. On session_delete or TTL eviction the gateway
82
- * runs `git worktree remove --force`.
83
- *
84
- * Tool response: when a worktree was used, the successful response stdout
85
- * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
86
- * parse/use the path without a schema change (slice λ §1.d).
87
- *
88
- * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
89
- * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
90
- */
91
50
  export declare const WORKTREE_SCHEMA: z.ZodUnion<[z.ZodBoolean, z.ZodObject<{
92
51
  name: z.ZodOptional<z.ZodString>;
93
52
  ref: z.ZodOptional<z.ZodString>;
@@ -128,65 +87,16 @@ export interface GatewayServerRuntime {
128
87
  export declare function resolveGatewayServerRuntime(deps?: GatewayServerDeps, options?: {
129
88
  isolateState?: boolean;
130
89
  }): GatewayServerRuntime;
131
- /**
132
- * Slice λ: shape returned by `resolveWorktreeForRequest`. `cwd` is what
133
- * the spawn helpers (`executeCli`, `startJobWithDedup`) consume;
134
- * `worktreePath` is what the tool handler embeds in the response prefix
135
- * so callers can discover the path.
136
- */
137
90
  export interface ResolvedWorktree {
138
91
  cwd?: string;
139
92
  worktreePath?: string;
140
93
  }
141
- /**
142
- * Slice λ: resolve a request's worktree directive into a spawn cwd.
143
- *
144
- * - `worktreeOpt` is the Zod-validated input value (boolean |
145
- * `{ name?, ref? }` | undefined).
146
- * - When the request has a session AND the session already has a
147
- * `metadata.worktreePath`, that path is reused (resume semantics).
148
- * The reused path is returned without touching git; if the directory
149
- * was externally removed between requests, the next CLI invocation
150
- * will surface the error naturally.
151
- * - When no reusable worktree exists, `createWorktree` runs; on success
152
- * the new path is written to `session.metadata` (only when a session
153
- * exists — request-scoped worktrees do NOT persist).
154
- * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
155
- * pre-λ behaviour at non-worktree call sites).
156
- * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
157
- * in a `createErrorResponse` envelope. Do NOT swallow.
158
- *
159
- * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
160
- * verify" §5.
161
- */
162
94
  export declare function resolveWorktreeForRequest(worktreeOpt: boolean | {
163
95
  name?: string;
164
96
  ref?: string;
165
97
  } | undefined, sessionId: string | undefined, runtime: GatewayServerRuntime): Promise<ResolvedWorktree>;
166
- /**
167
- * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
168
- *
169
- * We surface the worktree path inline as a stdout prefix
170
- * (`[gateway] worktree=<absolute-path>\n`) rather than as a
171
- * structuredContent field or JSON wrapper. Rationale:
172
- * - zero schema change across all 10 tools and their downstream parsers
173
- * - matches how other slice features (session warnings, cache_state
174
- * aggregates) surface side-channel metadata today
175
- * - callers that want the path can split on the first newline; callers
176
- * that don't care see a single ignorable header line
177
- *
178
- * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
179
- * the moment a successful response is constructed.
180
- */
181
98
  export declare function formatWorktreePrefix(worktreePath?: string): string;
182
- export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string,
183
- /**
184
- * Optional context for off-stdout telemetry sources. Today only Mistral
185
- * uses this — its meta.json lives on disk keyed by sessionId. Threading
186
- * this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
187
- * primitives-only (no `params`/`prep` retention on AsyncJobRecord).
188
- */
189
- ctx?: {
99
+ export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string, ctx?: {
190
100
  sessionId?: string;
191
101
  home?: string;
192
102
  }): {
@@ -205,35 +115,10 @@ interface CliRequestPrep {
205
115
  approvalDecision: ApprovalRecord | null;
206
116
  reviewIntegrity?: ReviewIntegrityResult;
207
117
  args: string[];
208
- /**
209
- * Sha256 of the assembled prompt's stable prefix bytes when the caller
210
- * supplied `promptParts`. Null when the legacy `prompt` field was used.
211
- * Populated by `resolvePromptOrPartsForPrep` and threaded into the
212
- * flight-recorder row by the caller's safeFlightStart entry.
213
- */
214
118
  stablePrefixHash: string | null;
215
- /** Heuristic token count (bytes/4) of the same stable prefix. */
216
119
  stablePrefixTokens: number | null;
217
- /**
218
- * Slice κ (Claude only): JSON stream-json payload to feed on stdin
219
- * when the gateway emits `-p --input-format stream-json`. Undefined
220
- * when the caller did not opt into Anthropic `cache_control`
221
- * breakpoints. Non-κ providers always leave this undefined.
222
- */
223
120
  stdinPayload?: string;
224
- /**
225
- * Slice κ (Claude only): number of caller-supplied content blocks
226
- * that carry an explicit `cache_control` marker. Threaded into the
227
- * flight recorder so `cache_state` aggregates can distinguish
228
- * κ-explicit breakpoints from implicit prefix-cache hits.
229
- */
230
121
  cacheControlBlocks?: number;
231
- /**
232
- * Rec #4: structured warnings produced during prep (e.g. cacheable
233
- * stable prefix without cacheControl). Handlers merge these with any
234
- * other warnings (cache_ttl_expiring_soon, etc.) before returning to
235
- * the caller.
236
- */
237
122
  warnings?: WarningEntry[];
238
123
  }
239
124
  export declare function prepareClaudeRequest(params: {
@@ -270,12 +155,6 @@ export declare function prepareClaudeRequest(params: {
270
155
  tools?: string[];
271
156
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
272
157
  export interface CodexRequestPrep extends CliRequestPrep {
273
- /**
274
- * U26: Cleanup hook for any `outputSchema` temp file written during prep.
275
- * Callers MUST invoke this in a `finally` block (regardless of whether the
276
- * spawn succeeded, failed, or never ran) to avoid leaking the 0o600 temp
277
- * file into `os.tmpdir()`.
278
- */
279
158
  cleanup?: () => void;
280
159
  }
281
160
  export declare function prepareCodexRequest(params: {
@@ -296,11 +175,6 @@ export declare function prepareCodexRequest(params: {
296
175
  correlationId?: string;
297
176
  optimizePrompt: boolean;
298
177
  operation: string;
299
- /**
300
- * U23: output format. When set to "json", emits `--json` so Codex streams
301
- * the JSONL event format that `parseCodexJsonStream` (and downstream
302
- * `extractUsageAndCost`) can consume. Defaults to "text".
303
- */
304
178
  outputFormat?: "text" | "json";
305
179
  outputSchema?: string | Record<string, unknown>;
306
180
  search?: boolean;
@@ -326,31 +200,12 @@ export declare function prepareGeminiRequest(params: {
326
200
  correlationId?: string;
327
201
  optimizePrompt: boolean;
328
202
  operation: string;
329
- /**
330
- * U23 + Phase 4 slice ε: output format. `json` emits `-o json` (single
331
- * JSON object with usageMetadata). `stream-json` emits `-o stream-json`
332
- * (NDJSON event stream — `init` / `message` / `result` lines). Both
333
- * route through `extractUsageAndCost` so usage tokens reach the flight
334
- * recorder. Defaults to "text".
335
- */
336
203
  outputFormat?: "text" | "json" | "stream-json";
337
204
  sandbox?: boolean;
338
205
  policyFiles?: string[];
339
206
  adminPolicyFiles?: string[];
340
207
  attachments?: string[];
341
- /**
342
- * Phase 4 slice γ: emit `--skip-trust` so first-run workspaces don't
343
- * block headless invocations on the interactive trust prompt. Default
344
- * is undefined (preserves current prompt behaviour for legacy callers).
345
- */
346
208
  skipTrust?: boolean;
347
- /**
348
- * Emit `--yolo` (auto-approve all actions). Equivalent in effect to
349
- * `approvalMode: "yolo"`; provided for CLI ergonomic parity. Routed
350
- * through the same approval gate (sets `bypassRequested`), and never
351
- * emitted alongside `--approval-mode yolo` so there is a single
352
- * auto-approve path. Default undefined.
353
- */
354
209
  yolo?: boolean;
355
210
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
356
211
  export declare function prepareGrokRequest(params: {
@@ -370,44 +225,13 @@ export declare function prepareGrokRequest(params: {
370
225
  correlationId?: string;
371
226
  optimizePrompt: boolean;
372
227
  operation: string;
373
- /**
374
- * Phase 4 slice δ: emit `--max-turns N` so callers can cap agent-loop
375
- * iterations for cost / latency control. Mirrors Claude's wiring.
376
- */
377
228
  maxTurns?: number;
378
- /**
379
- * Phase 4 slice ζ: emit `--cwd <DIR>` so headless callers can set Grok's
380
- * working directory without depending on the gateway process's cwd.
381
- */
382
229
  workingDir?: string;
383
- /**
384
- * Phase 4 slice θ — Grok HIGH parity. All five are passthrough flags:
385
- *
386
- * - `sandbox` → `--sandbox <PROFILE>` (freeform; Grok 0.1.210 --help
387
- * shows no enum constraint, unlike --effort / --permission-mode /
388
- * --output-format which all show `[possible values: …]`).
389
- * - `rules` → `--rules <RULES>`. Supports `@file` prefix; gateway
390
- * passes the value verbatim and lets Grok parse it.
391
- * - `systemPromptOverride` → `--system-prompt-override <PROMPT>`.
392
- * Distinct from Claude's --system-prompt / --append-system-prompt
393
- * (Grok has only one override flag).
394
- * - `allow` / `deny` → repeatable `--allow <RULE>` / `--deny <RULE>`
395
- * per --help ("Repeat to add multiple rules"). One argv pair per
396
- * entry — NOT comma-joined like --tools / --disallowed-tools.
397
- */
398
230
  sandbox?: string;
399
231
  rules?: string;
400
232
  systemPromptOverride?: string;
401
233
  allow?: string[];
402
234
  deny?: string[];
403
- /**
404
- * Grok 0.2.x context/compaction controls (both enum passthrough flags):
405
- * - `compactionMode` → `--compaction-mode <summary|transcript|segments>`
406
- * (default summary; sets GROK_COMPACTION_MODE).
407
- * - `compactionDetail` → `--compaction-detail <none|minimal|balanced|verbose>`
408
- * (default verbose; only affects `--compaction-mode segments`; sets
409
- * GROK_COMPACTION_DETAIL).
410
- */
411
235
  compactionMode?: string;
412
236
  compactionDetail?: string;
413
237
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
@@ -425,33 +249,15 @@ export declare function prepareMistralRequest(params: {
425
249
  correlationId?: string;
426
250
  optimizePrompt: boolean;
427
251
  operation: string;
428
- /**
429
- * Phase 4 slice γ: emit `--trust` to bypass Vibe's interactive trust
430
- * prompt for this invocation only (not persisted). Default undefined.
431
- */
432
252
  trust?: boolean;
433
- /** Phase 4 slice δ: Vibe `--max-turns N` cap on agent-loop iterations. */
434
253
  maxTurns?: number;
435
- /** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
436
254
  maxPrice?: number;
437
- /** Vibe 2.x: `--max-tokens N` cumulative prompt + completion token cap. */
438
255
  maxTokens?: number;
439
- /** Phase 4 slice ζ: Vibe `--workdir <DIR>` working-directory parity. */
440
256
  workingDir?: string;
441
- /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
442
257
  addDir?: string[];
443
258
  }, runtime?: GatewayServerRuntime): (CliRequestPrep & {
444
259
  mistralEnv: Record<string, string>;
445
260
  }) | ExtendedToolResponse;
446
- /**
447
- * Phase 4 slice δ post-review: pure helper extracted from
448
- * `handleMistralRequest` so the retry-path arg-preservation invariants
449
- * (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
450
- * without mocking awaitJobOrDefer. Any param the wrapper threads into
451
- * the FIRST `buildMistralCliInvocation` call MUST also be threaded
452
- * through here, or a fresh-workspace / budgeted run can degrade on
453
- * the second attempt.
454
- */
455
261
  export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams, "outputFormat" | "permissionMode" | "allowedTools" | "disallowedTools" | "approvalStrategy" | "trust" | "maxTurns" | "maxPrice" | "maxTokens" | "workingDir" | "addDir"> & {
456
262
  effectivePrompt: string;
457
263
  }, recoveryModel: string): {
@@ -477,20 +283,13 @@ export interface GeminiRequestParams {
477
283
  optimizeResponse?: boolean;
478
284
  idleTimeoutMs?: number;
479
285
  forceRefresh?: boolean;
480
- /**
481
- * U23 + Phase 4 slice ε: "json" emits `-o json`; "stream-json" emits
482
- * `-o stream-json` (NDJSON event stream). Both are usage-extracted.
483
- */
484
286
  outputFormat?: "text" | "json" | "stream-json";
485
287
  sandbox?: boolean;
486
288
  policyFiles?: string[];
487
289
  adminPolicyFiles?: string[];
488
290
  attachments?: string[];
489
- /** Phase 4 slice γ: emit `--skip-trust` for fresh-workspace headless runs. */
490
291
  skipTrust?: boolean;
491
- /** Emit `--yolo` (auto-approve all). Equivalent to approvalMode "yolo"; gated identically. */
492
292
  yolo?: boolean;
493
- /** Slice λ: run this request inside a gateway-owned git worktree. */
494
293
  worktree?: boolean | {
495
294
  name?: string;
496
295
  ref?: string;
@@ -533,25 +332,15 @@ export interface GrokRequestParams {
533
332
  optimizeResponse?: boolean;
534
333
  idleTimeoutMs?: number;
535
334
  forceRefresh?: boolean;
536
- /** Phase 4 slice δ: cap agent-loop iterations via `--max-turns N`. */
537
335
  maxTurns?: number;
538
- /** Phase 4 slice ζ: emit `--cwd <DIR>` so the CLI uses the specified working directory. */
539
336
  workingDir?: string;
540
- /** Phase 4 slice θ: Grok `--sandbox <PROFILE>` (freeform passthrough). */
541
337
  sandbox?: string;
542
- /** Phase 4 slice θ: Grok `--rules <RULES>` (supports `@file` prefix; verbatim passthrough). */
543
338
  rules?: string;
544
- /** Phase 4 slice θ: Grok `--system-prompt-override <PROMPT>`. */
545
339
  systemPromptOverride?: string;
546
- /** Phase 4 slice θ: Grok `--allow <RULE>` (repeatable; one entry per --allow instance). */
547
340
  allow?: string[];
548
- /** Phase 4 slice θ: Grok `--deny <RULE>` (repeatable; one entry per --deny instance). */
549
341
  deny?: string[];
550
- /** Grok 0.2.x: `--compaction-mode <summary|transcript|segments>` context control. */
551
342
  compactionMode?: string;
552
- /** Grok 0.2.x: `--compaction-detail <none|minimal|balanced|verbose>`; only affects segments mode. */
553
343
  compactionDetail?: string;
554
- /** Slice λ: run this request inside a gateway-owned git worktree. */
555
344
  worktree?: boolean | {
556
345
  name?: string;
557
346
  ref?: string;
@@ -578,19 +367,12 @@ export interface MistralRequestParams {
578
367
  optimizeResponse?: boolean;
579
368
  idleTimeoutMs?: number;
580
369
  forceRefresh?: boolean;
581
- /** Phase 4 slice γ: emit `--trust` for fresh-workspace headless runs. */
582
370
  trust?: boolean;
583
- /** Phase 4 slice δ: Vibe `--max-turns N` cap on agent-loop iterations. */
584
371
  maxTurns?: number;
585
- /** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
586
372
  maxPrice?: number;
587
- /** Vibe 2.x: `--max-tokens N` cumulative prompt + completion token cap. */
588
373
  maxTokens?: number;
589
- /** Phase 4 slice ζ: Vibe `--workdir <DIR>` working-directory parity. */
590
374
  workingDir?: string;
591
- /** Phase 4 slice ζ: Vibe `--add-dir <DIR>` repeatable add-dir parity. */
592
375
  addDir?: string[];
593
- /** Slice λ: run this request inside a gateway-owned git worktree. */
594
376
  worktree?: boolean | {
595
377
  name?: string;
596
378
  ref?: string;
@@ -617,7 +399,6 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
617
399
  optimizePrompt: boolean;
618
400
  idleTimeoutMs?: number;
619
401
  forceRefresh?: boolean;
620
- /** U23: when "json", emits Codex `--json` so the parser is reachable. */
621
402
  outputFormat?: "text" | "json";
622
403
  outputSchema?: string | Record<string, unknown>;
623
404
  search?: boolean;
@@ -629,7 +410,6 @@ export declare function handleCodexRequestAsync(deps: AsyncHandlerDeps, params:
629
410
  ignoreRules?: boolean;
630
411
  workingDir?: string;
631
412
  addDir?: string[];
632
- /** Slice λ: run this request inside a gateway-owned git worktree. */
633
413
  worktree?: boolean | {
634
414
  name?: string;
635
415
  ref?: string;