llm-cli-gateway 1.17.3 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/README.md +1 -1
  3. package/dist/approval-manager.js +0 -8
  4. package/dist/async-job-manager.d.ts +0 -113
  5. package/dist/async-job-manager.js +6 -124
  6. package/dist/cache-stats.d.ts +0 -89
  7. package/dist/cache-stats.js +0 -62
  8. package/dist/claude-mcp-config.js +0 -1
  9. package/dist/cli-updater.d.ts +0 -8
  10. package/dist/cli-updater.js +0 -12
  11. package/dist/codex-json-parser.d.ts +0 -20
  12. package/dist/codex-json-parser.js +0 -21
  13. package/dist/config.d.ts +0 -31
  14. package/dist/config.js +2 -72
  15. package/dist/db.d.ts +0 -18
  16. package/dist/db.js +0 -22
  17. package/dist/doctor.d.ts +0 -49
  18. package/dist/doctor.js +0 -47
  19. package/dist/endpoint-exposure.js +0 -1
  20. package/dist/executor.d.ts +0 -19
  21. package/dist/executor.js +3 -38
  22. package/dist/flight-recorder.d.ts +0 -26
  23. package/dist/flight-recorder.js +1 -70
  24. package/dist/gemini-json-parser.d.ts +0 -25
  25. package/dist/gemini-json-parser.js +0 -28
  26. package/dist/health.d.ts +0 -3
  27. package/dist/health.js +0 -3
  28. package/dist/index.d.ts +12 -208
  29. package/dist/index.js +116 -588
  30. package/dist/job-store.d.ts +0 -74
  31. package/dist/job-store.js +1 -73
  32. package/dist/logger.d.ts +0 -7
  33. package/dist/logger.js +0 -6
  34. package/dist/migrate-sessions.d.ts +0 -3
  35. package/dist/migrate-sessions.js +0 -16
  36. package/dist/migrate.js +1 -18
  37. package/dist/mistral-meta-json-parser.js +0 -67
  38. package/dist/model-registry.js +0 -13
  39. package/dist/pricing.d.ts +0 -46
  40. package/dist/pricing.js +0 -47
  41. package/dist/process-monitor.d.ts +0 -15
  42. package/dist/process-monitor.js +2 -31
  43. package/dist/prompt-parts.d.ts +6 -31
  44. package/dist/prompt-parts.js +0 -11
  45. package/dist/provider-status.d.ts +0 -8
  46. package/dist/provider-status.js +0 -11
  47. package/dist/request-helpers.d.ts +4 -316
  48. package/dist/request-helpers.js +13 -231
  49. package/dist/resources.d.ts +0 -20
  50. package/dist/resources.js +1 -34
  51. package/dist/retry.d.ts +0 -45
  52. package/dist/retry.js +3 -40
  53. package/dist/session-manager-pg.d.ts +0 -32
  54. package/dist/session-manager-pg.js +0 -32
  55. package/dist/session-manager.d.ts +0 -21
  56. package/dist/session-manager.js +1 -15
  57. package/dist/stream-json-parser.d.ts +0 -18
  58. package/dist/stream-json-parser.js +0 -22
  59. package/dist/upstream-contracts.d.ts +0 -55
  60. package/dist/upstream-contracts.js +86 -64
  61. package/dist/validation-orchestrator.js +0 -3
  62. package/dist/worktree-manager.d.ts +0 -9
  63. package/dist/worktree-manager.js +0 -21
  64. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -35,7 +35,6 @@ import { printDoctorJson } from "./doctor.js";
35
35
  import { registerValidationTools } from "./validation-tools.js";
36
36
  import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
37
37
  import { entrypointFileURL } from "./entrypoint-url.js";
38
- // Simple logger that writes to stderr (stdout is used for MCP protocol)
39
38
  const logger = {
40
39
  info: (message, ...args) => {
41
40
  console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
@@ -94,10 +93,6 @@ function logOptimizationTokens(kind, correlationId, original, optimized) {
94
93
  const reduction = originalTokens === 0 ? 0 : ((originalTokens - optimizedTokens) / originalTokens) * 100;
95
94
  logger.info(`[${correlationId}] ${kind} tokens ${originalTokens} → ${optimizedTokens} (${reduction.toFixed(1)}% reduction)`);
96
95
  }
97
- // Sync-to-async deadline: if a sync tool's CLI call hasn't finished within this
98
- // window, the tool returns a deferred async job reference instead of blocking
99
- // until the MCP client's tool-call timeout fires (~60s in many runtimes).
100
- // Configurable via SYNC_DEADLINE_MS env var. Set to 0 to disable (pure sync).
101
96
  const SYNC_DEADLINE_MS = (() => {
102
97
  const env = process.env.SYNC_DEADLINE_MS;
103
98
  if (env !== undefined) {
@@ -105,11 +100,8 @@ const SYNC_DEADLINE_MS = (() => {
105
100
  if (Number.isFinite(parsed) && parsed >= 0)
106
101
  return parsed;
107
102
  }
108
- return 45_000; // 45s default — safely under the 60s MCP client cap
103
+ return 45_000;
109
104
  })();
110
- //──────────────────────────────────────────────────────────────────────────────
111
- // Skills loader — reads .agents/skills/*/SKILL.md at startup
112
- //──────────────────────────────────────────────────────────────────────────────
113
105
  const __filename = fileURLToPath(import.meta.url);
114
106
  const __dirname = dirname(__filename);
115
107
  const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
@@ -124,7 +116,6 @@ function packageVersion() {
124
116
  return parsed.version || "unknown";
125
117
  }
126
118
  catch {
127
- // Try next candidate.
128
119
  }
129
120
  }
130
121
  return "unknown";
@@ -137,24 +128,19 @@ function loadSkills() {
137
128
  const skillPath = join(SKILLS_DIR, dir.name, "SKILL.md");
138
129
  try {
139
130
  const content = readFileSync(skillPath, "utf-8");
140
- // Extract description from YAML frontmatter
141
131
  const descMatch = content.match(/^---[\s\S]*?description:\s*(.+?)$/m);
142
132
  const description = descMatch?.[1]?.trim() || dir.name;
143
133
  skills.push({ name: dir.name, content, description });
144
134
  }
145
135
  catch {
146
- // Skill file missing or unreadable — skip silently
147
136
  }
148
137
  }
149
138
  }
150
139
  catch {
151
- // Skills directory missing — not fatal
152
140
  }
153
141
  return skills;
154
142
  }
155
143
  const loadedSkills = loadSkills();
156
- // L1: Compact server instructions (~200 tokens) — injected into every client's
157
- // system prompt at connection time. Covers key patterns + pointers to L2 resources.
158
144
  const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
159
145
 
160
146
  Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
@@ -175,17 +161,11 @@ ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}
175
161
  function newGatewayMcpServer() {
176
162
  return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
177
163
  }
178
- // Global state (initialized asynchronously)
179
164
  let sessionManager;
180
165
  let db = null;
181
166
  const performanceMetrics = new PerformanceMetrics();
182
167
  let resourceProvider;
183
168
  let flightRecorder = null;
184
- // Resolved persistence config — single source of truth for the async-job backend.
185
- // Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
186
- // When backend = "none", the JobStore is null AND *_request_async tools are not
187
- // registered (see createGatewayServer), making silent in-memory loss
188
- // structurally impossible.
189
169
  let persistenceConfig = null;
190
170
  let cacheAwarenessConfig = null;
191
171
  let jobStore = null;
@@ -231,47 +211,9 @@ function getApprovalManager(runtimeLogger = logger) {
231
211
  return approvalManager;
232
212
  }
233
213
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
234
- /**
235
- * Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
236
- *
237
- * Both flags reach the upstream CLIs as decimal-formatted argv strings via
238
- * `String(N)`. `z.number().int().positive()` alone lets values past
239
- * `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
240
- * scientific notation that Grok and Vibe both reject. The bounds below
241
- * (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
242
- * for price) guarantee a lossless decimal stringification AND a sane
243
- * upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
244
- */
245
214
  export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
246
- // Token budgets can legitimately exceed the agent-turn cap by orders of
247
- // magnitude. Keep a finite operational guardrail while avoiding the 10k turn
248
- // ceiling that would make large-context Vibe sessions unusable.
249
215
  export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
250
- // `.min(1e-6)` keeps the value in JS's decimal-stringify range:
251
- // String(1e-6) === "0.000001" but String(1e-7) === "1e-7", which both
252
- // upstream CLIs would reject. 1µUSD per request is fine-grained enough
253
- // for any plausible budget-cap use.
254
216
  export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
255
- /**
256
- * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
257
- * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
258
- * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
259
- * name and/or git ref (default ref: HEAD).
260
- *
261
- * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
262
- * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
263
- * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
264
- * the request carries a sessionId and the session already has a worktree,
265
- * that worktree is reused. On session_delete or TTL eviction the gateway
266
- * runs `git worktree remove --force`.
267
- *
268
- * Tool response: when a worktree was used, the successful response stdout
269
- * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
270
- * parse/use the path without a schema change (slice λ §1.d).
271
- *
272
- * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
273
- * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
274
- */
275
217
  export const WORKTREE_SCHEMA = z
276
218
  .union([
277
219
  z.boolean(),
@@ -296,9 +238,6 @@ export const WORKTREE_SCHEMA = z
296
238
  "path. NOTE: callers should `.gitignore` the `.worktrees/` " +
297
239
  "directory in their repo (the gateway does NOT auto-gitignore — " +
298
240
  "see slice λ spec Q4).");
299
- // U22: Session-provider enum extended to five providers. The storage layer's
300
- // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
301
- // session_create / session_list / session_clear_all accept the fifth provider.
302
241
  export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
303
242
  export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
304
243
  let activeServer = null;
@@ -308,13 +247,10 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
308
247
  const runtimeSessionManager = deps.sessionManager ?? sessionManager;
309
248
  const runtimePerformanceMetrics = deps.performanceMetrics ??
310
249
  (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
311
- // Resolve flight recorder BEFORE async manager so isolateState managers
312
- // can be wired with the same recorder instance the runtime exposes.
313
250
  const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
314
251
  const runtimeAsyncJobManager = deps.asyncJobManager ??
315
252
  (options.isolateState
316
- ? // Factory-created test/HTTP session servers must not mark another instance's
317
- // durable jobs orphaned. Stdio startup injects the process-global manager.
253
+ ?
318
254
  newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null, runtimeFlightRecorder)
319
255
  : getAsyncJobManager(runtimeLogger));
320
256
  const runtimeApprovalManager = deps.approvalManager ??
@@ -337,15 +273,12 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
337
273
  cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
338
274
  };
339
275
  }
340
- // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
341
- // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
342
- // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
343
276
  const CLI_IDLE_TIMEOUTS = {
344
- claude: 600_000, // 10 minutes — only used when outputFormat=stream-json
345
- codex: 600_000, // 10 minutes — Codex streams stderr progress
346
- gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
347
- grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
348
- mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
277
+ claude: 600_000,
278
+ codex: 600_000,
279
+ gemini: 600_000,
280
+ grok: 600_000,
281
+ mistral: 600_000,
349
282
  };
350
283
  function resolveIdleTimeout(cli, override) {
351
284
  if (override !== undefined)
@@ -353,41 +286,7 @@ function resolveIdleTimeout(cli, override) {
353
286
  return CLI_IDLE_TIMEOUTS[cli];
354
287
  }
355
288
  const SYNC_POLL_INTERVAL_MS = 1_000;
356
- /**
357
- * Start an async job and poll until completion or deadline.
358
- * Returns the job result if it finishes in time, or a deferral marker.
359
- */
360
- async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete,
361
- /**
362
- * Slice 1.5: when the sync handler has already written a logStart row
363
- * keyed on `corrId`, pass these so the manager can write logComplete
364
- * (with usage extraction) when the underlying async job terminates —
365
- * even if the sync handler returned a deferred response.
366
- * `writeFlightStart` is NEVER true on this path: the sync handler is
367
- * always the upstream logStart writer.
368
- */
369
- flightRecorderEntry, extractUsage,
370
- /**
371
- * Slice κ: optional stdin payload piped to the child CLI. Currently
372
- * only Claude's `--input-format stream-json` path sets this. Threaded
373
- * through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
374
- * the AsyncJobManager spawn path, and participates in the dedup key.
375
- */
376
- stdin,
377
- /**
378
- * Slice λ: optional working directory for the spawned child process,
379
- * derived from a gateway-owned git worktree. Threaded to both the
380
- * direct-execute fallback (`executeCli({ cwd })`) and the
381
- * AsyncJobManager dedup-aware spawn path
382
- * (`startJobWithDedup({ cwd })`). `cwd` also participates in the
383
- * dedup key (see async-job-manager.buildRequestKey) so two requests
384
- * with identical argv in different worktrees do not collide.
385
- */
386
- cwd) {
387
- // U26 fix: ownership of onComplete is a contract. Once this function returns
388
- // OR throws, the caller MUST consider onComplete consumed — i.e. it has
389
- // either been run, or the AsyncJobManager has taken ownership of it. The
390
- // caller never needs to reclaim.
289
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete, flightRecorderEntry, extractUsage, stdin, cwd) {
391
290
  let onCompleteOwnedByCaller = onComplete !== undefined;
392
291
  const consumeOnComplete = () => {
393
292
  if (!onCompleteOwnedByCaller || !onComplete)
@@ -409,8 +308,6 @@ cwd) {
409
308
  throw err;
410
309
  }
411
310
  if (SYNC_DEADLINE_MS === 0) {
412
- // Disabled — fall through to direct execution.
413
- // Note: direct execution bypasses dedup. forceRefresh is implied.
414
311
  const command = cli === "mistral" ? "vibe" : cli;
415
312
  try {
416
313
  return await executeCli(command, args, {
@@ -422,8 +319,6 @@ cwd) {
422
319
  });
423
320
  }
424
321
  finally {
425
- // Direct-execution path completes inline; release per-request resources
426
- // (e.g. outputSchema temp files) here.
427
322
  consumeOnComplete();
428
323
  }
429
324
  }
@@ -437,22 +332,12 @@ cwd) {
437
332
  env,
438
333
  stdin,
439
334
  onComplete,
440
- // Sync-deferred path: the upstream sync handler already wrote
441
- // logStart for this corrId, so writeFlightStart stays false. The
442
- // manager still writes logComplete on terminal state (which UPDATEs
443
- // the sync handler's row), closing the previously-orphaned
444
- // sync-deferred case.
445
335
  flightRecorderEntry,
446
336
  extractUsage,
447
337
  });
448
- // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
449
- // fireOnComplete on terminal status, or run inline immediately for dedup).
450
338
  onCompleteOwnedByCaller = false;
451
339
  }
452
340
  catch (err) {
453
- // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
454
- // registered, so onComplete will never be called by the manager. Reclaim
455
- // here so the temp file is not leaked.
456
341
  consumeOnComplete();
457
342
  throw err;
458
343
  }
@@ -464,7 +349,6 @@ cwd) {
464
349
  while (Date.now() < deadline) {
465
350
  const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
466
351
  if (snapshot && snapshot.status !== "running") {
467
- // Job finished within deadline — extract result
468
352
  const result = runtime.asyncJobManager.getJobResult(job.id);
469
353
  if (!result) {
470
354
  return { stdout: "", stderr: "Job result unavailable", code: 1 };
@@ -477,13 +361,6 @@ cwd) {
477
361
  }
478
362
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
479
363
  }
480
- // Deadline exceeded — return deferral.
481
- // R2 Codex-Unit-B F1: hand FR-complete ownership to the manager. Until
482
- // this call, the manager skips writeFlightComplete on terminal so the
483
- // sync handler's safeFlightComplete (with rich approvalDecision /
484
- // optimizationApplied metadata) wins for sync-inline completions. From
485
- // here on the sync handler returns deferred and will NOT write
486
- // safeFlightComplete, so the manager must.
487
364
  runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
488
365
  runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
489
366
  return {
@@ -517,27 +394,6 @@ function buildDeferredToolResponse(deferred, sessionId) {
517
394
  ],
518
395
  };
519
396
  }
520
- /**
521
- * Slice λ: resolve a request's worktree directive into a spawn cwd.
522
- *
523
- * - `worktreeOpt` is the Zod-validated input value (boolean |
524
- * `{ name?, ref? }` | undefined).
525
- * - When the request has a session AND the session already has a
526
- * `metadata.worktreePath`, that path is reused (resume semantics).
527
- * The reused path is returned without touching git; if the directory
528
- * was externally removed between requests, the next CLI invocation
529
- * will surface the error naturally.
530
- * - When no reusable worktree exists, `createWorktree` runs; on success
531
- * the new path is written to `session.metadata` (only when a session
532
- * exists — request-scoped worktrees do NOT persist).
533
- * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
534
- * pre-λ behaviour at non-worktree call sites).
535
- * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
536
- * in a `createErrorResponse` envelope. Do NOT swallow.
537
- *
538
- * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
539
- * verify" §5.
540
- */
541
397
  export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
542
398
  if (!worktreeOpt)
543
399
  return {};
@@ -566,30 +422,13 @@ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime)
566
422
  }
567
423
  return { cwd: handle.path, worktreePath: handle.path };
568
424
  }
569
- /**
570
- * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
571
- *
572
- * We surface the worktree path inline as a stdout prefix
573
- * (`[gateway] worktree=<absolute-path>\n`) rather than as a
574
- * structuredContent field or JSON wrapper. Rationale:
575
- * - zero schema change across all 10 tools and their downstream parsers
576
- * - matches how other slice features (session warnings, cache_state
577
- * aggregates) surface side-channel metadata today
578
- * - callers that want the path can split on the first newline; callers
579
- * that don't care see a single ignorable header line
580
- *
581
- * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
582
- * the moment a successful response is constructed.
583
- */
584
425
  export function formatWorktreePrefix(worktreePath) {
585
426
  return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
586
427
  }
587
- // Helper function for standardized error responses
588
428
  function createErrorResponse(cli, code, stderr, correlationId, error) {
589
429
  let errorMessage = `Error executing ${cli} CLI`;
590
430
  const isLaunchExit = code === 127 || code === -4058;
591
431
  if (error) {
592
- // Command not found or spawn error
593
432
  errorMessage += `:\n${error.message}`;
594
433
  if (error.message.includes("ENOENT")) {
595
434
  errorMessage += `\n\nThe '${cli}' command was not found. Please ensure ${cli} CLI is installed and in your PATH.`;
@@ -597,12 +436,10 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
597
436
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI execution failed:`, error.message);
598
437
  }
599
438
  else if (code === 124) {
600
- // Wall-clock timeout
601
439
  errorMessage += `: Command timed out\n${stderr}`;
602
440
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI timed out`);
603
441
  }
604
442
  else if (code === 125) {
605
- // Idle timeout (stuck process)
606
443
  errorMessage += `: Process killed due to inactivity\n${stderr}`;
607
444
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI killed due to inactivity`);
608
445
  }
@@ -611,7 +448,6 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
611
448
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed to launch`);
612
449
  }
613
450
  else if (code !== 0) {
614
- // Other non-zero exit code
615
451
  errorMessage += ` (exit code ${code}):\n${stderr}`;
616
452
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed with exit code ${code}`);
617
453
  }
@@ -634,14 +470,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
634
470
  },
635
471
  };
636
472
  }
637
- export function extractUsageAndCost(cli, output, outputFormat,
638
- /**
639
- * Optional context for off-stdout telemetry sources. Today only Mistral
640
- * uses this — its meta.json lives on disk keyed by sessionId. Threading
641
- * this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
642
- * primitives-only (no `params`/`prep` retention on AsyncJobRecord).
643
- */
644
- ctx) {
473
+ export function extractUsageAndCost(cli, output, outputFormat, ctx) {
645
474
  if (cli === "claude" && outputFormat === "stream-json") {
646
475
  const parsed = parseStreamJson(output);
647
476
  if (!parsed.usage) {
@@ -679,29 +508,12 @@ ctx) {
679
508
  cacheReadTokens: parsed.usage.cache_read_tokens,
680
509
  };
681
510
  }
682
- // Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
683
- // (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
684
- // session whose Vibe-assigned UUID we never observed) or the file is
685
- // missing/malformed, the parser returns `{}` and the FR row simply lacks
686
- // usage data — matching pre-slice behaviour. No stdout fallback exists.
687
511
  if (cli === "mistral") {
688
512
  return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
689
513
  }
690
514
  return {};
691
515
  }
692
- /**
693
- * Slice 1.5: build the async-job-manager's FR payload from a prep object
694
- * (which every prepare*Request returns), plus the bound CLI and output
695
- * format primitives needed by extractUsageAndCost. Returning the closure
696
- * separately means it captures `cliName` and `fmt` ONLY — never `params`
697
- * or `prep` — so retention on AsyncJobRecord is O(constant).
698
- */
699
516
  function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
700
- // Extract primitives BEFORE building the closure — capturing `prep` or
701
- // `params` directly would pin large attachments / promptParts on the
702
- // AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
703
- // primitives too, threaded through so the Mistral branch of
704
- // extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
705
517
  const cli = cliName;
706
518
  const fmt = outputFormat;
707
519
  const sid = sessionId;
@@ -795,11 +607,7 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
795
607
  }
796
608
  return { config: mcpConfig };
797
609
  }
798
- //──────────────────────────────────────────────────────────────────────────────
799
- // MCP Resources
800
- //──────────────────────────────────────────────────────────────────────────────
801
610
  function registerBaseResources(server, runtime) {
802
- // Register skill resources (L2: full docs, read on demand)
803
611
  for (const skill of loadedSkills) {
804
612
  server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
805
613
  title: skill.name,
@@ -816,7 +624,6 @@ function registerBaseResources(server, runtime) {
816
624
  }));
817
625
  }
818
626
  runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
819
- // Register all sessions resource
820
627
  server.registerResource("all-sessions", "sessions://all", {
821
628
  title: "📋 All Sessions",
822
629
  description: "All conversation sessions across CLIs",
@@ -826,7 +633,6 @@ function registerBaseResources(server, runtime) {
826
633
  const contents = await runtime.resourceProvider.readResource(uri.href);
827
634
  return { contents: contents ? [contents] : [] };
828
635
  });
829
- // Register Claude sessions resource
830
636
  server.registerResource("claude-sessions", "sessions://claude", {
831
637
  title: "🤖 Claude Sessions",
832
638
  description: "Claude conversation sessions",
@@ -836,7 +642,6 @@ function registerBaseResources(server, runtime) {
836
642
  const contents = await runtime.resourceProvider.readResource(uri.href);
837
643
  return { contents: contents ? [contents] : [] };
838
644
  });
839
- // Register Codex sessions resource
840
645
  server.registerResource("codex-sessions", "sessions://codex", {
841
646
  title: "💻 Codex Sessions",
842
647
  description: "Codex conversation sessions",
@@ -846,7 +651,6 @@ function registerBaseResources(server, runtime) {
846
651
  const contents = await runtime.resourceProvider.readResource(uri.href);
847
652
  return { contents: contents ? [contents] : [] };
848
653
  });
849
- // Register Gemini sessions resource
850
654
  server.registerResource("gemini-sessions", "sessions://gemini", {
851
655
  title: "✨ Gemini Sessions",
852
656
  description: "Gemini conversation sessions",
@@ -856,7 +660,6 @@ function registerBaseResources(server, runtime) {
856
660
  const contents = await runtime.resourceProvider.readResource(uri.href);
857
661
  return { contents: contents ? [contents] : [] };
858
662
  });
859
- // Register Grok sessions resource
860
663
  server.registerResource("grok-sessions", "sessions://grok", {
861
664
  title: "⚡ Grok Sessions",
862
665
  description: "Grok conversation sessions",
@@ -866,7 +669,6 @@ function registerBaseResources(server, runtime) {
866
669
  const contents = await runtime.resourceProvider.readResource(uri.href);
867
670
  return { contents: contents ? [contents] : [] };
868
671
  });
869
- // Register Mistral sessions resource
870
672
  server.registerResource("mistral-sessions", "sessions://mistral", {
871
673
  title: "🌬 Mistral Sessions",
872
674
  description: "Mistral Vibe conversation sessions",
@@ -876,7 +678,6 @@ function registerBaseResources(server, runtime) {
876
678
  const contents = await runtime.resourceProvider.readResource(uri.href);
877
679
  return { contents: contents ? [contents] : [] };
878
680
  });
879
- // Register Claude models resource
880
681
  server.registerResource("claude-models", "models://claude", {
881
682
  title: "🧠 Claude Models",
882
683
  description: "Claude models and capabilities",
@@ -886,7 +687,6 @@ function registerBaseResources(server, runtime) {
886
687
  const contents = await runtime.resourceProvider.readResource(uri.href);
887
688
  return { contents: contents ? [contents] : [] };
888
689
  });
889
- // Register Codex models resource
890
690
  server.registerResource("codex-models", "models://codex", {
891
691
  title: "🔧 Codex Models",
892
692
  description: "Codex models and capabilities",
@@ -896,7 +696,6 @@ function registerBaseResources(server, runtime) {
896
696
  const contents = await runtime.resourceProvider.readResource(uri.href);
897
697
  return { contents: contents ? [contents] : [] };
898
698
  });
899
- // Register Gemini models resource
900
699
  server.registerResource("gemini-models", "models://gemini", {
901
700
  title: "🌟 Gemini Models",
902
701
  description: "Gemini models and capabilities",
@@ -906,7 +705,6 @@ function registerBaseResources(server, runtime) {
906
705
  const contents = await runtime.resourceProvider.readResource(uri.href);
907
706
  return { contents: contents ? [contents] : [] };
908
707
  });
909
- // Register Grok models resource
910
708
  server.registerResource("grok-models", "models://grok", {
911
709
  title: "⚡ Grok Models",
912
710
  description: "Grok models and capabilities",
@@ -916,7 +714,6 @@ function registerBaseResources(server, runtime) {
916
714
  const contents = await runtime.resourceProvider.readResource(uri.href);
917
715
  return { contents: contents ? [contents] : [] };
918
716
  });
919
- // Register Mistral models resource
920
717
  server.registerResource("mistral-models", "models://mistral", {
921
718
  title: "🌬 Mistral Models",
922
719
  description: "Mistral Vibe models and capabilities",
@@ -926,7 +723,6 @@ function registerBaseResources(server, runtime) {
926
723
  const contents = await runtime.resourceProvider.readResource(uri.href);
927
724
  return { contents: contents ? [contents] : [] };
928
725
  });
929
- // Register performance metrics resource
930
726
  server.registerResource("performance-metrics", "metrics://performance", {
931
727
  title: "📈 Performance Metrics",
932
728
  description: "Request counts, latency, success/failure rates",
@@ -936,11 +732,6 @@ function registerBaseResources(server, runtime) {
936
732
  const contents = await runtime.resourceProvider.readResource(uri.href);
937
733
  return { contents: contents ? [contents] : [] };
938
734
  });
939
- // Cache-state resources (slice 2). Static URI for global, templated for
940
- // session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
941
- // ONLY — never raw prompt or response text. The structural guarantee is in
942
- // the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
943
- // themselves: those shapes have no prompt/response/system/task fields.
944
735
  server.registerResource("cache-state-global", "cache_state://global", {
945
736
  title: "💾 Cache State (Global)",
946
737
  description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
@@ -999,11 +790,6 @@ function registerBaseResources(server, runtime) {
999
790
  };
1000
791
  });
1001
792
  }
1002
- /**
1003
- * Slice 1: validate the prompt / promptParts mutex at the prep boundary and
1004
- * return either an error response or the resolved input. The exact error
1005
- * messages are part of the public contract — tests assert them verbatim.
1006
- */
1007
793
  function resolvePromptOrPartsForPrep(args) {
1008
794
  const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
1009
795
  const hasParts = args.promptParts !== undefined;
@@ -1045,7 +831,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1045
831
  const assembledPrompt = inputResolution.assembledPrompt;
1046
832
  const stablePrefixHash = inputResolution.stablePrefixHash;
1047
833
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1048
- // Review integrity check on raw prompt (before optimization)
1049
834
  const reviewIntegrity = checkReviewIntegrity({
1050
835
  prompt: assembledPrompt,
1051
836
  allowedTools: params.allowedTools,
@@ -1058,13 +843,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1058
843
  score: reviewIntegrity.totalScore,
1059
844
  });
1060
845
  }
1061
- // Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
1062
- // before running optimization. Optimization rewrites the assembled
1063
- // prompt text the flight-recorder logs, but the κ stdin payload is
1064
- // built from raw `promptParts` content blocks — letting both run
1065
- // produces a FR row whose `prompt` no longer matches what Claude
1066
- // actually received, AND any optimisation-driven text change would
1067
- // silently break Anthropic prefix-cache reuse on the next call.
1068
846
  const ccEarly = params.promptParts?.cacheControl;
1069
847
  const cacheControlRequestedEarly = !!(ccEarly &&
1070
848
  (ccEarly.system || ccEarly.tools || ccEarly.context));
@@ -1088,7 +866,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1088
866
  approvalDecision = runtime.approvalManager.decide({
1089
867
  cli: "claude",
1090
868
  operation: params.operation,
1091
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
869
+ prompt: assembledPrompt,
1092
870
  bypassRequested: params.dangerouslySkipPermissions,
1093
871
  fullAuto: false,
1094
872
  requestedMcpServers,
@@ -1102,18 +880,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1102
880
  return createApprovalDeniedResponse(params.operation, approvalDecision);
1103
881
  }
1104
882
  }
1105
- // Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
1106
- // `promptParts` whose stable prefix exceeds the per-model minimum,
1107
- // the caller has NOT explicitly set `cacheControl`, the gateway
1108
- // config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
1109
- // and outputFormat is stream-json. Auto-emit marks the LAST non-empty
1110
- // stable block (context → tools → system priority — the rightmost
1111
- // stable block covers the widest prefix). Skipped when optimizePrompt
1112
- // is on (same rec #5 desync risk).
1113
- //
1114
- // The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
1115
- // breakpoints from caller content are rejected by Anthropic once
1116
- // Claude Code's own 1h-marked session-wrap blocks land ahead of them.
1117
883
  let autoEmittedCacheControlBlock = null;
1118
884
  if (!cacheControlRequestedEarly &&
1119
885
  runtime.cacheAwareness.emitAnthropicCacheControl &&
@@ -1124,9 +890,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1124
890
  const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
1125
891
  if (stablePrefixTokens >= threshold) {
1126
892
  const pp = params.promptParts;
1127
- // Rightmost non-empty stable block — its cache_control breakpoint
1128
- // covers everything above it in the message (the API matches
1129
- // breakpoints in order).
1130
893
  if (pp.context && pp.context.length > 0)
1131
894
  autoEmittedCacheControlBlock = "context";
1132
895
  else if (pp.tools && pp.tools.length > 0)
@@ -1141,12 +904,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1141
904
  }
1142
905
  }
1143
906
  }
1144
- // Rec #4: warn when promptParts has a cacheable stable prefix but no
1145
- // cache_control breakpoint is being emitted (neither explicit nor
1146
- // auto). Either the caller forgot to set `cacheControl` or
1147
- // `[cache_awareness].emit_anthropic_cache_control` is off — both
1148
- // leave the stable prefix bytes unreused across calls, defeating the
1149
- // point of using `promptParts`.
1150
907
  const warnings = [];
1151
908
  if (!cacheControlRequestedEarly &&
1152
909
  autoEmittedCacheControlBlock === null &&
@@ -1168,13 +925,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1168
925
  });
1169
926
  }
1170
927
  }
1171
- // Slice κ: switch from the legacy positional `-p <prompt>` emission
1172
- // to `claude -p --input-format stream-json` and feed a JSON
1173
- // content-blocks payload via stdin. Non-κ callers (no cacheControl,
1174
- // or cacheControl with all flags false) take the existing positional
1175
- // path bit-for-bit. The κ path activates on EITHER an explicit caller
1176
- // opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
1177
- // (`autoEmittedCacheControlBlock`).
1178
928
  const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
1179
929
  let stdinPayload;
1180
930
  let cacheControlBlocks;
@@ -1182,9 +932,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1182
932
  if (params.outputFormat !== "stream-json") {
1183
933
  return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
1184
934
  }
1185
- // promptParts is non-null whenever cacheControlRequested is true
1186
- // (explicit opt-in lives in PromptParts; auto-emit guard requires
1187
- // promptParts to be defined).
1188
935
  const effectiveParts = autoEmittedCacheControlBlock !== null
1189
936
  ? {
1190
937
  ...params.promptParts,
@@ -1216,11 +963,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1216
963
  args.push("--output-format", "json");
1217
964
  }
1218
965
  else if (params.outputFormat === "stream-json") {
1219
- // Claude CLI 2.x rejects `--print --output-format stream-json` without
1220
- // `--verbose`: "When using --print, --output-format=stream-json requires
1221
- // --verbose". --verbose only affects what claude logs to stderr; the
1222
- // stream-json stdout payload is unchanged, so the gateway's NDJSON
1223
- // parser is unaffected.
1224
966
  args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
1225
967
  }
1226
968
  }
@@ -1251,7 +993,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1251
993
  args.push("--strict-mcp-config");
1252
994
  }
1253
995
  }
1254
- // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
1255
996
  let validatedAgents;
1256
997
  if (params.agents && Object.keys(params.agents).length > 0) {
1257
998
  const result = validateClaudeAgentsMap(params.agents);
@@ -1273,6 +1014,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1273
1014
  fallbackModel: params.fallbackModel,
1274
1015
  jsonSchema: params.jsonSchema,
1275
1016
  addDir: params.addDir,
1017
+ noSessionPersistence: params.noSessionPersistence,
1018
+ settingSources: params.settingSources,
1019
+ settings: params.settings,
1020
+ tools: params.tools,
1276
1021
  }));
1277
1022
  return {
1278
1023
  corrId,
@@ -1305,7 +1050,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1305
1050
  const assembledPrompt = inputResolution.assembledPrompt;
1306
1051
  const stablePrefixHash = inputResolution.stablePrefixHash;
1307
1052
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1308
- // Review integrity check on raw prompt (before optimization)
1309
1053
  const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
1310
1054
  if (reviewIntegrity.violations.length > 0) {
1311
1055
  runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
@@ -1326,7 +1070,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1326
1070
  approvalDecision = runtime.approvalManager.decide({
1327
1071
  cli: "codex",
1328
1072
  operation: params.operation,
1329
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
1073
+ prompt: assembledPrompt,
1330
1074
  bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
1331
1075
  fullAuto: params.fullAuto,
1332
1076
  requestedMcpServers,
@@ -1338,9 +1082,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1338
1082
  return createApprovalDeniedResponse(params.operation, approvalDecision);
1339
1083
  }
1340
1084
  }
1341
- // Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
1342
- // Note: `codex exec resume` does NOT accept sandbox policy flags; the original
1343
- // session's approval policy is inherited. We silently drop fullAuto on resume.
1344
1085
  let sessionPlan;
1345
1086
  try {
1346
1087
  sessionPlan = resolveCodexSessionArgs({
@@ -1361,9 +1102,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1361
1102
  }
1362
1103
  if (resolvedModel)
1363
1104
  args.push("--model", resolvedModel);
1364
- // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
1365
- // `codex exec resume` rejects all of these (the original session's policy is
1366
- // inherited), so we only emit them when starting a NEW session.
1367
1105
  const sandboxFlags = resolveCodexSandboxFlags({
1368
1106
  sandboxMode: params.sandboxMode,
1369
1107
  askForApproval: params.askForApproval,
@@ -1379,26 +1117,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1379
1117
  if (params.dangerouslyBypassApprovalsAndSandbox) {
1380
1118
  args.push("--dangerously-bypass-approvals-and-sandbox");
1381
1119
  }
1382
- // U23 fix: emit `--json` when the caller asked for JSON output so the
1383
- // codex-json-parser actually receives JSONL events. This is what makes
1384
- // extractUsageAndCost() reachable from the tool surface; without it, the
1385
- // U23 parser is dead code.
1386
1120
  if (params.outputFormat === "json") {
1387
1121
  args.push("--json");
1388
1122
  }
1389
1123
  args.push("--skip-git-repo-check");
1390
- // U26: High-impact feature flags. `--search` is retained as a compatibility
1391
- // input but current `codex exec` no longer accepts it, so the helper warns
1392
- // and emits no argv. `--profile` is accepted for new sessions only. The other
1393
- // flags here are accepted on resume per `codex exec resume --help` and are
1394
- // emitted in both branches.
1395
1124
  let highImpactCleanup;
1396
1125
  if (sessionPlan.mode === "new") {
1397
- // Phase 4 slice ζ: emit working-dir and add-dir on new sessions only.
1398
- // Both flags are listed in CODEX_RESUME_FILTERED_FLAGS — resume inherits
1399
- // the original session's cwd and writable-dir policy, so emitting them
1400
- // on resume would be silently stripped (wasteful + misleading on argv
1401
- // logs). Gating here mirrors `--search` / `--sandbox`.
1402
1126
  if (params.workingDir) {
1403
1127
  args.push("-C", params.workingDir);
1404
1128
  }
@@ -1481,7 +1205,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1481
1205
  const assembledPrompt = inputResolution.assembledPrompt;
1482
1206
  const stablePrefixHash = inputResolution.stablePrefixHash;
1483
1207
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1484
- // Review integrity check on raw prompt (before optimization)
1485
1208
  const reviewIntegrity = checkReviewIntegrity({
1486
1209
  prompt: assembledPrompt,
1487
1210
  allowedTools: params.allowedTools,
@@ -1505,8 +1228,8 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1505
1228
  approvalDecision = runtime.approvalManager.decide({
1506
1229
  cli: "gemini",
1507
1230
  operation: params.operation,
1508
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
1509
- bypassRequested: params.approvalMode === "yolo",
1231
+ prompt: assembledPrompt,
1232
+ bypassRequested: params.approvalMode === "yolo" || params.yolo === true,
1510
1233
  fullAuto: false,
1511
1234
  requestedMcpServers,
1512
1235
  allowedTools: params.allowedTools,
@@ -1519,8 +1242,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1519
1242
  }
1520
1243
  }
1521
1244
  const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
1522
- // U27: Validate high-impact policy paths and prepend attachment tokens
1523
- // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
1524
1245
  const highImpact = prepareGeminiHighImpactFlags({
1525
1246
  sandbox: params.sandbox,
1526
1247
  policyFiles: params.policyFiles,
@@ -1537,15 +1258,14 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1537
1258
  return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
1538
1259
  }
1539
1260
  }
1540
- // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
1541
- // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
1542
- // the documented non-interactive flag and is robust against future CLI mode
1543
- // changes.
1544
1261
  const args = ["-p", effectivePrompt];
1545
1262
  if (resolvedModel)
1546
1263
  args.push("--model", resolvedModel);
1547
1264
  if (effectiveApprovalMode)
1548
1265
  args.push("--approval-mode", effectiveApprovalMode);
1266
+ if (params.yolo && effectiveApprovalMode !== "yolo") {
1267
+ args.push("--yolo");
1268
+ }
1549
1269
  if (params.allowedTools && params.allowedTools.length > 0) {
1550
1270
  sanitizeCliArgValues(params.allowedTools, "allowedTools");
1551
1271
  params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
@@ -1558,26 +1278,13 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1558
1278
  sanitizeCliArgValues(params.includeDirs, "includeDirs");
1559
1279
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
1560
1280
  }
1561
- // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
1562
- // existing flag set so positional ordering relative to `-p` is preserved.
1563
1281
  args.push(...highImpact.args);
1564
- // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
1565
- // JSON parser is otherwise unreachable from the tool surface and the
1566
- // structured usageMetadata is silently dropped.
1567
- //
1568
- // Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
1569
- // Gemini already streams stdout in real-time so the existing 10-minute
1570
- // idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
1571
- // adjustment — unlike Claude, no `--include-partial-messages` companion
1572
- // flag is required because Gemini emits assistant `delta` events as part
1573
- // of the default stream-json shape.
1574
1282
  if (params.outputFormat === "json") {
1575
1283
  args.push("-o", "json");
1576
1284
  }
1577
1285
  else if (params.outputFormat === "stream-json") {
1578
1286
  args.push("-o", "stream-json");
1579
1287
  }
1580
- // Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
1581
1288
  if (params.skipTrust) {
1582
1289
  args.push("--skip-trust");
1583
1290
  }
@@ -1608,7 +1315,6 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1608
1315
  const assembledPrompt = inputResolution.assembledPrompt;
1609
1316
  const stablePrefixHash = inputResolution.stablePrefixHash;
1610
1317
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1611
- // Review integrity check on raw prompt (before optimization)
1612
1318
  const reviewIntegrity = checkReviewIntegrity({
1613
1319
  prompt: assembledPrompt,
1614
1320
  allowedTools: params.allowedTools,
@@ -1633,7 +1339,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1633
1339
  approvalDecision = runtime.approvalManager.decide({
1634
1340
  cli: "grok",
1635
1341
  operation: params.operation,
1636
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
1342
+ prompt: assembledPrompt,
1637
1343
  bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
1638
1344
  fullAuto: false,
1639
1345
  requestedMcpServers,
@@ -1694,6 +1400,12 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1694
1400
  args.push("--deny", rule);
1695
1401
  }
1696
1402
  }
1403
+ if (params.compactionMode) {
1404
+ args.push("--compaction-mode", params.compactionMode);
1405
+ }
1406
+ if (params.compactionDetail) {
1407
+ args.push("--compaction-detail", params.compactionDetail);
1408
+ }
1697
1409
  return {
1698
1410
  corrId,
1699
1411
  effectivePrompt,
@@ -1762,9 +1474,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
1762
1474
  return createApprovalDeniedResponse(params.operation, approvalDecision);
1763
1475
  }
1764
1476
  }
1765
- // Under mcp_managed, force --agent auto-approve so the approval gate's
1766
- // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
1767
- // forcing under mcp_managed).
1768
1477
  const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
1769
1478
  ? "auto-approve"
1770
1479
  : (params.permissionMode ?? "auto-approve");
@@ -1773,8 +1482,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
1773
1482
  resolvedModel,
1774
1483
  outputFormat: params.outputFormat,
1775
1484
  permissionMode: effectivePermissionMode,
1776
- effort: params.effort,
1777
- reasoningEffort: params.reasoningEffort,
1778
1485
  allowedTools: params.allowedTools,
1779
1486
  disallowedTools: params.disallowedTools,
1780
1487
  trust: params.trust,
@@ -1813,15 +1520,6 @@ function selectMistralRecoveryModel(failedModel) {
1813
1520
  ].filter((model) => Boolean(model && model !== failedModel));
1814
1521
  return candidates.find(model => model !== "local");
1815
1522
  }
1816
- /**
1817
- * Phase 4 slice δ post-review: pure helper extracted from
1818
- * `handleMistralRequest` so the retry-path arg-preservation invariants
1819
- * (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
1820
- * without mocking awaitJobOrDefer. Any param the wrapper threads into
1821
- * the FIRST `buildMistralCliInvocation` call MUST also be threaded
1822
- * through here, or a fresh-workspace / budgeted run can degrade on
1823
- * the second attempt.
1824
- */
1825
1523
  export function buildMistralRetryPrep(params, recoveryModel) {
1826
1524
  return buildMistralCliInvocation({
1827
1525
  prompt: params.effectivePrompt,
@@ -1830,8 +1528,6 @@ export function buildMistralRetryPrep(params, recoveryModel) {
1830
1528
  permissionMode: params.approvalStrategy === "mcp_managed"
1831
1529
  ? "auto-approve"
1832
1530
  : (params.permissionMode ?? "auto-approve"),
1833
- effort: params.effort,
1834
- reasoningEffort: params.reasoningEffort,
1835
1531
  allowedTools: params.allowedTools,
1836
1532
  disallowedTools: params.disallowedTools,
1837
1533
  trust: params.trust,
@@ -1844,13 +1540,11 @@ export function buildMistralRetryPrep(params, recoveryModel) {
1844
1540
  }
1845
1541
  function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
1846
1542
  let finalStdout = stdout;
1847
- // Skip response optimization for JSON output to prevent corrupting structured data
1848
1543
  if (optimizeResponse && outputFormat !== "json") {
1849
1544
  const optimized = optimizeResponseText(finalStdout);
1850
1545
  logOptimizationTokens("response", corrId, finalStdout, optimized);
1851
1546
  finalStdout = optimized;
1852
1547
  }
1853
- // Append review integrity warnings to response text (skip for JSON output to avoid corruption)
1854
1548
  if (prep.reviewIntegrity &&
1855
1549
  prep.reviewIntegrity.violations.length > 0 &&
1856
1550
  outputFormat !== "json") {
@@ -1867,9 +1561,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
1867
1561
  correlationId: corrId,
1868
1562
  sessionId: sessionId || null,
1869
1563
  durationMs,
1870
- // Phase 4 slice β: thread sessionId + home so the Mistral branch of
1871
- // extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
1872
- // Other CLIs ignore the ctx (their usage source is stdout).
1873
1564
  ...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
1874
1565
  exitCode: 0,
1875
1566
  retryCount: 0,
@@ -1899,12 +1590,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
1899
1590
  }
1900
1591
  return response;
1901
1592
  }
1902
- /**
1903
- * Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
1904
- * claude session, if the feature is enabled, the session has prior cache
1905
- * writes, and ttlRemainingMs is below the threshold (30s by default).
1906
- * Returns null when no warning applies.
1907
- */
1908
1593
  function maybeBuildCacheTtlWarning(args) {
1909
1594
  if (args.cli !== "claude")
1910
1595
  return null;
@@ -1933,7 +1618,6 @@ function resolveHandlerRuntime(deps) {
1933
1618
  if (deps.runtime)
1934
1619
  return deps.runtime;
1935
1620
  const asyncDeps = deps;
1936
- // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
1937
1621
  const depLogger = deps.logger;
1938
1622
  const normalizedLogger = {
1939
1623
  info: depLogger.info,
@@ -1969,6 +1653,7 @@ export async function handleGeminiRequest(deps, params) {
1969
1653
  adminPolicyFiles: params.adminPolicyFiles,
1970
1654
  attachments: params.attachments,
1971
1655
  skipTrust: params.skipTrust,
1656
+ yolo: params.yolo,
1972
1657
  }, runtime);
1973
1658
  if (!("args" in prep))
1974
1659
  return prep;
@@ -1986,8 +1671,6 @@ export async function handleGeminiRequest(deps, params) {
1986
1671
  }, runtime);
1987
1672
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
1988
1673
  try {
1989
- // Gemini CLI 0.43 supports `--resume`, but not a supported fresh
1990
- // `--session-id` flag. Fresh sessions emit no session flag.
1991
1674
  const sessionPlan = resolveGeminiSessionPlan({
1992
1675
  sessionId: params.sessionId,
1993
1676
  resumeLatest: params.resumeLatest,
@@ -2005,7 +1688,6 @@ export async function handleGeminiRequest(deps, params) {
2005
1688
  }
2006
1689
  const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
2007
1690
  const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
2008
- // Deferred — job still running, return async reference
2009
1691
  if (isDeferredResponse(result)) {
2010
1692
  return buildDeferredToolResponse(result, effectiveSessionIdHint);
2011
1693
  }
@@ -2026,9 +1708,6 @@ export async function handleGeminiRequest(deps, params) {
2026
1708
  return createErrorResponse("gemini", code, stderr, corrId);
2027
1709
  }
2028
1710
  wasSuccessful = true;
2029
- // Post-success session I/O for explicit resume flows. Fresh Gemini sessions
2030
- // are owned by the CLI because the current CLI has no supported fresh
2031
- // session-id flag the gateway can inject.
2032
1711
  let effectiveSessionId = effectiveSessionIdHint;
2033
1712
  if (effectiveSessionId) {
2034
1713
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2111,19 +1790,18 @@ export async function handleGeminiRequestAsync(deps, params) {
2111
1790
  adminPolicyFiles: params.adminPolicyFiles,
2112
1791
  attachments: params.attachments,
2113
1792
  skipTrust: params.skipTrust,
1793
+ yolo: params.yolo,
2114
1794
  }, runtime);
2115
1795
  if (!("args" in prep))
2116
1796
  return prep;
2117
1797
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
2118
1798
  try {
2119
- // Gemini CLI 0.43 supports `--resume`, but fresh sessions emit no session flag.
2120
1799
  const sessionPlan = resolveGeminiSessionPlan({
2121
1800
  sessionId: params.sessionId,
2122
1801
  resumeLatest: params.resumeLatest,
2123
1802
  createNewSession: params.createNewSession,
2124
1803
  });
2125
1804
  args.push(...sessionPlan.args);
2126
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
2127
1805
  let effectiveSessionId = sessionPlan.resumed ? params.sessionId : undefined;
2128
1806
  if (effectiveSessionId) {
2129
1807
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2146,13 +1824,8 @@ export async function handleGeminiRequestAsync(deps, params) {
2146
1824
  catch (err) {
2147
1825
  return createErrorResponse("gemini_request_async", 1, "", corrId, err);
2148
1826
  }
2149
- // Start job only after all session I/O succeeds. U23: forward outputFormat
2150
- // so AsyncJobManager records it in the durable store (the manager also
2151
- // surfaces it in the snapshot).
2152
1827
  assertUpstreamCliArgs("gemini", args);
2153
1828
  assertUpstreamCliEnv("gemini", undefined);
2154
- // Slice 1.5: pure async path — no upstream safeFlightStart, so the
2155
- // manager owns both logStart and logComplete for this corrId.
2156
1829
  const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
2157
1830
  const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
2158
1831
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
@@ -2210,6 +1883,8 @@ export async function handleGrokRequest(deps, params) {
2210
1883
  systemPromptOverride: params.systemPromptOverride,
2211
1884
  allow: params.allow,
2212
1885
  deny: params.deny,
1886
+ compactionMode: params.compactionMode,
1887
+ compactionDetail: params.compactionDetail,
2213
1888
  }, runtime);
2214
1889
  if (!("args" in prep))
2215
1890
  return prep;
@@ -2227,7 +1902,6 @@ export async function handleGrokRequest(deps, params) {
2227
1902
  }, runtime);
2228
1903
  deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
2229
1904
  try {
2230
- // Session arg planning (pure, no I/O)
2231
1905
  const sessionResult = resolveGrokSessionArgs({
2232
1906
  sessionId: params.sessionId,
2233
1907
  resumeLatest: params.resumeLatest,
@@ -2243,7 +1917,6 @@ export async function handleGrokRequest(deps, params) {
2243
1917
  }
2244
1918
  const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
2245
1919
  const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2246
- // Deferred — job still running, return async reference
2247
1920
  if (isDeferredResponse(result)) {
2248
1921
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
2249
1922
  }
@@ -2264,7 +1937,6 @@ export async function handleGrokRequest(deps, params) {
2264
1937
  return createErrorResponse("grok", code, stderr, corrId);
2265
1938
  }
2266
1939
  wasSuccessful = true;
2267
- // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
2268
1940
  let effectiveSessionId = sessionResult.effectiveSessionId;
2269
1941
  if (sessionResult.userProvidedSession && effectiveSessionId) {
2270
1942
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2350,19 +2022,19 @@ export async function handleGrokRequestAsync(deps, params) {
2350
2022
  systemPromptOverride: params.systemPromptOverride,
2351
2023
  allow: params.allow,
2352
2024
  deny: params.deny,
2025
+ compactionMode: params.compactionMode,
2026
+ compactionDetail: params.compactionDetail,
2353
2027
  }, runtime);
2354
2028
  if (!("args" in prep))
2355
2029
  return prep;
2356
2030
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
2357
2031
  try {
2358
- // Session arg planning (pure, no I/O)
2359
2032
  const sessionResult = resolveGrokSessionArgs({
2360
2033
  sessionId: params.sessionId,
2361
2034
  resumeLatest: params.resumeLatest,
2362
2035
  createNewSession: params.createNewSession,
2363
2036
  });
2364
2037
  args.push(...sessionResult.resumeArgs);
2365
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
2366
2038
  let effectiveSessionId = sessionResult.effectiveSessionId;
2367
2039
  if (sessionResult.userProvidedSession && effectiveSessionId) {
2368
2040
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2389,7 +2061,6 @@ export async function handleGrokRequestAsync(deps, params) {
2389
2061
  catch (err) {
2390
2062
  return createErrorResponse("grok_request_async", 1, "", corrId, err);
2391
2063
  }
2392
- // Start job only after all session I/O succeeds
2393
2064
  assertUpstreamCliArgs("grok", args);
2394
2065
  assertUpstreamCliEnv("grok", undefined);
2395
2066
  const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
@@ -2431,8 +2102,6 @@ export async function handleMistralRequest(deps, params) {
2431
2102
  model: params.model,
2432
2103
  outputFormat: params.outputFormat,
2433
2104
  permissionMode: params.permissionMode,
2434
- effort: params.effort,
2435
- reasoningEffort: params.reasoningEffort,
2436
2105
  allowedTools: params.allowedTools,
2437
2106
  disallowedTools: params.disallowedTools,
2438
2107
  approvalStrategy: params.approvalStrategy,
@@ -2488,8 +2157,6 @@ export async function handleMistralRequest(deps, params) {
2488
2157
  deps.logger.info(`[${corrId}] mistral_request detected stale Vibe model selection; retrying once with ${recoveryModel}`);
2489
2158
  const retryPrep = buildMistralRetryPrep({ ...params, effectivePrompt: prep.effectivePrompt }, recoveryModel);
2490
2159
  const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
2491
- // Reuse the FR handoff built above — the retry preserves corrId,
2492
- // so the manager's logComplete still updates the original row.
2493
2160
  result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2494
2161
  if (isDeferredResponse(result)) {
2495
2162
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -2582,8 +2249,6 @@ export async function handleMistralRequestAsync(deps, params) {
2582
2249
  model: params.model,
2583
2250
  outputFormat: params.outputFormat,
2584
2251
  permissionMode: params.permissionMode,
2585
- effort: params.effort,
2586
- reasoningEffort: params.reasoningEffort,
2587
2252
  allowedTools: params.allowedTools,
2588
2253
  disallowedTools: params.disallowedTools,
2589
2254
  approvalStrategy: params.approvalStrategy,
@@ -2702,11 +2367,6 @@ export async function handleCodexRequestAsync(deps, params) {
2702
2367
  if (!("args" in prep))
2703
2368
  return prep;
2704
2369
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
2705
- // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
2706
- // exactly one place at a time: this scope until startJob succeeds, then
2707
- // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
2708
- // the job is registered. Any code path that fails to hand it off MUST run
2709
- // it locally.
2710
2370
  const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
2711
2371
  let prepCleanupOwnedHere = prepCleanup !== undefined;
2712
2372
  const runPrepCleanupLocally = () => {
@@ -2721,7 +2381,6 @@ export async function handleCodexRequestAsync(deps, params) {
2721
2381
  }
2722
2382
  };
2723
2383
  try {
2724
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
2725
2384
  let effectiveSessionId = params.sessionId;
2726
2385
  if (!params.createNewSession && !params.sessionId) {
2727
2386
  const activeSession = await deps.sessionManager.getActiveSession("codex");
@@ -2740,9 +2399,6 @@ export async function handleCodexRequestAsync(deps, params) {
2740
2399
  const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2741
2400
  effectiveSessionId = newSession.id;
2742
2401
  }
2743
- // Slice λ: resolve worktree directive after session I/O so resume reuse
2744
- // can read metadata.worktreePath. A pre-startJob failure here means
2745
- // prepCleanup is still owned locally; run it before returning.
2746
2402
  let worktreeResolution = {};
2747
2403
  try {
2748
2404
  worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
@@ -2751,22 +2407,15 @@ export async function handleCodexRequestAsync(deps, params) {
2751
2407
  runPrepCleanupLocally();
2752
2408
  return createErrorResponse("codex_request_async", 1, "", corrId, err);
2753
2409
  }
2754
- // Start job only after all session I/O succeeds. If startJob throws before
2755
- // registering the record, ownership stays here and we run it in the catch.
2756
2410
  assertUpstreamCliArgs("codex", args);
2757
2411
  assertUpstreamCliEnv("codex", undefined);
2758
2412
  const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
2759
2413
  let job;
2760
2414
  try {
2761
2415
  job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
2762
- // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
2763
- // status. Release our local ownership claim so the catch path doesn't
2764
- // double-fire.
2765
2416
  prepCleanupOwnedHere = false;
2766
2417
  }
2767
2418
  catch (startErr) {
2768
- // startJob never stored the record → manager won't call onComplete. We
2769
- // still own the cleanup; let the outer catch run it.
2770
2419
  throw startErr;
2771
2420
  }
2772
2421
  deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
@@ -2793,42 +2442,15 @@ export async function handleCodexRequestAsync(deps, params) {
2793
2442
  };
2794
2443
  }
2795
2444
  catch (error) {
2796
- // Pre-start failure: either session I/O threw, or startJob threw before
2797
- // registering the record. In either case the manager will NOT fire
2798
- // prepCleanup, so we must run it here.
2799
2445
  runPrepCleanupLocally();
2800
2446
  return createErrorResponse("codex_request_async", 1, "", corrId, error);
2801
2447
  }
2802
2448
  }
2803
- //──────────────────────────────────────────────────────────────────────────────
2804
- // Claude Code Tool
2805
- //──────────────────────────────────────────────────────────────────────────────
2806
2449
  export function createGatewayServer(deps = {}) {
2807
2450
  const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
2808
2451
  const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
2809
- // `flightRecorder` is destructured into closure scope so the session_get
2810
- // handler (see ~line 5590) has the FlightRecorderQuery read capability
2811
- // available without re-resolving runtime. Slice 2 will populate the
2812
- // `cacheState` field of session_get's response from this read surface.
2813
- // `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
2814
2452
  void flightRecorder;
2815
2453
  void cacheAwareness;
2816
- // Structural invariant: tools register iff ALL THREE conditions hold:
2817
- // (1) persistence.backend !== "none" — the operator/config has not
2818
- // explicitly disabled durable persistence;
2819
- // (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
2820
- // agrees (loadPersistenceConfig sets this iff backend is one of
2821
- // sqlite/postgres/memory);
2822
- // (3) asyncJobManager.hasStore() === true — the runtime manager
2823
- // actually has a store attached (isolate-mode runtimes use null).
2824
- //
2825
- // Each guard closes a distinct re-entry path for the silent-loss footgun:
2826
- // - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
2827
- // and re-advertise the async tools while reporting backend='none' in
2828
- // llm_process_health — exactly contradicting SPEC CLAIM 4f.
2829
- // - Without (2), config that opts out is ignored.
2830
- // - Without (3), a null-store manager (isolate-mode / HTTP per-session)
2831
- // accepts registrations that have nowhere to persist results.
2832
2454
  const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
2833
2455
  const server = newGatewayMcpServer();
2834
2456
  registerBaseResources(server, runtime);
@@ -2865,7 +2487,6 @@ export function createGatewayServer(deps = {}) {
2865
2487
  .enum(CLAUDE_PERMISSION_MODES)
2866
2488
  .optional()
2867
2489
  .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
2868
- // U25 — Claude high-impact features
2869
2490
  agent: z
2870
2491
  .string()
2871
2492
  .optional()
@@ -2905,7 +2526,6 @@ export function createGatewayServer(deps = {}) {
2905
2526
  .boolean()
2906
2527
  .optional()
2907
2528
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2908
- // Phase 4 slice η — Claude reliability + structured-output parity
2909
2529
  fallbackModel: z
2910
2530
  .string()
2911
2531
  .min(1)
@@ -2915,11 +2535,28 @@ export function createGatewayServer(deps = {}) {
2915
2535
  .union([z.string(), z.record(z.string(), z.unknown())])
2916
2536
  .optional()
2917
2537
  .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
2918
- // Phase 4 slice ζ — Claude additional-workspace-dirs parity
2919
2538
  addDir: z
2920
2539
  .array(z.string())
2921
2540
  .optional()
2922
2541
  .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
2542
+ noSessionPersistence: z
2543
+ .boolean()
2544
+ .optional()
2545
+ .describe("Claude --no-session-persistence: do not write this session to disk (ephemeral one-shot runs; mirrors codex --ephemeral)."),
2546
+ settingSources: z
2547
+ .string()
2548
+ .min(1)
2549
+ .optional()
2550
+ .describe("Claude --setting-sources: comma-separated setting sources to load (user|project|local) for reproducible/isolated headless runs."),
2551
+ settings: z
2552
+ .string()
2553
+ .min(1)
2554
+ .optional()
2555
+ .describe("Claude --settings: path to a settings JSON file or a JSON literal of additional settings. Powerful: settings can define hooks/permissions/model; passed verbatim."),
2556
+ tools: z
2557
+ .array(z.string())
2558
+ .optional()
2559
+ .describe('Claude --tools: restrict the available built-in tool set (distinct from allowedTools permission gating). Pass [""] to disable all tools.'),
2923
2560
  worktree: WORKTREE_SCHEMA.optional(),
2924
2561
  approvalStrategy: z
2925
2562
  .enum(["legacy", "mcp_managed"])
@@ -2951,7 +2588,7 @@ export function createGatewayServer(deps = {}) {
2951
2588
  .boolean()
2952
2589
  .default(false)
2953
2590
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
2954
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2591
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
2955
2592
  const startTime = Date.now();
2956
2593
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
2957
2594
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2984,24 +2621,18 @@ export function createGatewayServer(deps = {}) {
2984
2621
  fallbackModel,
2985
2622
  jsonSchema,
2986
2623
  addDir,
2624
+ noSessionPersistence,
2625
+ settingSources,
2626
+ settings,
2627
+ tools,
2987
2628
  }, runtime);
2988
2629
  if (!("args" in prep))
2989
2630
  return prep;
2990
2631
  const { corrId, args } = prep;
2991
2632
  let durationMs = 0;
2992
2633
  let wasSuccessful = false;
2993
- // Session resolution happens BEFORE safeFlightStart so that:
2994
- // (1) the TTL warning reads the PRIOR session's lastWriteAt
2995
- // rather than the row about to be inserted (codex-r1/F1).
2996
- // (2) the flight-recorder row is tagged with effectiveSessionId
2997
- // (the session the CLI will actually resume), not the raw
2998
- // user-provided sessionId.
2999
2634
  let effectiveSessionId = sessionId;
3000
2635
  let useContinue = continueSession;
3001
- // Guard the active-session lookup: in some test harnesses the
3002
- // sessionManager is undefined; the original try-catch wrapped this
3003
- // block, so we replicate that tolerance here. Failure leaves
3004
- // effectiveSessionId as the user-provided sessionId.
3005
2636
  let activeSession = null;
3006
2637
  try {
3007
2638
  activeSession = await sessionManager.getActiveSession("claude");
@@ -3016,16 +2647,11 @@ export function createGatewayServer(deps = {}) {
3016
2647
  if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
3017
2648
  useContinue = true;
3018
2649
  }
3019
- // Slice 3: if the resolved session has a near-expiry Anthropic
3020
- // cache breakpoint, attach a structured warning (NOT a hard error)
3021
- // to the response. Computed BEFORE safeFlightStart so the current
3022
- // row does not skew lastRequestAt.
3023
2650
  const ttlWarning = maybeBuildCacheTtlWarning({
3024
2651
  runtime,
3025
2652
  sessionId: effectiveSessionId,
3026
2653
  cli: "claude",
3027
2654
  });
3028
- // Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
3029
2655
  const warnings = [
3030
2656
  ...(ttlWarning ? [ttlWarning] : []),
3031
2657
  ...(prep.warnings ?? []),
@@ -3049,8 +2675,6 @@ export function createGatewayServer(deps = {}) {
3049
2675
  args.push("--session-id", effectiveSessionId);
3050
2676
  await sessionManager.updateSessionUsage(effectiveSessionId);
3051
2677
  }
3052
- // Slice λ: resolve worktree directive into spawn cwd. Done after
3053
- // session resolution so resume reuse can read metadata.worktreePath.
3054
2678
  let worktreeResolution = {};
3055
2679
  try {
3056
2680
  worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
@@ -3058,11 +2682,9 @@ export function createGatewayServer(deps = {}) {
3058
2682
  catch (err) {
3059
2683
  return createErrorResponse("claude_request", 1, "", corrId, err);
3060
2684
  }
3061
- // Idle timeout only for stream-json (text/json produce no output until done)
3062
2685
  const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
3063
2686
  const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
3064
2687
  const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
3065
- // Deferred — job still running, return async reference
3066
2688
  if (isDeferredResponse(result)) {
3067
2689
  return buildDeferredToolResponse(result, effectiveSessionId);
3068
2690
  }
@@ -3080,9 +2702,6 @@ export function createGatewayServer(deps = {}) {
3080
2702
  errorMessage: stderr || `Exit code ${code}`,
3081
2703
  status: "failed",
3082
2704
  }, runtime);
3083
- // Slice 3: attach any computed warnings to the error response so
3084
- // the caller still sees cache_ttl_expiring_soon when the CLI
3085
- // happens to fail for an unrelated reason.
3086
2705
  const errResp = createErrorResponse("claude", code, stderr, corrId);
3087
2706
  if (warnings.length > 0) {
3088
2707
  errResp.warnings = warnings;
@@ -3090,7 +2709,6 @@ export function createGatewayServer(deps = {}) {
3090
2709
  return errResp;
3091
2710
  }
3092
2711
  wasSuccessful = true;
3093
- // If we used a session ID and it's not tracked yet, create a session record
3094
2712
  if (effectiveSessionId) {
3095
2713
  const existingSession = await sessionManager.getSession(effectiveSessionId);
3096
2714
  if (!existingSession) {
@@ -3098,7 +2716,6 @@ export function createGatewayServer(deps = {}) {
3098
2716
  }
3099
2717
  }
3100
2718
  logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
3101
- // Parse stream-json NDJSON output to extract result text
3102
2719
  if (outputFormat === "stream-json") {
3103
2720
  const parsed = parseStreamJson(stdout);
3104
2721
  if (parsed.costUsd !== null) {
@@ -3165,9 +2782,6 @@ export function createGatewayServer(deps = {}) {
3165
2782
  performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
3166
2783
  }
3167
2784
  });
3168
- //──────────────────────────────────────────────────────────────────────────────
3169
- // Codex Tool
3170
- //──────────────────────────────────────────────────────────────────────────────
3171
2785
  server.tool("codex_request", {
3172
2786
  prompt: z
3173
2787
  .string()
@@ -3232,14 +2846,10 @@ export function createGatewayServer(deps = {}) {
3232
2846
  .boolean()
3233
2847
  .default(false)
3234
2848
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3235
- // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
3236
- // tokens (and any cost) through extractUsageAndCost. Without "json", the
3237
- // parser is unreachable and Codex usage is never reported.
3238
2849
  outputFormat: z
3239
2850
  .enum(["text", "json"])
3240
2851
  .default("text")
3241
2852
  .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
3242
- // U26: high-impact feature flags. All optional.
3243
2853
  outputSchema: z
3244
2854
  .union([z.string(), z.record(z.string(), z.unknown())])
3245
2855
  .optional()
@@ -3269,7 +2879,6 @@ export function createGatewayServer(deps = {}) {
3269
2879
  .boolean()
3270
2880
  .optional()
3271
2881
  .describe("Codex --ignore-rules: skip project rule files for this run."),
3272
- // Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
3273
2882
  workingDir: z
3274
2883
  .string()
3275
2884
  .min(1)
@@ -3327,15 +2936,7 @@ export function createGatewayServer(deps = {}) {
3327
2936
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
3328
2937
  }, runtime);
3329
2938
  logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
3330
- // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
3331
- // guarantees the cleanup runs exactly once — inline for direct
3332
- // execution, on terminal status for the job-backed path (sync
3333
- // completion or deferred). The outer finally MUST NOT clean again.
3334
2939
  const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
3335
- // Slice λ: resolve worktree directive into spawn cwd. Codex has no
3336
- // in-handler session resolution prior to spawn (session lookup is
3337
- // lazy via `codex exec resume`), so the user-supplied sessionId is
3338
- // the only reuse key.
3339
2940
  let worktreeResolution = {};
3340
2941
  try {
3341
2942
  worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
@@ -3346,8 +2947,6 @@ export function createGatewayServer(deps = {}) {
3346
2947
  try {
3347
2948
  const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
3348
2949
  const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
3349
- // Deferred — job still running, return async reference. Cleanup
3350
- // ownership belongs to AsyncJobManager via onComplete.
3351
2950
  if (isDeferredResponse(result)) {
3352
2951
  return buildDeferredToolResponse(result, sessionId);
3353
2952
  }
@@ -3368,7 +2967,6 @@ export function createGatewayServer(deps = {}) {
3368
2967
  return createErrorResponse("codex", code, stderr, corrId);
3369
2968
  }
3370
2969
  wasSuccessful = true;
3371
- // Track session usage
3372
2970
  let effectiveSessionId = sessionId;
3373
2971
  if (!createNewSession && !sessionId) {
3374
2972
  const activeSession = await sessionManager.getActiveSession("codex");
@@ -3430,12 +3028,8 @@ export function createGatewayServer(deps = {}) {
3430
3028
  finally {
3431
3029
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
3432
3030
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3433
- // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
3434
3031
  }
3435
3032
  });
3436
- //──────────────────────────────────────────────────────────────────────────────
3437
- // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
3438
- //──────────────────────────────────────────────────────────────────────────────
3439
3033
  server.tool("codex_fork_session", {
3440
3034
  prompt: z
3441
3035
  .string()
@@ -3472,8 +3066,6 @@ export function createGatewayServer(deps = {}) {
3472
3066
  const startTime = Date.now();
3473
3067
  let durationMs = 0;
3474
3068
  let wasSuccessful = false;
3475
- // Enforce mutual exclusion at tool boundary (Zod records the params but
3476
- // the SDK's `.tool(...)` does not accept top-level refines).
3477
3069
  if (sessionId && forkLast) {
3478
3070
  return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
3479
3071
  }
@@ -3489,11 +3081,6 @@ export function createGatewayServer(deps = {}) {
3489
3081
  }
3490
3082
  const cliInfo = getCliInfo();
3491
3083
  const resolvedModel = resolveModelAlias("codex", model, cliInfo);
3492
- // Compose argv: forkArgs already starts with `fork`. Inject model and
3493
- // sandbox/approval flags BEFORE the positional <sessionId|--last> +
3494
- // prompt to keep them as flags rather than positionals. forkArgs layout
3495
- // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
3496
- // we splice flags right after "fork".
3497
3084
  const flagSegment = [];
3498
3085
  if (resolvedModel)
3499
3086
  flagSegment.push("--model", resolvedModel);
@@ -3530,9 +3117,6 @@ export function createGatewayServer(deps = {}) {
3530
3117
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3531
3118
  }
3532
3119
  });
3533
- //──────────────────────────────────────────────────────────────────────────────
3534
- // Gemini Tool
3535
- //──────────────────────────────────────────────────────────────────────────────
3536
3120
  server.tool("gemini_request", {
3537
3121
  prompt: z
3538
3122
  .string()
@@ -3583,11 +3167,6 @@ export function createGatewayServer(deps = {}) {
3583
3167
  .boolean()
3584
3168
  .default(false)
3585
3169
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3586
- // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3587
- // remains text so existing callers see no behavior change. Phase 4 slice
3588
- // ε adds `stream-json` (NDJSON event stream parsed by
3589
- // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
3590
- // semantics covered by Gemini's existing real-time stdout streaming).
3591
3170
  outputFormat: z
3592
3171
  .enum(["text", "json", "stream-json"])
3593
3172
  .default("text")
@@ -3600,8 +3179,12 @@ export function createGatewayServer(deps = {}) {
3600
3179
  .boolean()
3601
3180
  .default(false)
3602
3181
  .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
3182
+ yolo: z
3183
+ .boolean()
3184
+ .optional()
3185
+ .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
3603
3186
  worktree: WORKTREE_SCHEMA.optional(),
3604
- }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
3187
+ }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
3605
3188
  return handleGeminiRequest({ sessionManager, logger, runtime }, {
3606
3189
  prompt,
3607
3190
  promptParts,
@@ -3626,12 +3209,10 @@ export function createGatewayServer(deps = {}) {
3626
3209
  adminPolicyFiles,
3627
3210
  attachments,
3628
3211
  skipTrust,
3212
+ yolo,
3629
3213
  worktree,
3630
3214
  });
3631
3215
  });
3632
- //──────────────────────────────────────────────────────────────────────────────
3633
- // Grok Tool
3634
- //──────────────────────────────────────────────────────────────────────────────
3635
3216
  server.tool("grok_request", {
3636
3217
  prompt: z
3637
3218
  .string()
@@ -3702,13 +3283,11 @@ export function createGatewayServer(deps = {}) {
3702
3283
  .default(false)
3703
3284
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3704
3285
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3705
- // Phase 4 slice ζ — Grok working-directory parity.
3706
3286
  workingDir: z
3707
3287
  .string()
3708
3288
  .min(1)
3709
3289
  .optional()
3710
3290
  .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
3711
- // Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
3712
3291
  sandbox: z
3713
3292
  .string()
3714
3293
  .min(1)
@@ -3732,8 +3311,16 @@ export function createGatewayServer(deps = {}) {
3732
3311
  .array(z.string())
3733
3312
  .optional()
3734
3313
  .describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
3314
+ compactionMode: z
3315
+ .enum(["summary", "transcript", "segments"])
3316
+ .optional()
3317
+ .describe("Grok --compaction-mode: summary (default; no pointer) | transcript (points at the raw transcript) | segments (persists per-segment markdown to grep). Sets GROK_COMPACTION_MODE."),
3318
+ compactionDetail: z
3319
+ .enum(["none", "minimal", "balanced", "verbose"])
3320
+ .optional()
3321
+ .describe("Grok --compaction-detail: verbatim segment detail (none|minimal|balanced|verbose, default verbose). Only affects `--compaction-mode segments`. Sets GROK_COMPACTION_DETAIL."),
3735
3322
  worktree: WORKTREE_SCHEMA.optional(),
3736
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
3323
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, worktree, }) => {
3737
3324
  return handleGrokRequest({ sessionManager, logger, runtime }, {
3738
3325
  prompt,
3739
3326
  promptParts,
@@ -3763,12 +3350,11 @@ export function createGatewayServer(deps = {}) {
3763
3350
  systemPromptOverride,
3764
3351
  allow,
3765
3352
  deny,
3353
+ compactionMode,
3354
+ compactionDetail,
3766
3355
  worktree,
3767
3356
  });
3768
3357
  });
3769
- //──────────────────────────────────────────────────────────────────────────────
3770
- // Mistral Vibe Tool
3771
- //──────────────────────────────────────────────────────────────────────────────
3772
3358
  server.tool("mistral_request", {
3773
3359
  prompt: z
3774
3360
  .string()
@@ -3798,11 +3384,6 @@ export function createGatewayServer(deps = {}) {
3798
3384
  .enum(MISTRAL_AGENT_MODES)
3799
3385
  .optional()
3800
3386
  .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
3801
- effort: z
3802
- .enum(["low", "medium", "high", "xhigh", "max"])
3803
- .optional()
3804
- .describe("Vibe effort level"),
3805
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
3806
3387
  approvalStrategy: z
3807
3388
  .enum(["legacy", "mcp_managed"])
3808
3389
  .default("legacy")
@@ -3844,7 +3425,6 @@ export function createGatewayServer(deps = {}) {
3844
3425
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3845
3426
  maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
3846
3427
  maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
3847
- // Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
3848
3428
  workingDir: z
3849
3429
  .string()
3850
3430
  .min(1)
@@ -3855,7 +3435,7 @@ export function createGatewayServer(deps = {}) {
3855
3435
  .optional()
3856
3436
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
3857
3437
  worktree: WORKTREE_SCHEMA.optional(),
3858
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
3438
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
3859
3439
  return handleMistralRequest({ sessionManager, logger, runtime }, {
3860
3440
  prompt,
3861
3441
  promptParts,
@@ -3865,8 +3445,6 @@ export function createGatewayServer(deps = {}) {
3865
3445
  resumeLatest,
3866
3446
  createNewSession,
3867
3447
  permissionMode,
3868
- effort,
3869
- reasoningEffort,
3870
3448
  approvalStrategy,
3871
3449
  approvalPolicy,
3872
3450
  mcpServers,
@@ -3886,16 +3464,6 @@ export function createGatewayServer(deps = {}) {
3886
3464
  worktree,
3887
3465
  });
3888
3466
  });
3889
- //──────────────────────────────────────────────────────────────────────────────
3890
- // Async Long-Running Job Tools (No Time-Bound LLM Execution)
3891
- //
3892
- // STRUCTURAL INVARIANT: these tools are only registered when a real job
3893
- // store is attached (`persistence.asyncJobsEnabled === true`). When the
3894
- // operator has configured `[persistence].backend = "none"`, none of the
3895
- // *_request_async / llm_job_* tools exist in the MCP tool list at all —
3896
- // orchestrating agents get a clean "tool not found" signal at connect
3897
- // time instead of silent in-memory loss after the 1-hour TTL.
3898
- //──────────────────────────────────────────────────────────────────────────────
3899
3467
  if (asyncJobsEnabled) {
3900
3468
  server.tool("claude_request_async", {
3901
3469
  prompt: z
@@ -3929,7 +3497,6 @@ export function createGatewayServer(deps = {}) {
3929
3497
  .enum(CLAUDE_PERMISSION_MODES)
3930
3498
  .optional()
3931
3499
  .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
3932
- // U25 — Claude high-impact features
3933
3500
  agent: z
3934
3501
  .string()
3935
3502
  .optional()
@@ -3969,7 +3536,6 @@ export function createGatewayServer(deps = {}) {
3969
3536
  .boolean()
3970
3537
  .optional()
3971
3538
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
3972
- // Phase 4 slice η — Claude reliability + structured-output parity
3973
3539
  fallbackModel: z
3974
3540
  .string()
3975
3541
  .min(1)
@@ -3979,11 +3545,28 @@ export function createGatewayServer(deps = {}) {
3979
3545
  .union([z.string(), z.record(z.string(), z.unknown())])
3980
3546
  .optional()
3981
3547
  .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
3982
- // Phase 4 slice ζ — Claude additional-workspace-dirs parity
3983
3548
  addDir: z
3984
3549
  .array(z.string())
3985
3550
  .optional()
3986
3551
  .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
3552
+ noSessionPersistence: z
3553
+ .boolean()
3554
+ .optional()
3555
+ .describe("Claude --no-session-persistence: do not write this session to disk (ephemeral one-shot runs; mirrors codex --ephemeral)."),
3556
+ settingSources: z
3557
+ .string()
3558
+ .min(1)
3559
+ .optional()
3560
+ .describe("Claude --setting-sources: comma-separated setting sources to load (user|project|local) for reproducible/isolated headless runs."),
3561
+ settings: z
3562
+ .string()
3563
+ .min(1)
3564
+ .optional()
3565
+ .describe("Claude --settings: path to a settings JSON file or a JSON literal of additional settings. Powerful: settings can define hooks/permissions/model; passed verbatim."),
3566
+ tools: z
3567
+ .array(z.string())
3568
+ .optional()
3569
+ .describe('Claude --tools: restrict the available built-in tool set (distinct from allowedTools permission gating). Pass [""] to disable all tools.'),
3987
3570
  worktree: WORKTREE_SCHEMA.optional(),
3988
3571
  approvalStrategy: z
3989
3572
  .enum(["legacy", "mcp_managed"])
@@ -4014,7 +3597,7 @@ export function createGatewayServer(deps = {}) {
4014
3597
  .boolean()
4015
3598
  .default(false)
4016
3599
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4017
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
3600
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
4018
3601
  if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
4019
3602
  return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
4020
3603
  }
@@ -4046,12 +3629,15 @@ export function createGatewayServer(deps = {}) {
4046
3629
  fallbackModel,
4047
3630
  jsonSchema,
4048
3631
  addDir,
3632
+ noSessionPersistence,
3633
+ settingSources,
3634
+ settings,
3635
+ tools,
4049
3636
  }, runtime);
4050
3637
  if (!("args" in prep))
4051
3638
  return prep;
4052
3639
  const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
4053
3640
  try {
4054
- // Session management (before job start for async)
4055
3641
  let effectiveSessionId = sessionId;
4056
3642
  let useContinue = continueSession;
4057
3643
  const activeSession = await sessionManager.getActiveSession("claude");
@@ -4075,14 +3661,11 @@ export function createGatewayServer(deps = {}) {
4075
3661
  await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
4076
3662
  }
4077
3663
  }
4078
- // Slice 3: TTL warning on resume (async path too).
4079
3664
  const ttlWarning = maybeBuildCacheTtlWarning({
4080
3665
  runtime,
4081
3666
  sessionId: effectiveSessionId,
4082
3667
  cli: "claude",
4083
3668
  });
4084
- // Slice λ: resolve worktree directive after session metadata is
4085
- // settled so resume reuse can read metadata.worktreePath.
4086
3669
  let worktreeResolution = {};
4087
3670
  try {
4088
3671
  worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
@@ -4090,7 +3673,6 @@ export function createGatewayServer(deps = {}) {
4090
3673
  catch (err) {
4091
3674
  return createErrorResponse("claude_request_async", 1, "", corrId, err);
4092
3675
  }
4093
- // Idle timeout only for stream-json (text/json produce no output until done)
4094
3676
  const effectiveIdleTimeout = outputFormat === "stream-json"
4095
3677
  ? resolveIdleTimeout("claude", idleTimeoutMs)
4096
3678
  : undefined;
@@ -4116,8 +3698,6 @@ export function createGatewayServer(deps = {}) {
4116
3698
  if (worktreeResolution.worktreePath) {
4117
3699
  asyncResponse.worktreePath = worktreeResolution.worktreePath;
4118
3700
  }
4119
- // Rec #4: include any prep-time warnings (e.g.
4120
- // cacheable_prefix_uncached) alongside ttlWarning.
4121
3701
  const mergedWarnings = [
4122
3702
  ...(ttlWarning ? [ttlWarning] : []),
4123
3703
  ...(prep.warnings ?? []),
@@ -4201,12 +3781,10 @@ export function createGatewayServer(deps = {}) {
4201
3781
  .boolean()
4202
3782
  .default(false)
4203
3783
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4204
- // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
4205
3784
  outputFormat: z
4206
3785
  .enum(["text", "json"])
4207
3786
  .default("text")
4208
3787
  .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
4209
- // U26: high-impact feature flags. All optional.
4210
3788
  outputSchema: z
4211
3789
  .union([z.string(), z.record(z.string(), z.unknown())])
4212
3790
  .optional()
@@ -4221,7 +3799,6 @@ export function createGatewayServer(deps = {}) {
4221
3799
  images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
4222
3800
  ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
4223
3801
  ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
4224
- // Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
4225
3802
  workingDir: z
4226
3803
  .string()
4227
3804
  .min(1)
@@ -4318,11 +3895,6 @@ export function createGatewayServer(deps = {}) {
4318
3895
  .boolean()
4319
3896
  .default(false)
4320
3897
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4321
- // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
4322
- // remains text so existing callers see no behavior change. Phase 4 slice
4323
- // ε adds `stream-json` (NDJSON event stream parsed by
4324
- // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
4325
- // semantics covered by Gemini's existing real-time stdout streaming).
4326
3898
  outputFormat: z
4327
3899
  .enum(["text", "json", "stream-json"])
4328
3900
  .default("text")
@@ -4335,8 +3907,12 @@ export function createGatewayServer(deps = {}) {
4335
3907
  .boolean()
4336
3908
  .default(false)
4337
3909
  .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
3910
+ yolo: z
3911
+ .boolean()
3912
+ .optional()
3913
+ .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
4338
3914
  worktree: WORKTREE_SCHEMA.optional(),
4339
- }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
3915
+ }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
4340
3916
  return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4341
3917
  prompt,
4342
3918
  promptParts,
@@ -4360,6 +3936,7 @@ export function createGatewayServer(deps = {}) {
4360
3936
  adminPolicyFiles,
4361
3937
  attachments,
4362
3938
  skipTrust,
3939
+ yolo,
4363
3940
  worktree,
4364
3941
  });
4365
3942
  });
@@ -4432,13 +4009,11 @@ export function createGatewayServer(deps = {}) {
4432
4009
  .default(false)
4433
4010
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4434
4011
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
4435
- // Phase 4 slice ζ — Grok working-directory parity.
4436
4012
  workingDir: z
4437
4013
  .string()
4438
4014
  .min(1)
4439
4015
  .optional()
4440
4016
  .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
4441
- // Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
4442
4017
  sandbox: z
4443
4018
  .string()
4444
4019
  .min(1)
@@ -4462,8 +4037,16 @@ export function createGatewayServer(deps = {}) {
4462
4037
  .array(z.string())
4463
4038
  .optional()
4464
4039
  .describe("Grok --deny <RULE>: permission deny rules. Each entry → its own --deny instance."),
4040
+ compactionMode: z
4041
+ .enum(["summary", "transcript", "segments"])
4042
+ .optional()
4043
+ .describe("Grok --compaction-mode: summary (default) | transcript | segments. Sets GROK_COMPACTION_MODE."),
4044
+ compactionDetail: z
4045
+ .enum(["none", "minimal", "balanced", "verbose"])
4046
+ .optional()
4047
+ .describe("Grok --compaction-detail: segment verbatim detail (none|minimal|balanced|verbose, default verbose). Only affects segments mode. Sets GROK_COMPACTION_DETAIL."),
4465
4048
  worktree: WORKTREE_SCHEMA.optional(),
4466
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
4049
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, worktree, }) => {
4467
4050
  return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4468
4051
  prompt,
4469
4052
  promptParts,
@@ -4492,6 +4075,8 @@ export function createGatewayServer(deps = {}) {
4492
4075
  systemPromptOverride,
4493
4076
  allow,
4494
4077
  deny,
4078
+ compactionMode,
4079
+ compactionDetail,
4495
4080
  worktree,
4496
4081
  });
4497
4082
  });
@@ -4524,11 +4109,6 @@ export function createGatewayServer(deps = {}) {
4524
4109
  .enum(MISTRAL_AGENT_MODES)
4525
4110
  .optional()
4526
4111
  .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
4527
- effort: z
4528
- .enum(["low", "medium", "high", "xhigh", "max"])
4529
- .optional()
4530
- .describe("Vibe effort level"),
4531
- reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
4532
4112
  approvalStrategy: z
4533
4113
  .enum(["legacy", "mcp_managed"])
4534
4114
  .default("legacy")
@@ -4569,7 +4149,6 @@ export function createGatewayServer(deps = {}) {
4569
4149
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
4570
4150
  maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
4571
4151
  maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
4572
- // Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
4573
4152
  workingDir: z
4574
4153
  .string()
4575
4154
  .min(1)
@@ -4580,7 +4159,7 @@ export function createGatewayServer(deps = {}) {
4580
4159
  .optional()
4581
4160
  .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
4582
4161
  worktree: WORKTREE_SCHEMA.optional(),
4583
- }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
4162
+ }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
4584
4163
  return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
4585
4164
  prompt,
4586
4165
  promptParts,
@@ -4590,8 +4169,6 @@ export function createGatewayServer(deps = {}) {
4590
4169
  resumeLatest,
4591
4170
  createNewSession,
4592
4171
  permissionMode,
4593
- effort,
4594
- reasoningEffort,
4595
4172
  approvalStrategy,
4596
4173
  approvalPolicy,
4597
4174
  mcpServers,
@@ -4667,7 +4244,6 @@ export function createGatewayServer(deps = {}) {
4667
4244
  isError: true,
4668
4245
  };
4669
4246
  }
4670
- // Parse stream-json output for Claude async jobs
4671
4247
  const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
4672
4248
  let parsed;
4673
4249
  if (outputFormat === "stream-json" && result.stdout) {
@@ -4727,14 +4303,7 @@ export function createGatewayServer(deps = {}) {
4727
4303
  ],
4728
4304
  };
4729
4305
  });
4730
- } // end if (asyncJobsEnabled)
4731
- // Read back any persisted request (sync OR async) by its correlation id.
4732
- // Registered unconditionally — it reads the flight recorder, which is
4733
- // independent of async-job persistence. Every sync/async response echoes
4734
- // its id in `structuredContent.correlationId`; pass that id here to recover
4735
- // the persisted prompt/response after the inline result is gone. With flight
4736
- // recording disabled (LLM_GATEWAY_LOGS_DB=none → NoopFlightRecorder) the
4737
- // query yields no rows and this returns the "not found" shape.
4306
+ }
4738
4307
  server.tool("llm_request_result", {
4739
4308
  correlationId: z
4740
4309
  .string()
@@ -4805,9 +4374,6 @@ export function createGatewayServer(deps = {}) {
4805
4374
  ],
4806
4375
  };
4807
4376
  });
4808
- //──────────────────────────────────────────────────────────────────────────────
4809
- // Approval Audit Tools
4810
- //──────────────────────────────────────────────────────────────────────────────
4811
4377
  server.tool("approval_list", {
4812
4378
  limit: z
4813
4379
  .number()
@@ -4835,9 +4401,6 @@ export function createGatewayServer(deps = {}) {
4835
4401
  ],
4836
4402
  };
4837
4403
  });
4838
- //──────────────────────────────────────────────────────────────────────────────
4839
- // List Models Tool
4840
- //──────────────────────────────────────────────────────────────────────────────
4841
4404
  server.tool("list_models", {
4842
4405
  cli: z
4843
4406
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
@@ -4916,9 +4479,6 @@ export function createGatewayServer(deps = {}) {
4916
4479
  };
4917
4480
  }
4918
4481
  });
4919
- //──────────────────────────────────────────────────────────────────────────────
4920
- // Session Management Tools
4921
- //──────────────────────────────────────────────────────────────────────────────
4922
4482
  server.tool("session_create", {
4923
4483
  cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
4924
4484
  description: z.string().optional().describe("Session description"),
@@ -5094,15 +4654,6 @@ export function createGatewayServer(deps = {}) {
5094
4654
  };
5095
4655
  }
5096
4656
  const activeSession = await sessionManager.getActiveSession(session.cli);
5097
- // Slice 2: project a compact cacheState view from the flight
5098
- // recorder at read time. NOT persisted on the Session interface
5099
- // (sessions.json stays content-free per the project invariant).
5100
- // The field is OMITTED entirely (not null, not empty object) when
5101
- // the session has zero rows in the flight recorder so the response
5102
- // stays compact for fresh sessions.
5103
- //
5104
- // Slice 3: include ttlRemainingMs derived from the gateway's
5105
- // configured TTL policy. Null for non-claude sessions.
5106
4657
  let cacheState;
5107
4658
  try {
5108
4659
  const stats = computeSessionCacheStats(flightRecorder, session.id);
@@ -5171,16 +4722,8 @@ export function createGatewayServer(deps = {}) {
5171
4722
  });
5172
4723
  return server;
5173
4724
  }
5174
- //──────────────────────────────────────────────────────────────────────────────
5175
- // Async Initialization
5176
- //──────────────────────────────────────────────────────────────────────────────
5177
4725
  async function initializeSessionManager() {
5178
4726
  const config = loadConfig();
5179
- // Slice λ: file-backed sessions get a cleanup hook that tears down any
5180
- // git worktrees recorded on session.metadata.worktreePath. PG-backed
5181
- // sessions skip the hook (multi-tenant deployments don't necessarily
5182
- // own a single filesystem); revisit if/when worktree support extends
5183
- // there.
5184
4727
  const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
5185
4728
  if (config.database) {
5186
4729
  logger.info("Initializing PostgreSQL session manager");
@@ -5198,9 +4741,6 @@ async function initializeSessionManager() {
5198
4741
  }
5199
4742
  resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
5200
4743
  }
5201
- //──────────────────────────────────────────────────────────────────────────────
5202
- // Health Check Resource (only if using PostgreSQL)
5203
- //──────────────────────────────────────────────────────────────────────────────
5204
4744
  function registerHealthResource(server) {
5205
4745
  if (db) {
5206
4746
  server.registerResource("health", "health://status", {
@@ -5221,7 +4761,6 @@ function registerHealthResource(server) {
5221
4761
  });
5222
4762
  logger.info("Health check resource registered");
5223
4763
  }
5224
- // Process health resource (always available, not dependent on DB)
5225
4764
  server.registerResource("process-health", "metrics://process-health", {
5226
4765
  title: "Process Health",
5227
4766
  description: "Async job health (CPU, memory, zombie detection)",
@@ -5240,13 +4779,9 @@ function registerHealthResource(server) {
5240
4779
  });
5241
4780
  logger.info("Process health resource registered");
5242
4781
  }
5243
- //──────────────────────────────────────────────────────────────────────────────
5244
- // Graceful Shutdown
5245
- //──────────────────────────────────────────────────────────────────────────────
5246
4782
  async function shutdown(signal) {
5247
4783
  logger.info(`Received ${signal}, shutting down gracefully...`);
5248
4784
  try {
5249
- // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
5250
4785
  await killAllProcessGroups();
5251
4786
  logger.info("All process groups terminated");
5252
4787
  if (activeHttpGateway) {
@@ -5276,9 +4811,6 @@ async function shutdown(signal) {
5276
4811
  }
5277
4812
  process.on("SIGTERM", () => shutdown("SIGTERM"));
5278
4813
  process.on("SIGINT", () => shutdown("SIGINT"));
5279
- //──────────────────────────────────────────────────────────────────────────────
5280
- // Server Startup
5281
- //──────────────────────────────────────────────────────────────────────────────
5282
4814
  async function main() {
5283
4815
  startWindowsBootstrapperSelfHeal();
5284
4816
  const args = process.argv.slice(2);
@@ -5342,7 +4874,6 @@ async function main() {
5342
4874
  process.env.MCP_TRANSPORT ||
5343
4875
  "stdio";
5344
4876
  logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
5345
- // Initialize session manager first
5346
4877
  await initializeSessionManager();
5347
4878
  const serverDeps = {
5348
4879
  sessionManager,
@@ -5369,14 +4900,11 @@ async function main() {
5369
4900
  activeServer = createGatewayServer({
5370
4901
  ...serverDeps,
5371
4902
  });
5372
- // Register health check resource if using PostgreSQL
5373
4903
  registerHealthResource(activeServer);
5374
4904
  const transport = new StdioServerTransport();
5375
4905
  await activeServer.connect(transport);
5376
4906
  logger.info("llm-cli-gateway MCP server connected and ready");
5377
4907
  }
5378
- // Guard: only auto-start when run directly (not imported for testing)
5379
- // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
5380
4908
  const __entryUrl = entrypointFileURL(process.argv[1]);
5381
4909
  if (__entryUrl === import.meta.url) {
5382
4910
  main().catch(error => {