llm-cli-gateway 1.17.4 → 1.17.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +1 -1
  3. package/dist/approval-manager.js +0 -8
  4. package/dist/async-job-manager.d.ts +0 -113
  5. package/dist/async-job-manager.js +6 -124
  6. package/dist/cache-stats.d.ts +0 -89
  7. package/dist/cache-stats.js +0 -62
  8. package/dist/claude-mcp-config.js +0 -1
  9. package/dist/cli-updater.d.ts +0 -8
  10. package/dist/cli-updater.js +0 -12
  11. package/dist/codex-json-parser.d.ts +0 -20
  12. package/dist/codex-json-parser.js +0 -21
  13. package/dist/config.d.ts +0 -31
  14. package/dist/config.js +2 -72
  15. package/dist/db.d.ts +0 -18
  16. package/dist/db.js +0 -22
  17. package/dist/doctor.d.ts +0 -49
  18. package/dist/doctor.js +0 -47
  19. package/dist/endpoint-exposure.js +0 -1
  20. package/dist/executor.d.ts +0 -19
  21. package/dist/executor.js +3 -38
  22. package/dist/flight-recorder.d.ts +0 -26
  23. package/dist/flight-recorder.js +1 -70
  24. package/dist/gemini-json-parser.d.ts +0 -25
  25. package/dist/gemini-json-parser.js +0 -28
  26. package/dist/health.d.ts +0 -3
  27. package/dist/health.js +0 -3
  28. package/dist/index.d.ts +1 -221
  29. package/dist/index.js +14 -563
  30. package/dist/job-store.d.ts +0 -74
  31. package/dist/job-store.js +1 -73
  32. package/dist/logger.d.ts +0 -7
  33. package/dist/logger.js +0 -6
  34. package/dist/migrate-sessions.d.ts +0 -3
  35. package/dist/migrate-sessions.js +0 -16
  36. package/dist/migrate.js +1 -18
  37. package/dist/mistral-meta-json-parser.js +0 -67
  38. package/dist/model-registry.js +0 -13
  39. package/dist/pricing.d.ts +0 -46
  40. package/dist/pricing.js +0 -47
  41. package/dist/process-monitor.d.ts +0 -15
  42. package/dist/process-monitor.js +2 -31
  43. package/dist/prompt-parts.d.ts +0 -25
  44. package/dist/prompt-parts.js +0 -11
  45. package/dist/provider-status.d.ts +0 -8
  46. package/dist/provider-status.js +0 -11
  47. package/dist/request-helpers.d.ts +0 -334
  48. package/dist/request-helpers.js +1 -229
  49. package/dist/resources.d.ts +0 -20
  50. package/dist/resources.js +1 -34
  51. package/dist/retry.d.ts +0 -45
  52. package/dist/retry.js +3 -40
  53. package/dist/session-manager-pg.d.ts +0 -32
  54. package/dist/session-manager-pg.js +0 -32
  55. package/dist/session-manager.d.ts +0 -21
  56. package/dist/session-manager.js +1 -15
  57. package/dist/stream-json-parser.d.ts +0 -18
  58. package/dist/stream-json-parser.js +0 -22
  59. package/dist/upstream-contracts.d.ts +0 -55
  60. package/dist/upstream-contracts.js +0 -77
  61. package/dist/validation-orchestrator.js +0 -3
  62. package/dist/worktree-manager.d.ts +0 -9
  63. package/dist/worktree-manager.js +0 -21
  64. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -35,7 +35,6 @@ import { printDoctorJson } from "./doctor.js";
35
35
  import { registerValidationTools } from "./validation-tools.js";
36
36
  import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
37
37
  import { entrypointFileURL } from "./entrypoint-url.js";
38
- // Simple logger that writes to stderr (stdout is used for MCP protocol)
39
38
  const logger = {
40
39
  info: (message, ...args) => {
41
40
  console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
@@ -94,10 +93,6 @@ function logOptimizationTokens(kind, correlationId, original, optimized) {
94
93
  const reduction = originalTokens === 0 ? 0 : ((originalTokens - optimizedTokens) / originalTokens) * 100;
95
94
  logger.info(`[${correlationId}] ${kind} tokens ${originalTokens} → ${optimizedTokens} (${reduction.toFixed(1)}% reduction)`);
96
95
  }
97
- // Sync-to-async deadline: if a sync tool's CLI call hasn't finished within this
98
- // window, the tool returns a deferred async job reference instead of blocking
99
- // until the MCP client's tool-call timeout fires (~60s in many runtimes).
100
- // Configurable via SYNC_DEADLINE_MS env var. Set to 0 to disable (pure sync).
101
96
  const SYNC_DEADLINE_MS = (() => {
102
97
  const env = process.env.SYNC_DEADLINE_MS;
103
98
  if (env !== undefined) {
@@ -105,11 +100,8 @@ const SYNC_DEADLINE_MS = (() => {
105
100
  if (Number.isFinite(parsed) && parsed >= 0)
106
101
  return parsed;
107
102
  }
108
- return 45_000; // 45s default — safely under the 60s MCP client cap
103
+ return 45_000;
109
104
  })();
110
- //──────────────────────────────────────────────────────────────────────────────
111
- // Skills loader — reads .agents/skills/*/SKILL.md at startup
112
- //──────────────────────────────────────────────────────────────────────────────
113
105
  const __filename = fileURLToPath(import.meta.url);
114
106
  const __dirname = dirname(__filename);
115
107
  const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
@@ -124,7 +116,6 @@ function packageVersion() {
124
116
  return parsed.version || "unknown";
125
117
  }
126
118
  catch {
127
- // Try next candidate.
128
119
  }
129
120
  }
130
121
  return "unknown";
@@ -137,24 +128,19 @@ function loadSkills() {
137
128
  const skillPath = join(SKILLS_DIR, dir.name, "SKILL.md");
138
129
  try {
139
130
  const content = readFileSync(skillPath, "utf-8");
140
- // Extract description from YAML frontmatter
141
131
  const descMatch = content.match(/^---[\s\S]*?description:\s*(.+?)$/m);
142
132
  const description = descMatch?.[1]?.trim() || dir.name;
143
133
  skills.push({ name: dir.name, content, description });
144
134
  }
145
135
  catch {
146
- // Skill file missing or unreadable — skip silently
147
136
  }
148
137
  }
149
138
  }
150
139
  catch {
151
- // Skills directory missing — not fatal
152
140
  }
153
141
  return skills;
154
142
  }
155
143
  const loadedSkills = loadSkills();
156
- // L1: Compact server instructions (~200 tokens) — injected into every client's
157
- // system prompt at connection time. Covers key patterns + pointers to L2 resources.
158
144
  const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
159
145
 
160
146
  Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
@@ -175,17 +161,11 @@ ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}
175
161
  function newGatewayMcpServer() {
176
162
  return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
177
163
  }
178
- // Global state (initialized asynchronously)
179
164
  let sessionManager;
180
165
  let db = null;
181
166
  const performanceMetrics = new PerformanceMetrics();
182
167
  let resourceProvider;
183
168
  let flightRecorder = null;
184
- // Resolved persistence config — single source of truth for the async-job backend.
185
- // Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
186
- // When backend = "none", the JobStore is null AND *_request_async tools are not
187
- // registered (see createGatewayServer), making silent in-memory loss
188
- // structurally impossible.
189
169
  let persistenceConfig = null;
190
170
  let cacheAwarenessConfig = null;
191
171
  let jobStore = null;
@@ -231,47 +211,9 @@ function getApprovalManager(runtimeLogger = logger) {
231
211
  return approvalManager;
232
212
  }
233
213
  const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
234
- /**
235
- * Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
236
- *
237
- * Both flags reach the upstream CLIs as decimal-formatted argv strings via
238
- * `String(N)`. `z.number().int().positive()` alone lets values past
239
- * `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
240
- * scientific notation that Grok and Vibe both reject. The bounds below
241
- * (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
242
- * for price) guarantee a lossless decimal stringification AND a sane
243
- * upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
244
- */
245
214
  export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
246
- // Token budgets can legitimately exceed the agent-turn cap by orders of
247
- // magnitude. Keep a finite operational guardrail while avoiding the 10k turn
248
- // ceiling that would make large-context Vibe sessions unusable.
249
215
  export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
250
- // `.min(1e-6)` keeps the value in JS's decimal-stringify range:
251
- // String(1e-6) === "0.000001" but String(1e-7) === "1e-7", which both
252
- // upstream CLIs would reject. 1µUSD per request is fine-grained enough
253
- // for any plausible budget-cap use.
254
216
  export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
255
- /**
256
- * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
257
- * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
258
- * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
259
- * name and/or git ref (default ref: HEAD).
260
- *
261
- * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
262
- * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
263
- * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
264
- * the request carries a sessionId and the session already has a worktree,
265
- * that worktree is reused. On session_delete or TTL eviction the gateway
266
- * runs `git worktree remove --force`.
267
- *
268
- * Tool response: when a worktree was used, the successful response stdout
269
- * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
270
- * parse/use the path without a schema change (slice λ §1.d).
271
- *
272
- * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
273
- * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
274
- */
275
217
  export const WORKTREE_SCHEMA = z
276
218
  .union([
277
219
  z.boolean(),
@@ -296,9 +238,6 @@ export const WORKTREE_SCHEMA = z
296
238
  "path. NOTE: callers should `.gitignore` the `.worktrees/` " +
297
239
  "directory in their repo (the gateway does NOT auto-gitignore — " +
298
240
  "see slice λ spec Q4).");
299
- // U22: Session-provider enum extended to five providers. The storage layer's
300
- // CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
301
- // session_create / session_list / session_clear_all accept the fifth provider.
302
241
  export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
303
242
  export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
304
243
  let activeServer = null;
@@ -308,13 +247,10 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
308
247
  const runtimeSessionManager = deps.sessionManager ?? sessionManager;
309
248
  const runtimePerformanceMetrics = deps.performanceMetrics ??
310
249
  (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
311
- // Resolve flight recorder BEFORE async manager so isolateState managers
312
- // can be wired with the same recorder instance the runtime exposes.
313
250
  const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
314
251
  const runtimeAsyncJobManager = deps.asyncJobManager ??
315
252
  (options.isolateState
316
- ? // Factory-created test/HTTP session servers must not mark another instance's
317
- // durable jobs orphaned. Stdio startup injects the process-global manager.
253
+ ?
318
254
  newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null, runtimeFlightRecorder)
319
255
  : getAsyncJobManager(runtimeLogger));
320
256
  const runtimeApprovalManager = deps.approvalManager ??
@@ -337,15 +273,12 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
337
273
  cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
338
274
  };
339
275
  }
340
- // Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
341
- // Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
342
- // In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
343
276
  const CLI_IDLE_TIMEOUTS = {
344
- claude: 600_000, // 10 minutes — only used when outputFormat=stream-json
345
- codex: 600_000, // 10 minutes — Codex streams stderr progress
346
- gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
347
- grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
348
- mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
277
+ claude: 600_000,
278
+ codex: 600_000,
279
+ gemini: 600_000,
280
+ grok: 600_000,
281
+ mistral: 600_000,
349
282
  };
350
283
  function resolveIdleTimeout(cli, override) {
351
284
  if (override !== undefined)
@@ -353,41 +286,7 @@ function resolveIdleTimeout(cli, override) {
353
286
  return CLI_IDLE_TIMEOUTS[cli];
354
287
  }
355
288
  const SYNC_POLL_INTERVAL_MS = 1_000;
356
- /**
357
- * Start an async job and poll until completion or deadline.
358
- * Returns the job result if it finishes in time, or a deferral marker.
359
- */
360
- async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete,
361
- /**
362
- * Slice 1.5: when the sync handler has already written a logStart row
363
- * keyed on `corrId`, pass these so the manager can write logComplete
364
- * (with usage extraction) when the underlying async job terminates —
365
- * even if the sync handler returned a deferred response.
366
- * `writeFlightStart` is NEVER true on this path: the sync handler is
367
- * always the upstream logStart writer.
368
- */
369
- flightRecorderEntry, extractUsage,
370
- /**
371
- * Slice κ: optional stdin payload piped to the child CLI. Currently
372
- * only Claude's `--input-format stream-json` path sets this. Threaded
373
- * through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
374
- * the AsyncJobManager spawn path, and participates in the dedup key.
375
- */
376
- stdin,
377
- /**
378
- * Slice λ: optional working directory for the spawned child process,
379
- * derived from a gateway-owned git worktree. Threaded to both the
380
- * direct-execute fallback (`executeCli({ cwd })`) and the
381
- * AsyncJobManager dedup-aware spawn path
382
- * (`startJobWithDedup({ cwd })`). `cwd` also participates in the
383
- * dedup key (see async-job-manager.buildRequestKey) so two requests
384
- * with identical argv in different worktrees do not collide.
385
- */
386
- cwd) {
387
- // U26 fix: ownership of onComplete is a contract. Once this function returns
388
- // OR throws, the caller MUST consider onComplete consumed — i.e. it has
389
- // either been run, or the AsyncJobManager has taken ownership of it. The
390
- // caller never needs to reclaim.
289
+ async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete, flightRecorderEntry, extractUsage, stdin, cwd) {
391
290
  let onCompleteOwnedByCaller = onComplete !== undefined;
392
291
  const consumeOnComplete = () => {
393
292
  if (!onCompleteOwnedByCaller || !onComplete)
@@ -409,8 +308,6 @@ cwd) {
409
308
  throw err;
410
309
  }
411
310
  if (SYNC_DEADLINE_MS === 0) {
412
- // Disabled — fall through to direct execution.
413
- // Note: direct execution bypasses dedup. forceRefresh is implied.
414
311
  const command = cli === "mistral" ? "vibe" : cli;
415
312
  try {
416
313
  return await executeCli(command, args, {
@@ -422,8 +319,6 @@ cwd) {
422
319
  });
423
320
  }
424
321
  finally {
425
- // Direct-execution path completes inline; release per-request resources
426
- // (e.g. outputSchema temp files) here.
427
322
  consumeOnComplete();
428
323
  }
429
324
  }
@@ -437,22 +332,12 @@ cwd) {
437
332
  env,
438
333
  stdin,
439
334
  onComplete,
440
- // Sync-deferred path: the upstream sync handler already wrote
441
- // logStart for this corrId, so writeFlightStart stays false. The
442
- // manager still writes logComplete on terminal state (which UPDATEs
443
- // the sync handler's row), closing the previously-orphaned
444
- // sync-deferred case.
445
335
  flightRecorderEntry,
446
336
  extractUsage,
447
337
  });
448
- // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
449
- // fireOnComplete on terminal status, or run inline immediately for dedup).
450
338
  onCompleteOwnedByCaller = false;
451
339
  }
452
340
  catch (err) {
453
- // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
454
- // registered, so onComplete will never be called by the manager. Reclaim
455
- // here so the temp file is not leaked.
456
341
  consumeOnComplete();
457
342
  throw err;
458
343
  }
@@ -464,7 +349,6 @@ cwd) {
464
349
  while (Date.now() < deadline) {
465
350
  const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
466
351
  if (snapshot && snapshot.status !== "running") {
467
- // Job finished within deadline — extract result
468
352
  const result = runtime.asyncJobManager.getJobResult(job.id);
469
353
  if (!result) {
470
354
  return { stdout: "", stderr: "Job result unavailable", code: 1 };
@@ -477,13 +361,6 @@ cwd) {
477
361
  }
478
362
  await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
479
363
  }
480
- // Deadline exceeded — return deferral.
481
- // R2 Codex-Unit-B F1: hand FR-complete ownership to the manager. Until
482
- // this call, the manager skips writeFlightComplete on terminal so the
483
- // sync handler's safeFlightComplete (with rich approvalDecision /
484
- // optimizationApplied metadata) wins for sync-inline completions. From
485
- // here on the sync handler returns deferred and will NOT write
486
- // safeFlightComplete, so the manager must.
487
364
  runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
488
365
  runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
489
366
  return {
@@ -517,27 +394,6 @@ function buildDeferredToolResponse(deferred, sessionId) {
517
394
  ],
518
395
  };
519
396
  }
520
- /**
521
- * Slice λ: resolve a request's worktree directive into a spawn cwd.
522
- *
523
- * - `worktreeOpt` is the Zod-validated input value (boolean |
524
- * `{ name?, ref? }` | undefined).
525
- * - When the request has a session AND the session already has a
526
- * `metadata.worktreePath`, that path is reused (resume semantics).
527
- * The reused path is returned without touching git; if the directory
528
- * was externally removed between requests, the next CLI invocation
529
- * will surface the error naturally.
530
- * - When no reusable worktree exists, `createWorktree` runs; on success
531
- * the new path is written to `session.metadata` (only when a session
532
- * exists — request-scoped worktrees do NOT persist).
533
- * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
534
- * pre-λ behaviour at non-worktree call sites).
535
- * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
536
- * in a `createErrorResponse` envelope. Do NOT swallow.
537
- *
538
- * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
539
- * verify" §5.
540
- */
541
397
  export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
542
398
  if (!worktreeOpt)
543
399
  return {};
@@ -566,30 +422,13 @@ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime)
566
422
  }
567
423
  return { cwd: handle.path, worktreePath: handle.path };
568
424
  }
569
- /**
570
- * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
571
- *
572
- * We surface the worktree path inline as a stdout prefix
573
- * (`[gateway] worktree=<absolute-path>\n`) rather than as a
574
- * structuredContent field or JSON wrapper. Rationale:
575
- * - zero schema change across all 10 tools and their downstream parsers
576
- * - matches how other slice features (session warnings, cache_state
577
- * aggregates) surface side-channel metadata today
578
- * - callers that want the path can split on the first newline; callers
579
- * that don't care see a single ignorable header line
580
- *
581
- * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
582
- * the moment a successful response is constructed.
583
- */
584
425
  export function formatWorktreePrefix(worktreePath) {
585
426
  return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
586
427
  }
587
- // Helper function for standardized error responses
588
428
  function createErrorResponse(cli, code, stderr, correlationId, error) {
589
429
  let errorMessage = `Error executing ${cli} CLI`;
590
430
  const isLaunchExit = code === 127 || code === -4058;
591
431
  if (error) {
592
- // Command not found or spawn error
593
432
  errorMessage += `:\n${error.message}`;
594
433
  if (error.message.includes("ENOENT")) {
595
434
  errorMessage += `\n\nThe '${cli}' command was not found. Please ensure ${cli} CLI is installed and in your PATH.`;
@@ -597,12 +436,10 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
597
436
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI execution failed:`, error.message);
598
437
  }
599
438
  else if (code === 124) {
600
- // Wall-clock timeout
601
439
  errorMessage += `: Command timed out\n${stderr}`;
602
440
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI timed out`);
603
441
  }
604
442
  else if (code === 125) {
605
- // Idle timeout (stuck process)
606
443
  errorMessage += `: Process killed due to inactivity\n${stderr}`;
607
444
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI killed due to inactivity`);
608
445
  }
@@ -611,7 +448,6 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
611
448
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed to launch`);
612
449
  }
613
450
  else if (code !== 0) {
614
- // Other non-zero exit code
615
451
  errorMessage += ` (exit code ${code}):\n${stderr}`;
616
452
  logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed with exit code ${code}`);
617
453
  }
@@ -634,14 +470,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
634
470
  },
635
471
  };
636
472
  }
637
- export function extractUsageAndCost(cli, output, outputFormat,
638
- /**
639
- * Optional context for off-stdout telemetry sources. Today only Mistral
640
- * uses this — its meta.json lives on disk keyed by sessionId. Threading
641
- * this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
642
- * primitives-only (no `params`/`prep` retention on AsyncJobRecord).
643
- */
644
- ctx) {
473
+ export function extractUsageAndCost(cli, output, outputFormat, ctx) {
645
474
  if (cli === "claude" && outputFormat === "stream-json") {
646
475
  const parsed = parseStreamJson(output);
647
476
  if (!parsed.usage) {
@@ -679,29 +508,12 @@ ctx) {
679
508
  cacheReadTokens: parsed.usage.cache_read_tokens,
680
509
  };
681
510
  }
682
- // Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
683
- // (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
684
- // session whose Vibe-assigned UUID we never observed) or the file is
685
- // missing/malformed, the parser returns `{}` and the FR row simply lacks
686
- // usage data — matching pre-slice behaviour. No stdout fallback exists.
687
511
  if (cli === "mistral") {
688
512
  return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
689
513
  }
690
514
  return {};
691
515
  }
692
- /**
693
- * Slice 1.5: build the async-job-manager's FR payload from a prep object
694
- * (which every prepare*Request returns), plus the bound CLI and output
695
- * format primitives needed by extractUsageAndCost. Returning the closure
696
- * separately means it captures `cliName` and `fmt` ONLY — never `params`
697
- * or `prep` — so retention on AsyncJobRecord is O(constant).
698
- */
699
516
  function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
700
- // Extract primitives BEFORE building the closure — capturing `prep` or
701
- // `params` directly would pin large attachments / promptParts on the
702
- // AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
703
- // primitives too, threaded through so the Mistral branch of
704
- // extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
705
517
  const cli = cliName;
706
518
  const fmt = outputFormat;
707
519
  const sid = sessionId;
@@ -795,11 +607,7 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
795
607
  }
796
608
  return { config: mcpConfig };
797
609
  }
798
- //──────────────────────────────────────────────────────────────────────────────
799
- // MCP Resources
800
- //──────────────────────────────────────────────────────────────────────────────
801
610
  function registerBaseResources(server, runtime) {
802
- // Register skill resources (L2: full docs, read on demand)
803
611
  for (const skill of loadedSkills) {
804
612
  server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
805
613
  title: skill.name,
@@ -816,7 +624,6 @@ function registerBaseResources(server, runtime) {
816
624
  }));
817
625
  }
818
626
  runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
819
- // Register all sessions resource
820
627
  server.registerResource("all-sessions", "sessions://all", {
821
628
  title: "📋 All Sessions",
822
629
  description: "All conversation sessions across CLIs",
@@ -826,7 +633,6 @@ function registerBaseResources(server, runtime) {
826
633
  const contents = await runtime.resourceProvider.readResource(uri.href);
827
634
  return { contents: contents ? [contents] : [] };
828
635
  });
829
- // Register Claude sessions resource
830
636
  server.registerResource("claude-sessions", "sessions://claude", {
831
637
  title: "🤖 Claude Sessions",
832
638
  description: "Claude conversation sessions",
@@ -836,7 +642,6 @@ function registerBaseResources(server, runtime) {
836
642
  const contents = await runtime.resourceProvider.readResource(uri.href);
837
643
  return { contents: contents ? [contents] : [] };
838
644
  });
839
- // Register Codex sessions resource
840
645
  server.registerResource("codex-sessions", "sessions://codex", {
841
646
  title: "💻 Codex Sessions",
842
647
  description: "Codex conversation sessions",
@@ -846,7 +651,6 @@ function registerBaseResources(server, runtime) {
846
651
  const contents = await runtime.resourceProvider.readResource(uri.href);
847
652
  return { contents: contents ? [contents] : [] };
848
653
  });
849
- // Register Gemini sessions resource
850
654
  server.registerResource("gemini-sessions", "sessions://gemini", {
851
655
  title: "✨ Gemini Sessions",
852
656
  description: "Gemini conversation sessions",
@@ -856,7 +660,6 @@ function registerBaseResources(server, runtime) {
856
660
  const contents = await runtime.resourceProvider.readResource(uri.href);
857
661
  return { contents: contents ? [contents] : [] };
858
662
  });
859
- // Register Grok sessions resource
860
663
  server.registerResource("grok-sessions", "sessions://grok", {
861
664
  title: "⚡ Grok Sessions",
862
665
  description: "Grok conversation sessions",
@@ -866,7 +669,6 @@ function registerBaseResources(server, runtime) {
866
669
  const contents = await runtime.resourceProvider.readResource(uri.href);
867
670
  return { contents: contents ? [contents] : [] };
868
671
  });
869
- // Register Mistral sessions resource
870
672
  server.registerResource("mistral-sessions", "sessions://mistral", {
871
673
  title: "🌬 Mistral Sessions",
872
674
  description: "Mistral Vibe conversation sessions",
@@ -876,7 +678,6 @@ function registerBaseResources(server, runtime) {
876
678
  const contents = await runtime.resourceProvider.readResource(uri.href);
877
679
  return { contents: contents ? [contents] : [] };
878
680
  });
879
- // Register Claude models resource
880
681
  server.registerResource("claude-models", "models://claude", {
881
682
  title: "🧠 Claude Models",
882
683
  description: "Claude models and capabilities",
@@ -886,7 +687,6 @@ function registerBaseResources(server, runtime) {
886
687
  const contents = await runtime.resourceProvider.readResource(uri.href);
887
688
  return { contents: contents ? [contents] : [] };
888
689
  });
889
- // Register Codex models resource
890
690
  server.registerResource("codex-models", "models://codex", {
891
691
  title: "🔧 Codex Models",
892
692
  description: "Codex models and capabilities",
@@ -896,7 +696,6 @@ function registerBaseResources(server, runtime) {
896
696
  const contents = await runtime.resourceProvider.readResource(uri.href);
897
697
  return { contents: contents ? [contents] : [] };
898
698
  });
899
- // Register Gemini models resource
900
699
  server.registerResource("gemini-models", "models://gemini", {
901
700
  title: "🌟 Gemini Models",
902
701
  description: "Gemini models and capabilities",
@@ -906,7 +705,6 @@ function registerBaseResources(server, runtime) {
906
705
  const contents = await runtime.resourceProvider.readResource(uri.href);
907
706
  return { contents: contents ? [contents] : [] };
908
707
  });
909
- // Register Grok models resource
910
708
  server.registerResource("grok-models", "models://grok", {
911
709
  title: "⚡ Grok Models",
912
710
  description: "Grok models and capabilities",
@@ -916,7 +714,6 @@ function registerBaseResources(server, runtime) {
916
714
  const contents = await runtime.resourceProvider.readResource(uri.href);
917
715
  return { contents: contents ? [contents] : [] };
918
716
  });
919
- // Register Mistral models resource
920
717
  server.registerResource("mistral-models", "models://mistral", {
921
718
  title: "🌬 Mistral Models",
922
719
  description: "Mistral Vibe models and capabilities",
@@ -926,7 +723,6 @@ function registerBaseResources(server, runtime) {
926
723
  const contents = await runtime.resourceProvider.readResource(uri.href);
927
724
  return { contents: contents ? [contents] : [] };
928
725
  });
929
- // Register performance metrics resource
930
726
  server.registerResource("performance-metrics", "metrics://performance", {
931
727
  title: "📈 Performance Metrics",
932
728
  description: "Request counts, latency, success/failure rates",
@@ -936,11 +732,6 @@ function registerBaseResources(server, runtime) {
936
732
  const contents = await runtime.resourceProvider.readResource(uri.href);
937
733
  return { contents: contents ? [contents] : [] };
938
734
  });
939
- // Cache-state resources (slice 2). Static URI for global, templated for
940
- // session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
941
- // ONLY — never raw prompt or response text. The structural guarantee is in
942
- // the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
943
- // themselves: those shapes have no prompt/response/system/task fields.
944
735
  server.registerResource("cache-state-global", "cache_state://global", {
945
736
  title: "💾 Cache State (Global)",
946
737
  description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
@@ -999,11 +790,6 @@ function registerBaseResources(server, runtime) {
999
790
  };
1000
791
  });
1001
792
  }
1002
- /**
1003
- * Slice 1: validate the prompt / promptParts mutex at the prep boundary and
1004
- * return either an error response or the resolved input. The exact error
1005
- * messages are part of the public contract — tests assert them verbatim.
1006
- */
1007
793
  function resolvePromptOrPartsForPrep(args) {
1008
794
  const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
1009
795
  const hasParts = args.promptParts !== undefined;
@@ -1045,7 +831,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1045
831
  const assembledPrompt = inputResolution.assembledPrompt;
1046
832
  const stablePrefixHash = inputResolution.stablePrefixHash;
1047
833
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1048
- // Review integrity check on raw prompt (before optimization)
1049
834
  const reviewIntegrity = checkReviewIntegrity({
1050
835
  prompt: assembledPrompt,
1051
836
  allowedTools: params.allowedTools,
@@ -1058,13 +843,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1058
843
  score: reviewIntegrity.totalScore,
1059
844
  });
1060
845
  }
1061
- // Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
1062
- // before running optimization. Optimization rewrites the assembled
1063
- // prompt text the flight-recorder logs, but the κ stdin payload is
1064
- // built from raw `promptParts` content blocks — letting both run
1065
- // produces a FR row whose `prompt` no longer matches what Claude
1066
- // actually received, AND any optimisation-driven text change would
1067
- // silently break Anthropic prefix-cache reuse on the next call.
1068
846
  const ccEarly = params.promptParts?.cacheControl;
1069
847
  const cacheControlRequestedEarly = !!(ccEarly &&
1070
848
  (ccEarly.system || ccEarly.tools || ccEarly.context));
@@ -1088,7 +866,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1088
866
  approvalDecision = runtime.approvalManager.decide({
1089
867
  cli: "claude",
1090
868
  operation: params.operation,
1091
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
869
+ prompt: assembledPrompt,
1092
870
  bypassRequested: params.dangerouslySkipPermissions,
1093
871
  fullAuto: false,
1094
872
  requestedMcpServers,
@@ -1102,18 +880,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1102
880
  return createApprovalDeniedResponse(params.operation, approvalDecision);
1103
881
  }
1104
882
  }
1105
- // Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
1106
- // `promptParts` whose stable prefix exceeds the per-model minimum,
1107
- // the caller has NOT explicitly set `cacheControl`, the gateway
1108
- // config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
1109
- // and outputFormat is stream-json. Auto-emit marks the LAST non-empty
1110
- // stable block (context → tools → system priority — the rightmost
1111
- // stable block covers the widest prefix). Skipped when optimizePrompt
1112
- // is on (same rec #5 desync risk).
1113
- //
1114
- // The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
1115
- // breakpoints from caller content are rejected by Anthropic once
1116
- // Claude Code's own 1h-marked session-wrap blocks land ahead of them.
1117
883
  let autoEmittedCacheControlBlock = null;
1118
884
  if (!cacheControlRequestedEarly &&
1119
885
  runtime.cacheAwareness.emitAnthropicCacheControl &&
@@ -1124,9 +890,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1124
890
  const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
1125
891
  if (stablePrefixTokens >= threshold) {
1126
892
  const pp = params.promptParts;
1127
- // Rightmost non-empty stable block — its cache_control breakpoint
1128
- // covers everything above it in the message (the API matches
1129
- // breakpoints in order).
1130
893
  if (pp.context && pp.context.length > 0)
1131
894
  autoEmittedCacheControlBlock = "context";
1132
895
  else if (pp.tools && pp.tools.length > 0)
@@ -1141,12 +904,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1141
904
  }
1142
905
  }
1143
906
  }
1144
- // Rec #4: warn when promptParts has a cacheable stable prefix but no
1145
- // cache_control breakpoint is being emitted (neither explicit nor
1146
- // auto). Either the caller forgot to set `cacheControl` or
1147
- // `[cache_awareness].emit_anthropic_cache_control` is off — both
1148
- // leave the stable prefix bytes unreused across calls, defeating the
1149
- // point of using `promptParts`.
1150
907
  const warnings = [];
1151
908
  if (!cacheControlRequestedEarly &&
1152
909
  autoEmittedCacheControlBlock === null &&
@@ -1168,13 +925,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1168
925
  });
1169
926
  }
1170
927
  }
1171
- // Slice κ: switch from the legacy positional `-p <prompt>` emission
1172
- // to `claude -p --input-format stream-json` and feed a JSON
1173
- // content-blocks payload via stdin. Non-κ callers (no cacheControl,
1174
- // or cacheControl with all flags false) take the existing positional
1175
- // path bit-for-bit. The κ path activates on EITHER an explicit caller
1176
- // opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
1177
- // (`autoEmittedCacheControlBlock`).
1178
928
  const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
1179
929
  let stdinPayload;
1180
930
  let cacheControlBlocks;
@@ -1182,9 +932,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1182
932
  if (params.outputFormat !== "stream-json") {
1183
933
  return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
1184
934
  }
1185
- // promptParts is non-null whenever cacheControlRequested is true
1186
- // (explicit opt-in lives in PromptParts; auto-emit guard requires
1187
- // promptParts to be defined).
1188
935
  const effectiveParts = autoEmittedCacheControlBlock !== null
1189
936
  ? {
1190
937
  ...params.promptParts,
@@ -1216,11 +963,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1216
963
  args.push("--output-format", "json");
1217
964
  }
1218
965
  else if (params.outputFormat === "stream-json") {
1219
- // Claude CLI 2.x rejects `--print --output-format stream-json` without
1220
- // `--verbose`: "When using --print, --output-format=stream-json requires
1221
- // --verbose". --verbose only affects what claude logs to stderr; the
1222
- // stream-json stdout payload is unchanged, so the gateway's NDJSON
1223
- // parser is unaffected.
1224
966
  args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
1225
967
  }
1226
968
  }
@@ -1251,7 +993,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
1251
993
  args.push("--strict-mcp-config");
1252
994
  }
1253
995
  }
1254
- // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
1255
996
  let validatedAgents;
1256
997
  if (params.agents && Object.keys(params.agents).length > 0) {
1257
998
  const result = validateClaudeAgentsMap(params.agents);
@@ -1309,7 +1050,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1309
1050
  const assembledPrompt = inputResolution.assembledPrompt;
1310
1051
  const stablePrefixHash = inputResolution.stablePrefixHash;
1311
1052
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1312
- // Review integrity check on raw prompt (before optimization)
1313
1053
  const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
1314
1054
  if (reviewIntegrity.violations.length > 0) {
1315
1055
  runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
@@ -1330,7 +1070,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1330
1070
  approvalDecision = runtime.approvalManager.decide({
1331
1071
  cli: "codex",
1332
1072
  operation: params.operation,
1333
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
1073
+ prompt: assembledPrompt,
1334
1074
  bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
1335
1075
  fullAuto: params.fullAuto,
1336
1076
  requestedMcpServers,
@@ -1342,9 +1082,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1342
1082
  return createApprovalDeniedResponse(params.operation, approvalDecision);
1343
1083
  }
1344
1084
  }
1345
- // Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
1346
- // Note: `codex exec resume` does NOT accept sandbox policy flags; the original
1347
- // session's approval policy is inherited. We silently drop fullAuto on resume.
1348
1085
  let sessionPlan;
1349
1086
  try {
1350
1087
  sessionPlan = resolveCodexSessionArgs({
@@ -1365,9 +1102,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1365
1102
  }
1366
1103
  if (resolvedModel)
1367
1104
  args.push("--model", resolvedModel);
1368
- // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
1369
- // `codex exec resume` rejects all of these (the original session's policy is
1370
- // inherited), so we only emit them when starting a NEW session.
1371
1105
  const sandboxFlags = resolveCodexSandboxFlags({
1372
1106
  sandboxMode: params.sandboxMode,
1373
1107
  askForApproval: params.askForApproval,
@@ -1383,26 +1117,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
1383
1117
  if (params.dangerouslyBypassApprovalsAndSandbox) {
1384
1118
  args.push("--dangerously-bypass-approvals-and-sandbox");
1385
1119
  }
1386
- // U23 fix: emit `--json` when the caller asked for JSON output so the
1387
- // codex-json-parser actually receives JSONL events. This is what makes
1388
- // extractUsageAndCost() reachable from the tool surface; without it, the
1389
- // U23 parser is dead code.
1390
1120
  if (params.outputFormat === "json") {
1391
1121
  args.push("--json");
1392
1122
  }
1393
1123
  args.push("--skip-git-repo-check");
1394
- // U26: High-impact feature flags. `--search` is retained as a compatibility
1395
- // input but current `codex exec` no longer accepts it, so the helper warns
1396
- // and emits no argv. `--profile` is accepted for new sessions only. The other
1397
- // flags here are accepted on resume per `codex exec resume --help` and are
1398
- // emitted in both branches.
1399
1124
  let highImpactCleanup;
1400
1125
  if (sessionPlan.mode === "new") {
1401
- // Phase 4 slice ζ: emit working-dir and add-dir on new sessions only.
1402
- // Both flags are listed in CODEX_RESUME_FILTERED_FLAGS — resume inherits
1403
- // the original session's cwd and writable-dir policy, so emitting them
1404
- // on resume would be silently stripped (wasteful + misleading on argv
1405
- // logs). Gating here mirrors `--search` / `--sandbox`.
1406
1126
  if (params.workingDir) {
1407
1127
  args.push("-C", params.workingDir);
1408
1128
  }
@@ -1485,7 +1205,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1485
1205
  const assembledPrompt = inputResolution.assembledPrompt;
1486
1206
  const stablePrefixHash = inputResolution.stablePrefixHash;
1487
1207
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1488
- // Review integrity check on raw prompt (before optimization)
1489
1208
  const reviewIntegrity = checkReviewIntegrity({
1490
1209
  prompt: assembledPrompt,
1491
1210
  allowedTools: params.allowedTools,
@@ -1509,7 +1228,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1509
1228
  approvalDecision = runtime.approvalManager.decide({
1510
1229
  cli: "gemini",
1511
1230
  operation: params.operation,
1512
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
1231
+ prompt: assembledPrompt,
1513
1232
  bypassRequested: params.approvalMode === "yolo" || params.yolo === true,
1514
1233
  fullAuto: false,
1515
1234
  requestedMcpServers,
@@ -1523,8 +1242,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1523
1242
  }
1524
1243
  }
1525
1244
  const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
1526
- // U27: Validate high-impact policy paths and prepend attachment tokens
1527
- // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
1528
1245
  const highImpact = prepareGeminiHighImpactFlags({
1529
1246
  sandbox: params.sandbox,
1530
1247
  policyFiles: params.policyFiles,
@@ -1541,19 +1258,11 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1541
1258
  return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
1542
1259
  }
1543
1260
  }
1544
- // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
1545
- // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
1546
- // the documented non-interactive flag and is robust against future CLI mode
1547
- // changes.
1548
1261
  const args = ["-p", effectivePrompt];
1549
1262
  if (resolvedModel)
1550
1263
  args.push("--model", resolvedModel);
1551
1264
  if (effectiveApprovalMode)
1552
1265
  args.push("--approval-mode", effectiveApprovalMode);
1553
- // `--yolo` is functionally identical to `--approval-mode yolo`; emit it only
1554
- // when the caller asked for yolo AND we are not already emitting
1555
- // `--approval-mode yolo` (under mcp_managed the gate forces that mode), so
1556
- // there is never a redundant double auto-approve flag.
1557
1266
  if (params.yolo && effectiveApprovalMode !== "yolo") {
1558
1267
  args.push("--yolo");
1559
1268
  }
@@ -1569,26 +1278,13 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
1569
1278
  sanitizeCliArgValues(params.includeDirs, "includeDirs");
1570
1279
  params.includeDirs.forEach(dir => args.push("--include-directories", dir));
1571
1280
  }
1572
- // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
1573
- // existing flag set so positional ordering relative to `-p` is preserved.
1574
1281
  args.push(...highImpact.args);
1575
- // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
1576
- // JSON parser is otherwise unreachable from the tool surface and the
1577
- // structured usageMetadata is silently dropped.
1578
- //
1579
- // Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
1580
- // Gemini already streams stdout in real-time so the existing 10-minute
1581
- // idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
1582
- // adjustment — unlike Claude, no `--include-partial-messages` companion
1583
- // flag is required because Gemini emits assistant `delta` events as part
1584
- // of the default stream-json shape.
1585
1282
  if (params.outputFormat === "json") {
1586
1283
  args.push("-o", "json");
1587
1284
  }
1588
1285
  else if (params.outputFormat === "stream-json") {
1589
1286
  args.push("-o", "stream-json");
1590
1287
  }
1591
- // Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
1592
1288
  if (params.skipTrust) {
1593
1289
  args.push("--skip-trust");
1594
1290
  }
@@ -1619,7 +1315,6 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1619
1315
  const assembledPrompt = inputResolution.assembledPrompt;
1620
1316
  const stablePrefixHash = inputResolution.stablePrefixHash;
1621
1317
  const stablePrefixTokens = inputResolution.stablePrefixTokens;
1622
- // Review integrity check on raw prompt (before optimization)
1623
1318
  const reviewIntegrity = checkReviewIntegrity({
1624
1319
  prompt: assembledPrompt,
1625
1320
  allowedTools: params.allowedTools,
@@ -1644,7 +1339,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
1644
1339
  approvalDecision = runtime.approvalManager.decide({
1645
1340
  cli: "grok",
1646
1341
  operation: params.operation,
1647
- prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
1342
+ prompt: assembledPrompt,
1648
1343
  bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
1649
1344
  fullAuto: false,
1650
1345
  requestedMcpServers,
@@ -1779,9 +1474,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
1779
1474
  return createApprovalDeniedResponse(params.operation, approvalDecision);
1780
1475
  }
1781
1476
  }
1782
- // Under mcp_managed, force --agent auto-approve so the approval gate's
1783
- // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
1784
- // forcing under mcp_managed).
1785
1477
  const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
1786
1478
  ? "auto-approve"
1787
1479
  : (params.permissionMode ?? "auto-approve");
@@ -1828,15 +1520,6 @@ function selectMistralRecoveryModel(failedModel) {
1828
1520
  ].filter((model) => Boolean(model && model !== failedModel));
1829
1521
  return candidates.find(model => model !== "local");
1830
1522
  }
1831
- /**
1832
- * Phase 4 slice δ post-review: pure helper extracted from
1833
- * `handleMistralRequest` so the retry-path arg-preservation invariants
1834
- * (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
1835
- * without mocking awaitJobOrDefer. Any param the wrapper threads into
1836
- * the FIRST `buildMistralCliInvocation` call MUST also be threaded
1837
- * through here, or a fresh-workspace / budgeted run can degrade on
1838
- * the second attempt.
1839
- */
1840
1523
  export function buildMistralRetryPrep(params, recoveryModel) {
1841
1524
  return buildMistralCliInvocation({
1842
1525
  prompt: params.effectivePrompt,
@@ -1857,13 +1540,11 @@ export function buildMistralRetryPrep(params, recoveryModel) {
1857
1540
  }
1858
1541
  function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
1859
1542
  let finalStdout = stdout;
1860
- // Skip response optimization for JSON output to prevent corrupting structured data
1861
1543
  if (optimizeResponse && outputFormat !== "json") {
1862
1544
  const optimized = optimizeResponseText(finalStdout);
1863
1545
  logOptimizationTokens("response", corrId, finalStdout, optimized);
1864
1546
  finalStdout = optimized;
1865
1547
  }
1866
- // Append review integrity warnings to response text (skip for JSON output to avoid corruption)
1867
1548
  if (prep.reviewIntegrity &&
1868
1549
  prep.reviewIntegrity.violations.length > 0 &&
1869
1550
  outputFormat !== "json") {
@@ -1880,9 +1561,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
1880
1561
  correlationId: corrId,
1881
1562
  sessionId: sessionId || null,
1882
1563
  durationMs,
1883
- // Phase 4 slice β: thread sessionId + home so the Mistral branch of
1884
- // extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
1885
- // Other CLIs ignore the ctx (their usage source is stdout).
1886
1564
  ...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
1887
1565
  exitCode: 0,
1888
1566
  retryCount: 0,
@@ -1912,12 +1590,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
1912
1590
  }
1913
1591
  return response;
1914
1592
  }
1915
- /**
1916
- * Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
1917
- * claude session, if the feature is enabled, the session has prior cache
1918
- * writes, and ttlRemainingMs is below the threshold (30s by default).
1919
- * Returns null when no warning applies.
1920
- */
1921
1593
  function maybeBuildCacheTtlWarning(args) {
1922
1594
  if (args.cli !== "claude")
1923
1595
  return null;
@@ -1946,7 +1618,6 @@ function resolveHandlerRuntime(deps) {
1946
1618
  if (deps.runtime)
1947
1619
  return deps.runtime;
1948
1620
  const asyncDeps = deps;
1949
- // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
1950
1621
  const depLogger = deps.logger;
1951
1622
  const normalizedLogger = {
1952
1623
  info: depLogger.info,
@@ -2000,8 +1671,6 @@ export async function handleGeminiRequest(deps, params) {
2000
1671
  }, runtime);
2001
1672
  deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
2002
1673
  try {
2003
- // Gemini CLI 0.43 supports `--resume`, but not a supported fresh
2004
- // `--session-id` flag. Fresh sessions emit no session flag.
2005
1674
  const sessionPlan = resolveGeminiSessionPlan({
2006
1675
  sessionId: params.sessionId,
2007
1676
  resumeLatest: params.resumeLatest,
@@ -2019,7 +1688,6 @@ export async function handleGeminiRequest(deps, params) {
2019
1688
  }
2020
1689
  const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
2021
1690
  const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
2022
- // Deferred — job still running, return async reference
2023
1691
  if (isDeferredResponse(result)) {
2024
1692
  return buildDeferredToolResponse(result, effectiveSessionIdHint);
2025
1693
  }
@@ -2040,9 +1708,6 @@ export async function handleGeminiRequest(deps, params) {
2040
1708
  return createErrorResponse("gemini", code, stderr, corrId);
2041
1709
  }
2042
1710
  wasSuccessful = true;
2043
- // Post-success session I/O for explicit resume flows. Fresh Gemini sessions
2044
- // are owned by the CLI because the current CLI has no supported fresh
2045
- // session-id flag the gateway can inject.
2046
1711
  let effectiveSessionId = effectiveSessionIdHint;
2047
1712
  if (effectiveSessionId) {
2048
1713
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2131,14 +1796,12 @@ export async function handleGeminiRequestAsync(deps, params) {
2131
1796
  return prep;
2132
1797
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
2133
1798
  try {
2134
- // Gemini CLI 0.43 supports `--resume`, but fresh sessions emit no session flag.
2135
1799
  const sessionPlan = resolveGeminiSessionPlan({
2136
1800
  sessionId: params.sessionId,
2137
1801
  resumeLatest: params.resumeLatest,
2138
1802
  createNewSession: params.createNewSession,
2139
1803
  });
2140
1804
  args.push(...sessionPlan.args);
2141
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
2142
1805
  let effectiveSessionId = sessionPlan.resumed ? params.sessionId : undefined;
2143
1806
  if (effectiveSessionId) {
2144
1807
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2161,13 +1824,8 @@ export async function handleGeminiRequestAsync(deps, params) {
2161
1824
  catch (err) {
2162
1825
  return createErrorResponse("gemini_request_async", 1, "", corrId, err);
2163
1826
  }
2164
- // Start job only after all session I/O succeeds. U23: forward outputFormat
2165
- // so AsyncJobManager records it in the durable store (the manager also
2166
- // surfaces it in the snapshot).
2167
1827
  assertUpstreamCliArgs("gemini", args);
2168
1828
  assertUpstreamCliEnv("gemini", undefined);
2169
- // Slice 1.5: pure async path — no upstream safeFlightStart, so the
2170
- // manager owns both logStart and logComplete for this corrId.
2171
1829
  const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
2172
1830
  const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
2173
1831
  deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
@@ -2244,7 +1902,6 @@ export async function handleGrokRequest(deps, params) {
2244
1902
  }, runtime);
2245
1903
  deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
2246
1904
  try {
2247
- // Session arg planning (pure, no I/O)
2248
1905
  const sessionResult = resolveGrokSessionArgs({
2249
1906
  sessionId: params.sessionId,
2250
1907
  resumeLatest: params.resumeLatest,
@@ -2260,7 +1917,6 @@ export async function handleGrokRequest(deps, params) {
2260
1917
  }
2261
1918
  const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
2262
1919
  const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2263
- // Deferred — job still running, return async reference
2264
1920
  if (isDeferredResponse(result)) {
2265
1921
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
2266
1922
  }
@@ -2281,7 +1937,6 @@ export async function handleGrokRequest(deps, params) {
2281
1937
  return createErrorResponse("grok", code, stderr, corrId);
2282
1938
  }
2283
1939
  wasSuccessful = true;
2284
- // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
2285
1940
  let effectiveSessionId = sessionResult.effectiveSessionId;
2286
1941
  if (sessionResult.userProvidedSession && effectiveSessionId) {
2287
1942
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2374,14 +2029,12 @@ export async function handleGrokRequestAsync(deps, params) {
2374
2029
  return prep;
2375
2030
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
2376
2031
  try {
2377
- // Session arg planning (pure, no I/O)
2378
2032
  const sessionResult = resolveGrokSessionArgs({
2379
2033
  sessionId: params.sessionId,
2380
2034
  resumeLatest: params.resumeLatest,
2381
2035
  createNewSession: params.createNewSession,
2382
2036
  });
2383
2037
  args.push(...sessionResult.resumeArgs);
2384
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
2385
2038
  let effectiveSessionId = sessionResult.effectiveSessionId;
2386
2039
  if (sessionResult.userProvidedSession && effectiveSessionId) {
2387
2040
  const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2408,7 +2061,6 @@ export async function handleGrokRequestAsync(deps, params) {
2408
2061
  catch (err) {
2409
2062
  return createErrorResponse("grok_request_async", 1, "", corrId, err);
2410
2063
  }
2411
- // Start job only after all session I/O succeeds
2412
2064
  assertUpstreamCliArgs("grok", args);
2413
2065
  assertUpstreamCliEnv("grok", undefined);
2414
2066
  const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
@@ -2505,8 +2157,6 @@ export async function handleMistralRequest(deps, params) {
2505
2157
  deps.logger.info(`[${corrId}] mistral_request detected stale Vibe model selection; retrying once with ${recoveryModel}`);
2506
2158
  const retryPrep = buildMistralRetryPrep({ ...params, effectivePrompt: prep.effectivePrompt }, recoveryModel);
2507
2159
  const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
2508
- // Reuse the FR handoff built above — the retry preserves corrId,
2509
- // so the manager's logComplete still updates the original row.
2510
2160
  result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
2511
2161
  if (isDeferredResponse(result)) {
2512
2162
  return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -2717,11 +2367,6 @@ export async function handleCodexRequestAsync(deps, params) {
2717
2367
  if (!("args" in prep))
2718
2368
  return prep;
2719
2369
  const { corrId, args, requestedMcpServers, approvalDecision } = prep;
2720
- // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
2721
- // exactly one place at a time: this scope until startJob succeeds, then
2722
- // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
2723
- // the job is registered. Any code path that fails to hand it off MUST run
2724
- // it locally.
2725
2370
  const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
2726
2371
  let prepCleanupOwnedHere = prepCleanup !== undefined;
2727
2372
  const runPrepCleanupLocally = () => {
@@ -2736,7 +2381,6 @@ export async function handleCodexRequestAsync(deps, params) {
2736
2381
  }
2737
2382
  };
2738
2383
  try {
2739
- // Pre-start session I/O (async handlers: prevent orphaned jobs)
2740
2384
  let effectiveSessionId = params.sessionId;
2741
2385
  if (!params.createNewSession && !params.sessionId) {
2742
2386
  const activeSession = await deps.sessionManager.getActiveSession("codex");
@@ -2755,9 +2399,6 @@ export async function handleCodexRequestAsync(deps, params) {
2755
2399
  const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
2756
2400
  effectiveSessionId = newSession.id;
2757
2401
  }
2758
- // Slice λ: resolve worktree directive after session I/O so resume reuse
2759
- // can read metadata.worktreePath. A pre-startJob failure here means
2760
- // prepCleanup is still owned locally; run it before returning.
2761
2402
  let worktreeResolution = {};
2762
2403
  try {
2763
2404
  worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
@@ -2766,22 +2407,15 @@ export async function handleCodexRequestAsync(deps, params) {
2766
2407
  runPrepCleanupLocally();
2767
2408
  return createErrorResponse("codex_request_async", 1, "", corrId, err);
2768
2409
  }
2769
- // Start job only after all session I/O succeeds. If startJob throws before
2770
- // registering the record, ownership stays here and we run it in the catch.
2771
2410
  assertUpstreamCliArgs("codex", args);
2772
2411
  assertUpstreamCliEnv("codex", undefined);
2773
2412
  const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
2774
2413
  let job;
2775
2414
  try {
2776
2415
  job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
2777
- // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
2778
- // status. Release our local ownership claim so the catch path doesn't
2779
- // double-fire.
2780
2416
  prepCleanupOwnedHere = false;
2781
2417
  }
2782
2418
  catch (startErr) {
2783
- // startJob never stored the record → manager won't call onComplete. We
2784
- // still own the cleanup; let the outer catch run it.
2785
2419
  throw startErr;
2786
2420
  }
2787
2421
  deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
@@ -2808,42 +2442,15 @@ export async function handleCodexRequestAsync(deps, params) {
2808
2442
  };
2809
2443
  }
2810
2444
  catch (error) {
2811
- // Pre-start failure: either session I/O threw, or startJob threw before
2812
- // registering the record. In either case the manager will NOT fire
2813
- // prepCleanup, so we must run it here.
2814
2445
  runPrepCleanupLocally();
2815
2446
  return createErrorResponse("codex_request_async", 1, "", corrId, error);
2816
2447
  }
2817
2448
  }
2818
- //──────────────────────────────────────────────────────────────────────────────
2819
- // Claude Code Tool
2820
- //──────────────────────────────────────────────────────────────────────────────
2821
2449
  export function createGatewayServer(deps = {}) {
2822
2450
  const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
2823
2451
  const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
2824
- // `flightRecorder` is destructured into closure scope so the session_get
2825
- // handler (see ~line 5590) has the FlightRecorderQuery read capability
2826
- // available without re-resolving runtime. Slice 2 will populate the
2827
- // `cacheState` field of session_get's response from this read surface.
2828
- // `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
2829
2452
  void flightRecorder;
2830
2453
  void cacheAwareness;
2831
- // Structural invariant: tools register iff ALL THREE conditions hold:
2832
- // (1) persistence.backend !== "none" — the operator/config has not
2833
- // explicitly disabled durable persistence;
2834
- // (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
2835
- // agrees (loadPersistenceConfig sets this iff backend is one of
2836
- // sqlite/postgres/memory);
2837
- // (3) asyncJobManager.hasStore() === true — the runtime manager
2838
- // actually has a store attached (isolate-mode runtimes use null).
2839
- //
2840
- // Each guard closes a distinct re-entry path for the silent-loss footgun:
2841
- // - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
2842
- // and re-advertise the async tools while reporting backend='none' in
2843
- // llm_process_health — exactly contradicting SPEC CLAIM 4f.
2844
- // - Without (2), config that opts out is ignored.
2845
- // - Without (3), a null-store manager (isolate-mode / HTTP per-session)
2846
- // accepts registrations that have nowhere to persist results.
2847
2454
  const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
2848
2455
  const server = newGatewayMcpServer();
2849
2456
  registerBaseResources(server, runtime);
@@ -2880,7 +2487,6 @@ export function createGatewayServer(deps = {}) {
2880
2487
  .enum(CLAUDE_PERMISSION_MODES)
2881
2488
  .optional()
2882
2489
  .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
2883
- // U25 — Claude high-impact features
2884
2490
  agent: z
2885
2491
  .string()
2886
2492
  .optional()
@@ -2920,7 +2526,6 @@ export function createGatewayServer(deps = {}) {
2920
2526
  .boolean()
2921
2527
  .optional()
2922
2528
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
2923
- // Phase 4 slice η — Claude reliability + structured-output parity
2924
2529
  fallbackModel: z
2925
2530
  .string()
2926
2531
  .min(1)
@@ -2930,12 +2535,10 @@ export function createGatewayServer(deps = {}) {
2930
2535
  .union([z.string(), z.record(z.string(), z.unknown())])
2931
2536
  .optional()
2932
2537
  .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
2933
- // Phase 4 slice ζ — Claude additional-workspace-dirs parity
2934
2538
  addDir: z
2935
2539
  .array(z.string())
2936
2540
  .optional()
2937
2541
  .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
2938
- // Claude session / settings / tools surface (2.x)
2939
2542
  noSessionPersistence: z
2940
2543
  .boolean()
2941
2544
  .optional()
@@ -3028,18 +2631,8 @@ export function createGatewayServer(deps = {}) {
3028
2631
  const { corrId, args } = prep;
3029
2632
  let durationMs = 0;
3030
2633
  let wasSuccessful = false;
3031
- // Session resolution happens BEFORE safeFlightStart so that:
3032
- // (1) the TTL warning reads the PRIOR session's lastWriteAt
3033
- // rather than the row about to be inserted (codex-r1/F1).
3034
- // (2) the flight-recorder row is tagged with effectiveSessionId
3035
- // (the session the CLI will actually resume), not the raw
3036
- // user-provided sessionId.
3037
2634
  let effectiveSessionId = sessionId;
3038
2635
  let useContinue = continueSession;
3039
- // Guard the active-session lookup: in some test harnesses the
3040
- // sessionManager is undefined; the original try-catch wrapped this
3041
- // block, so we replicate that tolerance here. Failure leaves
3042
- // effectiveSessionId as the user-provided sessionId.
3043
2636
  let activeSession = null;
3044
2637
  try {
3045
2638
  activeSession = await sessionManager.getActiveSession("claude");
@@ -3054,16 +2647,11 @@ export function createGatewayServer(deps = {}) {
3054
2647
  if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
3055
2648
  useContinue = true;
3056
2649
  }
3057
- // Slice 3: if the resolved session has a near-expiry Anthropic
3058
- // cache breakpoint, attach a structured warning (NOT a hard error)
3059
- // to the response. Computed BEFORE safeFlightStart so the current
3060
- // row does not skew lastRequestAt.
3061
2650
  const ttlWarning = maybeBuildCacheTtlWarning({
3062
2651
  runtime,
3063
2652
  sessionId: effectiveSessionId,
3064
2653
  cli: "claude",
3065
2654
  });
3066
- // Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
3067
2655
  const warnings = [
3068
2656
  ...(ttlWarning ? [ttlWarning] : []),
3069
2657
  ...(prep.warnings ?? []),
@@ -3087,8 +2675,6 @@ export function createGatewayServer(deps = {}) {
3087
2675
  args.push("--session-id", effectiveSessionId);
3088
2676
  await sessionManager.updateSessionUsage(effectiveSessionId);
3089
2677
  }
3090
- // Slice λ: resolve worktree directive into spawn cwd. Done after
3091
- // session resolution so resume reuse can read metadata.worktreePath.
3092
2678
  let worktreeResolution = {};
3093
2679
  try {
3094
2680
  worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
@@ -3096,11 +2682,9 @@ export function createGatewayServer(deps = {}) {
3096
2682
  catch (err) {
3097
2683
  return createErrorResponse("claude_request", 1, "", corrId, err);
3098
2684
  }
3099
- // Idle timeout only for stream-json (text/json produce no output until done)
3100
2685
  const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
3101
2686
  const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
3102
2687
  const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
3103
- // Deferred — job still running, return async reference
3104
2688
  if (isDeferredResponse(result)) {
3105
2689
  return buildDeferredToolResponse(result, effectiveSessionId);
3106
2690
  }
@@ -3118,9 +2702,6 @@ export function createGatewayServer(deps = {}) {
3118
2702
  errorMessage: stderr || `Exit code ${code}`,
3119
2703
  status: "failed",
3120
2704
  }, runtime);
3121
- // Slice 3: attach any computed warnings to the error response so
3122
- // the caller still sees cache_ttl_expiring_soon when the CLI
3123
- // happens to fail for an unrelated reason.
3124
2705
  const errResp = createErrorResponse("claude", code, stderr, corrId);
3125
2706
  if (warnings.length > 0) {
3126
2707
  errResp.warnings = warnings;
@@ -3128,7 +2709,6 @@ export function createGatewayServer(deps = {}) {
3128
2709
  return errResp;
3129
2710
  }
3130
2711
  wasSuccessful = true;
3131
- // If we used a session ID and it's not tracked yet, create a session record
3132
2712
  if (effectiveSessionId) {
3133
2713
  const existingSession = await sessionManager.getSession(effectiveSessionId);
3134
2714
  if (!existingSession) {
@@ -3136,7 +2716,6 @@ export function createGatewayServer(deps = {}) {
3136
2716
  }
3137
2717
  }
3138
2718
  logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
3139
- // Parse stream-json NDJSON output to extract result text
3140
2719
  if (outputFormat === "stream-json") {
3141
2720
  const parsed = parseStreamJson(stdout);
3142
2721
  if (parsed.costUsd !== null) {
@@ -3203,9 +2782,6 @@ export function createGatewayServer(deps = {}) {
3203
2782
  performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
3204
2783
  }
3205
2784
  });
3206
- //──────────────────────────────────────────────────────────────────────────────
3207
- // Codex Tool
3208
- //──────────────────────────────────────────────────────────────────────────────
3209
2785
  server.tool("codex_request", {
3210
2786
  prompt: z
3211
2787
  .string()
@@ -3270,14 +2846,10 @@ export function createGatewayServer(deps = {}) {
3270
2846
  .boolean()
3271
2847
  .default(false)
3272
2848
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3273
- // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
3274
- // tokens (and any cost) through extractUsageAndCost. Without "json", the
3275
- // parser is unreachable and Codex usage is never reported.
3276
2849
  outputFormat: z
3277
2850
  .enum(["text", "json"])
3278
2851
  .default("text")
3279
2852
  .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
3280
- // U26: high-impact feature flags. All optional.
3281
2853
  outputSchema: z
3282
2854
  .union([z.string(), z.record(z.string(), z.unknown())])
3283
2855
  .optional()
@@ -3307,7 +2879,6 @@ export function createGatewayServer(deps = {}) {
3307
2879
  .boolean()
3308
2880
  .optional()
3309
2881
  .describe("Codex --ignore-rules: skip project rule files for this run."),
3310
- // Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
3311
2882
  workingDir: z
3312
2883
  .string()
3313
2884
  .min(1)
@@ -3365,15 +2936,7 @@ export function createGatewayServer(deps = {}) {
3365
2936
  stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
3366
2937
  }, runtime);
3367
2938
  logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
3368
- // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
3369
- // guarantees the cleanup runs exactly once — inline for direct
3370
- // execution, on terminal status for the job-backed path (sync
3371
- // completion or deferred). The outer finally MUST NOT clean again.
3372
2939
  const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
3373
- // Slice λ: resolve worktree directive into spawn cwd. Codex has no
3374
- // in-handler session resolution prior to spawn (session lookup is
3375
- // lazy via `codex exec resume`), so the user-supplied sessionId is
3376
- // the only reuse key.
3377
2940
  let worktreeResolution = {};
3378
2941
  try {
3379
2942
  worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
@@ -3384,8 +2947,6 @@ export function createGatewayServer(deps = {}) {
3384
2947
  try {
3385
2948
  const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
3386
2949
  const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
3387
- // Deferred — job still running, return async reference. Cleanup
3388
- // ownership belongs to AsyncJobManager via onComplete.
3389
2950
  if (isDeferredResponse(result)) {
3390
2951
  return buildDeferredToolResponse(result, sessionId);
3391
2952
  }
@@ -3406,7 +2967,6 @@ export function createGatewayServer(deps = {}) {
3406
2967
  return createErrorResponse("codex", code, stderr, corrId);
3407
2968
  }
3408
2969
  wasSuccessful = true;
3409
- // Track session usage
3410
2970
  let effectiveSessionId = sessionId;
3411
2971
  if (!createNewSession && !sessionId) {
3412
2972
  const activeSession = await sessionManager.getActiveSession("codex");
@@ -3468,12 +3028,8 @@ export function createGatewayServer(deps = {}) {
3468
3028
  finally {
3469
3029
  const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
3470
3030
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3471
- // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
3472
3031
  }
3473
3032
  });
3474
- //──────────────────────────────────────────────────────────────────────────────
3475
- // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
3476
- //──────────────────────────────────────────────────────────────────────────────
3477
3033
  server.tool("codex_fork_session", {
3478
3034
  prompt: z
3479
3035
  .string()
@@ -3510,8 +3066,6 @@ export function createGatewayServer(deps = {}) {
3510
3066
  const startTime = Date.now();
3511
3067
  let durationMs = 0;
3512
3068
  let wasSuccessful = false;
3513
- // Enforce mutual exclusion at tool boundary (Zod records the params but
3514
- // the SDK's `.tool(...)` does not accept top-level refines).
3515
3069
  if (sessionId && forkLast) {
3516
3070
  return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
3517
3071
  }
@@ -3527,11 +3081,6 @@ export function createGatewayServer(deps = {}) {
3527
3081
  }
3528
3082
  const cliInfo = getCliInfo();
3529
3083
  const resolvedModel = resolveModelAlias("codex", model, cliInfo);
3530
- // Compose argv: forkArgs already starts with `fork`. Inject model and
3531
- // sandbox/approval flags BEFORE the positional <sessionId|--last> +
3532
- // prompt to keep them as flags rather than positionals. forkArgs layout
3533
- // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
3534
- // we splice flags right after "fork".
3535
3084
  const flagSegment = [];
3536
3085
  if (resolvedModel)
3537
3086
  flagSegment.push("--model", resolvedModel);
@@ -3568,9 +3117,6 @@ export function createGatewayServer(deps = {}) {
3568
3117
  performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
3569
3118
  }
3570
3119
  });
3571
- //──────────────────────────────────────────────────────────────────────────────
3572
- // Gemini Tool
3573
- //──────────────────────────────────────────────────────────────────────────────
3574
3120
  server.tool("gemini_request", {
3575
3121
  prompt: z
3576
3122
  .string()
@@ -3621,11 +3167,6 @@ export function createGatewayServer(deps = {}) {
3621
3167
  .boolean()
3622
3168
  .default(false)
3623
3169
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3624
- // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
3625
- // remains text so existing callers see no behavior change. Phase 4 slice
3626
- // ε adds `stream-json` (NDJSON event stream parsed by
3627
- // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
3628
- // semantics covered by Gemini's existing real-time stdout streaming).
3629
3170
  outputFormat: z
3630
3171
  .enum(["text", "json", "stream-json"])
3631
3172
  .default("text")
@@ -3672,9 +3213,6 @@ export function createGatewayServer(deps = {}) {
3672
3213
  worktree,
3673
3214
  });
3674
3215
  });
3675
- //──────────────────────────────────────────────────────────────────────────────
3676
- // Grok Tool
3677
- //──────────────────────────────────────────────────────────────────────────────
3678
3216
  server.tool("grok_request", {
3679
3217
  prompt: z
3680
3218
  .string()
@@ -3745,13 +3283,11 @@ export function createGatewayServer(deps = {}) {
3745
3283
  .default(false)
3746
3284
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
3747
3285
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3748
- // Phase 4 slice ζ — Grok working-directory parity.
3749
3286
  workingDir: z
3750
3287
  .string()
3751
3288
  .min(1)
3752
3289
  .optional()
3753
3290
  .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
3754
- // Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
3755
3291
  sandbox: z
3756
3292
  .string()
3757
3293
  .min(1)
@@ -3819,9 +3355,6 @@ export function createGatewayServer(deps = {}) {
3819
3355
  worktree,
3820
3356
  });
3821
3357
  });
3822
- //──────────────────────────────────────────────────────────────────────────────
3823
- // Mistral Vibe Tool
3824
- //──────────────────────────────────────────────────────────────────────────────
3825
3358
  server.tool("mistral_request", {
3826
3359
  prompt: z
3827
3360
  .string()
@@ -3892,7 +3425,6 @@ export function createGatewayServer(deps = {}) {
3892
3425
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
3893
3426
  maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
3894
3427
  maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
3895
- // Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
3896
3428
  workingDir: z
3897
3429
  .string()
3898
3430
  .min(1)
@@ -3932,16 +3464,6 @@ export function createGatewayServer(deps = {}) {
3932
3464
  worktree,
3933
3465
  });
3934
3466
  });
3935
- //──────────────────────────────────────────────────────────────────────────────
3936
- // Async Long-Running Job Tools (No Time-Bound LLM Execution)
3937
- //
3938
- // STRUCTURAL INVARIANT: these tools are only registered when a real job
3939
- // store is attached (`persistence.asyncJobsEnabled === true`). When the
3940
- // operator has configured `[persistence].backend = "none"`, none of the
3941
- // *_request_async / llm_job_* tools exist in the MCP tool list at all —
3942
- // orchestrating agents get a clean "tool not found" signal at connect
3943
- // time instead of silent in-memory loss after the 1-hour TTL.
3944
- //──────────────────────────────────────────────────────────────────────────────
3945
3467
  if (asyncJobsEnabled) {
3946
3468
  server.tool("claude_request_async", {
3947
3469
  prompt: z
@@ -3975,7 +3497,6 @@ export function createGatewayServer(deps = {}) {
3975
3497
  .enum(CLAUDE_PERMISSION_MODES)
3976
3498
  .optional()
3977
3499
  .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
3978
- // U25 — Claude high-impact features
3979
3500
  agent: z
3980
3501
  .string()
3981
3502
  .optional()
@@ -4015,7 +3536,6 @@ export function createGatewayServer(deps = {}) {
4015
3536
  .boolean()
4016
3537
  .optional()
4017
3538
  .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
4018
- // Phase 4 slice η — Claude reliability + structured-output parity
4019
3539
  fallbackModel: z
4020
3540
  .string()
4021
3541
  .min(1)
@@ -4025,12 +3545,10 @@ export function createGatewayServer(deps = {}) {
4025
3545
  .union([z.string(), z.record(z.string(), z.unknown())])
4026
3546
  .optional()
4027
3547
  .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
4028
- // Phase 4 slice ζ — Claude additional-workspace-dirs parity
4029
3548
  addDir: z
4030
3549
  .array(z.string())
4031
3550
  .optional()
4032
3551
  .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
4033
- // Claude session / settings / tools surface (2.x)
4034
3552
  noSessionPersistence: z
4035
3553
  .boolean()
4036
3554
  .optional()
@@ -4120,7 +3638,6 @@ export function createGatewayServer(deps = {}) {
4120
3638
  return prep;
4121
3639
  const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
4122
3640
  try {
4123
- // Session management (before job start for async)
4124
3641
  let effectiveSessionId = sessionId;
4125
3642
  let useContinue = continueSession;
4126
3643
  const activeSession = await sessionManager.getActiveSession("claude");
@@ -4144,14 +3661,11 @@ export function createGatewayServer(deps = {}) {
4144
3661
  await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
4145
3662
  }
4146
3663
  }
4147
- // Slice 3: TTL warning on resume (async path too).
4148
3664
  const ttlWarning = maybeBuildCacheTtlWarning({
4149
3665
  runtime,
4150
3666
  sessionId: effectiveSessionId,
4151
3667
  cli: "claude",
4152
3668
  });
4153
- // Slice λ: resolve worktree directive after session metadata is
4154
- // settled so resume reuse can read metadata.worktreePath.
4155
3669
  let worktreeResolution = {};
4156
3670
  try {
4157
3671
  worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
@@ -4159,7 +3673,6 @@ export function createGatewayServer(deps = {}) {
4159
3673
  catch (err) {
4160
3674
  return createErrorResponse("claude_request_async", 1, "", corrId, err);
4161
3675
  }
4162
- // Idle timeout only for stream-json (text/json produce no output until done)
4163
3676
  const effectiveIdleTimeout = outputFormat === "stream-json"
4164
3677
  ? resolveIdleTimeout("claude", idleTimeoutMs)
4165
3678
  : undefined;
@@ -4185,8 +3698,6 @@ export function createGatewayServer(deps = {}) {
4185
3698
  if (worktreeResolution.worktreePath) {
4186
3699
  asyncResponse.worktreePath = worktreeResolution.worktreePath;
4187
3700
  }
4188
- // Rec #4: include any prep-time warnings (e.g.
4189
- // cacheable_prefix_uncached) alongside ttlWarning.
4190
3701
  const mergedWarnings = [
4191
3702
  ...(ttlWarning ? [ttlWarning] : []),
4192
3703
  ...(prep.warnings ?? []),
@@ -4270,12 +3781,10 @@ export function createGatewayServer(deps = {}) {
4270
3781
  .boolean()
4271
3782
  .default(false)
4272
3783
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4273
- // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
4274
3784
  outputFormat: z
4275
3785
  .enum(["text", "json"])
4276
3786
  .default("text")
4277
3787
  .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
4278
- // U26: high-impact feature flags. All optional.
4279
3788
  outputSchema: z
4280
3789
  .union([z.string(), z.record(z.string(), z.unknown())])
4281
3790
  .optional()
@@ -4290,7 +3799,6 @@ export function createGatewayServer(deps = {}) {
4290
3799
  images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
4291
3800
  ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
4292
3801
  ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
4293
- // Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
4294
3802
  workingDir: z
4295
3803
  .string()
4296
3804
  .min(1)
@@ -4387,11 +3895,6 @@ export function createGatewayServer(deps = {}) {
4387
3895
  .boolean()
4388
3896
  .default(false)
4389
3897
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4390
- // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
4391
- // remains text so existing callers see no behavior change. Phase 4 slice
4392
- // ε adds `stream-json` (NDJSON event stream parsed by
4393
- // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
4394
- // semantics covered by Gemini's existing real-time stdout streaming).
4395
3898
  outputFormat: z
4396
3899
  .enum(["text", "json", "stream-json"])
4397
3900
  .default("text")
@@ -4506,13 +4009,11 @@ export function createGatewayServer(deps = {}) {
4506
4009
  .default(false)
4507
4010
  .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
4508
4011
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
4509
- // Phase 4 slice ζ — Grok working-directory parity.
4510
4012
  workingDir: z
4511
4013
  .string()
4512
4014
  .min(1)
4513
4015
  .optional()
4514
4016
  .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
4515
- // Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
4516
4017
  sandbox: z
4517
4018
  .string()
4518
4019
  .min(1)
@@ -4648,7 +4149,6 @@ export function createGatewayServer(deps = {}) {
4648
4149
  maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
4649
4150
  maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
4650
4151
  maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
4651
- // Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
4652
4152
  workingDir: z
4653
4153
  .string()
4654
4154
  .min(1)
@@ -4744,7 +4244,6 @@ export function createGatewayServer(deps = {}) {
4744
4244
  isError: true,
4745
4245
  };
4746
4246
  }
4747
- // Parse stream-json output for Claude async jobs
4748
4247
  const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
4749
4248
  let parsed;
4750
4249
  if (outputFormat === "stream-json" && result.stdout) {
@@ -4804,14 +4303,7 @@ export function createGatewayServer(deps = {}) {
4804
4303
  ],
4805
4304
  };
4806
4305
  });
4807
- } // end if (asyncJobsEnabled)
4808
- // Read back any persisted request (sync OR async) by its correlation id.
4809
- // Registered unconditionally — it reads the flight recorder, which is
4810
- // independent of async-job persistence. Every sync/async response echoes
4811
- // its id in `structuredContent.correlationId`; pass that id here to recover
4812
- // the persisted prompt/response after the inline result is gone. With flight
4813
- // recording disabled (LLM_GATEWAY_LOGS_DB=none → NoopFlightRecorder) the
4814
- // query yields no rows and this returns the "not found" shape.
4306
+ }
4815
4307
  server.tool("llm_request_result", {
4816
4308
  correlationId: z
4817
4309
  .string()
@@ -4882,9 +4374,6 @@ export function createGatewayServer(deps = {}) {
4882
4374
  ],
4883
4375
  };
4884
4376
  });
4885
- //──────────────────────────────────────────────────────────────────────────────
4886
- // Approval Audit Tools
4887
- //──────────────────────────────────────────────────────────────────────────────
4888
4377
  server.tool("approval_list", {
4889
4378
  limit: z
4890
4379
  .number()
@@ -4912,9 +4401,6 @@ export function createGatewayServer(deps = {}) {
4912
4401
  ],
4913
4402
  };
4914
4403
  });
4915
- //──────────────────────────────────────────────────────────────────────────────
4916
- // List Models Tool
4917
- //──────────────────────────────────────────────────────────────────────────────
4918
4404
  server.tool("list_models", {
4919
4405
  cli: z
4920
4406
  .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
@@ -4993,9 +4479,6 @@ export function createGatewayServer(deps = {}) {
4993
4479
  };
4994
4480
  }
4995
4481
  });
4996
- //──────────────────────────────────────────────────────────────────────────────
4997
- // Session Management Tools
4998
- //──────────────────────────────────────────────────────────────────────────────
4999
4482
  server.tool("session_create", {
5000
4483
  cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
5001
4484
  description: z.string().optional().describe("Session description"),
@@ -5171,15 +4654,6 @@ export function createGatewayServer(deps = {}) {
5171
4654
  };
5172
4655
  }
5173
4656
  const activeSession = await sessionManager.getActiveSession(session.cli);
5174
- // Slice 2: project a compact cacheState view from the flight
5175
- // recorder at read time. NOT persisted on the Session interface
5176
- // (sessions.json stays content-free per the project invariant).
5177
- // The field is OMITTED entirely (not null, not empty object) when
5178
- // the session has zero rows in the flight recorder so the response
5179
- // stays compact for fresh sessions.
5180
- //
5181
- // Slice 3: include ttlRemainingMs derived from the gateway's
5182
- // configured TTL policy. Null for non-claude sessions.
5183
4657
  let cacheState;
5184
4658
  try {
5185
4659
  const stats = computeSessionCacheStats(flightRecorder, session.id);
@@ -5248,16 +4722,8 @@ export function createGatewayServer(deps = {}) {
5248
4722
  });
5249
4723
  return server;
5250
4724
  }
5251
- //──────────────────────────────────────────────────────────────────────────────
5252
- // Async Initialization
5253
- //──────────────────────────────────────────────────────────────────────────────
5254
4725
  async function initializeSessionManager() {
5255
4726
  const config = loadConfig();
5256
- // Slice λ: file-backed sessions get a cleanup hook that tears down any
5257
- // git worktrees recorded on session.metadata.worktreePath. PG-backed
5258
- // sessions skip the hook (multi-tenant deployments don't necessarily
5259
- // own a single filesystem); revisit if/when worktree support extends
5260
- // there.
5261
4727
  const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
5262
4728
  if (config.database) {
5263
4729
  logger.info("Initializing PostgreSQL session manager");
@@ -5275,9 +4741,6 @@ async function initializeSessionManager() {
5275
4741
  }
5276
4742
  resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
5277
4743
  }
5278
- //──────────────────────────────────────────────────────────────────────────────
5279
- // Health Check Resource (only if using PostgreSQL)
5280
- //──────────────────────────────────────────────────────────────────────────────
5281
4744
  function registerHealthResource(server) {
5282
4745
  if (db) {
5283
4746
  server.registerResource("health", "health://status", {
@@ -5298,7 +4761,6 @@ function registerHealthResource(server) {
5298
4761
  });
5299
4762
  logger.info("Health check resource registered");
5300
4763
  }
5301
- // Process health resource (always available, not dependent on DB)
5302
4764
  server.registerResource("process-health", "metrics://process-health", {
5303
4765
  title: "Process Health",
5304
4766
  description: "Async job health (CPU, memory, zombie detection)",
@@ -5317,13 +4779,9 @@ function registerHealthResource(server) {
5317
4779
  });
5318
4780
  logger.info("Process health resource registered");
5319
4781
  }
5320
- //──────────────────────────────────────────────────────────────────────────────
5321
- // Graceful Shutdown
5322
- //──────────────────────────────────────────────────────────────────────────────
5323
4782
  async function shutdown(signal) {
5324
4783
  logger.info(`Received ${signal}, shutting down gracefully...`);
5325
4784
  try {
5326
- // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
5327
4785
  await killAllProcessGroups();
5328
4786
  logger.info("All process groups terminated");
5329
4787
  if (activeHttpGateway) {
@@ -5353,9 +4811,6 @@ async function shutdown(signal) {
5353
4811
  }
5354
4812
  process.on("SIGTERM", () => shutdown("SIGTERM"));
5355
4813
  process.on("SIGINT", () => shutdown("SIGINT"));
5356
- //──────────────────────────────────────────────────────────────────────────────
5357
- // Server Startup
5358
- //──────────────────────────────────────────────────────────────────────────────
5359
4814
  async function main() {
5360
4815
  startWindowsBootstrapperSelfHeal();
5361
4816
  const args = process.argv.slice(2);
@@ -5419,7 +4874,6 @@ async function main() {
5419
4874
  process.env.MCP_TRANSPORT ||
5420
4875
  "stdio";
5421
4876
  logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
5422
- // Initialize session manager first
5423
4877
  await initializeSessionManager();
5424
4878
  const serverDeps = {
5425
4879
  sessionManager,
@@ -5446,14 +4900,11 @@ async function main() {
5446
4900
  activeServer = createGatewayServer({
5447
4901
  ...serverDeps,
5448
4902
  });
5449
- // Register health check resource if using PostgreSQL
5450
4903
  registerHealthResource(activeServer);
5451
4904
  const transport = new StdioServerTransport();
5452
4905
  await activeServer.connect(transport);
5453
4906
  logger.info("llm-cli-gateway MCP server connected and ready");
5454
4907
  }
5455
- // Guard: only auto-start when run directly (not imported for testing)
5456
- // Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
5457
4908
  const __entryUrl = entrypointFileURL(process.argv[1]);
5458
4909
  if (__entryUrl === import.meta.url) {
5459
4910
  main().catch(error => {