npm - llm-cli-gateway - Versions diffs - 1.17.3 → 1.17.5 - Mend

llm-cli-gateway 1.17.3 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +45 -0
package/README.md +1 -1
package/dist/approval-manager.js +0 -8
package/dist/async-job-manager.d.ts +0 -113
package/dist/async-job-manager.js +6 -124
package/dist/cache-stats.d.ts +0 -89
package/dist/cache-stats.js +0 -62
package/dist/claude-mcp-config.js +0 -1
package/dist/cli-updater.d.ts +0 -8
package/dist/cli-updater.js +0 -12
package/dist/codex-json-parser.d.ts +0 -20
package/dist/codex-json-parser.js +0 -21
package/dist/config.d.ts +0 -31
package/dist/config.js +2 -72
package/dist/db.d.ts +0 -18
package/dist/db.js +0 -22
package/dist/doctor.d.ts +0 -49
package/dist/doctor.js +0 -47
package/dist/endpoint-exposure.js +0 -1
package/dist/executor.d.ts +0 -19
package/dist/executor.js +3 -38
package/dist/flight-recorder.d.ts +0 -26
package/dist/flight-recorder.js +1 -70
package/dist/gemini-json-parser.d.ts +0 -25
package/dist/gemini-json-parser.js +0 -28
package/dist/health.d.ts +0 -3
package/dist/health.js +0 -3
package/dist/index.d.ts +12 -208
package/dist/index.js +116 -588
package/dist/job-store.d.ts +0 -74
package/dist/job-store.js +1 -73
package/dist/logger.d.ts +0 -7
package/dist/logger.js +0 -6
package/dist/migrate-sessions.d.ts +0 -3
package/dist/migrate-sessions.js +0 -16
package/dist/migrate.js +1 -18
package/dist/mistral-meta-json-parser.js +0 -67
package/dist/model-registry.js +0 -13
package/dist/pricing.d.ts +0 -46
package/dist/pricing.js +0 -47
package/dist/process-monitor.d.ts +0 -15
package/dist/process-monitor.js +2 -31
package/dist/prompt-parts.d.ts +6 -31
package/dist/prompt-parts.js +0 -11
package/dist/provider-status.d.ts +0 -8
package/dist/provider-status.js +0 -11
package/dist/request-helpers.d.ts +4 -316
package/dist/request-helpers.js +13 -231
package/dist/resources.d.ts +0 -20
package/dist/resources.js +1 -34
package/dist/retry.d.ts +0 -45
package/dist/retry.js +3 -40
package/dist/session-manager-pg.d.ts +0 -32
package/dist/session-manager-pg.js +0 -32
package/dist/session-manager.d.ts +0 -21
package/dist/session-manager.js +1 -15
package/dist/stream-json-parser.d.ts +0 -18
package/dist/stream-json-parser.js +0 -22
package/dist/upstream-contracts.d.ts +0 -55
package/dist/upstream-contracts.js +86 -64
package/dist/validation-orchestrator.js +0 -3
package/dist/worktree-manager.d.ts +0 -9
package/dist/worktree-manager.js +0 -21
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -35,7 +35,6 @@ import { printDoctorJson } from "./doctor.js";
 import { registerValidationTools } from "./validation-tools.js";
 import { assertUpstreamCliArgs, assertUpstreamCliEnv, buildUpstreamContractReport, } from "./upstream-contracts.js";
 import { entrypointFileURL } from "./entrypoint-url.js";
-// Simple logger that writes to stderr (stdout is used for MCP protocol)
 const logger = {
     info: (message, ...args) => {
         console.error(`[INFO] ${new Date().toISOString()} - ${message}`, ...args);
@@ -94,10 +93,6 @@ function logOptimizationTokens(kind, correlationId, original, optimized) {
     const reduction = originalTokens === 0 ? 0 : ((originalTokens - optimizedTokens) / originalTokens) * 100;
     logger.info(`[${correlationId}] ${kind} tokens ${originalTokens} → ${optimizedTokens} (${reduction.toFixed(1)}% reduction)`);
 }
-// Sync-to-async deadline: if a sync tool's CLI call hasn't finished within this
-// window, the tool returns a deferred async job reference instead of blocking
-// until the MCP client's tool-call timeout fires (~60s in many runtimes).
-// Configurable via SYNC_DEADLINE_MS env var. Set to 0 to disable (pure sync).
 const SYNC_DEADLINE_MS = (() => {
     const env = process.env.SYNC_DEADLINE_MS;
     if (env !== undefined) {
@@ -105,11 +100,8 @@ const SYNC_DEADLINE_MS = (() => {
         if (Number.isFinite(parsed) && parsed >= 0)
             return parsed;
     }
-    return 45_000; // 45s default — safely under the 60s MCP client cap
+    return 45_000;
 })();
-//──────────────────────────────────────────────────────────────────────────────
-// Skills loader — reads .agents/skills/*/SKILL.md at startup
-//──────────────────────────────────────────────────────────────────────────────
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
 const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
@@ -124,7 +116,6 @@ function packageVersion() {
             return parsed.version || "unknown";
         }
         catch {
-            // Try next candidate.
         }
     }
     return "unknown";
@@ -137,24 +128,19 @@ function loadSkills() {
             const skillPath = join(SKILLS_DIR, dir.name, "SKILL.md");
             try {
                 const content = readFileSync(skillPath, "utf-8");
-                // Extract description from YAML frontmatter
                 const descMatch = content.match(/^---[\s\S]*?description:\s*(.+?)$/m);
                 const description = descMatch?.[1]?.trim() || dir.name;
                 skills.push({ name: dir.name, content, description });
             }
             catch {
-                // Skill file missing or unreadable — skip silently
             }
         }
     }
     catch {
-        // Skills directory missing — not fatal
     }
     return skills;
 }
 const loadedSkills = loadSkills();
-// L1: Compact server instructions (~200 tokens) — injected into every client's
-// system prompt at connection time. Covers key patterns + pointers to L2 resources.
 const SERVER_INSTRUCTIONS = `llm-cli-gateway: Multi-LLM orchestration via MCP.
 Tools: claude_request, codex_request, gemini_request, grok_request, mistral_request (sync) | *_request_async (async)
@@ -175,17 +161,11 @@ ${loadedSkills.map(s => `- skills://${s.name} — ${s.description}`).join("\n")}
 function newGatewayMcpServer() {
     return new McpServer({ name: "llm-cli-gateway", version: "1.0.0" }, { instructions: SERVER_INSTRUCTIONS });
 }
-// Global state (initialized asynchronously)
 let sessionManager;
 let db = null;
 const performanceMetrics = new PerformanceMetrics();
 let resourceProvider;
 let flightRecorder = null;
-// Resolved persistence config — single source of truth for the async-job backend.
-// Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
-// When backend = "none", the JobStore is null AND *_request_async tools are not
-// registered (see createGatewayServer), making silent in-memory loss
-// structurally impossible.
 let persistenceConfig = null;
 let cacheAwarenessConfig = null;
 let jobStore = null;
@@ -231,47 +211,9 @@ function getApprovalManager(runtimeLogger = logger) {
     return approvalManager;
 }
 const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
-/**
- * Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
- *
- * Both flags reach the upstream CLIs as decimal-formatted argv strings via
- * `String(N)`. `z.number().int().positive()` alone lets values past
- * `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
- * scientific notation that Grok and Vibe both reject. The bounds below
- * (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
- * for price) guarantee a lossless decimal stringification AND a sane
- * upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
- */
 export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
-// Token budgets can legitimately exceed the agent-turn cap by orders of
-// magnitude. Keep a finite operational guardrail while avoiding the 10k turn
-// ceiling that would make large-context Vibe sessions unusable.
 export const MAX_TOKENS_SCHEMA = z.number().int().positive().safe().max(100_000_000);
-// `.min(1e-6)` keeps the value in JS's decimal-stringify range:
-// String(1e-6) === "0.000001" but String(1e-7) === "1e-7", which both
-// upstream CLIs would reject. 1µUSD per request is fine-grained enough
-// for any plausible budget-cap use.
 export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
-/**
- * Slice λ: shared worktree directive for all 10 `*_request` / `*_request_async`
- * tools. `true` creates a fresh worktree under `<repoRoot>/.worktrees/<uuid>`
- * branched from HEAD. `{ name?, ref? }` lets the caller supply a sanitized
- * name and/or git ref (default ref: HEAD).
- *
- * Lifecycle is gateway-owned: the gateway pre-creates the worktree via
- * `git worktree add`, then spawns the child CLI with `cwd: <worktree-path>`.
- * No `-w` / `--worktree` flag is ever emitted to the underlying CLI. When
- * the request carries a sessionId and the session already has a worktree,
- * that worktree is reused. On session_delete or TTL eviction the gateway
- * runs `git worktree remove --force`.
- *
- * Tool response: when a worktree was used, the successful response stdout
- * is prefixed with `[gateway] worktree=<absolute-path>\n` so callers can
- * parse/use the path without a schema change (slice λ §1.d).
- *
- * NOTE: callers should `.gitignore` the `.worktrees/` directory in their
- * repo (the gateway does NOT auto-gitignore — see slice λ spec Q4).
- */
 export const WORKTREE_SCHEMA = z
     .union([
     z.boolean(),
@@ -296,9 +238,6 @@ export const WORKTREE_SCHEMA = z
     "path. NOTE: callers should `.gitignore` the `.worktrees/` " +
     "directory in their repo (the gateway does NOT auto-gitignore — " +
     "see slice λ spec Q4).");
-// U22: Session-provider enum extended to five providers. The storage layer's
-// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
-// session_create / session_list / session_clear_all accept the fifth provider.
 export const SESSION_PROVIDER_VALUES = ["claude", "codex", "gemini", "grok", "mistral"];
 export const SESSION_PROVIDER_ENUM = z.enum(SESSION_PROVIDER_VALUES);
 let activeServer = null;
@@ -308,13 +247,10 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
     const runtimeSessionManager = deps.sessionManager ?? sessionManager;
     const runtimePerformanceMetrics = deps.performanceMetrics ??
         (options.isolateState ? new PerformanceMetrics() : performanceMetrics);
-    // Resolve flight recorder BEFORE async manager so isolateState managers
-    // can be wired with the same recorder instance the runtime exposes.
     const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
     const runtimeAsyncJobManager = deps.asyncJobManager ??
         (options.isolateState
-            ? // Factory-created test/HTTP session servers must not mark another instance's
-                // durable jobs orphaned. Stdio startup injects the process-global manager.
+            ?
                 newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null, runtimeFlightRecorder)
             : getAsyncJobManager(runtimeLogger));
     const runtimeApprovalManager = deps.approvalManager ??
@@ -337,15 +273,12 @@ export function resolveGatewayServerRuntime(deps = {}, options = {}) {
         cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
     };
 }
-// Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
-// Claude idle timeout only applies in stream-json mode (with --include-partial-messages).
-// In text/json mode, Claude produces no output until done, so idle timeout would false-positive.
 const CLI_IDLE_TIMEOUTS = {
-    claude: 600_000, // 10 minutes — only used when outputFormat=stream-json
-    codex: 600_000, // 10 minutes — Codex streams stderr progress
-    gemini: 600_000, // 10 minutes — Gemini streams stdout in real-time
-    grok: 600_000, // 10 minutes — Grok streams stderr/stdout activity in headless mode
-    mistral: 600_000, // 10 minutes — Vibe streams stdout/stderr in headless mode
+    claude: 600_000,
+    codex: 600_000,
+    gemini: 600_000,
+    grok: 600_000,
+    mistral: 600_000,
 };
 function resolveIdleTimeout(cli, override) {
     if (override !== undefined)
@@ -353,41 +286,7 @@ function resolveIdleTimeout(cli, override) {
     return CLI_IDLE_TIMEOUTS[cli];
 }
 const SYNC_POLL_INTERVAL_MS = 1_000;
-/**
- * Start an async job and poll until completion or deadline.
- * Returns the job result if it finishes in time, or a deferral marker.
- */
-async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete,
-/**
- * Slice 1.5: when the sync handler has already written a logStart row
- * keyed on `corrId`, pass these so the manager can write logComplete
- * (with usage extraction) when the underlying async job terminates —
- * even if the sync handler returned a deferred response.
- * `writeFlightStart` is NEVER true on this path: the sync handler is
- * always the upstream logStart writer.
- */
-flightRecorderEntry, extractUsage,
-/**
- * Slice κ: optional stdin payload piped to the child CLI. Currently
- * only Claude's `--input-format stream-json` path sets this. Threaded
- * through both the direct-execute fallback (SYNC_DEADLINE_MS===0) and
- * the AsyncJobManager spawn path, and participates in the dedup key.
- */
-stdin,
-/**
- * Slice λ: optional working directory for the spawned child process,
- * derived from a gateway-owned git worktree. Threaded to both the
- * direct-execute fallback (`executeCli({ cwd })`) and the
- * AsyncJobManager dedup-aware spawn path
- * (`startJobWithDedup({ cwd })`). `cwd` also participates in the
- * dedup key (see async-job-manager.buildRequestKey) so two requests
- * with identical argv in different worktrees do not collide.
- */
-cwd) {
-    // U26 fix: ownership of onComplete is a contract. Once this function returns
-    // OR throws, the caller MUST consider onComplete consumed — i.e. it has
-    // either been run, or the AsyncJobManager has taken ownership of it. The
-    // caller never needs to reclaim.
+async function awaitJobOrDefer(cli, args, corrId, idleTimeoutMs, outputFormat, forceRefresh, runtime = resolveGatewayServerRuntime(), env, onComplete, flightRecorderEntry, extractUsage, stdin, cwd) {
     let onCompleteOwnedByCaller = onComplete !== undefined;
     const consumeOnComplete = () => {
         if (!onCompleteOwnedByCaller || !onComplete)
@@ -409,8 +308,6 @@ cwd) {
         throw err;
     }
     if (SYNC_DEADLINE_MS === 0) {
-        // Disabled — fall through to direct execution.
-        // Note: direct execution bypasses dedup. forceRefresh is implied.
         const command = cli === "mistral" ? "vibe" : cli;
         try {
             return await executeCli(command, args, {
@@ -422,8 +319,6 @@ cwd) {
             });
         }
         finally {
-            // Direct-execution path completes inline; release per-request resources
-            // (e.g. outputSchema temp files) here.
             consumeOnComplete();
         }
     }
@@ -437,22 +332,12 @@ cwd) {
             env,
             stdin,
             onComplete,
-            // Sync-deferred path: the upstream sync handler already wrote
-            // logStart for this corrId, so writeFlightStart stays false. The
-            // manager still writes logComplete on terminal state (which UPDATEs
-            // the sync handler's row), closing the previously-orphaned
-            // sync-deferred case.
             flightRecorderEntry,
             extractUsage,
         });
-        // Handoff succeeded: AsyncJobManager owns onComplete (it'll fire via
-        // fireOnComplete on terminal status, or run inline immediately for dedup).
         onCompleteOwnedByCaller = false;
     }
     catch (err) {
-        // Spawn or pre-spawn failure inside AsyncJobManager. The record was never
-        // registered, so onComplete will never be called by the manager. Reclaim
-        // here so the temp file is not leaked.
         consumeOnComplete();
         throw err;
     }
@@ -464,7 +349,6 @@ cwd) {
     while (Date.now() < deadline) {
         const snapshot = runtime.asyncJobManager.getJobSnapshot(job.id);
         if (snapshot && snapshot.status !== "running") {
-            // Job finished within deadline — extract result
             const result = runtime.asyncJobManager.getJobResult(job.id);
             if (!result) {
                 return { stdout: "", stderr: "Job result unavailable", code: 1 };
@@ -477,13 +361,6 @@ cwd) {
         }
         await new Promise(resolve => setTimeout(resolve, SYNC_POLL_INTERVAL_MS));
     }
-    // Deadline exceeded — return deferral.
-    // R2 Codex-Unit-B F1: hand FR-complete ownership to the manager. Until
-    // this call, the manager skips writeFlightComplete on terminal so the
-    // sync handler's safeFlightComplete (with rich approvalDecision /
-    // optimizationApplied metadata) wins for sync-inline completions. From
-    // here on the sync handler returns deferred and will NOT write
-    // safeFlightComplete, so the manager must.
     runtime.asyncJobManager.armFlightCompleteForDeferral(job.id);
     runtime.logger.info(`[${corrId}] ${cli} sync deadline exceeded (${SYNC_DEADLINE_MS}ms), deferring to async job ${job.id}`);
     return {
@@ -517,27 +394,6 @@ function buildDeferredToolResponse(deferred, sessionId) {
         ],
     };
 }
-/**
- * Slice λ: resolve a request's worktree directive into a spawn cwd.
- *
- * - `worktreeOpt` is the Zod-validated input value (boolean |
- *   `{ name?, ref? }` | undefined).
- * - When the request has a session AND the session already has a
- *   `metadata.worktreePath`, that path is reused (resume semantics).
- *   The reused path is returned without touching git; if the directory
- *   was externally removed between requests, the next CLI invocation
- *   will surface the error naturally.
- * - When no reusable worktree exists, `createWorktree` runs; on success
- *   the new path is written to `session.metadata` (only when a session
- *   exists — request-scoped worktrees do NOT persist).
- * - Returns `{}` when `worktreeOpt` is undefined/false (preserves
- *   pre-λ behaviour at non-worktree call sites).
- * - Errors propagate as `WorktreeError`/`Error`; the caller wraps them
- *   in a `createErrorResponse` envelope. Do NOT swallow.
- *
- * Spec: docs/plans/slice-lambda.spec.md §"Implementation surface to
- * verify" §5.
- */
 export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime) {
     if (!worktreeOpt)
         return {};
@@ -566,30 +422,13 @@ export async function resolveWorktreeForRequest(worktreeOpt, sessionId, runtime)
     }
     return { cwd: handle.path, worktreePath: handle.path };
 }
-/**
- * Slice λ §1.d: response-envelope shape decision for `worktreePath`.
- *
- * We surface the worktree path inline as a stdout prefix
- * (`[gateway] worktree=<absolute-path>\n`) rather than as a
- * structuredContent field or JSON wrapper. Rationale:
- *   - zero schema change across all 10 tools and their downstream parsers
- *   - matches how other slice features (session warnings, cache_state
- *     aggregates) surface side-channel metadata today
- *   - callers that want the path can split on the first newline; callers
- *     that don't care see a single ignorable header line
- *
- * Use `formatWorktreePrefix(resolution.worktreePath)` once per tool, at
- * the moment a successful response is constructed.
- */
 export function formatWorktreePrefix(worktreePath) {
     return worktreePath ? `[gateway] worktree=${worktreePath}\n` : "";
 }
-// Helper function for standardized error responses
 function createErrorResponse(cli, code, stderr, correlationId, error) {
     let errorMessage = `Error executing ${cli} CLI`;
     const isLaunchExit = code === 127 || code === -4058;
     if (error) {
-        // Command not found or spawn error
         errorMessage += `:\n${error.message}`;
         if (error.message.includes("ENOENT")) {
             errorMessage += `\n\nThe '${cli}' command was not found. Please ensure ${cli} CLI is installed and in your PATH.`;
@@ -597,12 +436,10 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
         logger.error(`[${correlationId || "unknown"}] ${cli} CLI execution failed:`, error.message);
     }
     else if (code === 124) {
-        // Wall-clock timeout
         errorMessage += `: Command timed out\n${stderr}`;
         logger.error(`[${correlationId || "unknown"}] ${cli} CLI timed out`);
     }
     else if (code === 125) {
-        // Idle timeout (stuck process)
         errorMessage += `: Process killed due to inactivity\n${stderr}`;
         logger.error(`[${correlationId || "unknown"}] ${cli} CLI killed due to inactivity`);
     }
@@ -611,7 +448,6 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
         logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed to launch`);
     }
     else if (code !== 0) {
-        // Other non-zero exit code
         errorMessage += ` (exit code ${code}):\n${stderr}`;
         logger.error(`[${correlationId || "unknown"}] ${cli} CLI failed with exit code ${code}`);
     }
@@ -634,14 +470,7 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
         },
     };
 }
-export function extractUsageAndCost(cli, output, outputFormat,
-/**
- * Optional context for off-stdout telemetry sources. Today only Mistral
- * uses this — its meta.json lives on disk keyed by sessionId. Threading
- * this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
- * primitives-only (no `params`/`prep` retention on AsyncJobRecord).
- */
-ctx) {
+export function extractUsageAndCost(cli, output, outputFormat, ctx) {
     if (cli === "claude" && outputFormat === "stream-json") {
         const parsed = parseStreamJson(output);
         if (!parsed.usage) {
@@ -679,29 +508,12 @@ ctx) {
             cacheReadTokens: parsed.usage.cache_read_tokens,
         };
     }
-    // Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
-    // (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
-    // session whose Vibe-assigned UUID we never observed) or the file is
-    // missing/malformed, the parser returns `{}` and the FR row simply lacks
-    // usage data — matching pre-slice behaviour. No stdout fallback exists.
     if (cli === "mistral") {
         return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
     }
     return {};
 }
-/**
- * Slice 1.5: build the async-job-manager's FR payload from a prep object
- * (which every prepare*Request returns), plus the bound CLI and output
- * format primitives needed by extractUsageAndCost. Returning the closure
- * separately means it captures `cliName` and `fmt` ONLY — never `params`
- * or `prep` — so retention on AsyncJobRecord is O(constant).
- */
 function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
-    // Extract primitives BEFORE building the closure — capturing `prep` or
-    // `params` directly would pin large attachments / promptParts on the
-    // AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
-    // primitives too, threaded through so the Mistral branch of
-    // extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
     const cli = cliName;
     const fmt = outputFormat;
     const sid = sessionId;
@@ -795,11 +607,7 @@ function resolveClaudeMcpConfig(operation, correlationId, requestedMcpServers, s
     }
     return { config: mcpConfig };
 }
-//──────────────────────────────────────────────────────────────────────────────
-// MCP Resources
-//──────────────────────────────────────────────────────────────────────────────
 function registerBaseResources(server, runtime) {
-    // Register skill resources (L2: full docs, read on demand)
     for (const skill of loadedSkills) {
         server.registerResource(`skill-${skill.name}`, `skills://${skill.name}`, {
             title: skill.name,
@@ -816,7 +624,6 @@ function registerBaseResources(server, runtime) {
         }));
     }
     runtime.logger.info(`Registered ${loadedSkills.length} skill resources`);
-    // Register all sessions resource
     server.registerResource("all-sessions", "sessions://all", {
         title: "📋 All Sessions",
         description: "All conversation sessions across CLIs",
@@ -826,7 +633,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Claude sessions resource
     server.registerResource("claude-sessions", "sessions://claude", {
         title: "🤖 Claude Sessions",
         description: "Claude conversation sessions",
@@ -836,7 +642,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Codex sessions resource
     server.registerResource("codex-sessions", "sessions://codex", {
         title: "💻 Codex Sessions",
         description: "Codex conversation sessions",
@@ -846,7 +651,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Gemini sessions resource
     server.registerResource("gemini-sessions", "sessions://gemini", {
         title: "✨ Gemini Sessions",
         description: "Gemini conversation sessions",
@@ -856,7 +660,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Grok sessions resource
     server.registerResource("grok-sessions", "sessions://grok", {
         title: "⚡ Grok Sessions",
         description: "Grok conversation sessions",
@@ -866,7 +669,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Mistral sessions resource
     server.registerResource("mistral-sessions", "sessions://mistral", {
         title: "🌬 Mistral Sessions",
         description: "Mistral Vibe conversation sessions",
@@ -876,7 +678,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Claude models resource
     server.registerResource("claude-models", "models://claude", {
         title: "🧠 Claude Models",
         description: "Claude models and capabilities",
@@ -886,7 +687,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Codex models resource
     server.registerResource("codex-models", "models://codex", {
         title: "🔧 Codex Models",
         description: "Codex models and capabilities",
@@ -896,7 +696,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Gemini models resource
     server.registerResource("gemini-models", "models://gemini", {
         title: "🌟 Gemini Models",
         description: "Gemini models and capabilities",
@@ -906,7 +705,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Grok models resource
     server.registerResource("grok-models", "models://grok", {
         title: "⚡ Grok Models",
         description: "Grok models and capabilities",
@@ -916,7 +714,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register Mistral models resource
     server.registerResource("mistral-models", "models://mistral", {
         title: "🌬 Mistral Models",
         description: "Mistral Vibe models and capabilities",
@@ -926,7 +723,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Register performance metrics resource
     server.registerResource("performance-metrics", "metrics://performance", {
         title: "📈 Performance Metrics",
         description: "Request counts, latency, success/failure rates",
@@ -936,11 +732,6 @@ function registerBaseResources(server, runtime) {
         const contents = await runtime.resourceProvider.readResource(uri.href);
         return { contents: contents ? [contents] : [] };
     });
-    // Cache-state resources (slice 2). Static URI for global, templated for
-    // session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
-    // ONLY — never raw prompt or response text. The structural guarantee is in
-    // the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
-    // themselves: those shapes have no prompt/response/system/task fields.
     server.registerResource("cache-state-global", "cache_state://global", {
         title: "💾 Cache State (Global)",
         description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
@@ -999,11 +790,6 @@ function registerBaseResources(server, runtime) {
         };
     });
 }
-/**
- * Slice 1: validate the prompt / promptParts mutex at the prep boundary and
- * return either an error response or the resolved input. The exact error
- * messages are part of the public contract — tests assert them verbatim.
- */
 function resolvePromptOrPartsForPrep(args) {
     const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
     const hasParts = args.promptParts !== undefined;
@@ -1045,7 +831,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
     const assembledPrompt = inputResolution.assembledPrompt;
     const stablePrefixHash = inputResolution.stablePrefixHash;
     const stablePrefixTokens = inputResolution.stablePrefixTokens;
-    // Review integrity check on raw prompt (before optimization)
     const reviewIntegrity = checkReviewIntegrity({
         prompt: assembledPrompt,
         allowedTools: params.allowedTools,
@@ -1058,13 +843,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             score: reviewIntegrity.totalScore,
         });
     }
-    // Rec #5 (slice κ): refuse the optimizePrompt + cacheControl combo
-    // before running optimization. Optimization rewrites the assembled
-    // prompt text the flight-recorder logs, but the κ stdin payload is
-    // built from raw `promptParts` content blocks — letting both run
-    // produces a FR row whose `prompt` no longer matches what Claude
-    // actually received, AND any optimisation-driven text change would
-    // silently break Anthropic prefix-cache reuse on the next call.
     const ccEarly = params.promptParts?.cacheControl;
     const cacheControlRequestedEarly = !!(ccEarly &&
         (ccEarly.system || ccEarly.tools || ccEarly.context));
@@ -1088,7 +866,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         approvalDecision = runtime.approvalManager.decide({
             cli: "claude",
             operation: params.operation,
-            prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
+            prompt: assembledPrompt,
             bypassRequested: params.dangerouslySkipPermissions,
             fullAuto: false,
             requestedMcpServers,
@@ -1102,18 +880,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             return createApprovalDeniedResponse(params.operation, approvalDecision);
         }
     }
-    // Rec #2 (slice κ): auto-emit `cache_control` when the caller passes
-    // `promptParts` whose stable prefix exceeds the per-model minimum,
-    // the caller has NOT explicitly set `cacheControl`, the gateway
-    // config has opted in (`[cache_awareness].emit_anthropic_cache_control`),
-    // and outputFormat is stream-json. Auto-emit marks the LAST non-empty
-    // stable block (context → tools → system priority — the rightmost
-    // stable block covers the widest prefix). Skipped when optimizePrompt
-    // is on (same rec #5 desync risk).
-    //
-    // The 1h ttl is forced regardless of `anthropic_ttl_seconds`: 5m
-    // breakpoints from caller content are rejected by Anthropic once
-    // Claude Code's own 1h-marked session-wrap blocks land ahead of them.
     let autoEmittedCacheControlBlock = null;
     if (!cacheControlRequestedEarly &&
         runtime.cacheAwareness.emitAnthropicCacheControl &&
@@ -1124,9 +890,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         const threshold = minStableTokensForModel(runtime.cacheAwareness, resolvedModel ?? "default");
         if (stablePrefixTokens >= threshold) {
             const pp = params.promptParts;
-            // Rightmost non-empty stable block — its cache_control breakpoint
-            // covers everything above it in the message (the API matches
-            // breakpoints in order).
             if (pp.context && pp.context.length > 0)
                 autoEmittedCacheControlBlock = "context";
             else if (pp.tools && pp.tools.length > 0)
@@ -1141,12 +904,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             }
         }
     }
-    // Rec #4: warn when promptParts has a cacheable stable prefix but no
-    // cache_control breakpoint is being emitted (neither explicit nor
-    // auto). Either the caller forgot to set `cacheControl` or
-    // `[cache_awareness].emit_anthropic_cache_control` is off — both
-    // leave the stable prefix bytes unreused across calls, defeating the
-    // point of using `promptParts`.
     const warnings = [];
     if (!cacheControlRequestedEarly &&
         autoEmittedCacheControlBlock === null &&
@@ -1168,13 +925,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             });
         }
     }
-    // Slice κ: switch from the legacy positional `-p <prompt>` emission
-    // to `claude -p --input-format stream-json` and feed a JSON
-    // content-blocks payload via stdin. Non-κ callers (no cacheControl,
-    // or cacheControl with all flags false) take the existing positional
-    // path bit-for-bit. The κ path activates on EITHER an explicit caller
-    // opt-in (`cacheControlRequestedEarly`) OR a gateway-driven auto-emit
-    // (`autoEmittedCacheControlBlock`).
     const cacheControlRequested = cacheControlRequestedEarly || autoEmittedCacheControlBlock !== null;
     let stdinPayload;
     let cacheControlBlocks;
@@ -1182,9 +932,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         if (params.outputFormat !== "stream-json") {
             return createErrorResponse(params.operation, 1, "", corrId, new Error("promptParts.cacheControl requires outputFormat: 'stream-json' (slice κ pipes the cache_control blocks over --input-format stream-json; text/json output formats cannot carry the required NDJSON usage events)."));
         }
-        // promptParts is non-null whenever cacheControlRequested is true
-        // (explicit opt-in lives in PromptParts; auto-emit guard requires
-        // promptParts to be defined).
         const effectiveParts = autoEmittedCacheControlBlock !== null
             ? {
                 ...params.promptParts,
@@ -1216,11 +963,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             args.push("--output-format", "json");
         }
         else if (params.outputFormat === "stream-json") {
-            // Claude CLI 2.x rejects `--print --output-format stream-json` without
-            // `--verbose`: "When using --print, --output-format=stream-json requires
-            // --verbose". --verbose only affects what claude logs to stderr; the
-            // stream-json stdout payload is unchanged, so the gateway's NDJSON
-            // parser is unaffected.
             args.push("--output-format", "stream-json", "--include-partial-messages", "--verbose");
         }
     }
@@ -1251,7 +993,6 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
             args.push("--strict-mcp-config");
         }
     }
-    // U25: Claude high-impact features (agent, agents, fork, system-prompt, budget, effort, …)
     let validatedAgents;
     if (params.agents && Object.keys(params.agents).length > 0) {
         const result = validateClaudeAgentsMap(params.agents);
@@ -1273,6 +1014,10 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
         fallbackModel: params.fallbackModel,
         jsonSchema: params.jsonSchema,
         addDir: params.addDir,
+        noSessionPersistence: params.noSessionPersistence,
+        settingSources: params.settingSources,
+        settings: params.settings,
+        tools: params.tools,
     }));
     return {
         corrId,
@@ -1305,7 +1050,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
     const assembledPrompt = inputResolution.assembledPrompt;
     const stablePrefixHash = inputResolution.stablePrefixHash;
     const stablePrefixTokens = inputResolution.stablePrefixTokens;
-    // Review integrity check on raw prompt (before optimization)
     const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
     if (reviewIntegrity.violations.length > 0) {
         runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
@@ -1326,7 +1070,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
         approvalDecision = runtime.approvalManager.decide({
             cli: "codex",
             operation: params.operation,
-            prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
+            prompt: assembledPrompt,
             bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
             fullAuto: params.fullAuto,
             requestedMcpServers,
@@ -1338,9 +1082,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
             return createApprovalDeniedResponse(params.operation, approvalDecision);
         }
     }
-    // Resume mode: codex exec resume <SESSION_ID|--last> [flags] PROMPT
-    // Note: `codex exec resume` does NOT accept sandbox policy flags; the original
-    // session's approval policy is inherited. We silently drop fullAuto on resume.
     let sessionPlan;
     try {
         sessionPlan = resolveCodexSessionArgs({
@@ -1361,9 +1102,6 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
     }
     if (resolvedModel)
         args.push("--model", resolvedModel);
-    // Codex sandbox / approval: resolve modern flags + legacy fullAuto shorthand.
-    // `codex exec resume` rejects all of these (the original session's policy is
-    // inherited), so we only emit them when starting a NEW session.
     const sandboxFlags = resolveCodexSandboxFlags({
         sandboxMode: params.sandboxMode,
         askForApproval: params.askForApproval,
@@ -1379,26 +1117,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
     if (params.dangerouslyBypassApprovalsAndSandbox) {
         args.push("--dangerously-bypass-approvals-and-sandbox");
     }
-    // U23 fix: emit `--json` when the caller asked for JSON output so the
-    // codex-json-parser actually receives JSONL events. This is what makes
-    // extractUsageAndCost() reachable from the tool surface; without it, the
-    // U23 parser is dead code.
     if (params.outputFormat === "json") {
         args.push("--json");
     }
     args.push("--skip-git-repo-check");
-    // U26: High-impact feature flags. `--search` is retained as a compatibility
-    // input but current `codex exec` no longer accepts it, so the helper warns
-    // and emits no argv. `--profile` is accepted for new sessions only. The other
-    // flags here are accepted on resume per `codex exec resume --help` and are
-    // emitted in both branches.
     let highImpactCleanup;
     if (sessionPlan.mode === "new") {
-        // Phase 4 slice ζ: emit working-dir and add-dir on new sessions only.
-        // Both flags are listed in CODEX_RESUME_FILTERED_FLAGS — resume inherits
-        // the original session's cwd and writable-dir policy, so emitting them
-        // on resume would be silently stripped (wasteful + misleading on argv
-        // logs). Gating here mirrors `--search` / `--sandbox`.
         if (params.workingDir) {
             args.push("-C", params.workingDir);
         }
@@ -1481,7 +1205,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
     const assembledPrompt = inputResolution.assembledPrompt;
     const stablePrefixHash = inputResolution.stablePrefixHash;
     const stablePrefixTokens = inputResolution.stablePrefixTokens;
-    // Review integrity check on raw prompt (before optimization)
     const reviewIntegrity = checkReviewIntegrity({
         prompt: assembledPrompt,
         allowedTools: params.allowedTools,
@@ -1505,8 +1228,8 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
         approvalDecision = runtime.approvalManager.decide({
             cli: "gemini",
             operation: params.operation,
-            prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
-            bypassRequested: params.approvalMode === "yolo",
+            prompt: assembledPrompt,
+            bypassRequested: params.approvalMode === "yolo" || params.yolo === true,
             fullAuto: false,
             requestedMcpServers,
             allowedTools: params.allowedTools,
@@ -1519,8 +1242,6 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
         }
     }
     const effectiveApprovalMode = params.approvalStrategy === "mcp_managed" ? "yolo" : params.approvalMode;
-    // U27: Validate high-impact policy paths and prepend attachment tokens
-    // BEFORE the `-p` pair is emitted, preserving the U21 ordering invariant.
     const highImpact = prepareGeminiHighImpactFlags({
         sandbox: params.sandbox,
         policyFiles: params.policyFiles,
@@ -1537,15 +1258,14 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
             return createErrorResponse(params.operation, 1, "", corrId, err instanceof Error ? err : new Error(String(err)));
         }
     }
-    // U21: Emit the prompt via -p/--prompt rather than as a positional argument.
-    // Positional prompts depend on Gemini's TTY/mode-detection heuristics; -p is
-    // the documented non-interactive flag and is robust against future CLI mode
-    // changes.
     const args = ["-p", effectivePrompt];
     if (resolvedModel)
         args.push("--model", resolvedModel);
     if (effectiveApprovalMode)
         args.push("--approval-mode", effectiveApprovalMode);
+    if (params.yolo && effectiveApprovalMode !== "yolo") {
+        args.push("--yolo");
+    }
     if (params.allowedTools && params.allowedTools.length > 0) {
         sanitizeCliArgValues(params.allowedTools, "allowedTools");
         params.allowedTools.forEach(tool => args.push("--allowed-tools", tool));
@@ -1558,26 +1278,13 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
         sanitizeCliArgValues(params.includeDirs, "includeDirs");
         params.includeDirs.forEach(dir => args.push("--include-directories", dir));
     }
-    // U27 high-impact flags (-s / --policy / --admin-policy) appended after the
-    // existing flag set so positional ordering relative to `-p` is preserved.
     args.push(...highImpact.args);
-    // U23 fix: emit `-o json` when the caller asked for JSON output. The Gemini
-    // JSON parser is otherwise unreachable from the tool surface and the
-    // structured usageMetadata is silently dropped.
-    //
-    // Phase 4 slice ε: same wiring for `-o stream-json` (NDJSON event stream).
-    // Gemini already streams stdout in real-time so the existing 10-minute
-    // idle timeout (CLI_IDLE_TIMEOUTS.gemini) covers both modes without
-    // adjustment — unlike Claude, no `--include-partial-messages` companion
-    // flag is required because Gemini emits assistant `delta` events as part
-    // of the default stream-json shape.
     if (params.outputFormat === "json") {
         args.push("-o", "json");
     }
     else if (params.outputFormat === "stream-json") {
         args.push("-o", "stream-json");
     }
-    // Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
     if (params.skipTrust) {
         args.push("--skip-trust");
     }
@@ -1608,7 +1315,6 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
     const assembledPrompt = inputResolution.assembledPrompt;
     const stablePrefixHash = inputResolution.stablePrefixHash;
     const stablePrefixTokens = inputResolution.stablePrefixTokens;
-    // Review integrity check on raw prompt (before optimization)
     const reviewIntegrity = checkReviewIntegrity({
         prompt: assembledPrompt,
         allowedTools: params.allowedTools,
@@ -1633,7 +1339,7 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
         approvalDecision = runtime.approvalManager.decide({
             cli: "grok",
             operation: params.operation,
-            prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
+            prompt: assembledPrompt,
             bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
             fullAuto: false,
             requestedMcpServers,
@@ -1694,6 +1400,12 @@ export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime
             args.push("--deny", rule);
         }
     }
+    if (params.compactionMode) {
+        args.push("--compaction-mode", params.compactionMode);
+    }
+    if (params.compactionDetail) {
+        args.push("--compaction-detail", params.compactionDetail);
+    }
     return {
         corrId,
         effectivePrompt,
@@ -1762,9 +1474,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
             return createApprovalDeniedResponse(params.operation, approvalDecision);
         }
     }
-    // Under mcp_managed, force --agent auto-approve so the approval gate's
-    // verdict carries through to the CLI invocation (mirrors Grok's --always-approve
-    // forcing under mcp_managed).
     const effectivePermissionMode = params.approvalStrategy === "mcp_managed"
         ? "auto-approve"
         : (params.permissionMode ?? "auto-approve");
@@ -1773,8 +1482,6 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
         resolvedModel,
         outputFormat: params.outputFormat,
         permissionMode: effectivePermissionMode,
-        effort: params.effort,
-        reasoningEffort: params.reasoningEffort,
         allowedTools: params.allowedTools,
         disallowedTools: params.disallowedTools,
         trust: params.trust,
@@ -1813,15 +1520,6 @@ function selectMistralRecoveryModel(failedModel) {
     ].filter((model) => Boolean(model && model !== failedModel));
     return candidates.find(model => model !== "local");
 }
-/**
- * Phase 4 slice δ post-review: pure helper extracted from
- * `handleMistralRequest` so the retry-path arg-preservation invariants
- * (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
- * without mocking awaitJobOrDefer. Any param the wrapper threads into
- * the FIRST `buildMistralCliInvocation` call MUST also be threaded
- * through here, or a fresh-workspace / budgeted run can degrade on
- * the second attempt.
- */
 export function buildMistralRetryPrep(params, recoveryModel) {
     return buildMistralCliInvocation({
         prompt: params.effectivePrompt,
@@ -1830,8 +1528,6 @@ export function buildMistralRetryPrep(params, recoveryModel) {
         permissionMode: params.approvalStrategy === "mcp_managed"
             ? "auto-approve"
             : (params.permissionMode ?? "auto-approve"),
-        effort: params.effort,
-        reasoningEffort: params.reasoningEffort,
         allowedTools: params.allowedTools,
         disallowedTools: params.disallowedTools,
         trust: params.trust,
@@ -1844,13 +1540,11 @@ export function buildMistralRetryPrep(params, recoveryModel) {
 }
 function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
     let finalStdout = stdout;
-    // Skip response optimization for JSON output to prevent corrupting structured data
     if (optimizeResponse && outputFormat !== "json") {
         const optimized = optimizeResponseText(finalStdout);
         logOptimizationTokens("response", corrId, finalStdout, optimized);
         finalStdout = optimized;
     }
-    // Append review integrity warnings to response text (skip for JSON output to avoid corruption)
     if (prep.reviewIntegrity &&
         prep.reviewIntegrity.violations.length > 0 &&
         outputFormat !== "json") {
@@ -1867,9 +1561,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
             correlationId: corrId,
             sessionId: sessionId || null,
             durationMs,
-            // Phase 4 slice β: thread sessionId + home so the Mistral branch of
-            // extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
-            // Other CLIs ignore the ctx (their usage source is stdout).
             ...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
             exitCode: 0,
             retryCount: 0,
@@ -1899,12 +1590,6 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
     }
     return response;
 }
-/**
- * Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
- * claude session, if the feature is enabled, the session has prior cache
- * writes, and ttlRemainingMs is below the threshold (30s by default).
- * Returns null when no warning applies.
- */
 function maybeBuildCacheTtlWarning(args) {
     if (args.cli !== "claude")
         return null;
@@ -1933,7 +1618,6 @@ function resolveHandlerRuntime(deps) {
     if (deps.runtime)
         return deps.runtime;
     const asyncDeps = deps;
-    // Older HandlerDeps callers may not provide `warn`; default-route to `info`.
     const depLogger = deps.logger;
     const normalizedLogger = {
         info: depLogger.info,
@@ -1969,6 +1653,7 @@ export async function handleGeminiRequest(deps, params) {
         adminPolicyFiles: params.adminPolicyFiles,
         attachments: params.attachments,
         skipTrust: params.skipTrust,
+        yolo: params.yolo,
     }, runtime);
     if (!("args" in prep))
         return prep;
@@ -1986,8 +1671,6 @@ export async function handleGeminiRequest(deps, params) {
     }, runtime);
     deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
     try {
-        // Gemini CLI 0.43 supports `--resume`, but not a supported fresh
-        // `--session-id` flag. Fresh sessions emit no session flag.
         const sessionPlan = resolveGeminiSessionPlan({
             sessionId: params.sessionId,
             resumeLatest: params.resumeLatest,
@@ -2005,7 +1688,6 @@ export async function handleGeminiRequest(deps, params) {
         }
         const geminiFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, params.sessionId, params.outputFormat);
         const result = await awaitJobOrDefer("gemini", args, corrId, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, geminiFrHandoff.flightRecorderEntry, geminiFrHandoff.extractUsage, worktreeResolution.cwd);
-        // Deferred — job still running, return async reference
         if (isDeferredResponse(result)) {
             return buildDeferredToolResponse(result, effectiveSessionIdHint);
         }
@@ -2026,9 +1708,6 @@ export async function handleGeminiRequest(deps, params) {
             return createErrorResponse("gemini", code, stderr, corrId);
         }
         wasSuccessful = true;
-        // Post-success session I/O for explicit resume flows. Fresh Gemini sessions
-        // are owned by the CLI because the current CLI has no supported fresh
-        // session-id flag the gateway can inject.
         let effectiveSessionId = effectiveSessionIdHint;
         if (effectiveSessionId) {
             const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2111,19 +1790,18 @@ export async function handleGeminiRequestAsync(deps, params) {
         adminPolicyFiles: params.adminPolicyFiles,
         attachments: params.attachments,
         skipTrust: params.skipTrust,
+        yolo: params.yolo,
     }, runtime);
     if (!("args" in prep))
         return prep;
     const { corrId, args, requestedMcpServers, approvalDecision } = prep;
     try {
-        // Gemini CLI 0.43 supports `--resume`, but fresh sessions emit no session flag.
         const sessionPlan = resolveGeminiSessionPlan({
             sessionId: params.sessionId,
             resumeLatest: params.resumeLatest,
             createNewSession: params.createNewSession,
         });
         args.push(...sessionPlan.args);
-        // Pre-start session I/O (async handlers: prevent orphaned jobs)
         let effectiveSessionId = sessionPlan.resumed ? params.sessionId : undefined;
         if (effectiveSessionId) {
             const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2146,13 +1824,8 @@ export async function handleGeminiRequestAsync(deps, params) {
         catch (err) {
             return createErrorResponse("gemini_request_async", 1, "", corrId, err);
         }
-        // Start job only after all session I/O succeeds. U23: forward outputFormat
-        // so AsyncJobManager records it in the durable store (the manager also
-        // surfaces it in the snapshot).
         assertUpstreamCliArgs("gemini", args);
         assertUpstreamCliEnv("gemini", undefined);
-        // Slice 1.5: pure async path — no upstream safeFlightStart, so the
-        // manager owns both logStart and logComplete for this corrId.
         const geminiAsyncFrHandoff = buildAsyncFlightRecorderHandoff("gemini", prep, effectiveSessionId, params.outputFormat);
         const job = deps.asyncJobManager.startJob("gemini", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("gemini", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, undefined, geminiAsyncFrHandoff.flightRecorderEntry, geminiAsyncFrHandoff.extractUsage, true);
         deps.logger.info(`[${corrId}] gemini_request_async started job ${job.id}`);
@@ -2210,6 +1883,8 @@ export async function handleGrokRequest(deps, params) {
         systemPromptOverride: params.systemPromptOverride,
         allow: params.allow,
         deny: params.deny,
+        compactionMode: params.compactionMode,
+        compactionDetail: params.compactionDetail,
     }, runtime);
     if (!("args" in prep))
         return prep;
@@ -2227,7 +1902,6 @@ export async function handleGrokRequest(deps, params) {
     }, runtime);
     deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
     try {
-        // Session arg planning (pure, no I/O)
         const sessionResult = resolveGrokSessionArgs({
             sessionId: params.sessionId,
             resumeLatest: params.resumeLatest,
@@ -2243,7 +1917,6 @@ export async function handleGrokRequest(deps, params) {
         }
         const grokFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, params.sessionId, params.outputFormat);
         const result = await awaitJobOrDefer("grok", args, corrId, resolveIdleTimeout("grok", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, runtime, undefined, undefined, grokFrHandoff.flightRecorderEntry, grokFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
-        // Deferred — job still running, return async reference
         if (isDeferredResponse(result)) {
             return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
         }
@@ -2264,7 +1937,6 @@ export async function handleGrokRequest(deps, params) {
             return createErrorResponse("grok", code, stderr, corrId);
         }
         wasSuccessful = true;
-        // Post-success session I/O (sync handlers: no phantom sessions on CLI failure)
         let effectiveSessionId = sessionResult.effectiveSessionId;
         if (sessionResult.userProvidedSession && effectiveSessionId) {
             const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2350,19 +2022,19 @@ export async function handleGrokRequestAsync(deps, params) {
         systemPromptOverride: params.systemPromptOverride,
         allow: params.allow,
         deny: params.deny,
+        compactionMode: params.compactionMode,
+        compactionDetail: params.compactionDetail,
     }, runtime);
     if (!("args" in prep))
         return prep;
     const { corrId, args, requestedMcpServers, approvalDecision } = prep;
     try {
-        // Session arg planning (pure, no I/O)
         const sessionResult = resolveGrokSessionArgs({
             sessionId: params.sessionId,
             resumeLatest: params.resumeLatest,
             createNewSession: params.createNewSession,
         });
         args.push(...sessionResult.resumeArgs);
-        // Pre-start session I/O (async handlers: prevent orphaned jobs)
         let effectiveSessionId = sessionResult.effectiveSessionId;
         if (sessionResult.userProvidedSession && effectiveSessionId) {
             const existing = await deps.sessionManager.getSession(effectiveSessionId);
@@ -2389,7 +2061,6 @@ export async function handleGrokRequestAsync(deps, params) {
         catch (err) {
             return createErrorResponse("grok_request_async", 1, "", corrId, err);
         }
-        // Start job only after all session I/O succeeds
         assertUpstreamCliArgs("grok", args);
         assertUpstreamCliEnv("grok", undefined);
         const grokAsyncFrHandoff = buildAsyncFlightRecorderHandoff("grok", prep, effectiveSessionId, params.outputFormat);
@@ -2431,8 +2102,6 @@ export async function handleMistralRequest(deps, params) {
         model: params.model,
         outputFormat: params.outputFormat,
         permissionMode: params.permissionMode,
-        effort: params.effort,
-        reasoningEffort: params.reasoningEffort,
         allowedTools: params.allowedTools,
         disallowedTools: params.disallowedTools,
         approvalStrategy: params.approvalStrategy,
@@ -2488,8 +2157,6 @@ export async function handleMistralRequest(deps, params) {
                 deps.logger.info(`[${corrId}] mistral_request detected stale Vibe model selection; retrying once with ${recoveryModel}`);
                 const retryPrep = buildMistralRetryPrep({ ...params, effectivePrompt: prep.effectivePrompt }, recoveryModel);
                 const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
-                // Reuse the FR handoff built above — the retry preserves corrId,
-                // so the manager's logComplete still updates the original row.
                 result = await awaitJobOrDefer("mistral", retryArgs, corrId, resolveIdleTimeout("mistral", params.idleTimeoutMs), params.outputFormat, true, runtime, retryPrep.env, undefined, mistralFrHandoff.flightRecorderEntry, mistralFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
                 if (isDeferredResponse(result)) {
                     return buildDeferredToolResponse(result, sessionResult.effectiveSessionId);
@@ -2582,8 +2249,6 @@ export async function handleMistralRequestAsync(deps, params) {
         model: params.model,
         outputFormat: params.outputFormat,
         permissionMode: params.permissionMode,
-        effort: params.effort,
-        reasoningEffort: params.reasoningEffort,
         allowedTools: params.allowedTools,
         disallowedTools: params.disallowedTools,
         approvalStrategy: params.approvalStrategy,
@@ -2702,11 +2367,6 @@ export async function handleCodexRequestAsync(deps, params) {
     if (!("args" in prep))
         return prep;
     const { corrId, args, requestedMcpServers, approvalDecision } = prep;
-    // U26 fix: outputSchema temp-file ownership. The cleanup callable lives in
-    // exactly one place at a time: this scope until startJob succeeds, then
-    // AsyncJobManager (via onComplete → persistComplete → fireOnComplete) once
-    // the job is registered. Any code path that fails to hand it off MUST run
-    // it locally.
     const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
     let prepCleanupOwnedHere = prepCleanup !== undefined;
     const runPrepCleanupLocally = () => {
@@ -2721,7 +2381,6 @@ export async function handleCodexRequestAsync(deps, params) {
         }
     };
     try {
-        // Pre-start session I/O (async handlers: prevent orphaned jobs)
         let effectiveSessionId = params.sessionId;
         if (!params.createNewSession && !params.sessionId) {
             const activeSession = await deps.sessionManager.getActiveSession("codex");
@@ -2740,9 +2399,6 @@ export async function handleCodexRequestAsync(deps, params) {
             const newSession = await deps.sessionManager.createSession("codex", "Codex Session");
             effectiveSessionId = newSession.id;
         }
-        // Slice λ: resolve worktree directive after session I/O so resume reuse
-        // can read metadata.worktreePath. A pre-startJob failure here means
-        // prepCleanup is still owned locally; run it before returning.
         let worktreeResolution = {};
         try {
             worktreeResolution = await resolveWorktreeForRequest(params.worktree, effectiveSessionId, runtime);
@@ -2751,22 +2407,15 @@ export async function handleCodexRequestAsync(deps, params) {
             runPrepCleanupLocally();
             return createErrorResponse("codex_request_async", 1, "", corrId, err);
         }
-        // Start job only after all session I/O succeeds. If startJob throws before
-        // registering the record, ownership stays here and we run it in the catch.
         assertUpstreamCliArgs("codex", args);
         assertUpstreamCliEnv("codex", undefined);
         const codexAsyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, effectiveSessionId, params.outputFormat);
         let job;
         try {
             job = deps.asyncJobManager.startJob("codex", args, corrId, worktreeResolution.cwd, resolveIdleTimeout("codex", params.idleTimeoutMs), params.outputFormat, params.forceRefresh, undefined, prepCleanup, codexAsyncFrHandoff.flightRecorderEntry, codexAsyncFrHandoff.extractUsage, true);
-            // Handoff succeeded: AsyncJobManager will fire prepCleanup on terminal
-            // status. Release our local ownership claim so the catch path doesn't
-            // double-fire.
             prepCleanupOwnedHere = false;
         }
         catch (startErr) {
-            // startJob never stored the record → manager won't call onComplete. We
-            // still own the cleanup; let the outer catch run it.
             throw startErr;
         }
         deps.logger.info(`[${corrId}] codex_request_async started job ${job.id}`);
@@ -2793,42 +2442,15 @@ export async function handleCodexRequestAsync(deps, params) {
         };
     }
     catch (error) {
-        // Pre-start failure: either session I/O threw, or startJob threw before
-        // registering the record. In either case the manager will NOT fire
-        // prepCleanup, so we must run it here.
         runPrepCleanupLocally();
         return createErrorResponse("codex_request_async", 1, "", corrId, error);
     }
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Claude Code Tool
-//──────────────────────────────────────────────────────────────────────────────
 export function createGatewayServer(deps = {}) {
     const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
     const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
-    // `flightRecorder` is destructured into closure scope so the session_get
-    // handler (see ~line 5590) has the FlightRecorderQuery read capability
-    // available without re-resolving runtime. Slice 2 will populate the
-    // `cacheState` field of session_get's response from this read surface.
-    // `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
     void flightRecorder;
     void cacheAwareness;
-    // Structural invariant: tools register iff ALL THREE conditions hold:
-    //   (1) persistence.backend !== "none"  — the operator/config has not
-    //       explicitly disabled durable persistence;
-    //   (2) persistence.asyncJobsEnabled === true — the derived opt-in flag
-    //       agrees (loadPersistenceConfig sets this iff backend is one of
-    //       sqlite/postgres/memory);
-    //   (3) asyncJobManager.hasStore() === true — the runtime manager
-    //       actually has a store attached (isolate-mode runtimes use null).
-    //
-    // Each guard closes a distinct re-entry path for the silent-loss footgun:
-    //   - Without (1), a caller can inject {backend:'none', asyncJobsEnabled:true}
-    //     and re-advertise the async tools while reporting backend='none' in
-    //     llm_process_health — exactly contradicting SPEC CLAIM 4f.
-    //   - Without (2), config that opts out is ignored.
-    //   - Without (3), a null-store manager (isolate-mode / HTTP per-session)
-    //     accepts registrations that have nowhere to persist results.
     const asyncJobsEnabled = persistence.backend !== "none" && persistence.asyncJobsEnabled && asyncJobManager.hasStore();
     const server = newGatewayMcpServer();
     registerBaseResources(server, runtime);
@@ -2865,7 +2487,6 @@ export function createGatewayServer(deps = {}) {
             .enum(CLAUDE_PERMISSION_MODES)
             .optional()
             .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op (no flag emitted)."),
-        // U25 — Claude high-impact features
         agent: z
             .string()
             .optional()
@@ -2905,7 +2526,6 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .optional()
             .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
-        // Phase 4 slice η — Claude reliability + structured-output parity
         fallbackModel: z
             .string()
             .min(1)
@@ -2915,11 +2535,28 @@ export function createGatewayServer(deps = {}) {
             .union([z.string(), z.record(z.string(), z.unknown())])
             .optional()
             .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
-        // Phase 4 slice ζ — Claude additional-workspace-dirs parity
         addDir: z
             .array(z.string())
             .optional()
             .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
+        noSessionPersistence: z
+            .boolean()
+            .optional()
+            .describe("Claude --no-session-persistence: do not write this session to disk (ephemeral one-shot runs; mirrors codex --ephemeral)."),
+        settingSources: z
+            .string()
+            .min(1)
+            .optional()
+            .describe("Claude --setting-sources: comma-separated setting sources to load (user|project|local) for reproducible/isolated headless runs."),
+        settings: z
+            .string()
+            .min(1)
+            .optional()
+            .describe("Claude --settings: path to a settings JSON file or a JSON literal of additional settings. Powerful: settings can define hooks/permissions/model; passed verbatim."),
+        tools: z
+            .array(z.string())
+            .optional()
+            .describe('Claude --tools: restrict the available built-in tool set (distinct from allowedTools permission gating). Pass [""] to disable all tools.'),
         worktree: WORKTREE_SCHEMA.optional(),
         approvalStrategy: z
             .enum(["legacy", "mcp_managed"])
@@ -2951,7 +2588,7 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
         const startTime = Date.now();
         if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
             return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
@@ -2984,24 +2621,18 @@ export function createGatewayServer(deps = {}) {
             fallbackModel,
             jsonSchema,
             addDir,
+            noSessionPersistence,
+            settingSources,
+            settings,
+            tools,
         }, runtime);
         if (!("args" in prep))
             return prep;
         const { corrId, args } = prep;
         let durationMs = 0;
         let wasSuccessful = false;
-        // Session resolution happens BEFORE safeFlightStart so that:
-        //   (1) the TTL warning reads the PRIOR session's lastWriteAt
-        //       rather than the row about to be inserted (codex-r1/F1).
-        //   (2) the flight-recorder row is tagged with effectiveSessionId
-        //       (the session the CLI will actually resume), not the raw
-        //       user-provided sessionId.
         let effectiveSessionId = sessionId;
         let useContinue = continueSession;
-        // Guard the active-session lookup: in some test harnesses the
-        // sessionManager is undefined; the original try-catch wrapped this
-        // block, so we replicate that tolerance here. Failure leaves
-        // effectiveSessionId as the user-provided sessionId.
         let activeSession = null;
         try {
             activeSession = await sessionManager.getActiveSession("claude");
@@ -3016,16 +2647,11 @@ export function createGatewayServer(deps = {}) {
         if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
             useContinue = true;
         }
-        // Slice 3: if the resolved session has a near-expiry Anthropic
-        // cache breakpoint, attach a structured warning (NOT a hard error)
-        // to the response. Computed BEFORE safeFlightStart so the current
-        // row does not skew lastRequestAt.
         const ttlWarning = maybeBuildCacheTtlWarning({
             runtime,
             sessionId: effectiveSessionId,
             cli: "claude",
         });
-        // Rec #4: include any prep-time warnings (e.g. cacheable_prefix_uncached).
         const warnings = [
             ...(ttlWarning ? [ttlWarning] : []),
             ...(prep.warnings ?? []),
@@ -3049,8 +2675,6 @@ export function createGatewayServer(deps = {}) {
                 args.push("--session-id", effectiveSessionId);
                 await sessionManager.updateSessionUsage(effectiveSessionId);
             }
-            // Slice λ: resolve worktree directive into spawn cwd. Done after
-            // session resolution so resume reuse can read metadata.worktreePath.
             let worktreeResolution = {};
             try {
                 worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
@@ -3058,11 +2682,9 @@ export function createGatewayServer(deps = {}) {
             catch (err) {
                 return createErrorResponse("claude_request", 1, "", corrId, err);
             }
-            // Idle timeout only for stream-json (text/json produce no output until done)
             const effectiveIdleTimeout = outputFormat === "stream-json" ? resolveIdleTimeout("claude", idleTimeoutMs) : undefined;
             const claudeSyncFrHandoff = buildAsyncFlightRecorderHandoff("claude", prep, effectiveSessionId, outputFormat);
             const result = await awaitJobOrDefer("claude", args, corrId, effectiveIdleTimeout, outputFormat, forceRefresh, runtime, undefined, undefined, claudeSyncFrHandoff.flightRecorderEntry, claudeSyncFrHandoff.extractUsage, prep.stdinPayload, worktreeResolution.cwd);
-            // Deferred — job still running, return async reference
             if (isDeferredResponse(result)) {
                 return buildDeferredToolResponse(result, effectiveSessionId);
             }
@@ -3080,9 +2702,6 @@ export function createGatewayServer(deps = {}) {
                     errorMessage: stderr || `Exit code ${code}`,
                     status: "failed",
                 }, runtime);
-                // Slice 3: attach any computed warnings to the error response so
-                // the caller still sees cache_ttl_expiring_soon when the CLI
-                // happens to fail for an unrelated reason.
                 const errResp = createErrorResponse("claude", code, stderr, corrId);
                 if (warnings.length > 0) {
                     errResp.warnings = warnings;
@@ -3090,7 +2709,6 @@ export function createGatewayServer(deps = {}) {
                 return errResp;
             }
             wasSuccessful = true;
-            // If we used a session ID and it's not tracked yet, create a session record
             if (effectiveSessionId) {
                 const existingSession = await sessionManager.getSession(effectiveSessionId);
                 if (!existingSession) {
@@ -3098,7 +2716,6 @@ export function createGatewayServer(deps = {}) {
                 }
             }
             logger.info(`[${corrId}] claude_request completed successfully in ${durationMs}ms`);
-            // Parse stream-json NDJSON output to extract result text
             if (outputFormat === "stream-json") {
                 const parsed = parseStreamJson(stdout);
                 if (parsed.costUsd !== null) {
@@ -3165,9 +2782,6 @@ export function createGatewayServer(deps = {}) {
             performanceMetrics.recordRequest("claude", finalizedDurationMs, wasSuccessful);
         }
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Codex Tool
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("codex_request", {
         prompt: z
             .string()
@@ -3232,14 +2846,10 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-        // U23: emit `--json` so the codex-json-parser surfaces input/output/cache
-        // tokens (and any cost) through extractUsageAndCost. Without "json", the
-        // parser is unreachable and Codex usage is never reported.
         outputFormat: z
             .enum(["text", "json"])
             .default("text")
             .describe("Codex output format. `json` emits --json (JSONL events) so token usage and cost are parsed and reported in the flight recorder. `text` is the default."),
-        // U26: high-impact feature flags. All optional.
         outputSchema: z
             .union([z.string(), z.record(z.string(), z.unknown())])
             .optional()
@@ -3269,7 +2879,6 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .optional()
             .describe("Codex --ignore-rules: skip project rule files for this run."),
-        // Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
         workingDir: z
             .string()
             .min(1)
@@ -3327,15 +2936,7 @@ export function createGatewayServer(deps = {}) {
             stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
         }, runtime);
         logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
-        // U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
-        // guarantees the cleanup runs exactly once — inline for direct
-        // execution, on terminal status for the job-backed path (sync
-        // completion or deferred). The outer finally MUST NOT clean again.
         const prepCleanup = "cleanup" in prep && typeof prep.cleanup === "function" ? prep.cleanup : undefined;
-        // Slice λ: resolve worktree directive into spawn cwd. Codex has no
-        // in-handler session resolution prior to spawn (session lookup is
-        // lazy via `codex exec resume`), so the user-supplied sessionId is
-        // the only reuse key.
         let worktreeResolution = {};
         try {
             worktreeResolution = await resolveWorktreeForRequest(worktree, sessionId, runtime);
@@ -3346,8 +2947,6 @@ export function createGatewayServer(deps = {}) {
         try {
             const codexSyncFrHandoff = buildAsyncFlightRecorderHandoff("codex", prep, sessionId, outputFormat);
             const result = await awaitJobOrDefer("codex", args, corrId, resolveIdleTimeout("codex", idleTimeoutMs), outputFormat, forceRefresh, runtime, undefined, prepCleanup, codexSyncFrHandoff.flightRecorderEntry, codexSyncFrHandoff.extractUsage, undefined, worktreeResolution.cwd);
-            // Deferred — job still running, return async reference. Cleanup
-            // ownership belongs to AsyncJobManager via onComplete.
             if (isDeferredResponse(result)) {
                 return buildDeferredToolResponse(result, sessionId);
             }
@@ -3368,7 +2967,6 @@ export function createGatewayServer(deps = {}) {
                 return createErrorResponse("codex", code, stderr, corrId);
             }
             wasSuccessful = true;
-            // Track session usage
             let effectiveSessionId = sessionId;
             if (!createNewSession && !sessionId) {
                 const activeSession = await sessionManager.getActiveSession("codex");
@@ -3430,12 +3028,8 @@ export function createGatewayServer(deps = {}) {
         finally {
             const finalizedDurationMs = Math.max(0, durationMs || Date.now() - startTime);
             performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
-            // Cleanup is owned by awaitJobOrDefer's contract; nothing to do here.
         }
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // U26: codex_fork_session — `codex fork <SESSION_ID|--last> <prompt>`
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("codex_fork_session", {
         prompt: z
             .string()
@@ -3472,8 +3066,6 @@ export function createGatewayServer(deps = {}) {
         const startTime = Date.now();
         let durationMs = 0;
         let wasSuccessful = false;
-        // Enforce mutual exclusion at tool boundary (Zod records the params but
-        // the SDK's `.tool(...)` does not accept top-level refines).
         if (sessionId && forkLast) {
             return createErrorResponse("codex_fork_session", 1, "", corrId, new Error("sessionId and forkLast are mutually exclusive"));
         }
@@ -3489,11 +3081,6 @@ export function createGatewayServer(deps = {}) {
         }
         const cliInfo = getCliInfo();
         const resolvedModel = resolveModelAlias("codex", model, cliInfo);
-        // Compose argv: forkArgs already starts with `fork`. Inject model and
-        // sandbox/approval flags BEFORE the positional <sessionId|--last> +
-        // prompt to keep them as flags rather than positionals. forkArgs layout
-        // is either ["fork", "--last", prompt] or ["fork", sessionId, prompt];
-        // we splice flags right after "fork".
         const flagSegment = [];
         if (resolvedModel)
             flagSegment.push("--model", resolvedModel);
@@ -3530,9 +3117,6 @@ export function createGatewayServer(deps = {}) {
             performanceMetrics.recordRequest("codex", finalizedDurationMs, wasSuccessful);
         }
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Gemini Tool
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("gemini_request", {
         prompt: z
             .string()
@@ -3583,11 +3167,6 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-        // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
-        // remains text so existing callers see no behavior change. Phase 4 slice
-        // ε adds `stream-json` (NDJSON event stream parsed by
-        // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
-        // semantics covered by Gemini's existing real-time stdout streaming).
         outputFormat: z
             .enum(["text", "json", "stream-json"])
             .default("text")
@@ -3600,8 +3179,12 @@ export function createGatewayServer(deps = {}) {
             .boolean()
             .default(false)
             .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
+        yolo: z
+            .boolean()
+            .optional()
+            .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
         worktree: WORKTREE_SCHEMA.optional(),
-    }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
+    }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
         return handleGeminiRequest({ sessionManager, logger, runtime }, {
             prompt,
             promptParts,
@@ -3626,12 +3209,10 @@ export function createGatewayServer(deps = {}) {
             adminPolicyFiles,
             attachments,
             skipTrust,
+            yolo,
             worktree,
         });
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Grok Tool
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("grok_request", {
         prompt: z
             .string()
@@ -3702,13 +3283,11 @@ export function createGatewayServer(deps = {}) {
             .default(false)
             .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
         maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
-        // Phase 4 slice ζ — Grok working-directory parity.
         workingDir: z
             .string()
             .min(1)
             .optional()
             .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
-        // Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
         sandbox: z
             .string()
             .min(1)
@@ -3732,8 +3311,16 @@ export function createGatewayServer(deps = {}) {
             .array(z.string())
             .optional()
             .describe('Grok --deny <RULE>: permission deny rules. Each entry is emitted as its own --deny instance (per `grok --help`: "Repeat to add multiple rules").'),
+        compactionMode: z
+            .enum(["summary", "transcript", "segments"])
+            .optional()
+            .describe("Grok --compaction-mode: summary (default; no pointer) | transcript (points at the raw transcript) | segments (persists per-segment markdown to grep). Sets GROK_COMPACTION_MODE."),
+        compactionDetail: z
+            .enum(["none", "minimal", "balanced", "verbose"])
+            .optional()
+            .describe("Grok --compaction-detail: verbatim segment detail (none|minimal|balanced|verbose, default verbose). Only affects `--compaction-mode segments`. Sets GROK_COMPACTION_DETAIL."),
         worktree: WORKTREE_SCHEMA.optional(),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, worktree, }) => {
         return handleGrokRequest({ sessionManager, logger, runtime }, {
             prompt,
             promptParts,
@@ -3763,12 +3350,11 @@ export function createGatewayServer(deps = {}) {
             systemPromptOverride,
             allow,
             deny,
+            compactionMode,
+            compactionDetail,
             worktree,
         });
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Mistral Vibe Tool
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("mistral_request", {
         prompt: z
             .string()
@@ -3798,11 +3384,6 @@ export function createGatewayServer(deps = {}) {
             .enum(MISTRAL_AGENT_MODES)
             .optional()
             .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
-        effort: z
-            .enum(["low", "medium", "high", "xhigh", "max"])
-            .optional()
-            .describe("Vibe effort level"),
-        reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
         approvalStrategy: z
             .enum(["legacy", "mcp_managed"])
             .default("legacy")
@@ -3844,7 +3425,6 @@ export function createGatewayServer(deps = {}) {
         maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
         maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
         maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
-        // Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
         workingDir: z
             .string()
             .min(1)
@@ -3855,7 +3435,7 @@ export function createGatewayServer(deps = {}) {
             .optional()
             .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance (Vibe states this flag may be specified multiple times)."),
         worktree: WORKTREE_SCHEMA.optional(),
-    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
+    }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
         return handleMistralRequest({ sessionManager, logger, runtime }, {
             prompt,
             promptParts,
@@ -3865,8 +3445,6 @@ export function createGatewayServer(deps = {}) {
             resumeLatest,
             createNewSession,
             permissionMode,
-            effort,
-            reasoningEffort,
             approvalStrategy,
             approvalPolicy,
             mcpServers,
@@ -3886,16 +3464,6 @@ export function createGatewayServer(deps = {}) {
             worktree,
         });
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Async Long-Running Job Tools (No Time-Bound LLM Execution)
-    //
-    // STRUCTURAL INVARIANT: these tools are only registered when a real job
-    // store is attached (`persistence.asyncJobsEnabled === true`). When the
-    // operator has configured `[persistence].backend = "none"`, none of the
-    // *_request_async / llm_job_* tools exist in the MCP tool list at all —
-    // orchestrating agents get a clean "tool not found" signal at connect
-    // time instead of silent in-memory loss after the 1-hour TTL.
-    //──────────────────────────────────────────────────────────────────────────────
     if (asyncJobsEnabled) {
         server.tool("claude_request_async", {
             prompt: z
@@ -3929,7 +3497,6 @@ export function createGatewayServer(deps = {}) {
                 .enum(CLAUDE_PERMISSION_MODES)
                 .optional()
                 .describe("Claude --permission-mode: default|acceptEdits|plan|auto|dontAsk|bypassPermissions. `default` is a no-op."),
-            // U25 — Claude high-impact features
             agent: z
                 .string()
                 .optional()
@@ -3969,7 +3536,6 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .optional()
                 .describe("Claude --exclude-dynamic-system-prompt-sections: trim dynamic context blocks from the system prompt."),
-            // Phase 4 slice η — Claude reliability + structured-output parity
             fallbackModel: z
                 .string()
                 .min(1)
@@ -3979,11 +3545,28 @@ export function createGatewayServer(deps = {}) {
                 .union([z.string(), z.record(z.string(), z.unknown())])
                 .optional()
                 .describe("Claude --json-schema: JSON Schema literal (NOT a path) constraining structured output. Object values are JSON.stringify-d; string values are passed verbatim. Use with outputFormat='json'."),
-            // Phase 4 slice ζ — Claude additional-workspace-dirs parity
             addDir: z
                 .array(z.string())
                 .optional()
                 .describe("Claude --add-dir: additional directories the CLI is allowed to read/write beyond the process cwd. Each entry is emitted as its own --add-dir instance."),
+            noSessionPersistence: z
+                .boolean()
+                .optional()
+                .describe("Claude --no-session-persistence: do not write this session to disk (ephemeral one-shot runs; mirrors codex --ephemeral)."),
+            settingSources: z
+                .string()
+                .min(1)
+                .optional()
+                .describe("Claude --setting-sources: comma-separated setting sources to load (user|project|local) for reproducible/isolated headless runs."),
+            settings: z
+                .string()
+                .min(1)
+                .optional()
+                .describe("Claude --settings: path to a settings JSON file or a JSON literal of additional settings. Powerful: settings can define hooks/permissions/model; passed verbatim."),
+            tools: z
+                .array(z.string())
+                .optional()
+                .describe('Claude --tools: restrict the available built-in tool set (distinct from allowedTools permission gating). Pass [""] to disable all tools.'),
             worktree: WORKTREE_SCHEMA.optional(),
             approvalStrategy: z
                 .enum(["legacy", "mcp_managed"])
@@ -4014,7 +3597,7 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, fallbackModel, jsonSchema, addDir, noSessionPersistence, settingSources, settings, tools, worktree, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
             if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
                 return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
             }
@@ -4046,12 +3629,15 @@ export function createGatewayServer(deps = {}) {
                 fallbackModel,
                 jsonSchema,
                 addDir,
+                noSessionPersistence,
+                settingSources,
+                settings,
+                tools,
             }, runtime);
             if (!("args" in prep))
                 return prep;
             const { corrId, args, requestedMcpServers, mcpConfig, approvalDecision } = prep;
             try {
-                // Session management (before job start for async)
                 let effectiveSessionId = sessionId;
                 let useContinue = continueSession;
                 const activeSession = await sessionManager.getActiveSession("claude");
@@ -4075,14 +3661,11 @@ export function createGatewayServer(deps = {}) {
                         await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
                     }
                 }
-                // Slice 3: TTL warning on resume (async path too).
                 const ttlWarning = maybeBuildCacheTtlWarning({
                     runtime,
                     sessionId: effectiveSessionId,
                     cli: "claude",
                 });
-                // Slice λ: resolve worktree directive after session metadata is
-                // settled so resume reuse can read metadata.worktreePath.
                 let worktreeResolution = {};
                 try {
                     worktreeResolution = await resolveWorktreeForRequest(worktree, effectiveSessionId, runtime);
@@ -4090,7 +3673,6 @@ export function createGatewayServer(deps = {}) {
                 catch (err) {
                     return createErrorResponse("claude_request_async", 1, "", corrId, err);
                 }
-                // Idle timeout only for stream-json (text/json produce no output until done)
                 const effectiveIdleTimeout = outputFormat === "stream-json"
                     ? resolveIdleTimeout("claude", idleTimeoutMs)
                     : undefined;
@@ -4116,8 +3698,6 @@ export function createGatewayServer(deps = {}) {
                 if (worktreeResolution.worktreePath) {
                     asyncResponse.worktreePath = worktreeResolution.worktreePath;
                 }
-                // Rec #4: include any prep-time warnings (e.g.
-                // cacheable_prefix_uncached) alongside ttlWarning.
                 const mergedWarnings = [
                     ...(ttlWarning ? [ttlWarning] : []),
                     ...(prep.warnings ?? []),
@@ -4201,12 +3781,10 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-            // U23: emit `--json` to enable JSONL event-stream parsing for token usage.
             outputFormat: z
                 .enum(["text", "json"])
                 .default("text")
                 .describe("Codex output format. `json` emits --json (JSONL events) for token usage extraction."),
-            // U26: high-impact feature flags. All optional.
             outputSchema: z
                 .union([z.string(), z.record(z.string(), z.unknown())])
                 .optional()
@@ -4221,7 +3799,6 @@ export function createGatewayServer(deps = {}) {
             images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
             ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
             ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
-            // Phase 4 slice ζ — Codex working-dir + add-dir parity (new sessions only).
             workingDir: z
                 .string()
                 .min(1)
@@ -4318,11 +3895,6 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
-            // U23: emit `-o json` to extract token usage via parseGeminiJson. Default
-            // remains text so existing callers see no behavior change. Phase 4 slice
-            // ε adds `stream-json` (NDJSON event stream parsed by
-            // parseGeminiStreamJson — `init`/`message`/`result` lines, idle-timeout
-            // semantics covered by Gemini's existing real-time stdout streaming).
             outputFormat: z
                 .enum(["text", "json", "stream-json"])
                 .default("text")
@@ -4335,8 +3907,12 @@ export function createGatewayServer(deps = {}) {
                 .boolean()
                 .default(false)
                 .describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
+            yolo: z
+                .boolean()
+                .optional()
+                .describe("Emit `--yolo` to auto-approve all actions. Equivalent to approvalMode 'yolo'; routed through the same approval gate. Under mcp_managed the gate still decides."),
             worktree: WORKTREE_SCHEMA.optional(),
-        }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, worktree, }) => {
+        }, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, yolo, worktree, }) => {
             return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -4360,6 +3936,7 @@ export function createGatewayServer(deps = {}) {
                 adminPolicyFiles,
                 attachments,
                 skipTrust,
+                yolo,
                 worktree,
             });
         });
@@ -4432,13 +4009,11 @@ export function createGatewayServer(deps = {}) {
                 .default(false)
                 .describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
             maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
-            // Phase 4 slice ζ — Grok working-directory parity.
             workingDir: z
                 .string()
                 .min(1)
                 .optional()
                 .describe("Grok --cwd <DIR>: working directory for this invocation. Lets headless callers run Grok against a directory other than the gateway process's cwd."),
-            // Phase 4 slice θ — Grok HIGH parity (sandbox, rules, system-prompt-override, allow, deny).
             sandbox: z
                 .string()
                 .min(1)
@@ -4462,8 +4037,16 @@ export function createGatewayServer(deps = {}) {
                 .array(z.string())
                 .optional()
                 .describe("Grok --deny <RULE>: permission deny rules. Each entry → its own --deny instance."),
+            compactionMode: z
+                .enum(["summary", "transcript", "segments"])
+                .optional()
+                .describe("Grok --compaction-mode: summary (default) | transcript | segments. Sets GROK_COMPACTION_MODE."),
+            compactionDetail: z
+                .enum(["none", "minimal", "balanced", "verbose"])
+                .optional()
+                .describe("Grok --compaction-detail: segment verbatim detail (none|minimal|balanced|verbose, default verbose). Only affects segments mode. Sets GROK_COMPACTION_DETAIL."),
             worktree: WORKTREE_SCHEMA.optional(),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, worktree, }) => {
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, workingDir, sandbox, rules, systemPromptOverride, allow, deny, compactionMode, compactionDetail, worktree, }) => {
             return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -4492,6 +4075,8 @@ export function createGatewayServer(deps = {}) {
                 systemPromptOverride,
                 allow,
                 deny,
+                compactionMode,
+                compactionDetail,
                 worktree,
             });
         });
@@ -4524,11 +4109,6 @@ export function createGatewayServer(deps = {}) {
                 .enum(MISTRAL_AGENT_MODES)
                 .optional()
                 .describe("Vibe agent mode (default|plan|accept-edits|auto-approve|chat|explore|lean). Defaults to auto-approve for programmatic use."),
-            effort: z
-                .enum(["low", "medium", "high", "xhigh", "max"])
-                .optional()
-                .describe("Vibe effort level"),
-            reasoningEffort: z.string().optional().describe("Reasoning effort for reasoning models"),
             approvalStrategy: z
                 .enum(["legacy", "mcp_managed"])
                 .default("legacy")
@@ -4569,7 +4149,6 @@ export function createGatewayServer(deps = {}) {
             maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
             maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
             maxTokens: MAX_TOKENS_SCHEMA.optional().describe("Vibe `--max-tokens N`: cap cumulative prompt + completion tokens for the session (programmatic mode only). Bounded to safe integers ≤ 100000000."),
-            // Phase 4 slice ζ — Vibe working-directory + additional-dirs parity.
             workingDir: z
                 .string()
                 .min(1)
@@ -4580,7 +4159,7 @@ export function createGatewayServer(deps = {}) {
                 .optional()
                 .describe("Vibe --add-dir <DIR>: additional writable workspace directories. Each entry is emitted as its own --add-dir instance."),
             worktree: WORKTREE_SCHEMA.optional(),
-        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
+        }, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, maxTokens, workingDir, addDir, worktree, }) => {
             return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
                 prompt,
                 promptParts,
@@ -4590,8 +4169,6 @@ export function createGatewayServer(deps = {}) {
                 resumeLatest,
                 createNewSession,
                 permissionMode,
-                effort,
-                reasoningEffort,
                 approvalStrategy,
                 approvalPolicy,
                 mcpServers,
@@ -4667,7 +4244,6 @@ export function createGatewayServer(deps = {}) {
                     isError: true,
                 };
             }
-            // Parse stream-json output for Claude async jobs
             const outputFormat = asyncJobManager.getJobOutputFormat(jobId);
             let parsed;
             if (outputFormat === "stream-json" && result.stdout) {
@@ -4727,14 +4303,7 @@ export function createGatewayServer(deps = {}) {
                 ],
             };
         });
-    } // end if (asyncJobsEnabled)
-    // Read back any persisted request (sync OR async) by its correlation id.
-    // Registered unconditionally — it reads the flight recorder, which is
-    // independent of async-job persistence. Every sync/async response echoes
-    // its id in `structuredContent.correlationId`; pass that id here to recover
-    // the persisted prompt/response after the inline result is gone. With flight
-    // recording disabled (LLM_GATEWAY_LOGS_DB=none → NoopFlightRecorder) the
-    // query yields no rows and this returns the "not found" shape.
+    }
     server.tool("llm_request_result", {
         correlationId: z
             .string()
@@ -4805,9 +4374,6 @@ export function createGatewayServer(deps = {}) {
             ],
         };
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Approval Audit Tools
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("approval_list", {
         limit: z
             .number()
@@ -4835,9 +4401,6 @@ export function createGatewayServer(deps = {}) {
             ],
         };
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // List Models Tool
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("list_models", {
         cli: z
             .preprocess(value => (value === "" || value === null ? undefined : value), z.enum(["claude", "codex", "gemini", "grok", "mistral"]).optional())
@@ -4916,9 +4479,6 @@ export function createGatewayServer(deps = {}) {
             };
         }
     });
-    //──────────────────────────────────────────────────────────────────────────────
-    // Session Management Tools
-    //──────────────────────────────────────────────────────────────────────────────
     server.tool("session_create", {
         cli: SESSION_PROVIDER_ENUM.describe("CLI type (claude|codex|gemini|grok|mistral)"),
         description: z.string().optional().describe("Session description"),
@@ -5094,15 +4654,6 @@ export function createGatewayServer(deps = {}) {
                 };
             }
             const activeSession = await sessionManager.getActiveSession(session.cli);
-            // Slice 2: project a compact cacheState view from the flight
-            // recorder at read time. NOT persisted on the Session interface
-            // (sessions.json stays content-free per the project invariant).
-            // The field is OMITTED entirely (not null, not empty object) when
-            // the session has zero rows in the flight recorder so the response
-            // stays compact for fresh sessions.
-            //
-            // Slice 3: include ttlRemainingMs derived from the gateway's
-            // configured TTL policy. Null for non-claude sessions.
             let cacheState;
             try {
                 const stats = computeSessionCacheStats(flightRecorder, session.id);
@@ -5171,16 +4722,8 @@ export function createGatewayServer(deps = {}) {
     });
     return server;
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Async Initialization
-//──────────────────────────────────────────────────────────────────────────────
 async function initializeSessionManager() {
     const config = loadConfig();
-    // Slice λ: file-backed sessions get a cleanup hook that tears down any
-    // git worktrees recorded on session.metadata.worktreePath. PG-backed
-    // sessions skip the hook (multi-tenant deployments don't necessarily
-    // own a single filesystem); revisit if/when worktree support extends
-    // there.
     const worktreeCleanupHook = createWorktreeSessionCleanupHook(logger);
     if (config.database) {
         logger.info("Initializing PostgreSQL session manager");
@@ -5198,9 +4741,6 @@ async function initializeSessionManager() {
     }
     resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Health Check Resource (only if using PostgreSQL)
-//──────────────────────────────────────────────────────────────────────────────
 function registerHealthResource(server) {
     if (db) {
         server.registerResource("health", "health://status", {
@@ -5221,7 +4761,6 @@ function registerHealthResource(server) {
         });
         logger.info("Health check resource registered");
     }
-    // Process health resource (always available, not dependent on DB)
     server.registerResource("process-health", "metrics://process-health", {
         title: "Process Health",
         description: "Async job health (CPU, memory, zombie detection)",
@@ -5240,13 +4779,9 @@ function registerHealthResource(server) {
     });
     logger.info("Process health resource registered");
 }
-//──────────────────────────────────────────────────────────────────────────────
-// Graceful Shutdown
-//──────────────────────────────────────────────────────────────────────────────
 async function shutdown(signal) {
     logger.info(`Received ${signal}, shutting down gracefully...`);
     try {
-        // Kill all active process groups (SIGTERM → wait 3s → SIGKILL)
         await killAllProcessGroups();
         logger.info("All process groups terminated");
         if (activeHttpGateway) {
@@ -5276,9 +4811,6 @@ async function shutdown(signal) {
 }
 process.on("SIGTERM", () => shutdown("SIGTERM"));
 process.on("SIGINT", () => shutdown("SIGINT"));
-//──────────────────────────────────────────────────────────────────────────────
-// Server Startup
-//──────────────────────────────────────────────────────────────────────────────
 async function main() {
     startWindowsBootstrapperSelfHeal();
     const args = process.argv.slice(2);
@@ -5342,7 +4874,6 @@ async function main() {
         process.env.MCP_TRANSPORT ||
         "stdio";
     logger.info(`Starting llm-cli-gateway MCP server with ${transportMode} transport`);
-    // Initialize session manager first
     await initializeSessionManager();
     const serverDeps = {
         sessionManager,
@@ -5369,14 +4900,11 @@ async function main() {
     activeServer = createGatewayServer({
         ...serverDeps,
     });
-    // Register health check resource if using PostgreSQL
     registerHealthResource(activeServer);
     const transport = new StdioServerTransport();
     await activeServer.connect(transport);
     logger.info("llm-cli-gateway MCP server connected and ready");
 }
-// Guard: only auto-start when run directly (not imported for testing)
-// Resolve symlinks so `llm-cli-gateway` (npm-linked bin) matches import.meta.url
 const __entryUrl = entrypointFileURL(process.argv[1]);
 if (__entryUrl === import.meta.url) {
     main().catch(error => {